Skip to content

Commit bf5863a

Browse files
Merge pull request #44 from katelynqueen98/master
Fix bug when first cluster is of size one
2 parents 4c3b323 + d14c3f1 commit bf5863a

File tree

1 file changed

+32
-7
lines changed

1 file changed

+32
-7
lines changed

R/super_partition.R

+32-7
Original file line numberDiff line numberDiff line change
@@ -62,18 +62,31 @@ super_partition <- function(full_data,
6262
}
6363

6464
# ensure 0 < threshold < 1
65-
if (0 > threshold | 1 < threshold) stop("Threshold must be between 0 and 1.")
65+
if (0 > threshold | 1 < threshold) {
66+
stop("Threshold must be between 0 and 1.")
67+
}
6668

6769
# ensure no column names contain x
68-
if (any(grepl(x, colnames(full_data)))) stop(paste0("The prefix for new variable names, ", x, ", is contained within existing data column names. Please choose a different prefix to avoid errors."))
70+
if (any(grepl(x, colnames(full_data)))) {
71+
stop(paste0("The prefix for new variable names, ", x, ", is contained within existing data column names. Please choose a different prefix to avoid errors."))
72+
}
6973

7074
# ensure data frame structure
7175
full_data <- as.data.frame(full_data)
7276

7377
# if < cluster_size features, call regular partition
7478
if (ncol(full_data) < cluster_size) {
7579
message(paste0("Using `partition()` since there are < ", cluster_size, "features."))
76-
return(partition(full_data, threshold = threshold))
80+
prt <- partition(
81+
full_data,
82+
threshold = threshold,
83+
partitioner = partitioner,
84+
tolerance = tolerance,
85+
niter = niter,
86+
x = x,
87+
.sep = .sep
88+
)
89+
return(prt)
7790
}
7891

7992
# iteration counters
@@ -183,18 +196,27 @@ super_partition <- function(full_data,
183196
)
184197
}
185198

186-
## first cluster
199+
# if no dimension reduction, use partition instead
200+
if (length(unique(master_cluster$cluster)) == ncol(full_data)) {
201+
if (verbose) message("No dimension reduction occured using Super Partition. Using Partition instead.")
202+
return(partition(full_data, threshold, partitioner, tolerance, niter, x, .sep))
203+
}
204+
205+
## first cluster - always use largest cluster
206+
clust_sizes <- as.data.frame(table(master_cluster$cluster))
207+
first_clust <- which(unique(master_cluster$cluster) == clust_sizes[which.max(clust_sizes$Freq), 1])
208+
187209
# get initial partition to build off
188210
part_master <- partition(
189-
full_data[, which(master_cluster$cluster == unique(master_cluster$cluster)[1])],
211+
full_data[, which(master_cluster$cluster == unique(master_cluster$cluster)[first_clust])],
190212
threshold, partitioner, tolerance, niter, x, .sep
191213
)
192214

193215
# update indices for each module
194216
mod_rows <- grep(x, part_master$mapping_key$variable)
195217
part_master$mapping_key$indices <- full_data_col_numbers(
196218
full_data = full_data,
197-
small_data = full_data[, which(master_cluster$cluster == unique(master_cluster$cluster)[1])],
219+
small_data = full_data[, which(master_cluster$cluster == unique(master_cluster$cluster)[first_clust])],
198220
modules = part_master$mapping_key$indices
199221
)
200222

@@ -208,7 +230,10 @@ super_partition <- function(full_data,
208230
if (progress_bar) pb$tick()
209231

210232
# for each cluster...
211-
for (i in 2:n_iter) {
233+
for (i in seq_len(n_iter)) {
234+
# skip if first cluster
235+
if (i == first_clust) next()
236+
212237
# what to do if cluster is of size one
213238
if (sum(master_cluster$cluster == unique(master_cluster$cluster)[i]) == 1) {
214239
# cbind data to master partition reduced data

0 commit comments

Comments
 (0)