From cb9c48bad523e76eff55711264e2d3a870c63cef Mon Sep 17 00:00:00 2001
From: Katelyn Queen <katelynqueen98@gmail.com>
Date: Fri, 1 Nov 2024 12:11:43 -0700
Subject: [PATCH 1/4] Fix bug when first cluster is of size one

First cluster to be partitioned is now always the largest cluster; if all clusters are of size one, algorithm will stop
---
 R/super_partition.R | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/R/super_partition.R b/R/super_partition.R
index f39b601..e485f0d 100644
--- a/R/super_partition.R
+++ b/R/super_partition.R
@@ -182,11 +182,17 @@ super_partition <- function(full_data,
       width = 100
     )
   }
-
-  ## first cluster
+  
+  # if no dimension reduction, stop
+  if (length(unique(master_cluster$cluster)) == ncol(full_data)) stop("No dimension reduction occured. Try a lower threshold.")
+
+  ## first cluster - always use largest cluster
+  clust_sizes   <- as.data.frame(table(master_cluster$cluster))
+  first_clust   <- which(unique(master_cluster$cluster) == clust_sizes[which.max(clust_sizes$Freq), 1])
+  
   # get initial partition to build off
   part_master <- partition(
-    full_data[, which(master_cluster$cluster == unique(master_cluster$cluster)[1])],
+    full_data[, which(master_cluster$cluster == unique(master_cluster$cluster)[first_clust])],
     threshold, partitioner, tolerance, niter, x, .sep
   )
 
@@ -194,7 +200,7 @@ super_partition <- function(full_data,
   mod_rows <- grep(x, part_master$mapping_key$variable)
   part_master$mapping_key$indices <- full_data_col_numbers(
     full_data = full_data,
-    small_data = full_data[, which(master_cluster$cluster == unique(master_cluster$cluster)[1])],
+    small_data = full_data[, which(master_cluster$cluster == unique(master_cluster$cluster)[first_clust])],
     modules = part_master$mapping_key$indices
   )
 
@@ -208,7 +214,10 @@ super_partition <- function(full_data,
   if (progress_bar) pb$tick()
 
   # for each cluster...
-  for (i in 2:n_iter) {
+  for (i in 1:n_iter) {
+    # skip if first cluster
+    if(i == first_clust) next()
+    
     # what to do if cluster is of size one
     if (sum(master_cluster$cluster == unique(master_cluster$cluster)[i]) == 1) {
       # cbind data to master partition reduced data

From a22ce75d666380fab52a5fcfe5ba16f48cb56081 Mon Sep 17 00:00:00 2001
From: Katelyn Queen <katelynqueen98@gmail.com>
Date: Mon, 4 Nov 2024 09:56:06 -0800
Subject: [PATCH 2/4] Use Partition if Super Partition results in no dimension
 reduction

---
 R/super_partition.R | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/R/super_partition.R b/R/super_partition.R
index e485f0d..310eb36 100644
--- a/R/super_partition.R
+++ b/R/super_partition.R
@@ -73,7 +73,7 @@ super_partition <- function(full_data,
   # if < cluster_size features, call regular partition
   if (ncol(full_data) < cluster_size) {
     message(paste0("Using `partition()` since there are < ", cluster_size, "features."))
-    return(partition(full_data, threshold = threshold))
+    return(partition(full_data, threshold, partitioner, tolerance, niter, x, .sep))
   }
 
   # iteration counters
@@ -183,8 +183,11 @@ super_partition <- function(full_data,
     )
   }
   
-  # if no dimension reduction, stop
-  if (length(unique(master_cluster$cluster)) == ncol(full_data)) stop("No dimension reduction occured. Try a lower threshold.")
+  # if no dimension reduction, use partition instead
+  if (length(unique(master_cluster$cluster)) == ncol(full_data)) {
+    if (verbose) ("No dimension reduction occured using Super Partition. Using Partition instead.")
+    return(partition(full_data, threshold, partitioner, tolerance, niter, x, .sep))
+  }
 
   ## first cluster - always use largest cluster
   clust_sizes   <- as.data.frame(table(master_cluster$cluster))

From 3a0044320113ae1db82ccff6dded0bfca38aa33e Mon Sep 17 00:00:00 2001
From: Katelyn Queen <katelynqueen98@gmail.com>
Date: Mon, 4 Nov 2024 10:25:07 -0800
Subject: [PATCH 3/4] Fix typo

---
 R/super_partition.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/super_partition.R b/R/super_partition.R
index 310eb36..732c60c 100644
--- a/R/super_partition.R
+++ b/R/super_partition.R
@@ -185,7 +185,7 @@ super_partition <- function(full_data,
   
   # if no dimension reduction, use partition instead
   if (length(unique(master_cluster$cluster)) == ncol(full_data)) {
-    if (verbose) ("No dimension reduction occured using Super Partition. Using Partition instead.")
+    if (verbose) message("No dimension reduction occured using Super Partition. Using Partition instead.")
     return(partition(full_data, threshold, partitioner, tolerance, niter, x, .sep))
   }
 

From d14c3f1a21a7aa817cafa7d474bb174a60246db6 Mon Sep 17 00:00:00 2001
From: Malcolm Barrett <malcolmbarrett@gmail.com>
Date: Sun, 10 Nov 2024 13:21:20 -0500
Subject: [PATCH 4/4] clean up

---
 R/super_partition.R | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/R/super_partition.R b/R/super_partition.R
index 732c60c..311820f 100644
--- a/R/super_partition.R
+++ b/R/super_partition.R
@@ -62,10 +62,14 @@ super_partition <- function(full_data,
   }
 
   # ensure 0 < threshold < 1
-  if (0 > threshold | 1 < threshold) stop("Threshold must be between 0 and 1.")
+  if (0 > threshold | 1 < threshold) {
+    stop("Threshold must be between 0 and 1.")
+  }
 
   # ensure no column names contain x
-  if (any(grepl(x, colnames(full_data)))) stop(paste0("The prefix for new variable names, ", x, ", is contained within existing data column names. Please choose a different prefix to avoid errors."))
+  if (any(grepl(x, colnames(full_data)))) {
+    stop(paste0("The prefix for new variable names, ", x, ", is contained within existing data column names. Please choose a different prefix to avoid errors."))
+  }
 
   # ensure data frame structure
   full_data <- as.data.frame(full_data)
@@ -73,7 +77,16 @@ super_partition <- function(full_data,
   # if < cluster_size features, call regular partition
   if (ncol(full_data) < cluster_size) {
     message(paste0("Using `partition()` since there are < ", cluster_size, "features."))
-    return(partition(full_data, threshold, partitioner, tolerance, niter, x, .sep))
+    prt <- partition(
+      full_data,
+      threshold = threshold,
+      partitioner = partitioner,
+      tolerance = tolerance,
+      niter = niter,
+      x = x,
+      .sep = .sep
+    )
+    return(prt)
   }
 
   # iteration counters
@@ -182,7 +195,7 @@ super_partition <- function(full_data,
       width = 100
     )
   }
-  
+
   # if no dimension reduction, use partition instead
   if (length(unique(master_cluster$cluster)) == ncol(full_data)) {
     if (verbose) message("No dimension reduction occured using Super Partition. Using Partition instead.")
@@ -192,7 +205,7 @@ super_partition <- function(full_data,
   ## first cluster - always use largest cluster
   clust_sizes   <- as.data.frame(table(master_cluster$cluster))
   first_clust   <- which(unique(master_cluster$cluster) == clust_sizes[which.max(clust_sizes$Freq), 1])
-  
+
   # get initial partition to build off
   part_master <- partition(
     full_data[, which(master_cluster$cluster == unique(master_cluster$cluster)[first_clust])],
@@ -217,10 +230,10 @@ super_partition <- function(full_data,
   if (progress_bar) pb$tick()
 
   # for each cluster...
-  for (i in 1:n_iter) {
+  for (i in seq_len(n_iter)) {
     # skip if first cluster
-    if(i == first_clust) next()
-    
+    if (i == first_clust) next()
+
     # what to do if cluster is of size one
     if (sum(master_cluster$cluster == unique(master_cluster$cluster)[i]) == 1) {
       # cbind data to master partition reduced data