51
51
NNS.SD.cluster <- function (data , degree = 1 , type = " discrete" , min_cluster = 1 , dendrogram = FALSE ) {
52
52
clusters <- list ()
53
53
iteration <- 1
54
+ n <- ncol(data )
55
+
56
+ if (is.null(colnames(data ))) colnames(data ) <- paste0(" X_" ,1 : ncol(data ))
57
+ original_names <- colnames(data )
54
58
55
59
# Ensure the input data is a matrix
56
60
remaining_data <- as.matrix(data )
57
61
62
+
58
63
# Continue clustering until the number of remaining columns is less than or equal to min_cluster
59
64
while (ncol(remaining_data ) > min_cluster ) {
60
65
# Use the original NNS.SD.efficient.set call as provided
@@ -97,21 +102,29 @@ NNS.SD.cluster <- function(data, degree = 1, type = "discrete", min_cluster = 1,
97
102
98
103
# Flatten the clusters into a single vector and generate cluster labels
99
104
all_vars <- unlist(clusters )
105
+
106
+
107
+
100
108
cluster_labels <- unlist(lapply(seq_along(clusters ), function (i ) rep(i , length(clusters [[i ]]))))
101
109
102
110
103
111
if (dendrogram ){
104
112
# Ensure there are at least two variables for hierarchical clustering
105
113
if (length(all_vars ) < 2 ) {
106
114
warning(" Not enough variables for hierarchical clustering. Returning clusters only." )
107
- return (list (" Clusters" = clusters , " Dendrogram " = NULL ))
115
+ return (list (" Clusters" = clusters , " Order " = NULL ))
108
116
}
109
117
110
- # Create a distance matrix based on cluster labels
111
- dist_matrix <- as.dist(outer(cluster_labels , cluster_labels , function (a , b ) abs(a - b )))
112
- # For a "dist" object, assign labels using the Labels attribute instead of rownames.
118
+ # Use the extraction order inherent in all_vars as a tie-breaker.
119
+ extraction_order <- seq_along(all_vars )
120
+
121
+ epsilon <- 1e-3 # small tie-breaker weight
122
+ dist_matrix <- as.dist(
123
+ outer(cluster_labels , cluster_labels , function (a , b ) n * abs(a - b )) +
124
+ epsilon * outer(extraction_order , extraction_order , function (i , j ) abs(i - j ))
125
+ )
113
126
attr(dist_matrix , " Labels" ) <- all_vars
114
-
127
+
115
128
# Perform hierarchical clustering
116
129
hc <- hclust(dist_matrix , method = " complete" )
117
130
@@ -121,7 +134,8 @@ NNS.SD.cluster <- function(data, degree = 1, type = "discrete", min_cluster = 1,
121
134
ylab = " SD Distance" ,
122
135
sub = " "
123
136
)
124
-
137
+
138
+ hc $ order <- match(hc $ labels , original_names )
125
139
126
140
return (list (" Clusters" = clusters , " Dendrogram" = hc ))
127
141
} else return (list (" Clusters" = clusters ))
0 commit comments