@@ -6,6 +6,7 @@ impure function KMeans "k-means clustering algorithm"
66 input Integer n_samples "Number of samples" ;
77 input Integer n_features "Number of features" ;
88 input Real relTol= 1e-5 "Relative tolerance on cluster positions" ;
9+ input Real absTol= 1e-8 "Absolute tolerance on cluster positions" ;
910 input Integer max_iter= 500 "Maximum number of k-means iterations" ;
1011 input Integer n_init= 10 "Number of runs with randomized centroid seeds" ;
1112 input Integer n_cluster_size= 0 "Length of the cluster_size output vector" ;
@@ -16,7 +17,8 @@ impure function KMeans "k-means clustering algorithm"
1617protected
1718 Real old_centroids[n_clusters,n_features] "Previous iteration centroids" ;
1819 Real new_centroids[n_clusters,n_features] "Next iteration centroids" ;
19- Real delta_centroids "Maximum relative displacement of cluster centroids between two k-means iterations" ;
20+ Real relDelta_centroids "Maximum relative displacement of cluster centroids between two k-means iterations" ;
21+ Real absDelta_centroids "Maximum absolute displacement of cluster centroids between two k-means iterations" ;
2022 Integer new_labels[n_samples] "Next iteration cluster labels" ;
2123 Real new_inertia "Inertia of the samples during the current run" ;
2224 Real inertia "Minimum inertia of the samples since first run" ;
@@ -31,30 +33,30 @@ algorithm
3133 id := Modelica.Math.Random.Utilities.initializeImpureRandom(seed);
3234
3335 // ---- Perform n_init successive runs of the k-means algorithm
34- for run in 1 :n_init loop
36+ inertia := Modelica.Constants.inf;
37+ for run in 1 :n_init loop
3538 // ---- Select initial centroids at random
3639 // Select 3 non-repeated data points in the data set
3740 n := Modelica.Math.Random.Utilities.impureRandomInteger(id,1 ,n_samples);
3841 old_centroids[1 ,:] := data[n,:];
3942 for i in 2 :n_clusters loop
40- n := Modelica.Math.Random.Utilities.impureRandomInteger(id,1 ,n_samples);
41- old_centroids[i,:] := data[n,:];
42- min_dis := Modelica.Math.Vectors.norm(old_centroids[i,:]- old_centroids[1 ,:], p= 2 )^ 2 ;
43+ min_dis := 0 ;
4344 while min_dis < Modelica.Constants.eps loop
4445 n := Modelica.Math.Random.Utilities.impureRandomInteger(id,1 ,n_samples);
4546 old_centroids[i,:] := data[n,:];
4647 min_dis := Modelica.Math.Vectors.norm(old_centroids[i,:]- old_centroids[1 ,:], p= 2 )^ 2 ;
47- for j in 1 :i- 1 loop
48- dis := Modelica.Math.Vectors.norm(old_centroids[j ,:]- old_centroids[i ,:], p= 2 )^ 2 ;
48+ for j in 2 :i- 1 loop
49+ dis := Modelica.Math.Vectors.norm(old_centroids[i ,:]- old_centroids[j ,:], p= 2 )^ 2 ;
4950 min_dis := min (dis, min_dis);
5051 end for ;
5152 end while ;
5253 end for ;
5354
5455 // ---- k-means iterations
5556 k_iter := 0 ;
56- delta_centroids := 2 * relTol;
57- while k_iter < max_iter and delta_centroids > relTol loop
57+ relDelta_centroids := 2 * relTol;
58+ absDelta_centroids := 2 * absTol;
59+ while k_iter < max_iter and (relDelta_centroids > relTol or absDelta_centroids > absTol) loop
5860 k_iter := k_iter + 1 ;
5961
6062 // Find centroid closest to each data point
@@ -71,7 +73,8 @@ algorithm
7173 end for ;
7274
7375 // Re-evaluate position of the centroids
74- delta_centroids := 0 ;
76+ relDelta_centroids := 0 ;
77+ absDelta_centroids := 0 ;
7578 for j in 1 :n_clusters loop
7679 n := sum (if new_labels[i]== j then 1 else 0 for i in 1 :n_samples);
7780 new_centroids[j,:] := zeros (n_features);
@@ -84,7 +87,8 @@ algorithm
8487 else
8588 new_centroids[j,:] := old_centroids[j,:];
8689 end if ;
87- delta_centroids := max (delta_centroids, sum ((new_centroids[j,:] - old_centroids[j,:])./ old_centroids[j,:]));
90+ relDelta_centroids := max (relDelta_centroids, sum (abs (new_centroids[j,:] - old_centroids[j,:]) ./ (abs (old_centroids[j,:]) .+ Modelica.Constants.eps)));
91+ absDelta_centroids := max (absDelta_centroids, sum (abs (new_centroids[j,:] - old_centroids[j,:])));
8892 end for ;
8993 old_centroids := new_centroids;
9094 end while ;
@@ -93,7 +97,7 @@ algorithm
9397 new_inertia := 0 ;
9498 for i in 1 :n_samples loop
9599 dis := Modelica.Math.Vectors.norm(data[i,:]- centroids[new_labels[i],:], p= 2 )^ 2 ;
96- new_inertia := inertia + dis;
100+ new_inertia := new_inertia + dis;
97101 end for ;
98102
99103 // Keep run results if inertia is minimum
@@ -129,6 +133,15 @@ modifying the constant <code>seed</code>.
129133</html>" , revisions="<html>
130134<ul>
131135<li>
136+ March 18, 2025 by Massimo Cimmino<br/>
137+ Added absolute tolerance. The algorithm stops when any of the relative and
138+ absolute tolerances is satisfied. This fixes errors that occur when a centroid
139+ has a value close to zero on any of its axes. See
140+ <a href=\" https://github.com/ibpsa/modelica-ibpsa/issues/1985\">#1985</a>.
141+ Fixed the initial selection of centroids to avoid repeated centroids. See also
142+ <a href=\" https://github.com/ibpsa/modelica-ibpsa/issues/1976\">#1976</a>.
143+ </li>
144+ <li>
132145February 1, 2023, by Michael Wetter:<br/>
133146Added <code>impure</code> declaration which is needed for compliance with the Modelica Language Specification,
134147and is required by Optimica.
0 commit comments