@@ -6,6 +6,7 @@ impure function KMeans "k-means clustering algorithm"
66 input Integer n_samples "Number of samples" ;
77 input Integer n_features "Number of features" ;
88 input Real relTol= 1e-5 "Relative tolerance on cluster positions" ;
9+ input Real absTol= 1e-8 "Absolute tolerance on cluster positions" ;
910 input Integer max_iter= 500 "Maximum number of k-means iterations" ;
1011 input Integer n_init= 10 "Number of runs with randomized centroid seeds" ;
1112 input Integer n_cluster_size= 0 "Length of the cluster_size output vector" ;
@@ -16,7 +17,8 @@ impure function KMeans "k-means clustering algorithm"
1617protected
1718 Real old_centroids[n_clusters,n_features] "Previous iteration centroids" ;
1819 Real new_centroids[n_clusters,n_features] "Next iteration centroids" ;
19- Real delta_centroids "Maximum relative displacement of cluster centroids between two k-means iterations" ;
20+ Real relDelta_centroids "Maximum relative displacement of cluster centroids between two k-means iterations" ;
21+ Real absDelta_centroids "Maximum absolute displacement of cluster centroids between two k-means iterations" ;
2022 Integer new_labels[n_samples] "Next iteration cluster labels" ;
2123 Real new_inertia "Inertia of the samples during the current run" ;
2224 Real inertia "Minimum inertia of the samples since first run" ;
@@ -31,7 +33,8 @@ algorithm
3133 id := Modelica.Math.Random.Utilities.initializeImpureRandom(seed);
3234
3335 // ---- Perform n_init successive runs of the k-means algorithm
34- for run in 1 :n_init loop
36+ inertia := 0 .;
37+ for run in 1 :n_init loop
3538 // ---- Select initial centroids at random
3639 // Select 3 non-repeated data points in the data set
3740 n := Modelica.Math.Random.Utilities.impureRandomInteger(id,1 ,n_samples);
@@ -53,8 +56,9 @@ algorithm
5356
5457 // ---- k-means iterations
5558 k_iter := 0 ;
56- delta_centroids := 2 * relTol;
57- while k_iter < max_iter and delta_centroids > relTol loop
59+ relDelta_centroids := 2 * relTol;
60+ absDelta_centroids := 2 * absTol;
61+ while k_iter < max_iter and (relDelta_centroids > relTol or absDelta_centroids > absTol) loop
5862 k_iter := k_iter + 1 ;
5963
6064 // Find centroid closest to each data point
@@ -71,7 +75,8 @@ algorithm
7175 end for ;
7276
7377 // Re-evaluate position of the centroids
74- delta_centroids := 0 ;
78+ relDelta_centroids := 0 ;
79+ absDelta_centroids := 0 ;
7580 for j in 1 :n_clusters loop
7681 n := sum (if new_labels[i]== j then 1 else 0 for i in 1 :n_samples);
7782 new_centroids[j,:] := zeros (n_features);
@@ -84,7 +89,8 @@ algorithm
8489 else
8590 new_centroids[j,:] := old_centroids[j,:];
8691 end if ;
87- delta_centroids := max (delta_centroids, sum ((new_centroids[j,:] - old_centroids[j,:])./ old_centroids[j,:]));
92+ relDelta_centroids := max (relDelta_centroids, sum (abs (new_centroids[j,:] - old_centroids[j,:]) ./ (old_centroids[j,:] .+ Modelica.Constants.eps)));
93+ absDelta_centroids := max (absDelta_centroids, sum (abs (new_centroids[j,:] - old_centroids[j,:])));
8894 end for ;
8995 old_centroids := new_centroids;
9096 end while ;
@@ -129,6 +135,13 @@ modifying the constant <code>seed</code>.
129135</html>" , revisions="<html>
130136<ul>
131137<li>
138+ March 18, 2025 by Massimo Cimmino<br/>
139+ Added absolute tolerance. The algorithm stops when any of the relative and
140+ absolute tolerances is satisfied. This fixes errors that occur when a centroid
141+ has a value close to zero on any of its axes. See
142+ <a href=\" https://github.com/ibpsa/modelica-ibpsa/issues/1985\">#1985</a>.
143+ </li>
144+ <li>
132145February 1, 2023, by Michael Wetter:<br/>
133146Added <code>impure</code> declaration which is needed for compliance with the Modelica Language Specification,
134147and is required by Optimica.
0 commit comments