Optimize and fix sample weight checks for Kmeans predict (uxlfoundation#2042)

md-shafiul-alam · md.shafiul.alam · web-flow · commit 5b149cb097b2 · 2024-09-13T11:07:25.000-04:00
* sample weight modification

* refactor

* ci fix

* refactor

* refactor

* ci fix

* add back the checks with version check

* refactor

* ci fix

* lint

* ci fix

---------

Co-authored-by: md.shafiul.alam &lt;mdshafiu@sdpdal630226.jf.intel.com&gt;
diff --git a/sklearnex/cluster/k_means.py b/sklearnex/cluster/k_means.py
@@ -109,14 +109,7 @@ def _onedal_fit_supported(self, method_name, X, y=None, sample_weight=None):
                 _is_csr(X) and daal_check_version((2024, "P", 700))
             ) or not issparse(X)
 
-            _acceptable_sample_weights = True
-            if sample_weight is not None or not isinstance(sample_weight, numbers.Number):
-                sample_weight = _check_sample_weight(
-                    sample_weight, X, dtype=X.dtype if hasattr(X, "dtype") else None
-                )
-                _acceptable_sample_weights = np.allclose(
-                    sample_weight, np.ones_like(sample_weight)
-                )
+            _acceptable_sample_weights = self._validate_sample_weight(sample_weight, X)
 
             patching_status.and_conditions(
                 [
@@ -127,7 +120,7 @@ def _onedal_fit_supported(self, method_name, X, y=None, sample_weight=None):
                     (correct_count, "n_clusters is smaller than number of samples"),
                     (
                         _acceptable_sample_weights,
-                        "oneDAL doesn't support sample_weight, either None or ones are acceptable",
+                        "oneDAL doesn't support sample_weight. Accepted options are None, constant, or equal weights.",
                     ),
                     (
                         is_data_supported,
@@ -161,6 +154,9 @@ def _onedal_fit(self, X, _, sample_weight, queue=None):
                 X,
                 accept_sparse="csr",
                 dtype=[np.float64, np.float32],
+                order="C",
+                copy=self.copy_x,
+                accept_large_sparse=False,
             )
 
             if sklearn_check_version("1.2"):
@@ -176,6 +172,22 @@ def _onedal_fit(self, X, _, sample_weight, queue=None):
 
             self._save_attributes()
 
+        def _validate_sample_weight(self, sample_weight, X):
+            if sample_weight is None:
+                return True
+            elif isinstance(sample_weight, numbers.Number):
+                return True
+            else:
+                sample_weight = _check_sample_weight(
+                    sample_weight,
+                    X,
+                    dtype=X.dtype if hasattr(X, "dtype") else None,
+                )
+                if np.all(sample_weight == sample_weight[0]):
+                    return True
+                else:
+                    return False
+
         def _onedal_predict_supported(self, method_name, X, sample_weight=None):
             class_name = self.__class__.__name__
             is_data_supported = (
@@ -194,12 +206,9 @@ def _onedal_predict_supported(self, method_name, X, sample_weight=None):
                 )
 
             _acceptable_sample_weights = True
-            if sample_weight is not None or not isinstance(sample_weight, numbers.Number):
-                sample_weight = _check_sample_weight(
-                    sample_weight, X, dtype=X.dtype if hasattr(X, "dtype") else None
-                )
-                _acceptable_sample_weights = np.allclose(
-                    sample_weight, np.ones_like(sample_weight)
+            if not sklearn_check_version("1.5"):
+                _acceptable_sample_weights = self._validate_sample_weight(
+                    sample_weight, X
                 )
 
             patching_status.and_conditions(
@@ -214,7 +223,7 @@ def _onedal_predict_supported(self, method_name, X, sample_weight=None):
                     ),
                     (
                         _acceptable_sample_weights,
-                        "oneDAL doesn't support sample_weight, None or ones are acceptable",
+                        "oneDAL doesn't support sample_weight. Acceptable options are None, constant, or equal weights.",
                     ),
                 ]
             )