ENH update code to check response values of an estimator (scikit-learn#33126)

qbarthelemy · AnneBeyer · web-flow · commit 73b45d8e0034 · 2026-02-05T13:33:18.000+01:00
Co-authored-by: Anne Beyer &lt;anne.beyer@mailbox.org&gt;
diff --git a/doc/whats_new/upcoming_changes/sklearn.utils/33126.enhancement.rst b/doc/whats_new/upcoming_changes/sklearn.utils/33126.enhancement.rst
@@ -0,0 +1,3 @@
+- ``sklearn.utils._response._get_response_values`` now provides a clearer error message
+  when estimator does not implement the given ``response_method``.
+  By :user:`Quentin Barthélemy <qbarthelemy>`.
diff --git a/sklearn/inspection/_plot/tests/test_boundary_decision_display.py b/sklearn/inspection/_plot/tests/test_boundary_decision_display.py
@@ -397,20 +397,6 @@ def test_multioutput_regressor_error(pyplot):
         DecisionBoundaryDisplay.from_estimator(tree, X, response_method="predict")
 
 
-@pytest.mark.parametrize(
-    "response_method",
-    ["predict_proba", "decision_function", ["predict_proba", "predict"]],
-)
-def test_regressor_unsupported_response(pyplot, response_method):
-    """Check that we can display the decision boundary for a regressor."""
-    X, y = load_diabetes(return_X_y=True)
-    X = X[:, :2]
-    tree = DecisionTreeRegressor().fit(X, y)
-    err_msg = "should either be a classifier to be used with response_method"
-    with pytest.raises(ValueError, match=err_msg):
-        DecisionBoundaryDisplay.from_estimator(tree, X, response_method=response_method)
-
-
 @pytest.mark.filterwarnings(
     # We expect to raise the following warning because the classifier is fit on a
     # NumPy array
diff --git a/sklearn/utils/_response.py b/sklearn/utils/_response.py
@@ -120,7 +120,8 @@ def _get_response_values(
     pos_label=None,
     return_response_method_used=False,
 ):
-    """Compute the response values of a classifier, an outlier detector, or a regressor.
+    """Compute the response values of a classifier, an outlier detector, a regressor
+    or a clusterer.
 
     The response values are predictions such that it follows the following shape:
 
@@ -129,8 +130,8 @@ def _get_response_values(
         - with response_method="predict", it is a 1d array of shape `(n_samples,)`;
         - otherwise, it is a 2d array of shape `(n_samples, n_classes)`;
     - for multilabel classification, it is a 2d array of shape `(n_samples, n_outputs)`;
-    - for outlier detection, it is a 1d array of shape `(n_samples,)`;
-    - for regression, it is a 1d array of shape `(n_samples,)`.
+    - for outlier detection, a regressor or a clusterer, it is a 1d array of shape
+      `(n_samples,)`.
 
     If `estimator` is a binary classifier, also return the label for the
     effective positive class.
@@ -142,9 +143,9 @@ def _get_response_values(
     Parameters
     ----------
     estimator : estimator instance
-        Fitted classifier, outlier detector, or regressor or a
+        Fitted classifier, outlier detector, regressor, clusterer or a
         fitted :class:`~sklearn.pipeline.Pipeline` in which the last estimator is a
-        classifier, an outlier detector, or a regressor.
+        classifier, an outlier detector, a regressor or a clusterer.
 
     X : {array-like, sparse matrix} of shape (n_samples, n_features)
         Input values.
@@ -180,8 +181,8 @@ def _get_response_values(
 
     pos_label : int, float, bool, str or None
         The class considered as the positive class when computing
-        the metrics. Returns `None` if `estimator` is a regressor or an outlier
-        detector.
+        the metrics. Returns `None` if `estimator` is a regressor, an outlier
+        detector or a clusterer.
 
     response_method_used : str
         The response method used to compute the response values. Only returned
@@ -194,13 +195,10 @@ def _get_response_values(
     ValueError
         If `pos_label` is not a valid label.
         If the shape of `y_pred` is not consistent for binary classifier.
-        If the response method can be applied to a classifier only and
-        `estimator` is a regressor.
     """
-    from sklearn.base import is_classifier, is_outlier_detector
+    prediction_method = _check_response_method(estimator, response_method)
 
     if is_classifier(estimator):
-        prediction_method = _check_response_method(estimator, response_method)
         classes = estimator.classes_
         target_type = type_of_target(classes)
 
@@ -229,18 +227,7 @@ def _get_response_values(
                 classes=classes,
                 pos_label=pos_label,
             )
-    elif is_outlier_detector(estimator):
-        prediction_method = _check_response_method(estimator, response_method)
-        y_pred, pos_label = prediction_method(X), None
-    else:  # estimator is a regressor
-        if response_method != "predict":
-            raise ValueError(
-                f"{estimator.__class__.__name__} should either be a classifier to be "
-                f"used with response_method={response_method} or the response_method "
-                "should be 'predict'. Got a regressor with response_method="
-                f"{response_method} instead."
-            )
-        prediction_method = estimator.predict
+    else:
         y_pred, pos_label = prediction_method(X), None
 
     if return_response_method_used:
diff --git a/sklearn/utils/tests/test_response.py b/sklearn/utils/tests/test_response.py
@@ -4,21 +4,17 @@
 import pytest
 
 from sklearn.base import clone
+from sklearn.cluster import DBSCAN, KMeans
 from sklearn.datasets import (
     load_iris,
     make_classification,
     make_multilabel_classification,
-    make_regression,
 )
 from sklearn.ensemble import IsolationForest
-from sklearn.linear_model import (
-    LinearRegression,
-    LogisticRegression,
-)
+from sklearn.linear_model import LinearRegression, LogisticRegression
 from sklearn.multioutput import ClassifierChain
 from sklearn.preprocessing import scale
 from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
-from sklearn.utils._mocking import _MockEstimatorOnOffPrediction
 from sklearn.utils._response import _get_response_values, _get_response_values_binary
 from sklearn.utils._testing import assert_allclose, assert_array_equal
 
@@ -29,56 +25,59 @@
 
 
 @pytest.mark.parametrize(
-    "response_method", ["decision_function", "predict_proba", "predict_log_proba"]
+    "estimator, response_method",
+    [
+        (DecisionTreeRegressor(), "predict_proba"),
+        (DecisionTreeRegressor(), ["predict_proba", "decision_function"]),
+        (KMeans(n_clusters=2, n_init=1), "predict_proba"),
+        (KMeans(n_clusters=2, n_init=1), ["predict_proba", "decision_function"]),
+        (DBSCAN(), "predict"),
+        (IsolationForest(random_state=0), "predict_proba"),
+        (IsolationForest(random_state=0), ["predict_proba", "score"]),
+    ],
 )
-def test_get_response_values_regressor_error(response_method):
-    """Check the error message with regressor an not supported response
-    method."""
-    my_estimator = _MockEstimatorOnOffPrediction(response_methods=[response_method])
-    X = "mocking_data", "mocking_target"
-    err_msg = f"{my_estimator.__class__.__name__} should either be a classifier"
-    with pytest.raises(ValueError, match=err_msg):
-        _get_response_values(my_estimator, X, response_method=response_method)
-
-
-@pytest.mark.parametrize("return_response_method_used", [True, False])
-def test_get_response_values_regressor(return_response_method_used):
-    """Check the behaviour of `_get_response_values` with regressor."""
-    X, y = make_regression(n_samples=10, random_state=0)
-    regressor = LinearRegression().fit(X, y)
-    results = _get_response_values(
-        regressor,
-        X,
-        response_method="predict",
-        return_response_method_used=return_response_method_used,
-    )
-    assert_array_equal(results[0], regressor.predict(X))
-    assert results[1] is None
-    if return_response_method_used:
-        assert results[2] == "predict"
+def test_estimator_unsupported_response(pyplot, estimator, response_method):
+    """Check the error message with not supported response method."""
+    X, y = np.random.RandomState(0).randn(10, 2), np.array([0, 1] * 5)
+    estimator.fit(X, y)
+    err_msg = "has none of the following attributes:"
+    with pytest.raises(AttributeError, match=err_msg):
+        _get_response_values(
+            estimator,
+            X,
+            response_method=response_method,
+        )
 
 
 @pytest.mark.parametrize(
-    "response_method",
-    ["predict", "decision_function", ["decision_function", "predict"]],
+    "estimator, response_method",
+    [
+        (LinearRegression(), "predict"),
+        (KMeans(n_clusters=2, n_init=1), "predict"),
+        (KMeans(n_clusters=2, n_init=1), "score"),
+        (KMeans(n_clusters=2, n_init=1), ["predict", "score"]),
+        (IsolationForest(random_state=0), "predict"),
+        (IsolationForest(random_state=0), "decision_function"),
+        (IsolationForest(random_state=0), ["decision_function", "predict"]),
+    ],
 )
 @pytest.mark.parametrize("return_response_method_used", [True, False])
-def test_get_response_values_outlier_detection(
-    response_method, return_response_method_used
+def test_estimator_get_response_values(
+    estimator, response_method, return_response_method_used
 ):
-    """Check the behaviour of `_get_response_values` with outlier detector."""
-    X, y = make_classification(n_samples=50, random_state=0)
-    outlier_detector = IsolationForest(random_state=0).fit(X, y)
+    """Check the behaviour of `_get_response_values`."""
+    X, y = np.random.RandomState(0).randn(10, 2), np.array([0, 1] * 5)
+    estimator.fit(X, y)
     results = _get_response_values(
-        outlier_detector,
+        estimator,
         X,
         response_method=response_method,
         return_response_method_used=return_response_method_used,
     )
     chosen_response_method = (
         response_method[0] if isinstance(response_method, list) else response_method
     )
-    prediction_method = getattr(outlier_detector, chosen_response_method)
+    prediction_method = getattr(estimator, chosen_response_method)
     assert_array_equal(results[0], prediction_method(X))
     assert results[1] is None
     if return_response_method_used:
@@ -417,6 +416,8 @@ def test_response_values_type_of_target_on_classes_no_warning():
         (IsolationForest(), "predict", "multiclass", (10,)),
         (DecisionTreeRegressor(), "predict", "binary", (10,)),
         (DecisionTreeRegressor(), "predict", "multiclass", (10,)),
+        (KMeans(n_clusters=2, n_init=1), "predict", "binary", (10,)),
+        (KMeans(n_clusters=2, n_init=1), "predict", "multiclass", (10,)),
     ],
 )
 def test_response_values_output_shape_(
@@ -430,8 +431,8 @@ def test_response_values_output_shape_(
         - with response_method="predict", it is a 1d array of shape `(n_samples,)`;
         - otherwise, it is a 2d array of shape `(n_samples, n_classes)`;
     - for multilabel classification, it is a 2d array of shape `(n_samples, n_outputs)`;
-    - for outlier detection, it is a 1d array of shape `(n_samples,)`;
-    - for regression, it is a 1d array of shape `(n_samples,)`.
+    - for outlier detection, regression and clustering,
+      it is a 1d array of shape `(n_samples,)`.
     """
     X = np.random.RandomState(0).randn(10, 2)
     if target_type == "binary":

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	+- ``sklearn.utils._response._get_response_values`` now provides a clearer error message
	`2`	+ when estimator does not implement the given ``response_method``.
	`3`	+ By :user:`Quentin Barthélemy <qbarthelemy>`.