Skip to content

Commit 73b45d8

Browse files
ENH update code to check response values of an estimator (scikit-learn#33126)
Co-authored-by: Anne Beyer <anne.beyer@mailbox.org>
1 parent c27576b commit 73b45d8

4 files changed

Lines changed: 57 additions & 80 deletions

File tree

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
- ``sklearn.utils._response._get_response_values`` now provides a clearer error message
2+
when estimator does not implement the given ``response_method``.
3+
By :user:`Quentin Barthélemy <qbarthelemy>`.

sklearn/inspection/_plot/tests/test_boundary_decision_display.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -397,20 +397,6 @@ def test_multioutput_regressor_error(pyplot):
397397
DecisionBoundaryDisplay.from_estimator(tree, X, response_method="predict")
398398

399399

400-
@pytest.mark.parametrize(
401-
"response_method",
402-
["predict_proba", "decision_function", ["predict_proba", "predict"]],
403-
)
404-
def test_regressor_unsupported_response(pyplot, response_method):
405-
"""Check that we can display the decision boundary for a regressor."""
406-
X, y = load_diabetes(return_X_y=True)
407-
X = X[:, :2]
408-
tree = DecisionTreeRegressor().fit(X, y)
409-
err_msg = "should either be a classifier to be used with response_method"
410-
with pytest.raises(ValueError, match=err_msg):
411-
DecisionBoundaryDisplay.from_estimator(tree, X, response_method=response_method)
412-
413-
414400
@pytest.mark.filterwarnings(
415401
# We expect to raise the following warning because the classifier is fit on a
416402
# NumPy array

sklearn/utils/_response.py

Lines changed: 10 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,8 @@ def _get_response_values(
120120
pos_label=None,
121121
return_response_method_used=False,
122122
):
123-
"""Compute the response values of a classifier, an outlier detector, or a regressor.
123+
"""Compute the response values of a classifier, an outlier detector, a regressor
124+
or a clusterer.
124125
125126
The response values are predictions such that it follows the following shape:
126127
@@ -129,8 +130,8 @@ def _get_response_values(
129130
- with response_method="predict", it is a 1d array of shape `(n_samples,)`;
130131
- otherwise, it is a 2d array of shape `(n_samples, n_classes)`;
131132
- for multilabel classification, it is a 2d array of shape `(n_samples, n_outputs)`;
132-
- for outlier detection, it is a 1d array of shape `(n_samples,)`;
133-
- for regression, it is a 1d array of shape `(n_samples,)`.
133+
- for outlier detection, a regressor or a clusterer, it is a 1d array of shape
134+
`(n_samples,)`.
134135
135136
If `estimator` is a binary classifier, also return the label for the
136137
effective positive class.
@@ -142,9 +143,9 @@ def _get_response_values(
142143
Parameters
143144
----------
144145
estimator : estimator instance
145-
Fitted classifier, outlier detector, or regressor or a
146+
Fitted classifier, outlier detector, regressor, clusterer or a
146147
fitted :class:`~sklearn.pipeline.Pipeline` in which the last estimator is a
147-
classifier, an outlier detector, or a regressor.
148+
classifier, an outlier detector, a regressor or a clusterer.
148149
149150
X : {array-like, sparse matrix} of shape (n_samples, n_features)
150151
Input values.
@@ -180,8 +181,8 @@ def _get_response_values(
180181
181182
pos_label : int, float, bool, str or None
182183
The class considered as the positive class when computing
183-
the metrics. Returns `None` if `estimator` is a regressor or an outlier
184-
detector.
184+
the metrics. Returns `None` if `estimator` is a regressor, an outlier
185+
detector or a clusterer.
185186
186187
response_method_used : str
187188
The response method used to compute the response values. Only returned
@@ -194,13 +195,10 @@ def _get_response_values(
194195
ValueError
195196
If `pos_label` is not a valid label.
196197
If the shape of `y_pred` is not consistent for binary classifier.
197-
If the response method can be applied to a classifier only and
198-
`estimator` is a regressor.
199198
"""
200-
from sklearn.base import is_classifier, is_outlier_detector
199+
prediction_method = _check_response_method(estimator, response_method)
201200

202201
if is_classifier(estimator):
203-
prediction_method = _check_response_method(estimator, response_method)
204202
classes = estimator.classes_
205203
target_type = type_of_target(classes)
206204

@@ -229,18 +227,7 @@ def _get_response_values(
229227
classes=classes,
230228
pos_label=pos_label,
231229
)
232-
elif is_outlier_detector(estimator):
233-
prediction_method = _check_response_method(estimator, response_method)
234-
y_pred, pos_label = prediction_method(X), None
235-
else: # estimator is a regressor
236-
if response_method != "predict":
237-
raise ValueError(
238-
f"{estimator.__class__.__name__} should either be a classifier to be "
239-
f"used with response_method={response_method} or the response_method "
240-
"should be 'predict'. Got a regressor with response_method="
241-
f"{response_method} instead."
242-
)
243-
prediction_method = estimator.predict
230+
else:
244231
y_pred, pos_label = prediction_method(X), None
245232

246233
if return_response_method_used:

sklearn/utils/tests/test_response.py

Lines changed: 44 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,17 @@
44
import pytest
55

66
from sklearn.base import clone
7+
from sklearn.cluster import DBSCAN, KMeans
78
from sklearn.datasets import (
89
load_iris,
910
make_classification,
1011
make_multilabel_classification,
11-
make_regression,
1212
)
1313
from sklearn.ensemble import IsolationForest
14-
from sklearn.linear_model import (
15-
LinearRegression,
16-
LogisticRegression,
17-
)
14+
from sklearn.linear_model import LinearRegression, LogisticRegression
1815
from sklearn.multioutput import ClassifierChain
1916
from sklearn.preprocessing import scale
2017
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
21-
from sklearn.utils._mocking import _MockEstimatorOnOffPrediction
2218
from sklearn.utils._response import _get_response_values, _get_response_values_binary
2319
from sklearn.utils._testing import assert_allclose, assert_array_equal
2420

@@ -29,56 +25,59 @@
2925

3026

3127
@pytest.mark.parametrize(
32-
"response_method", ["decision_function", "predict_proba", "predict_log_proba"]
28+
"estimator, response_method",
29+
[
30+
(DecisionTreeRegressor(), "predict_proba"),
31+
(DecisionTreeRegressor(), ["predict_proba", "decision_function"]),
32+
(KMeans(n_clusters=2, n_init=1), "predict_proba"),
33+
(KMeans(n_clusters=2, n_init=1), ["predict_proba", "decision_function"]),
34+
(DBSCAN(), "predict"),
35+
(IsolationForest(random_state=0), "predict_proba"),
36+
(IsolationForest(random_state=0), ["predict_proba", "score"]),
37+
],
3338
)
34-
def test_get_response_values_regressor_error(response_method):
35-
"""Check the error message with regressor an not supported response
36-
method."""
37-
my_estimator = _MockEstimatorOnOffPrediction(response_methods=[response_method])
38-
X = "mocking_data", "mocking_target"
39-
err_msg = f"{my_estimator.__class__.__name__} should either be a classifier"
40-
with pytest.raises(ValueError, match=err_msg):
41-
_get_response_values(my_estimator, X, response_method=response_method)
42-
43-
44-
@pytest.mark.parametrize("return_response_method_used", [True, False])
45-
def test_get_response_values_regressor(return_response_method_used):
46-
"""Check the behaviour of `_get_response_values` with regressor."""
47-
X, y = make_regression(n_samples=10, random_state=0)
48-
regressor = LinearRegression().fit(X, y)
49-
results = _get_response_values(
50-
regressor,
51-
X,
52-
response_method="predict",
53-
return_response_method_used=return_response_method_used,
54-
)
55-
assert_array_equal(results[0], regressor.predict(X))
56-
assert results[1] is None
57-
if return_response_method_used:
58-
assert results[2] == "predict"
39+
def test_estimator_unsupported_response(pyplot, estimator, response_method):
40+
"""Check the error message with not supported response method."""
41+
X, y = np.random.RandomState(0).randn(10, 2), np.array([0, 1] * 5)
42+
estimator.fit(X, y)
43+
err_msg = "has none of the following attributes:"
44+
with pytest.raises(AttributeError, match=err_msg):
45+
_get_response_values(
46+
estimator,
47+
X,
48+
response_method=response_method,
49+
)
5950

6051

6152
@pytest.mark.parametrize(
62-
"response_method",
63-
["predict", "decision_function", ["decision_function", "predict"]],
53+
"estimator, response_method",
54+
[
55+
(LinearRegression(), "predict"),
56+
(KMeans(n_clusters=2, n_init=1), "predict"),
57+
(KMeans(n_clusters=2, n_init=1), "score"),
58+
(KMeans(n_clusters=2, n_init=1), ["predict", "score"]),
59+
(IsolationForest(random_state=0), "predict"),
60+
(IsolationForest(random_state=0), "decision_function"),
61+
(IsolationForest(random_state=0), ["decision_function", "predict"]),
62+
],
6463
)
6564
@pytest.mark.parametrize("return_response_method_used", [True, False])
66-
def test_get_response_values_outlier_detection(
67-
response_method, return_response_method_used
65+
def test_estimator_get_response_values(
66+
estimator, response_method, return_response_method_used
6867
):
69-
"""Check the behaviour of `_get_response_values` with outlier detector."""
70-
X, y = make_classification(n_samples=50, random_state=0)
71-
outlier_detector = IsolationForest(random_state=0).fit(X, y)
68+
"""Check the behaviour of `_get_response_values`."""
69+
X, y = np.random.RandomState(0).randn(10, 2), np.array([0, 1] * 5)
70+
estimator.fit(X, y)
7271
results = _get_response_values(
73-
outlier_detector,
72+
estimator,
7473
X,
7574
response_method=response_method,
7675
return_response_method_used=return_response_method_used,
7776
)
7877
chosen_response_method = (
7978
response_method[0] if isinstance(response_method, list) else response_method
8079
)
81-
prediction_method = getattr(outlier_detector, chosen_response_method)
80+
prediction_method = getattr(estimator, chosen_response_method)
8281
assert_array_equal(results[0], prediction_method(X))
8382
assert results[1] is None
8483
if return_response_method_used:
@@ -417,6 +416,8 @@ def test_response_values_type_of_target_on_classes_no_warning():
417416
(IsolationForest(), "predict", "multiclass", (10,)),
418417
(DecisionTreeRegressor(), "predict", "binary", (10,)),
419418
(DecisionTreeRegressor(), "predict", "multiclass", (10,)),
419+
(KMeans(n_clusters=2, n_init=1), "predict", "binary", (10,)),
420+
(KMeans(n_clusters=2, n_init=1), "predict", "multiclass", (10,)),
420421
],
421422
)
422423
def test_response_values_output_shape_(
@@ -430,8 +431,8 @@ def test_response_values_output_shape_(
430431
- with response_method="predict", it is a 1d array of shape `(n_samples,)`;
431432
- otherwise, it is a 2d array of shape `(n_samples, n_classes)`;
432433
- for multilabel classification, it is a 2d array of shape `(n_samples, n_outputs)`;
433-
- for outlier detection, it is a 1d array of shape `(n_samples,)`;
434-
- for regression, it is a 1d array of shape `(n_samples,)`.
434+
- for outlier detection, regression and clustering,
435+
it is a 1d array of shape `(n_samples,)`.
435436
"""
436437
X = np.random.RandomState(0).randn(10, 2)
437438
if target_type == "binary":

0 commit comments

Comments
 (0)