Skip to content

Commit 55afa21

Browse files
Schefflera-Arboricolaglemaitreauguste-probabl
authored andcommitted
test: Include all estimators (with coef_) in test_all_sklearn_estimators (probabl-ai#1575)
closes issue probabl-ai#1364 - incorporated: - [x] sklearn.linear_model.GammaRegressor() - [x] sklearn.linear_model.PoissonRegressor() - [x] sklearn.compose.TransformedTargetRegressor() - [ ] sklearn.cross_decomposition.CCA() - [ ] sklearn.cross_decomposition.PLSCanonical() - [ ] sklearn.linear_model.MultiTaskElasticNet() - [ ] sklearn.linear_model.MultiTaskElasticNetCV() - [ ] sklearn.linear_model.MultiTaskLasso() - [ ] sklearn.linear_model.MultiTaskLassoCV() - [ ] sklearn.svm.OneClassSVM() - [ ] sklearn.linear_model.SGDOneClassSVM() - other changes: - [x] handling estimators that does not support `coef_` and/or `intercept_` when creating `coefficients()` in EstimatorReport - [x] removed sklearn.linear_model.Lasso() --> duplicate entry - [x] updated `_check_has_coef` and `coefficients()` to handle cases of meta estimators (updated tests accordingly) - [x] reorganised the commented models - need feedback on: - [x] is there a better(cleaner) way to get data? -- right now I've created different fixtures for `positive_regression_data`, `multi_regression_data`, `outlier_data` and `clustering_data` -- but all these are only used once in this one test (i.e. `test_all_sklearn_estimators`). - [x] for `TransformedTargetRegressor` should we modify the `_check_has_coef` in `skore/src/skore/utils/_accessor.py` to not just check if the `hasattr(estimator, "coef_")` is true but also check if `hasattr(estimator.regressor_, "coef_")` is also true ([here](https://github.com/probabl-ai/skore/blob/main/skore/src/skore/utils/_accessor.py#L60))? or should we exclude `TransformedTargetRegressor` from this test? - [x] even though the tests passed locally, I'm not sure about how I've incorporated `sklearn.cross_decomposition.CCA()` and `sklearn.cross_decomposition.PLSCanonical()` - [ ] any other feedback that you would like to give! Thanks :) --------- Co-authored-by: Guillaume Lemaitre <[email protected]> Co-authored-by: Auguste Baum <[email protected]>
1 parent 2f3496e commit 55afa21

File tree

5 files changed

+83
-29
lines changed

5 files changed

+83
-29
lines changed

skore/src/skore/sklearn/_estimator/feature_importance_accessor.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -203,10 +203,24 @@ def coefficients(self) -> pd.DataFrame:
203203
if isinstance(parent_estimator, Pipeline)
204204
else parent_estimator
205205
)
206-
intercept = np.atleast_2d(estimator.intercept_)
207-
coef = np.atleast_2d(estimator.coef_)
208-
209-
data = np.concatenate([intercept, coef.T])
206+
try:
207+
intercept = np.atleast_2d(estimator.intercept_)
208+
except AttributeError:
209+
# SGDOneClassSVM does not expose `intercept_`
210+
intercept = None
211+
212+
try:
213+
coef = np.atleast_2d(estimator.coef_)
214+
except AttributeError:
215+
# TransformedTargetRegressor() does not expose `coef_`
216+
coef = np.atleast_2d(estimator.regressor_.coef_)
217+
218+
if intercept is None:
219+
data = coef.T
220+
index = list(feature_names)
221+
else:
222+
data = np.concatenate([intercept, coef.T])
223+
index = ["Intercept"] + list(feature_names)
210224

211225
if data.shape[1] == 1:
212226
columns = ["Coefficient"]
@@ -217,7 +231,7 @@ def coefficients(self) -> pd.DataFrame:
217231

218232
df = pd.DataFrame(
219233
data=data,
220-
index=["Intercept"] + list(feature_names),
234+
index=index,
221235
columns=columns,
222236
)
223237

skore/src/skore/utils/_accessor.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,12 @@ def check(accessor: Any) -> bool:
5959
)
6060
if hasattr(estimator, "coef_"):
6161
return True
62+
try: # e.g. TransformedTargetRegressor()
63+
if hasattr(estimator.regressor_, "coef_"):
64+
return True
65+
except AttributeError as msg:
66+
if "object has no attribute 'regressor_'" not in str(msg):
67+
raise
6268
raise AttributeError(
6369
f"Estimator {parent_estimator} is not a supported estimator by "
6470
"the function called."

skore/tests/unit/sklearn/estimator/feature_importance/conftest.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import numpy as np
12
import pytest
23
from sklearn.datasets import make_classification, make_regression
34

@@ -7,6 +8,12 @@ def regression_data():
78
return make_regression(n_features=5, random_state=42)
89

910

11+
@pytest.fixture
12+
def positive_regression_data():
13+
X, y = make_regression(n_features=5, random_state=42)
14+
return X, np.abs(y) + 0.1
15+
16+
1017
@pytest.fixture
1118
def classification_data():
1219
return make_classification(n_features=5, random_state=42)

skore/tests/unit/sklearn/estimator/feature_importance/test_coefficients.py

Lines changed: 42 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from sklearn.pipeline import Pipeline, make_pipeline
88
from sklearn.preprocessing import StandardScaler
99
from skore import EstimatorReport
10+
from skore.externals._sklearn_compat import get_tags
1011

1112

1213
@pytest.mark.parametrize(
@@ -119,29 +120,25 @@ def test_estimator_report_coefficients_pandas_dataframe(estimator):
119120
[
120121
pytest.param(sklearn.svm.NuSVC(kernel="linear"), id="NuSVC"),
121122
pytest.param(sklearn.svm.NuSVR(kernel="linear"), id="NuSVR"),
122-
# pytest.param(sklearn.svm.OneClassSVM(), id="OneClassSVM"),
123123
pytest.param(sklearn.svm.SVC(kernel="linear"), id="SVC"),
124124
pytest.param(sklearn.svm.SVR(kernel="linear"), id="SVR"),
125125
pytest.param(sklearn.svm.LinearSVC(), id="LinearSVC"),
126126
pytest.param(sklearn.svm.LinearSVR(), id="LinearSVR"),
127-
# pytest.param(sklearn.cross_decomposition.CCA(), id="CCA"),
128-
# pytest.param(sklearn.cross_decomposition.PLSCanonical(), id="PLSCanonical"),
129127
pytest.param(sklearn.cross_decomposition.PLSRegression(), id="PLSRegression"),
130128
pytest.param(
131129
sklearn.discriminant_analysis.LinearDiscriminantAnalysis(),
132130
id="LinearDiscriminantAnalysis",
133131
),
134-
# pytest.param(
135-
# sklearn.compose.TransformedTargetRegressor(),
136-
# id="TransformedTargetRegressor",
137-
# ),
132+
pytest.param(
133+
sklearn.compose.TransformedTargetRegressor(),
134+
id="TransformedTargetRegressor",
135+
),
138136
pytest.param(sklearn.linear_model.ElasticNet(), id="ElasticNet"),
139-
pytest.param(sklearn.linear_model.Lasso(), id="Lasso"),
140137
pytest.param(sklearn.linear_model.ARDRegression(), id="ARDRegression"),
141138
pytest.param(sklearn.linear_model.BayesianRidge(), id="BayesianRidge"),
142139
pytest.param(sklearn.linear_model.ElasticNet(), id="ElasticNet"),
143140
pytest.param(sklearn.linear_model.ElasticNetCV(), id="ElasticNetCV"),
144-
# pytest.param(sklearn.linear_model.GammaRegressor(), id="GammaRegressor"),
141+
pytest.param(sklearn.linear_model.GammaRegressor(), id="GammaRegressor"),
145142
pytest.param(sklearn.linear_model.HuberRegressor(), id="HuberRegressor"),
146143
pytest.param(sklearn.linear_model.Lars(), id="Lars"),
147144
pytest.param(sklearn.linear_model.LarsCV(), id="LarsCV"),
@@ -157,14 +154,6 @@ def test_estimator_report_coefficients_pandas_dataframe(estimator):
157154
pytest.param(
158155
sklearn.linear_model.LogisticRegressionCV(), id="LogisticRegressionCV"
159156
),
160-
# pytest.param(
161-
# sklearn.linear_model.MultiTaskElasticNet(), id="MultiTaskElasticNet"
162-
# ),
163-
# pytest.param(
164-
# sklearn.linear_model.MultiTaskElasticNetCV(), id="MultiTaskElasticNetCV"
165-
# ),
166-
# pytest.param(sklearn.linear_model.MultiTaskLasso(), id="MultiTaskLasso"),
167-
# pytest.param(sklearn.linear_model.MultiTaskLassoCV(), id="MultiTaskLassoCV"),
168157
pytest.param(
169158
sklearn.linear_model.OrthogonalMatchingPursuit(),
170159
id="OrthogonalMatchingPursuit",
@@ -182,44 +171,73 @@ def test_estimator_report_coefficients_pandas_dataframe(estimator):
182171
id="PassiveAggressiveRegressor",
183172
),
184173
pytest.param(sklearn.linear_model.Perceptron(), id="Perceptron"),
185-
# pytest.param(sklearn.linear_model.PoissonRegressor(), id="PoissonRegressor"),
174+
pytest.param(sklearn.linear_model.PoissonRegressor(), id="PoissonRegressor"),
186175
pytest.param(sklearn.linear_model.QuantileRegressor(), id="QuantileRegressor"),
187176
pytest.param(sklearn.linear_model.Ridge(), id="Ridge"),
188177
pytest.param(sklearn.linear_model.RidgeClassifier(), id="RidgeClassifier"),
189178
pytest.param(sklearn.linear_model.RidgeClassifierCV(), id="RidgeClassifierCV"),
190179
pytest.param(sklearn.linear_model.RidgeCV(), id="RidgeCV"),
191180
pytest.param(sklearn.linear_model.SGDClassifier(), id="SGDClassifier"),
192-
# pytest.param(sklearn.linear_model.SGDOneClassSVM(), id="SGDOneClassSVM"),
193181
pytest.param(sklearn.linear_model.SGDRegressor(), id="SGDRegressor"),
194182
pytest.param(sklearn.linear_model.TheilSenRegressor(), id="TheilSenRegressor"),
195183
pytest.param(sklearn.linear_model.TweedieRegressor(), id="TweedieRegressor"),
184+
# The following models would be tested in the future when the `EstimatorReport`
185+
# will have metrics specific to these models:
186+
#
187+
# 1. multi-task
188+
# pytest.param(
189+
# sklearn.linear_model.MultiTaskElasticNet(), id="MultiTaskElasticNet"
190+
# ),
191+
# pytest.param(
192+
# sklearn.linear_model.MultiTaskElasticNetCV(), id="MultiTaskElasticNetCV"
193+
# ),
194+
# pytest.param(sklearn.linear_model.MultiTaskLasso(), id="MultiTaskLasso"),
195+
# pytest.param(sklearn.linear_model.MultiTaskLassoCV(), id="MultiTaskLassoCV"),
196+
# 2. cross_decomposition
197+
# pytest.param(sklearn.cross_decomposition.CCA(), id="CCA"),
198+
# pytest.param(sklearn.cross_decomposition.PLSCanonical(), id="PLSCanonical"),
199+
# 3. outlier detectors
200+
# pytest.param(sklearn.linear_model.SGDOneClassSVM(), id="SGDOneClassSVM"),
201+
# pytest.param(sklearn.svm.OneClassSVM(kernel="linear"), id="OneClassSVM"),
196202
],
197203
)
198204
def test_all_sklearn_estimators(
199-
request, estimator, regression_data, classification_data
205+
request,
206+
estimator,
207+
regression_data,
208+
positive_regression_data,
209+
classification_data,
200210
):
201211
"""Check that `coefficients` is supported for every sklearn estimator."""
202212
if is_classifier(estimator):
203213
X, y = classification_data
204214
elif is_regressor(estimator):
205-
X, y = regression_data
215+
if get_tags(estimator).target_tags.positive_only:
216+
X, y = positive_regression_data
217+
else:
218+
X, y = regression_data
206219
else:
207-
raise Exception("Estimator is neither a classifier nor a regressor")
220+
raise Exception("Estimator not in ['classifier', 'regressor']")
208221

209222
estimator.fit(X, y)
210223

211224
report = EstimatorReport(estimator)
212225
result = report.feature_importance.coefficients()
213226

214-
assert result.shape == (6, 1)
215-
assert result.index.tolist() == [
227+
rows = [
216228
"Intercept",
217229
"Feature #0",
218230
"Feature #1",
219231
"Feature #2",
220232
"Feature #3",
221233
"Feature #4",
222234
]
235+
if result.shape == (5, 1): # for TransformedTargetRegressor()
236+
assert rows[1:] == result.index.tolist()
237+
else:
238+
assert result.shape == (6, 1)
239+
assert rows == result.index.tolist()
240+
223241
assert result.columns.tolist() == ["Coefficient"]
224242

225243

skore/tests/unit/utils/test_accessors.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,10 @@ class Estimator:
8282
def __init__(self):
8383
self.coef_ = 0
8484

85+
class MetaEstimator:
86+
def __init__(self):
87+
self.regressor_ = Estimator()
88+
8589
parent = MockParent(Estimator())
8690
accessor = MockAccessor(parent)
8791

@@ -92,6 +96,11 @@ def __init__(self):
9296

9397
assert _check_has_coef()(accessor)
9498

99+
parent = MockParent(MetaEstimator())
100+
accessor = MockAccessor(parent)
101+
102+
assert _check_has_coef()(accessor)
103+
95104
parent = MockParent(estimator="hello")
96105
accessor = MockAccessor(parent)
97106

0 commit comments

Comments
 (0)