Description
Describe the bug
I am attempting to use cuml.accel
with skore to produce an EstimatorReport that provides information about feature importance, recommended metrics, etc. to better understand an estimator.
If I create a RandomForestClassifier (which is accelerated by cuml.accel) and call the EstimatorReport.metrics.report_metrics, the report fails with an AttributeError:
%load_ext cuml.accel
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from skore import EstimatorReport
X, y = load_breast_cancer(return_X_y=True, as_frame=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
rf = RandomForestClassifier(random_state=0)
rf_report = EstimatorReport(
rf, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test
)
rf_report.metrics.report_metrics(pos_label=1)
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In[1], line 17
11 rf = RandomForestClassifier(random_state=0)
13 rf_report = EstimatorReport(
14 rf, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test
15 )
---> 17 rf_report.metrics.report_metrics(pos_label=1)
File [/raid/btepera/miniforge3/envs/rapids-25.04/lib/python3.12/site-packages/skore/sklearn/_estimator/metrics_accessor.py:272](http://127.0.0.1:8890/raid/btepera/miniforge3/envs/rapids-25.04/lib/python3.12/site-packages/skore/sklearn/_estimator/metrics_accessor.py#line=271), in _MetricsAccessor.report_metrics(self, data_source, X, y, scoring, scoring_names, scoring_kwargs, pos_label, indicator_favorability, flat_index)
267 else:
268 raise ValueError(
269 f"Invalid type of metric: {type(metric)} for {metric!r}"
270 )
--> 272 score = metric_fn(data_source=data_source, X=X, y=y, **metrics_kwargs)
274 index: Union[pd.Index, pd.MultiIndex, list[str], None]
275 score_array: NDArray
File [/raid/btepera/miniforge3/envs/rapids-25.04/lib/python3.12/site-packages/skore/sklearn/_estimator/metrics_accessor.py:755](http://127.0.0.1:8890/raid/btepera/miniforge3/envs/rapids-25.04/lib/python3.12/site-packages/skore/sklearn/_estimator/metrics_accessor.py#line=754), in _MetricsAccessor._precision(self, data_source, data_source_hash, X, y, average, pos_label)
750 if self._parent._ml_task == "binary-classification" and pos_label is not None:
751 # if `pos_label` is specified by our user, then we can safely report only
752 # the statistics of the positive class
753 average = "binary"
--> 755 result = self._compute_metric_scores(
756 metrics.precision_score,
757 X=X,
758 y_true=y,
759 data_source=data_source,
760 data_source_hash=data_source_hash,
761 response_method="predict",
762 pos_label=pos_label,
763 average=average,
764 )
765 if self._parent._ml_task == "binary-classification" and (
766 pos_label is not None or average is not None
767 ):
768 return cast(float, result)
File [/raid/btepera/miniforge3/envs/rapids-25.04/lib/python3.12/site-packages/skore/sklearn/_estimator/metrics_accessor.py:423](http://127.0.0.1:8890/raid/btepera/miniforge3/envs/rapids-25.04/lib/python3.12/site-packages/skore/sklearn/_estimator/metrics_accessor.py#line=422), in _MetricsAccessor._compute_metric_scores(self, metric_fn, X, y_true, response_method, data_source, data_source_hash, pos_label, **metric_kwargs)
420 if "pos_label" in metric_params:
421 kwargs.update(pos_label=pos_label)
--> 423 y_pred = _get_cached_response_values(
424 cache=self._parent._cache,
425 estimator_hash=self._parent._hash,
426 estimator=self._parent.estimator_,
427 X=X,
428 response_method=response_method,
429 pos_label=pos_label,
430 data_source=data_source,
431 data_source_hash=data_source_hash,
432 )
434 score = metric_fn(y_true, y_pred, **kwargs)
436 if isinstance(score, np.ndarray):
File [/raid/btepera/miniforge3/envs/rapids-25.04/lib/python3.12/site-packages/skore/sklearn/_base.py:397](http://127.0.0.1:8890/raid/btepera/miniforge3/envs/rapids-25.04/lib/python3.12/site-packages/skore/sklearn/_base.py#line=396), in _get_cached_response_values(cache, estimator_hash, estimator, X, response_method, pos_label, data_source, data_source_hash)
394 return cached_predictions
396 with MeasureTime() as predict_time:
--> 397 predictions, _ = _get_response_values(
398 estimator,
399 X=X,
400 response_method=prediction_method,
401 pos_label=pos_label,
402 return_response_method_used=False,
403 )
405 cache[cache_key] = predictions
407 predict_time_cache_key: tuple[Any, ...] = (
408 estimator_hash,
409 data_source,
410 data_source_hash,
411 "predict_time",
412 )
File [/raid/btepera/miniforge3/envs/rapids-25.04/lib/python3.12/site-packages/sklearn/utils/_response.py:202](http://127.0.0.1:8890/raid/btepera/miniforge3/envs/rapids-25.04/lib/python3.12/site-packages/sklearn/utils/_response.py#line=201), in _get_response_values(estimator, X, response_method, pos_label, return_response_method_used)
200 if is_classifier(estimator):
201 prediction_method = _check_response_method(estimator, response_method)
--> 202 classes = estimator.classes_
203 target_type = type_of_target(classes)
205 if target_type in ("binary", "multiclass"):
File base.pyx:865, in cuml.internals.base.UniversalBase.__getattr__()
File base.pyx:833, in cuml.internals.base.UniversalBase.__getattr__()
File base.pyx:388, in cuml.internals.base.Base.__getattr__()
AttributeError: classes_
My expectation would be that either the same attributes in the sklearn class should exist for the corresponding cuml estimator class, or if these attributes don't exist for the cuml estimator, we should be able to convert to a sklearn estimator and access the attribute that way rather than erroring out.
Relevant package versions:
cuml 25.4.0a168
scikit-learn 1.6.1
skore 0.8.2