diff --git a/skore/pyproject.toml b/skore/pyproject.toml index 94bec5770a..4113fa67a3 100644 --- a/skore/pyproject.toml +++ b/skore/pyproject.toml @@ -159,9 +159,16 @@ convention = "numpy" "tests/*" = ["D"] [tool.mypy] -ignore_missing_imports = true -exclude = ["src/skore/_externals/.*", "hatch/*", "tests/*"] +exclude = ["src/skore/_externals/", "hatch/", "tests/"] [[tool.mypy.overrides]] -module = ["sklearn.*"] ignore_missing_imports = true +module = [ + "ipywidgets.*", + "joblib.*", + "pandas.*", + "plotly.*", + "seaborn.*", + "sklearn.*", + "skrub.*", +] diff --git a/skore/src/skore/_sklearn/_comparison/report.py b/skore/src/skore/_sklearn/_comparison/report.py index 20ef23d11d..d889ba8312 100644 --- a/skore/src/skore/_sklearn/_comparison/report.py +++ b/skore/src/skore/_sklearn/_comparison/report.py @@ -342,7 +342,7 @@ def get_predictions( ] = "predict", X: ArrayLike | None = None, pos_label: PositiveLabel | None = _DEFAULT, - ) -> list[ArrayLike]: + ) -> list[ArrayLike] | list[list[ArrayLike]]: """Get predictions from the underlying reports. This method has the advantage to reload from the cache if the predictions @@ -406,7 +406,7 @@ def get_predictions( >>> print([split_predictions.shape for split_predictions in predictions]) [(25,), (25,)] """ - return [ + return [ # type: ignore report.get_predictions( data_source=data_source, response_method=response_method, diff --git a/skore/src/skore/_sklearn/_cross_validation/data_accessor.py b/skore/src/skore/_sklearn/_cross_validation/data_accessor.py index d78396156f..bd1d9ed259 100644 --- a/skore/src/skore/_sklearn/_cross_validation/data_accessor.py +++ b/skore/src/skore/_sklearn/_cross_validation/data_accessor.py @@ -42,7 +42,7 @@ def _retrieve_data_as_frame( y = self._parent.y if not sbd.is_dataframe(X): - X = pd.DataFrame(X, columns=[f"Feature {i}" for i in range(X.shape[1])]) + X = pd.DataFrame(X, columns=[f"Feature {i}" for i in range(X.shape[1])]) # type: ignore if with_y: if y is None: @@ -52,10 +52,10 @@ def _retrieve_data_as_frame( name = y.name if y.name is not None else "Target" y = y.to_frame(name=name) elif not sbd.is_dataframe(y): - if y.ndim == 1: + if y.ndim == 1: # type: ignore columns = ["Target"] else: - columns = [f"Target {i}" for i in range(y.shape[1])] + columns = [f"Target {i}" for i in range(y.shape[1])] # type: ignore y = pd.DataFrame(y, columns=columns) return X, y diff --git a/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py b/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py index 34f1c7d30e..077fc477c8 100644 --- a/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py +++ b/skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py @@ -1143,16 +1143,17 @@ def _get_display( X, y, _ = report.metrics._get_X_y_and_data_source_hash( data_source=data_source ) + y_true.append( YPlotData( estimator_name=self._parent.estimator_name_, split=report_idx, - y=y, + y=cast(ArrayLike, y), ) ) results = _get_cached_response_values( cache=report._cache, - estimator_hash=report._hash, + estimator_hash=int(report._hash), estimator=report._estimator, X=X, response_method=response_method, diff --git a/skore/src/skore/_sklearn/_estimator/feature_importance_accessor.py b/skore/src/skore/_sklearn/_estimator/feature_importance_accessor.py index 785fd95360..77b136acbe 100644 --- a/skore/src/skore/_sklearn/_estimator/feature_importance_accessor.py +++ b/skore/src/skore/_sklearn/_estimator/feature_importance_accessor.py @@ -547,7 +547,7 @@ def _feature_permutation( feature_names = ( self._parent.estimator_.feature_names_in_ if hasattr(self._parent.estimator_, "feature_names_in_") - else [f"Feature #{i}" for i in range(X_.shape[1])] + else [f"Feature #{i}" for i in range(X_.shape[1])] # type: ignore ) # If there is more than one metric diff --git a/skore/src/skore/_sklearn/_estimator/metrics_accessor.py b/skore/src/skore/_sklearn/_estimator/metrics_accessor.py index 0b61753096..0f7eb03c6a 100644 --- a/skore/src/skore/_sklearn/_estimator/metrics_accessor.py +++ b/skore/src/skore/_sklearn/_estimator/metrics_accessor.py @@ -482,7 +482,7 @@ def _compute_metric_scores( results = _get_cached_response_values( cache=self._parent._cache, - estimator_hash=self._parent._hash, + estimator_hash=int(self._parent._hash), estimator=self._parent.estimator_, X=X, response_method=response_method, @@ -1674,7 +1674,7 @@ def _get_display( else: results = _get_cached_response_values( cache=self._parent._cache, - estimator_hash=self._parent._hash, + estimator_hash=int(self._parent._hash), estimator=self._parent.estimator_, X=X, response_method=response_method, diff --git a/skore/src/skore/_sklearn/_estimator/report.py b/skore/src/skore/_sklearn/_estimator/report.py index 574dc2751d..9670f30e87 100644 --- a/skore/src/skore/_sklearn/_estimator/report.py +++ b/skore/src/skore/_sklearn/_estimator/report.py @@ -389,7 +389,7 @@ def get_predictions( results = _get_cached_response_values( cache=self._cache, - estimator_hash=self._hash, + estimator_hash=int(self._hash), estimator=self._estimator, X=X_, response_method=response_method, diff --git a/skore/src/skore/_sklearn/_plot/metrics/precision_recall_curve.py b/skore/src/skore/_sklearn/_plot/metrics/precision_recall_curve.py index b2aba04adb..4eff98cf4a 100644 --- a/skore/src/skore/_sklearn/_plot/metrics/precision_recall_curve.py +++ b/skore/src/skore/_sklearn/_plot/metrics/precision_recall_curve.py @@ -865,17 +865,19 @@ def _compute_data_for_display( ): label_binarizer = LabelBinarizer().fit(est.classes_) y_true_onehot_i: NDArray = label_binarizer.transform(y_true_i.y) + y_pred_i_y = cast(NDArray, y_pred_i.y) + for class_idx, class_ in enumerate(est.classes_): precision_class_i, recall_class_i, thresholds_class_i = ( precision_recall_curve( y_true_onehot_i[:, class_idx], - y_pred_i.y[:, class_idx], + y_pred_i_y[:, class_idx], pos_label=None, drop_intermediate=drop_intermediate, ) ) average_precision_class_i = average_precision_score( - y_true_onehot_i[:, class_idx], y_pred_i.y[:, class_idx] + y_true_onehot_i[:, class_idx], y_pred_i_y[:, class_idx] ) for precision, recall, threshold in zip( diff --git a/skore/src/skore/_sklearn/_plot/metrics/prediction_error.py b/skore/src/skore/_sklearn/_plot/metrics/prediction_error.py index 546e09cc8e..031ddebbf9 100644 --- a/skore/src/skore/_sklearn/_plot/metrics/prediction_error.py +++ b/skore/src/skore/_sklearn/_plot/metrics/prediction_error.py @@ -1,6 +1,6 @@ import numbers from collections import namedtuple -from typing import Any, Literal +from typing import Any, Literal, cast import matplotlib.pyplot as plt import numpy as np @@ -265,7 +265,7 @@ def _plot_single_estimator( self.ax_.legend(handles, labels, loc="lower right") self.ax_.set_title(f"Prediction Error for {estimator_name}") - return scatter + return cast(list[Artist], scatter) def _plot_cross_validated_estimator( self, @@ -352,7 +352,7 @@ def _plot_cross_validated_estimator( self.ax_.legend(handles, labels, loc="lower right", title=legend_title) self.ax_.set_title(f"Prediction Error for {estimator_name}") - return scatter + return cast(list[Artist], scatter) def _plot_comparison_estimator( self, @@ -435,7 +435,7 @@ def _plot_comparison_estimator( self.ax_.legend(handles, labels, loc="lower right", title=legend_title) self.ax_.set_title("Prediction Error") - return scatter + return cast(list[Artist], scatter) def _plot_comparison_cross_validation( self, @@ -518,7 +518,7 @@ def _plot_comparison_cross_validation( self.ax_.legend(handles, labels, loc="lower right", title=legend_title) self.ax_.set_title("Prediction Error") - return scatter + return cast(list[Artist], scatter) @DisplayMixin.style_plot def plot( @@ -824,9 +824,9 @@ def _compute_data_for_display( } ) else: - y_true_sample = y_true_i.y - y_pred_sample = y_pred_i.y - residuals_sample = y_true_i.y - y_pred_i.y + y_true_sample = cast(np.typing.NDArray, y_true_i.y) + y_pred_sample = cast(np.typing.NDArray, y_pred_i.y) + residuals_sample = y_true_sample - y_pred_sample for y_true_sample_i, y_pred_sample_i, residuals_sample_i in zip( y_true_sample, y_pred_sample, residuals_sample, strict=False diff --git a/skore/src/skore/_sklearn/_plot/metrics/roc_curve.py b/skore/src/skore/_sklearn/_plot/metrics/roc_curve.py index 96dc0031ef..75bdfcb127 100644 --- a/skore/src/skore/_sklearn/_plot/metrics/roc_curve.py +++ b/skore/src/skore/_sklearn/_plot/metrics/roc_curve.py @@ -148,6 +148,8 @@ def __init__( self.ml_task = ml_task self.report_type = report_type + self.chance_level_: Line2D | list[Line2D] | None + def _plot_single_estimator( self, *, @@ -947,10 +949,12 @@ def _compute_data_for_display( ): label_binarizer = LabelBinarizer().fit(est.classes_) y_true_onehot_i: NDArray = label_binarizer.transform(y_true_i.y) + y_pred_i_y = cast(NDArray, y_pred_i.y) + for class_idx, class_ in enumerate(est.classes_): fpr_class_i, tpr_class_i, thresholds_class_i = roc_curve( y_true_onehot_i[:, class_idx], - y_pred_i.y[:, class_idx], + y_pred_i_y[:, class_idx], pos_label=None, drop_intermediate=drop_intermediate, ) diff --git a/skore/src/skore/_sklearn/train_test_split/train_test_split.py b/skore/src/skore/_sklearn/train_test_split/train_test_split.py index e291d081cb..87f47eeca2 100644 --- a/skore/src/skore/_sklearn/train_test_split/train_test_split.py +++ b/skore/src/skore/_sklearn/train_test_split/train_test_split.py @@ -207,6 +207,8 @@ class labels. if y is None and len(arrays) >= 2: y = arrays[-1] + y_labels: np.ndarray | None + if y is not None: y_labels = np.unique(y) y_test = ( diff --git a/skore/src/skore/project/widget.py b/skore/src/skore/project/widget.py index 224a222717..6a3ba4395b 100644 --- a/skore/src/skore/project/widget.py +++ b/skore/src/skore/project/widget.py @@ -207,7 +207,7 @@ def _filter_dataframe(self, ml_task: str, report_type: str) -> pd.DataFrame: df.columns = [col.removesuffix("_mean") for col in df.columns] return df - def _get_datasets(self, ml_task: str, report_type: str) -> np.ndarray: + def _get_datasets(self, ml_task: str, report_type: str) -> list[str]: """Get the unique datasets from the filtered dataframe. Parameters @@ -219,7 +219,7 @@ def _get_datasets(self, ml_task: str, report_type: str) -> np.ndarray: Returns ------- - np.ndarray + list[str] The unique datasets. """ return self._filter_dataframe(ml_task, report_type)["dataset"].unique()