probabl-ai
diff --git a/‎skore/src/skore/_sklearn/_base.py‎
Lines changed: 3 additions & 2 deletions b/‎skore/src/skore/_sklearn/_base.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎skore/src/skore/_sklearn/_comparison/metrics_accessor.py‎
Lines changed: 4 additions & 0 deletions b/‎skore/src/skore/_sklearn/_comparison/metrics_accessor.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎skore/src/skore/_sklearn/_comparison/report.py‎
Lines changed: 4 additions & 2 deletions b/‎skore/src/skore/_sklearn/_comparison/report.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py‎
Lines changed: 3 additions & 1 deletion b/‎skore/src/skore/_sklearn/_cross_validation/metrics_accessor.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎skore/src/skore/_sklearn/_cross_validation/report.py‎
Lines changed: 4 additions & 2 deletions b/‎skore/src/skore/_sklearn/_cross_validation/report.py‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎skore/src/skore/_sklearn/_estimator/metrics_accessor.py‎
Lines changed: 102 additions & 41 deletions b/‎skore/src/skore/_sklearn/_estimator/metrics_accessor.py‎
Lines changed: 102 additions & 41 deletions
diff --git a/‎skore/src/skore/_sklearn/_estimator/report.py‎
Lines changed: 3 additions & 2 deletions b/‎skore/src/skore/_sklearn/_estimator/report.py‎
Lines changed: 3 additions & 2 deletions
@@ -14,6 +14,7 @@
 
 from skore._externals._sklearn_compat import is_clusterer
 from skore._sklearn.types import PositiveLabel
+from skore._utils._cache import Cache
 from skore._utils._measure_time import MeasureTime
 
 
@@ -120,7 +121,7 @@ class _BaseReport(_HelpMixin):
     _X_test: ArrayLike | None
     _y_train: ArrayLike | None
     _y_test: ArrayLike | None
-    _cache: dict[tuple[Any, ...], Any]
+    _cache: Cache
     estimator_: BaseEstimator
 
     def _get_help_panel_title(self) -> str:
@@ -323,7 +324,7 @@ def _get_X_y_and_data_source_hash(
 
 def _get_cached_response_values(
     *,
-    cache: dict[tuple[Any, ...], Any],
+    cache: Cache,
     estimator_hash: int,
     estimator: BaseEstimator,
     X: ArrayLike | None,
 
@@ -1224,6 +1224,7 @@ def _get_display(
                     y_true.append(
                         YPlotData(
                             estimator_name=report_name,
+                            data_source=data_source,
                             split=None,
                             y=report_y,
                         )
@@ -1245,6 +1246,7 @@ def _get_display(
                             y_pred.append(
                                 YPlotData(
                                     estimator_name=report_name,
+                                    data_source=data_source,
                                     split=None,
                                     y=value,
                                 )
@@ -1278,6 +1280,7 @@ def _get_display(
                         y_true.append(
                             YPlotData(
                                 estimator_name=report_name,
+                                data_source=data_source,
                                 split=split,
                                 y=report_y,
                             )
@@ -1300,6 +1303,7 @@ def _get_display(
                                 y_pred.append(
                                     YPlotData(
                                         estimator_name=report_name,
+                                        data_source=data_source,
                                         split=split,
                                         y=value,
                                     )
 
@@ -14,6 +14,7 @@
 from skore._sklearn._cross_validation.report import CrossValidationReport
 from skore._sklearn._estimator.report import EstimatorReport
 from skore._sklearn.types import _DEFAULT, PositiveLabel
+from skore._utils._cache import Cache
 from skore._utils._progress_bar import progress_decorator
 
 if TYPE_CHECKING:
@@ -251,7 +252,7 @@ def __init__(
         self._hash = self._rng.integers(
             low=np.iinfo(np.int64).min, high=np.iinfo(np.int64).max
         )
-        self._cache: dict[tuple[Any, ...], Any] = {}
+        self._cache = Cache()
         self._ml_task = next(iter(self.reports_.values()))._ml_task  # type: ignore
 
     def clear_cache(self) -> None:
@@ -277,7 +278,8 @@ def clear_cache(self) -> None:
         """
         for report in self.reports_.values():
             report.clear_cache()
-        self._cache = {}
+
+        self._cache = Cache()
 
     @progress_decorator(description="Estimator predictions")
     def cache_predictions(
 
@@ -1129,7 +1129,7 @@ def _get_display(
         total_estimators = len(self._parent.estimator_reports_)
         progress.update(main_task, total=total_estimators)
 
-        if cache_key in self._parent._cache:
+        if cache_key and cache_key in self._parent._cache:
             display = self._parent._cache[cache_key]
         else:
             y_true: list[YPlotData] = []
@@ -1145,6 +1145,7 @@ def _get_display(
                 y_true.append(
                     YPlotData(
                         estimator_name=self._parent.estimator_name_,
+                        data_source=data_source,
                         split=report_idx,
                         y=cast(ArrayLike, y),
                     )
@@ -1166,6 +1167,7 @@ def _get_display(
                         y_pred.append(
                             YPlotData(
                                 estimator_name=self._parent.estimator_name_,
+                                data_source=data_source,
                                 split=report_idx,
                                 y=value,
                             )
 
@@ -17,6 +17,7 @@
 from skore._sklearn._estimator.report import EstimatorReport
 from skore._sklearn.find_ml_task import _find_ml_task
 from skore._sklearn.types import _DEFAULT, PositiveLabel, SKLearnCrossValidator
+from skore._utils._cache import Cache
 from skore._utils._fixes import _validate_joblib_parallel_params
 from skore._utils._parallel import Parallel, delayed
 from skore._utils._progress_bar import progress_decorator
@@ -177,7 +178,7 @@ def __init__(
         self._hash = self._rng.integers(
             low=np.iinfo(np.int64).min, high=np.iinfo(np.int64).max
         )
-        self._cache: dict[tuple[Any, ...], Any] = {}
+        self._cache = Cache()
         self._ml_task = _find_ml_task(
             y, estimator=self.estimator_reports_[0]._estimator
         )
@@ -296,7 +297,8 @@ def clear_cache(self) -> None:
         """
         for report in self.estimator_reports_:
             report.clear_cache()
-        self._cache = {}
+
+        self._cache = Cache()
 
     @progress_decorator(description="Cross-validation predictions")
     def cache_predictions(
 
@@ -28,6 +28,7 @@
 )
 from skore._sklearn.types import (
     _DEFAULT,
+    DataSource,
     PositiveLabel,
     Scoring,
     YPlotData,
@@ -40,8 +41,6 @@
 )
 from skore._utils._index import flatten_multi_index
 
-DataSource = Literal["test", "train", "X_y"]
-
 
 class _MetricsAccessor(
     _BaseMetricsAccessor, _BaseAccessor["EstimatorReport"], DirNamesMixin
@@ -1645,7 +1644,7 @@ def _get_display(
         *,
         X: ArrayLike | None,
         y: ArrayLike | None,
-        data_source: DataSource,
+        data_source: DataSource | Literal["both"],
         response_method: str | list[str] | tuple[str, ...],
         display_class: type[
             RocCurveDisplay
@@ -1670,12 +1669,13 @@ def _get_display(
         y : array-like of shape (n_samples,)
             The target.
 
-        data_source : {"test", "train", "X_y"}, default="test"
+        data_source : {"test", "train", "X_y", "both"}, default="test"
             The data source to use.
 
             - "test" : use the test set provided when creating the report.
             - "train" : use the train set provided when creating the report.
             - "X_y" : use the provided `X` and `y` to compute the metric.
+            - "both" : use both the train set and the test set to compute the metric.
 
         response_method : str, list of str or tuple of str
             The response method.
@@ -1691,11 +1691,98 @@ def _get_display(
         display : display_class
             The display.
         """
-        X, y, data_source_hash = self._get_X_y_and_data_source_hash(
-            data_source=data_source, X=X, y=y
-        )
-        assert y is not None, "y must be provided"
+        pos_label = display_kwargs.get("pos_label")
+
+        def get_ys(
+            *,
+            X,
+            y_true,
+            data_source,
+            data_source_hash,
+            cache=self._parent._cache,
+            estimator_hash=int(self._parent._hash),
+            estimator=self._parent.estimator_,
+            estimator_name=self._parent.estimator_name_,
+            response_method=response_method,
+            pos_label=pos_label,
+        ) -> tuple[list[YPlotData], list[YPlotData]]:
+            """Get predictions and format y_true and y_pred using YPlotData."""
+            results = _get_cached_response_values(
+                cache=cache,
+                estimator_hash=estimator_hash,
+                estimator=estimator,
+                X=X,
+                response_method=response_method,
+                pos_label=pos_label,
+                data_source=data_source,
+                data_source_hash=data_source_hash,
+            )
+            for key, value, is_cached in results:
+                key = cast(tuple[Any, ...], key)
+                if not is_cached:
+                    cache[key] = value
+                if key[-1] != "predict_time":
+                    y_pred = value
+
+            y_true = [
+                YPlotData(
+                    estimator_name=estimator_name,
+                    data_source=data_source,
+                    split=None,
+                    y=y_true,
+                )
+            ]
+            y_pred = [
+                YPlotData(
+                    estimator_name=estimator_name,
+                    data_source=data_source,
+                    split=None,
+                    y=y_pred,
+                )
+            ]
+            return y_true, y_pred
+
+        if data_source == "both":
+            X_train, y_train, data_source_hash_train = (
+                self._get_X_y_and_data_source_hash(data_source="train", X=X, y=y)
+            )
+            y_train_true, y_train_pred = get_ys(
+                X=X_train,
+                y_true=y_train,
+                data_source="train",
+                data_source_hash=data_source_hash_train,
+            )
+            assert y_train_true is not None, "y must be provided"
+
+            X_test, y_test, data_source_hash_test = self._get_X_y_and_data_source_hash(
+                data_source="test", X=X, y=y
+            )
+            y_test_true, y_test_pred = get_ys(
+                X=X_test,
+                y_true=y_test,
+                data_source="test",
+                data_source_hash=data_source_hash_test,
+            )
+            assert y_test_true is not None, "y must be provided"
 
+            y_true = y_train_true + y_test_true
+            y_pred = y_train_pred + y_test_pred
+            data_source_hash = None
+        else:
+            X, y_data_source, data_source_hash = self._get_X_y_and_data_source_hash(
+                data_source=data_source, X=X, y=y
+            )
+
+            y_true, y_pred = get_ys(
+                X=X,
+                y_true=y_data_source,
+                data_source=data_source,
+                data_source_hash=data_source_hash,
+            )
+
+            assert y_true is not None, "y must be provided"
+
+        # Compute cache key
         if "seed" in display_kwargs and display_kwargs["seed"] is None:
             cache_key = None
         else:
@@ -1711,40 +1798,12 @@ def _get_display(
                 cache_key_parts.append(data_source)
             cache_key = tuple(cache_key_parts)
 
-        if cache_key in self._parent._cache:
+        if cache_key and cache_key in self._parent._cache:
             display = self._parent._cache[cache_key]
         else:
-            results = _get_cached_response_values(
-                cache=self._parent._cache,
-                estimator_hash=int(self._parent._hash),
-                estimator=self._parent.estimator_,
-                X=X,
-                response_method=response_method,
-                pos_label=display_kwargs.get("pos_label"),
-                data_source=data_source,
-                data_source_hash=data_source_hash,
-            )
-            for key, value, is_cached in results:
-                if not is_cached:
-                    self._parent._cache[cast(tuple[Any, ...], key)] = value
-                if cast(tuple[Any, ...], key)[-1] != "predict_time":
-                    y_pred = value
-
             display = display_class._compute_data_for_display(
-                y_true=[
-                    YPlotData(
-                        estimator_name=self._parent.estimator_name_,
-                        split=None,
-                        y=y,
-                    )
-                ],
-                y_pred=[
-                    YPlotData(
-                        estimator_name=self._parent.estimator_name_,
-                        split=None,
-                        y=y_pred,
-                    )
-                ],
+                y_true=y_true,
+                y_pred=y_pred,
                 report_type="estimator",
                 estimators=[self._parent.estimator_],
                 ml_task=self._parent._ml_task,
@@ -1767,7 +1826,7 @@ def _get_display(
     def roc(
         self,
         *,
-        data_source: DataSource = "test",
+        data_source: DataSource | Literal["both"] = "test",
         X: ArrayLike | None = None,
         y: ArrayLike | None = None,
         pos_label: PositiveLabel | None = _DEFAULT,
@@ -1776,12 +1835,14 @@ def roc(
 
         Parameters
         ----------
-        data_source : {"test", "train", "X_y"}, default="test"
+        data_source : {"test", "train", "X_y", "both"}, default="test"
             The data source to use.
 
             - "test" : use the test set provided when creating the report.
             - "train" : use the train set provided when creating the report.
             - "X_y" : use the provided `X` and `y` to compute the metric.
+            - "both" : use both the train and test sets to compute the metrics and
+              present them side-by-side.
 
         X : array-like of shape (n_samples, n_features), default=None
             New data on which to compute the metric. By default, we use the validation
 
@@ -18,6 +18,7 @@
 from skore._sklearn._base import _BaseReport, _get_cached_response_values
 from skore._sklearn.find_ml_task import _find_ml_task
 from skore._sklearn.types import _DEFAULT, PositiveLabel
+from skore._utils._cache import Cache
 from skore._utils._fixes import _validate_joblib_parallel_params
 from skore._utils._measure_time import MeasureTime
 from skore._utils._parallel import Parallel, delayed
@@ -186,7 +187,7 @@ def _initialize_state(self) -> None:
         self._hash = self._rng.integers(
             low=np.iinfo(np.int64).min, high=np.iinfo(np.int64).max
         )
-        self._cache: dict[tuple[Any, ...], Any] = {}
+        self._cache = Cache()
         self._ml_task = _find_ml_task(self._y_test, estimator=self._estimator)
 
     # NOTE:
@@ -214,7 +215,7 @@ def clear_cache(self) -> None:
         >>> report._cache
         {}
         """
-        self._cache = {}
+        self._cache = Cache()
 
     @progress_decorator(description="Caching predictions")
     def cache_predictions(