Skip to content
76 changes: 74 additions & 2 deletions skore/src/skore/_sklearn/_estimator/metrics_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def __init__(self, parent: EstimatorReport) -> None:
def summarize(
self,
*,
data_source: DataSource = "test",
data_source: Literal["test", "train", "X_y", "all"] = "test",
X: ArrayLike | None = None,
y: ArrayLike | None = None,
scoring: Scoring | list[Scoring] | None = None,
Expand All @@ -77,12 +77,13 @@ def summarize(

Parameters
----------
data_source : {"test", "train", "X_y"}, default="test"
data_source : {"test", "train", "X_y", "all"}, default="test"
The data source to use.

- "test" : use the test set provided when creating the report.
- "train" : use the train set provided when creating the report.
- "X_y" : use the provided `X` and `y` to compute the metric.
- "all" : use both the train and test sets to compute the metrics.

X : array-like of shape (n_samples, n_features), default=None
New data on which to compute the metric. By default, we use the validation
Expand Down Expand Up @@ -160,7 +161,66 @@ class is set to the one provided when creating the report. If `None`,
LogisticRegression Favorability
Metric Label / Average
F1 Score 1 0.96... (↗︎)
>>> report.metrics.summarize(
... indicator_favorability=True,
... data_source="all"
... ).frame().drop(["Fit time (s)", "Predict time (s)"])
LogisticRegression (train) ... Favorability
Metric ...
Precision 0.962963 ... (↗︎)
Recall 0.973783 ... (↗︎)
ROC AUC 0.994300 ... (↗︎)
Brier score 0.027365 ... (↘︎)
>>> # Using scikit-learn metrics
>>> report.metrics.summarize(
... scoring=["f1"],
... indicator_favorability=True,
... ).frame()
LogisticRegression Favorability
Metric Label / Average
F1 Score 1 0.96... (↗︎)
"""
if data_source == "all":
train_summary = self.summarize(
data_source="train",
X=X,
y=y,
scoring=scoring,
scoring_names=scoring_names,
scoring_kwargs=scoring_kwargs,
pos_label=pos_label,
indicator_favorability=indicator_favorability,
flat_index=flat_index,
)
test_summary = self.summarize(
data_source="test",
X=X,
y=y,
scoring=scoring,
scoring_names=scoring_names,
scoring_kwargs=scoring_kwargs,
pos_label=pos_label,
indicator_favorability=indicator_favorability,
flat_index=flat_index,
)
# Add suffix to the dataframes to distinguish train and test.
train_df = train_summary.frame().add_suffix(" (train)")
test_df = test_summary.frame().add_suffix(" (test)")
combined = pd.concat(
[
train_df,
test_df,
],
axis=1,
)
if indicator_favorability:
combined["Favorability"] = train_df["Favorability (train)"]
combined.drop(
columns=["Favorability (train)", "Favorability (test)"],
inplace=True,
)
return MetricsSummaryDisplay(summarize_data=combined)

if pos_label is _DEFAULT:
pos_label = self._parent.pos_label

Expand Down Expand Up @@ -429,6 +489,18 @@ class is set to the one provided when creating the report. If `None`,
)
return MetricsSummaryDisplay(summarize_data=results)

return self._summarize(
data_source=data_source,
X=X,
y=y,
scoring=scoring,
scoring_names=scoring_names,
scoring_kwargs=scoring_kwargs,
pos_label=pos_label,
indicator_favorability=indicator_favorability,
flat_index=flat_index,
)

def _compute_metric_scores(
self,
metric_fn: Callable,
Expand Down
Loading