Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions skore/src/skore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
train_test_split,
)
from skore._sklearn._plot.base import Display
from skore._sklearn._plot.metrics.feature_importance_coefficients_display import (
FeatureImportanceCoefficientsDisplay,
)
from skore._utils._patch import setup_jupyter_display
from skore._utils._show_versions import show_versions
from skore.project import Project
Expand All @@ -41,6 +44,7 @@
"get_config",
"set_config",
"TableReportDisplay",
"FeatureImportanceCoefficientsDisplay",
]

logger = logging.getLogger(__name__)
Expand Down
34 changes: 22 additions & 12 deletions skore/src/skore/_sklearn/_comparison/feature_importance_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
from skore._sklearn._base import _BaseAccessor
from skore._sklearn._cross_validation import CrossValidationReport
from skore._sklearn._estimator import EstimatorReport
from skore._sklearn._plot.metrics.feature_importance_display import (
FeatureImportanceDisplay,
from skore._sklearn._plot.metrics.feature_importance_coefficients_display import (
FeatureImportanceCoefficientsDisplay,
)
from skore._utils._accessor import _check_comparison_report_sub_estimators_have_coef

Expand All @@ -29,20 +29,25 @@ def __init__(self, parent: ComparisonReport) -> None:
super().__init__(parent)

@available_if(_check_comparison_report_sub_estimators_have_coef())
def coefficients(self) -> FeatureImportanceDisplay:
def coefficients(self) -> FeatureImportanceCoefficientsDisplay:
"""Retrieve the coefficients for each report, including the intercepts.

If the compared reports are `EstimatorReport`s, the coefficients from each
report's estimator are returned as a single-column DataFrame.
If the compared reports are :class:`EstimatorReport` instances, the coefficients
from each report's estimator are returned as a single-column DataFrame.

If the compared reports are `CrossValidationReport`s, the coefficients
across all cross-validation splits are retained and the columns are prefixed
with the corresponding estimator name to distinguish them.
If the compared reports are :class:`CrossValidationReport` instances, the
coefficients across all cross-validation splits are retained and the columns are
prefixed with the corresponding estimator name to distinguish them.

Comparison reports with the same features are put under one key and are plotted
together.
When some reports share the same features and others do not, those with the same
features are plotted together.
together. When some reports share the same features and others do not, those
with the same features are plotted together.

Returns
-------
:class:`FeatureImportanceCoefficientsDisplay`
The feature importance display containing model coefficients and
intercept.
"""
similar_reports = defaultdict(list)

Expand Down Expand Up @@ -89,7 +94,12 @@ def coefficients(self) -> FeatureImportanceDisplay:
else:
raise TypeError(f"Unexpected report type: {self._parent._reports_type}")

return FeatureImportanceDisplay(self._parent, coef_frames)
return FeatureImportanceCoefficientsDisplay(
"comparison-estimator"
if self._parent._reports_type == "EstimatorReport"
else "comparison-cross-validation",
coef_frames,
)

####################################################################################
# Methods related to the help tree
Expand Down
1 change: 1 addition & 0 deletions skore/src/skore/_sklearn/_comparison/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ class ComparisonReport(_BaseReport, DirNamesMixin):

_ACCESSOR_CONFIG: dict[str, dict[str, str]] = {
"metrics": {"name": "metrics"},
"feature_importance": {"name": "feature_importance"},
}
metrics: _MetricsAccessor
feature_importance: _FeatureImportanceAccessor
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from skore._externals._pandas_accessors import DirNamesMixin
from skore._sklearn._base import _BaseAccessor
from skore._sklearn._cross_validation.report import CrossValidationReport
from skore._sklearn._plot.metrics.feature_importance_display import (
FeatureImportanceDisplay,
from skore._sklearn._plot.metrics.feature_importance_coefficients_display import (
FeatureImportanceCoefficientsDisplay,
)
from skore._utils._accessor import _check_cross_validation_sub_estimator_has_coef

Expand All @@ -22,9 +22,15 @@ def __init__(self, parent: CrossValidationReport) -> None:
super().__init__(parent)

@available_if(_check_cross_validation_sub_estimator_has_coef())
def coefficients(self) -> FeatureImportanceDisplay:
def coefficients(self) -> FeatureImportanceCoefficientsDisplay:
"""Retrieve the coefficients across splits, including the intercept.

Returns
-------
:class:`FeatureImportanceCoefficientsDisplay`
The feature importance display containing model coefficients and
intercept.

Examples
--------
>>> from sklearn.datasets import make_regression
Expand All @@ -34,15 +40,16 @@ def coefficients(self) -> FeatureImportanceDisplay:
>>> report = CrossValidationReport(
>>> estimator=Ridge(), X=X, y=y, splitter=5, n_jobs=4
>>> )
>>> report.feature_importance.coefficients().frame()
>>> display = report.feature_importance.coefficients()
>>> display.frame()
Intercept Feature #0 Feature #1 Feature #2
Split index
0 0.064837 74.100966 27.309656 17.367865
1 0.030257 74.276481 27.571421 17.392395
2 0.000084 74.107126 27.614821 17.277730
3 0.145613 74.207645 27.523667 17.391055
4 0.033695 74.259575 27.599610 17.390481
>>> report.feature_importance.coefficients().plot() # shows plot
>>> display.plot() # shows plot
"""
combined = pd.concat(
{
Expand All @@ -58,7 +65,7 @@ def coefficients(self) -> FeatureImportanceDisplay:
).T
combined.index.name = "Split index"

return FeatureImportanceDisplay(self._parent, combined)
return FeatureImportanceCoefficientsDisplay("cross-validation", combined)

####################################################################################
# Methods related to the help tree
Expand Down
1 change: 1 addition & 0 deletions skore/src/skore/_sklearn/_cross_validation/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ class CrossValidationReport(_BaseReport, DirNamesMixin):

_ACCESSOR_CONFIG: dict[str, dict[str, str]] = {
"metrics": {"name": "metrics"},
"feature_importance": {"name": "feature_importance"},
}
metrics: _MetricsAccessor
feature_importance: _FeatureImportanceAccessor
Expand Down
19 changes: 13 additions & 6 deletions skore/src/skore/_sklearn/_estimator/feature_importance_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
from skore._externals._pandas_accessors import DirNamesMixin
from skore._sklearn._base import _BaseAccessor
from skore._sklearn._estimator.report import EstimatorReport
from skore._sklearn._plot.metrics.feature_importance_display import (
FeatureImportanceDisplay,
from skore._sklearn._plot.metrics.feature_importance_coefficients_display import (
FeatureImportanceCoefficientsDisplay,
)
from skore._sklearn.types import Aggregate
from skore._utils._accessor import (
Expand Down Expand Up @@ -158,9 +158,15 @@ def __init__(self, parent: EstimatorReport) -> None:
super().__init__(parent)

@available_if(_check_estimator_has_coef())
def coefficients(self) -> FeatureImportanceDisplay:
def coefficients(self) -> FeatureImportanceCoefficientsDisplay:
"""Retrieve the coefficients of a linear model, including the intercept.

Returns
-------
:class:`FeatureImportanceCoefficientsDisplay`
The feature importance display containing model coefficients and
intercept.

Examples
--------
>>> from sklearn.datasets import load_diabetes
Expand All @@ -171,7 +177,8 @@ def coefficients(self) -> FeatureImportanceDisplay:
>>> split_data = train_test_split(X=X, y=y, random_state=0, as_dict=True)
>>> regressor = Ridge()
>>> report = EstimatorReport(regressor, **split_data)
>>> report.feature_importance.coefficients().frame()
>>> display = report.feature_importance.coefficients()
>>> display.frame()
Coefficient
Intercept 152.4...
Feature #0 21.2...
Expand All @@ -184,7 +191,7 @@ def coefficients(self) -> FeatureImportanceDisplay:
Feature #7 112.6...
Feature #8 250.5...
Feature #9 99.5...
>>> report.feature_importance.coefficients().plot() # shows plot
>>> display.plot() # shows plot
"""
parent_estimator = self._parent.estimator_

Expand Down Expand Up @@ -239,7 +246,7 @@ def coefficients(self) -> FeatureImportanceDisplay:
columns=columns,
)

return FeatureImportanceDisplay(self._parent, df)
return FeatureImportanceCoefficientsDisplay("estimator", df)

@available_if(_check_has_feature_importances())
def mean_decrease_impurity(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
import matplotlib.pyplot as plt

from skore._sklearn._plot.base import DisplayMixin


class FeatureImportanceCoefficientsDisplay(DisplayMixin):
"""Feature importance display.

Each report type produces its own output frame and plot.

Parameters
----------
report_type : {"estimator", "cross-validation", "comparison-estimator", \
"comparison-cross-validation"}
Report type from which the display is created.

coefficients : DataFrame | list[DataFrame]
The coefficients data to display.

Attributes
----------
ax_ : matplotlib Axes
Axes with the different matplotlib axis.

figure_ : matplotlib Figure
Figure containing the plot.

Examples
--------
>>> from sklearn.datasets import load_diabetes
>>> from sklearn.linear_model import LinearRegression
>>> from skore import train_test_split
>>> from skore import EstimatorReport
>>> X, y = load_diabetes(return_X_y=True)
>>> split_data = train_test_split(
>>> X=X, y=y, random_state=0, as_dict=True, shuffle=False
>>> )
>>> report = EstimatorReport(LinearRegression(), **split_data)
>>> display = report.feature_importance.coefficients()
>>> display.plot()
>>> display.frame()
Coefficient
Intercept 151.487952
Feature #0 -11.861904
Feature #1 -238.445509
Feature #2 505.395493
Feature #3 298.977119
... ...
"""

def __init__(self, report_type, coefficients):
self.report_type = report_type
self.coefficients = coefficients

def frame(self):
"""Return coefficients as a DataFrame.

Returns
-------
pd.DataFrame
The structure of the returned frame depends on the underlying report type:

- If an :class:`EstimatorReport`, a single column "Coefficient", with the
index being the feature names.

- If a :class:`CrossValidationReport`, the columns are the feature names,
and the index is the respective split number.

- If a :class:`ComparisonReport`, the columns are the models passed in the
report, with the index being the feature names.
"""
if self.report_type == "estimator":
return self._frame_estimator_report()
elif self.report_type == "cross-validation":
return self._frame_cross_validation_report()
else:
return self._frame_comparison_report()

def _frame_estimator_report(self):
return self.coefficients

def _frame_cross_validation_report(self):
return self.coefficients

def _frame_comparison_report(self):
import pandas as pd

return pd.concat(self.coefficients, axis=1)

@DisplayMixin.style_plot
def plot(self, **kwargs) -> None:
"""Plot the coefficients of linear models.

Parameters
----------
**kwargs : dict
Additional keyword arguments to be passed to the plot method.
"""
return self._plot(**kwargs)

def _style_plot_matplotlib(self, ax, title=None, legend=True):
if title:
ax.set_title(title)
if legend:
ax.legend(loc="best", bbox_to_anchor=(1, 1), borderaxespad=1)
ax.grid(False)
for spine in ["top", "right", "left"]:
ax.spines[spine].set_visible(False)
ax.tick_params(axis="y", length=0)

def _plot_matplotlib(self, **kwargs):
if self.report_type == "estimator":
return self._plot_estimator_report()
elif self.report_type == "cross-validation":
return self._plot_cross_validation_report()
elif self.report_type == "comparison-estimator":
return self._plot_comparison_report_estimator()
elif self.report_type == "comparison-cross-validation":
return self._plot_comparison_report_cross_validation()
else:
raise TypeError(f"Unexpected report type: {self.report_type!r}")

def _plot_estimator_report(self):
self.figure_, self.ax_ = plt.subplots()
self.coefficients.plot.barh(ax=self.ax_)
self._style_plot_matplotlib(self.ax_, title="Coefficients")
self.figure_.tight_layout()
plt.show()

def _plot_cross_validation_report(self):
self.figure_, self.ax_ = plt.subplots()
self.coefficients.boxplot(ax=self.ax_, vert=False)
self._style_plot_matplotlib(
self.ax_, title="Coefficient variance across CV splits", legend=None
)
self.figure_.tight_layout()
plt.show()

def _plot_comparison_report_estimator(self):
self.figure_, self.ax_ = plt.subplots(
nrows=1,
ncols=len(self.coefficients),
figsize=(5 * len(self.coefficients), 6),
squeeze=False,
)
self.ax_ = self.ax_.flatten()
self.figure_.suptitle("Coefficients")
for ax, coef_frame in zip(self.ax_, self.coefficients, strict=False):
coef_frame.plot.barh(ax=ax)
self._style_plot_matplotlib(ax, title=None)
self.figure_.tight_layout()
plt.show()

def _plot_comparison_report_cross_validation(self):
self.figure_, self.ax_ = plt.subplots(
nrows=1,
ncols=len(self.coefficients),
figsize=(5 * len(self.coefficients), 6),
squeeze=False,
)
self.ax_ = self.ax_.flatten()
for ax, coef_frame in zip(self.ax_, self.coefficients, strict=False):
coef_frame.boxplot(ax=ax, vert=False)
model_name = coef_frame.columns[0].split("__")[0]
self._style_plot_matplotlib(
ax,
title=f"{model_name} Coefficients across splits",
legend=None,
)
self.figure_.tight_layout()
plt.show()
Loading