Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
from skore._sklearn._base import _BaseAccessor
from skore._sklearn._cross_validation import CrossValidationReport
from skore._sklearn._estimator import EstimatorReport
from skore._sklearn._plot.metrics.feature_importance_display import (
FeatureImportanceDisplay,
from skore._sklearn._plot.metrics.feature_importance_coefficients_display import (
FeatureImportanceCoefficientsDisplay,
)
from skore._utils._accessor import _check_comparison_report_sub_estimators_have_coef

Expand All @@ -29,21 +29,27 @@ def __init__(self, parent: ComparisonReport) -> None:
super().__init__(parent)

@available_if(_check_comparison_report_sub_estimators_have_coef())
def coefficients(self) -> FeatureImportanceDisplay:
def coefficients(self) -> FeatureImportanceCoefficientsDisplay:
"""Retrieve the coefficients for each report, including the intercepts.

If the compared reports are `EstimatorReport`s, the coefficients from each
If the compared reports are ``EstimatorReport``\s, the coefficients from each
report's estimator are returned as a single-column DataFrame.

If the compared reports are `CrossValidationReport`s, the coefficients
If the compared reports are ``CrossValidationReport``\s, the coefficients
across all cross-validation splits are retained and the columns are prefixed
with the corresponding estimator name to distinguish them.

Comparison reports with the same features are put under one key and are plotted
together.
When some reports share the same features and others do not, those with the same
features are plotted together.
"""

Returns
-------
FeatureImportanceCoefficientsDisplay
The feature importance display containing model coefficients and
intercept.
""" # noqa: D301
similar_reports = defaultdict(list)

for name, report in self._parent.reports_.items():
Expand Down Expand Up @@ -89,7 +95,12 @@ def coefficients(self) -> FeatureImportanceDisplay:
else:
raise TypeError(f"Unexpected report type: {self._parent._reports_type}")

return FeatureImportanceDisplay(self._parent, coef_frames)
return FeatureImportanceCoefficientsDisplay(
"comparison-estimator"
if self._parent._reports_type == "EstimatorReport"
else "comparison-cross-validation",
coef_frames,
)

####################################################################################
# Methods related to the help tree
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from skore._externals._pandas_accessors import DirNamesMixin
from skore._sklearn._base import _BaseAccessor
from skore._sklearn._cross_validation.report import CrossValidationReport
from skore._sklearn._plot.metrics.feature_importance_display import (
FeatureImportanceDisplay,
from skore._sklearn._plot.metrics.feature_importance_coefficients_display import (
FeatureImportanceCoefficientsDisplay,
)
from skore._utils._accessor import _check_cross_validation_sub_estimator_has_coef

Expand All @@ -22,9 +22,15 @@ def __init__(self, parent: CrossValidationReport) -> None:
super().__init__(parent)

@available_if(_check_cross_validation_sub_estimator_has_coef())
def coefficients(self) -> FeatureImportanceDisplay:
def coefficients(self) -> FeatureImportanceCoefficientsDisplay:
"""Retrieve the coefficients across splits, including the intercept.

Returns
-------
FeatureImportanceCoefficientsDisplay
The feature importance display containing model coefficients and
intercept.

Examples
--------
>>> from sklearn.datasets import make_regression
Expand All @@ -34,15 +40,16 @@ def coefficients(self) -> FeatureImportanceDisplay:
>>> report = CrossValidationReport(
>>> estimator=Ridge(), X=X, y=y, splitter=5, n_jobs=4
>>> )
>>> report.feature_importance.coefficients().frame()
>>> display = report.feature_importance.coefficients()
>>> display.frame()
Intercept Feature #0 Feature #1 Feature #2
Split index
0 0.064837 74.100966 27.309656 17.367865
1 0.030257 74.276481 27.571421 17.392395
2 0.000084 74.107126 27.614821 17.277730
3 0.145613 74.207645 27.523667 17.391055
4 0.033695 74.259575 27.599610 17.390481
>>> report.feature_importance.coefficients().plot() # shows plot
>>> display.plot() # shows plot
"""
combined = pd.concat(
{
Expand All @@ -58,7 +65,7 @@ def coefficients(self) -> FeatureImportanceDisplay:
).T
combined.index.name = "Split index"

return FeatureImportanceDisplay(self._parent, combined)
return FeatureImportanceCoefficientsDisplay("cross-validation", combined)

####################################################################################
# Methods related to the help tree
Expand Down
19 changes: 13 additions & 6 deletions skore/src/skore/_sklearn/_estimator/feature_importance_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
from skore._externals._pandas_accessors import DirNamesMixin
from skore._sklearn._base import _BaseAccessor
from skore._sklearn._estimator.report import EstimatorReport
from skore._sklearn._plot.metrics.feature_importance_display import (
FeatureImportanceDisplay,
from skore._sklearn._plot.metrics.feature_importance_coefficients_display import (
FeatureImportanceCoefficientsDisplay,
)
from skore._sklearn.types import Aggregate
from skore._utils._accessor import (
Expand Down Expand Up @@ -158,9 +158,15 @@ def __init__(self, parent: EstimatorReport) -> None:
super().__init__(parent)

@available_if(_check_estimator_has_coef())
def coefficients(self) -> FeatureImportanceDisplay:
def coefficients(self) -> FeatureImportanceCoefficientsDisplay:
"""Retrieve the coefficients of a linear model, including the intercept.

Returns
-------
FeatureImportanceCoefficientsDisplay
The feature importance display containing model coefficients and
intercept.

Examples
--------
>>> from sklearn.datasets import load_diabetes
Expand All @@ -171,7 +177,8 @@ def coefficients(self) -> FeatureImportanceDisplay:
>>> split_data = train_test_split(X=X, y=y, random_state=0, as_dict=True)
>>> regressor = Ridge()
>>> report = EstimatorReport(regressor, **split_data)
>>> report.feature_importance.coefficients().frame()
>>> display = report.feature_importance.coefficients()
>>> display.frame()
Coefficient
Intercept 152.4...
Feature #0 21.2...
Expand All @@ -184,7 +191,7 @@ def coefficients(self) -> FeatureImportanceDisplay:
Feature #7 112.6...
Feature #8 250.5...
Feature #9 99.5...
>>> report.feature_importance.coefficients().plot() # shows plot
>>> display.plot() # shows plot
"""
parent_estimator = self._parent.estimator_

Expand Down Expand Up @@ -239,7 +246,7 @@ def coefficients(self) -> FeatureImportanceDisplay:
columns=columns,
)

return FeatureImportanceDisplay(self._parent, df)
return FeatureImportanceCoefficientsDisplay("estimator", df)

@available_if(_check_has_feature_importances())
def mean_decrease_impurity(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
from skore._sklearn._plot.base import DisplayMixin


class FeatureImportanceDisplay(DisplayMixin):
class FeatureImportanceCoefficientsDisplay(DisplayMixin):
"""Feature importance display.

Each report type produces its own output frame and plot.

Parameters
----------
parent : EstimatorReport | CrossValidationReport | ComparisonReport
parent : {"estimator", "cross-validation", "comparison-estimator",
"comparison-cross-validation"}
Report type from which the display is created.

coefficient_data : DataFrame | list[DataFrame]
Expand Down Expand Up @@ -76,16 +77,12 @@ def frame(self):
- If a ``ComparisonReport``, the columns are the
models passed in the report, with the index being the feature names.
"""
from skore import ComparisonReport, CrossValidationReport, EstimatorReport

if isinstance(self._parent, EstimatorReport):
if self._parent == "estimator":
return self._frame_estimator_report()
elif isinstance(self._parent, CrossValidationReport):
elif self._parent == "cross-validation":
return self._frame_cross_validation_report()
elif isinstance(self._parent, ComparisonReport):
return self._frame_comparison_report()
else:
raise TypeError(f"Unrecognised report type: {self._parent}")
return self._frame_comparison_report()

def _frame_estimator_report(self):
return self._coefficient_data
Expand All @@ -110,53 +107,71 @@ def plot(self, **kwargs) -> None:
return self._plot(**kwargs)

def _plot_matplotlib(self, **kwargs):
from skore._sklearn._comparison import ComparisonReport
from skore._sklearn._cross_validation import CrossValidationReport
from skore._sklearn._estimator import EstimatorReport

if isinstance(self._parent, EstimatorReport):
if self._parent == "estimator":
return self._plot_estimator_report()
elif isinstance(self._parent, CrossValidationReport):
elif self._parent == "cross-validation":
return self._plot_cross_validation_report()
elif isinstance(self._parent, ComparisonReport):
return self._plot_comparison_report()
else:
raise TypeError(f"Unrecognised report type: {self._parent}")
return self._plot_comparison_report()

def _plot_estimator_report(self):
self.figure_, self.ax_ = plt.subplots()
self._coefficient_data.plot.bar(ax=self.ax_)
self.ax_.set_title(f"{self._parent.estimator_name_} Coefficients")
self._coefficient_data.plot.barh(ax=self.ax_)
self.ax_.set_title("Coefficients")
self.ax_.legend(loc="best", bbox_to_anchor=(1, 1), borderaxespad=1)
self.ax_.grid(False)
self.ax_.spines["top"].set_visible(False)
self.ax_.spines["right"].set_visible(False)
self.ax_.spines["left"].set_visible(False)
self.ax_.tick_params(axis="y", length=0)
self.figure_.tight_layout()
plt.show()

def _plot_cross_validation_report(self):
self.figure_, self.ax_ = plt.subplots()
self._coefficient_data.boxplot(ax=self.ax_)
self._coefficient_data.boxplot(ax=self.ax_, vert=False)
self.ax_.set_title("Coefficient variance across CV splits")
self.ax_.grid(False)
self.ax_.spines["top"].set_visible(False)
self.ax_.spines["right"].set_visible(False)
self.ax_.spines["left"].set_visible(False)
self.ax_.tick_params(axis="y", length=0)
self.figure_.tight_layout()
plt.show()

def _plot_comparison_report(self):
if self._parent._reports_type == "EstimatorReport":
for coef_frame in self._coefficient_data:
self.figure_, self.ax_ = plt.subplots()
coef_frame.plot.bar(ax=self.ax_)
self.ax_.legend(loc="best", bbox_to_anchor=(1, 1), borderaxespad=1)

self.ax_.set_title("Coefficients")
self.figure_.tight_layout()
plt.show()
elif self._parent._reports_type == "CrossValidationReport":
for coef_frame in self._coefficient_data:
self.figure_, self.ax_ = plt.subplots()
coef_frame.boxplot(ax=self.ax_)
self.ax_.set_title(
self.figure_, self.ax_ = plt.subplots(
nrows=1,
ncols=len(self._coefficient_data),
figsize=(5 * len(self._coefficient_data), 6),
squeeze=False,
)
self.ax_ = self.ax_.flatten()

if self._parent == "comparison-estimator":
self.figure_.suptitle("Coefficients")
for ax, coef_frame in zip(self.ax_, self._coefficient_data, strict=False):
coef_frame.plot.barh(ax=ax)
ax.legend(loc="best", bbox_to_anchor=(1, 1), borderaxespad=1)
ax.grid(False)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.tick_params(axis="y", length=0)

elif self._parent == "comparison-cross-validation":
for ax, coef_frame in zip(self.ax_, self._coefficient_data, strict=False):
coef_frame.boxplot(ax=ax, vert=False)
ax.set_title(
f"{coef_frame.columns[0].split('__')[0]} Coefficients across splits"
)
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()
ax.grid(False)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.tick_params(axis="y", length=0)
else:
raise TypeError(f"Unexpected report type: {type(self._parent.reports_[0])}")
raise TypeError(f"Unexpected report type: {self._parent}")

self.figure_.tight_layout()
plt.show()
13 changes: 13 additions & 0 deletions sphinx/reference/report/comparison_report.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,16 @@ get the common performance metric representations.
ComparisonReport.metrics.rmse
ComparisonReport.metrics.roc_auc
ComparisonReport.metrics.timings

Feature importance
------------------

The `feature_importance` accessor helps you evaluate the importance
used to train your estimator.

.. autosummary::
:toctree: ../api/
:template: autosummary/accessor_method.rst

ComparisonReport.feature_importance.help
ComparisonReport.feature_importance.coefficients
13 changes: 13 additions & 0 deletions sphinx/reference/report/cross_validation_report.rst
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,16 @@ estimator across cross-validation splits.
CrossValidationReport.metrics.roc
CrossValidationReport.metrics.roc_auc
CrossValidationReport.metrics.timings

Feature importance
------------------

The `feature_importance` accessor helps you evaluate the importance
used to train your estimator.

.. autosummary::
:toctree: ../api/
:template: autosummary/accessor_method.rst

CrossValidationReport.feature_importance.help
CrossValidationReport.feature_importance.coefficients
10 changes: 10 additions & 0 deletions sphinx/user_guide/reporters.rst
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,16 @@ Refer to the :ref:`displays` section for more details regarding the `skore` disp
API. Refer to the :ref:`estimator_metrics` section for more details on all the
available metrics in `skore`.

Model interpretability
^^^^^^^^^^^^^^^^^^^^^^

:obj:`EstimatorReport.feature_importance` is the entry point to interpret and explain a
predictive model. This accessor provides methods that return a `skore` :class:`Display`
object. As with other display objects, they expose three methods:
(i) `plot` that visualizes the information.
(ii) `set_style` that sets graphical options.
(iii) `frame` that returns a `pandas.DataFrame` with the underlying data.

Caching mechanism
^^^^^^^^^^^^^^^^^

Expand Down
Loading