replace _filter_by by .query

auguste-probabl · auguste-probabl · commit 47cd9d48aa63 · 2025-05-23T11:15:01.000+02:00
diff --git a/skore/src/skore/sklearn/_plot/metrics/roc_curve.py b/skore/src/skore/sklearn/_plot/metrics/roc_curve.py
@@ -17,7 +17,6 @@
     HelpDisplayMixin,
     _ClassifierCurveDisplayMixin,
     _despine_matplotlib_axis,
-    _filter_by,
     _validate_style_kwargs,
     sample_mpl_colormap,
 )
@@ -222,15 +221,11 @@ def _plot_single_estimator(
             )
 
             for class_idx, class_label in enumerate(labels):
-                roc_curve = _filter_by(
-                    self.roc_curve,
-                    label=class_label,
-                )
+                roc_curve = self.roc_curve.query(f"label == {class_label}")
 
-                roc_auc = _filter_by(
-                    self.roc_auc,
-                    label=class_label,
-                )["roc_auc"].iloc[0]
+                roc_auc = self.roc_auc.query(f"label == {class_label}")["roc_auc"].iloc[
+                    0
+                ]
 
                 roc_curve_kwargs_class = roc_curve_kwargs[class_idx]
 
@@ -315,15 +310,11 @@ def _plot_cross_validated_estimator(
         if self.ml_task == "binary-classification":
             pos_label = cast(PositiveLabel, self.pos_label)
             for split_idx in self.roc_curve["split_index"].unique():
-                roc_curve = _filter_by(
-                    self.roc_curve,
-                    label=pos_label,
-                    split_index=split_idx,
+                roc_curve = self.roc_curve.query(
+                    f"label == {pos_label} & split_index == {split_idx}"
                 )
-                roc_auc = _filter_by(
-                    self.roc_auc,
-                    label=pos_label,
-                    split_index=split_idx,
+                roc_auc = self.roc_auc.query(
+                    f"label == {pos_label} & split_index == {split_idx}"
                 )["roc_auc"].iloc[0]
 
                 line_kwargs_validated = _validate_style_kwargs(
@@ -351,17 +342,14 @@ def _plot_cross_validated_estimator(
             )
 
             for class_idx, class_label in enumerate(labels):
-                roc_auc = _filter_by(
-                    self.roc_auc,
-                    label=class_label,
-                )["roc_auc"].iloc[0]
+                roc_auc = self.roc_auc.query(f"label == {class_label}")["roc_auc"].iloc[
+                    0
+                ]
                 roc_curve_kwargs_class = roc_curve_kwargs[class_idx]
 
                 for split_idx in self.roc_curve["split_index"].unique():
-                    roc_curve_label = _filter_by(
-                        self.roc_curve,
-                        label=class_label,
-                        split_index=split_idx,
+                    roc_curve_label = self.roc_curve.query(
+                        f"label == {class_label} & split_index == {split_idx}"
                     )
 
                     line_kwargs_validated = _validate_style_kwargs(
@@ -448,16 +436,12 @@ def _plot_comparison_estimator(
         if self.ml_task == "binary-classification":
             pos_label = cast(PositiveLabel, self.pos_label)
             for est_idx, est_name in enumerate(estimator_names):
-                roc_curve = _filter_by(
-                    self.roc_curve,
-                    label=pos_label,
-                    estimator_name=est_name,
+                roc_curve = self.roc_curve.query(
+                    f"label == {pos_label} & estimator_name == '{est_name}'"
                 )
 
-                roc_auc = _filter_by(
-                    self.roc_auc,
-                    label=pos_label,
-                    estimator_name=est_name,
+                roc_auc = self.roc_auc.query(
+                    f"label == {pos_label} & estimator_name == '{est_name}'"
                 )["roc_auc"].iloc[0]
 
                 line_kwargs_validated = _validate_style_kwargs(
@@ -485,16 +469,12 @@ def _plot_comparison_estimator(
                 est_color = class_colors[est_idx]
 
                 for class_idx, class_label in enumerate(labels):
-                    roc_curve = _filter_by(
-                        self.roc_curve,
-                        label=class_label,
-                        estimator_name=est_name,
+                    roc_curve = self.roc_curve.query(
+                        f"label == {class_label} & estimator_name == '{est_name}'"
                     )
 
-                    roc_auc = _filter_by(
-                        self.roc_auc,
-                        label=class_label,
-                        estimator_name=est_name,
+                    roc_auc = self.roc_auc.query(
+                        f"label == {class_label} & estimator_name == '{est_name}'"
                     )["roc_auc"].iloc[0]
 
                     class_linestyle = LINESTYLE[(class_idx % len(LINESTYLE))][1]
@@ -580,16 +560,13 @@ def _plot_comparison_cross_validation(
                 10 if len(estimator_names) < 10 else len(estimator_names),
             )
             for report_idx, estimator_name in enumerate(estimator_names):
-                roc_curve = _filter_by(
-                    self.roc_curve,
-                    label=self.pos_label,
-                    estimator_name=estimator_name,
+                roc_curve = self.roc_curve.query(
+                    f"label == {self.pos_label} & estimator_name == '{estimator_name}'"
                 )
 
-                roc_auc = _filter_by(
-                    self.roc_auc,
-                    estimator_name=estimator_name,
-                )["roc_auc"]
+                roc_auc = self.roc_auc.query(f"estimator_name == '{estimator_name}'")[
+                    "roc_auc"
+                ]
 
                 line_kwargs_validated = _validate_style_kwargs(
                     line_kwargs, roc_curve_kwargs[report_idx]
@@ -648,16 +625,12 @@ def _plot_comparison_cross_validation(
                 est_color = colors[est_idx]
 
                 for label_idx, label in enumerate(labels):
-                    roc_curve = _filter_by(
-                        self.roc_curve,
-                        label=label,
-                        estimator_name=estimator_name,
+                    roc_curve = self.roc_curve.query(
+                        f"label == {label} & estimator_name == '{estimator_name}'"
                     )
 
-                    roc_auc = _filter_by(
-                        self.roc_auc,
-                        label=label,
-                        estimator_name=estimator_name,
+                    roc_auc = self.roc_auc.query(
+                        f"label == {label} & estimator_name == '{estimator_name}'"
                     )["roc_auc"]
 
                     line_kwargs_validated = _validate_style_kwargs(
diff --git a/skore/src/skore/sklearn/_plot/utils.py b/skore/src/skore/sklearn/_plot/utils.py
@@ -353,20 +353,3 @@ def sample_mpl_colormap(
     """
     indices = np.linspace(0, 1, n)
     return [cmap(i) for i in indices]
-
-
-def _filter_by(
-    df,
-    label: Optional[PositiveLabel] = None,
-    split_index: Optional[int] = None,
-    estimator_name: Optional[str] = None,
-) -> DataFrame:
-    noop_filter = df.iloc[:, 0].map(lambda _: True)
-    label_filter = (df["label"] == label) if label is not None else True
-    split_number_filter = (
-        (df["split_index"] == split_index) if split_index is not None else True
-    )
-    estimator_name_filter = (
-        (df["estimator_name"] == estimator_name) if estimator_name is not None else True
-    )
-    return df[noop_filter & label_filter & split_number_filter & estimator_name_filter]
diff --git a/skore/tests/unit/sklearn/plot/roc_curve/conftest.py b/skore/tests/unit/sklearn/plot/roc_curve/conftest.py
@@ -2,7 +2,6 @@
 from sklearn.datasets import make_classification
 from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import train_test_split
-from skore.sklearn._plot.utils import _filter_by
 
 
 @pytest.fixture
@@ -31,17 +30,3 @@ def binary_classification_data_no_split():
 def multiclass_classification_data_no_split():
     X, y = make_classification(n_classes=3, n_clusters_per_class=1, random_state=42)
     return LogisticRegression(), X, y
-
-
-def get_roc_auc(
-    display,
-    label=None,
-    split_index=None,
-    estimator_name=None,
-) -> float:
-    return _filter_by(
-        display.roc_auc,
-        label=label,
-        split_index=split_index,
-        estimator_name=estimator_name,
-    )["roc_auc"].iloc[0]
diff --git a/skore/tests/unit/sklearn/plot/roc_curve/test_comparison_cross_validation.py b/skore/tests/unit/sklearn/plot/roc_curve/test_comparison_cross_validation.py
@@ -9,7 +9,7 @@
 from sklearn.linear_model import LogisticRegression
 from skore import ComparisonReport, CrossValidationReport
 from skore.sklearn._plot.metrics.roc_curve import RocCurveDisplay
-from skore.sklearn._plot.utils import _filter_by, sample_mpl_colormap
+from skore.sklearn._plot.utils import sample_mpl_colormap
 
 
 def test_binary_classification(pyplot):
@@ -37,10 +37,8 @@ def test_binary_classification(pyplot):
     for i, estimator_name in enumerate(report.report_names_):
         roc_curve_mpl = display.lines_[i * n_splits]
         assert isinstance(roc_curve_mpl, Line2D)
-        auc = _filter_by(
-            display.roc_auc,
-            label=pos_label,
-            estimator_name=estimator_name,
+        auc = display.roc_auc.query(
+            f"label == {pos_label} & estimator_name == '{estimator_name}'"
         )["roc_auc"]
 
         assert roc_curve_mpl.get_label() == (
@@ -95,10 +93,8 @@ def test_multiclass(pyplot):
         roc_curve_mpl = display.lines_[i * n_splits]
         assert isinstance(roc_curve_mpl, Line2D)
 
-        auc = _filter_by(
-            display.roc_auc,
-            label=label,
-            estimator_name=estimator_name,
+        auc = display.roc_auc.query(
+            f"label == {label} & estimator_name == '{estimator_name}'"
         )["roc_auc"]
 
         assert roc_curve_mpl.get_label() == (
diff --git a/skore/tests/unit/sklearn/plot/roc_curve/test_comparison_estimator.py b/skore/tests/unit/sklearn/plot/roc_curve/test_comparison_estimator.py
@@ -5,8 +5,6 @@
 from skore.sklearn._plot import RocCurveDisplay
 from skore.sklearn._plot.utils import sample_mpl_colormap
 
-from .conftest import get_roc_auc
-
 
 def test_binary_classification(pyplot, binary_classification_data):
     """Check the attributes and default plotting behaviour of the ROC curve plot with
@@ -63,9 +61,9 @@ def test_binary_classification(pyplot, binary_classification_data):
         zip(report.report_names_, display.lines_)
     ):
         assert isinstance(line, mpl.lines.Line2D)
-        roc_auc_class = get_roc_auc(
-            display, label=display.pos_label, estimator_name=estimator_name
-        )
+        roc_auc_class = display.roc_auc.query(
+            f"label == {display.pos_label} & estimator_name == '{estimator_name}'"
+        )["roc_auc"].iloc[0]
         assert line.get_label() == (f"{estimator_name} (AUC = {roc_auc_class:0.2f})")
         assert mpl.colors.to_rgba(line.get_color()) == expected_colors[idx]
 
@@ -144,11 +142,9 @@ def test_multiclass_classification(pyplot, multiclass_classification_data):
         for class_label_idx, class_label in enumerate(class_labels):
             roc_curve_mpl = display.lines_[idx * len(class_labels) + class_label_idx]
             assert isinstance(roc_curve_mpl, mpl.lines.Line2D)
-            roc_auc_class = get_roc_auc(
-                display,
-                label=class_label,
-                estimator_name=estimator_name,
-            )
+            roc_auc_class = display.roc_auc.query(
+                f"label == {class_label} & estimator_name == '{estimator_name}'"
+            )["roc_auc"].iloc[0]
             assert roc_curve_mpl.get_label() == (
                 f"{estimator_name} - {str(class_label).title()} "
                 f"(AUC = {roc_auc_class:0.2f})"
diff --git a/skore/tests/unit/sklearn/plot/roc_curve/test_cross_validation.py b/skore/tests/unit/sklearn/plot/roc_curve/test_cross_validation.py
@@ -5,8 +5,6 @@
 from skore.sklearn._plot import RocCurveDisplay
 from skore.sklearn._plot.utils import sample_mpl_colormap
 
-from .conftest import get_roc_auc
-
 
 @pytest.mark.parametrize("data_source", ["train", "test", "X_y"])
 def test_binary_classification(
@@ -60,7 +58,9 @@ def test_binary_classification(
     expected_colors = sample_mpl_colormap(pyplot.cm.tab10, 10)
     for split_idx, line in enumerate(display.lines_):
         assert isinstance(line, mpl.lines.Line2D)
-        roc_auc_split = get_roc_auc(display, label=pos_label, split_index=split_idx)
+        roc_auc_split = display.roc_auc.query(
+            f"label == {pos_label} & split_index == {split_idx}"
+        )["roc_auc"].iloc[0]
         assert line.get_label() == (
             f"Estimator of fold #{split_idx + 1} (AUC = {roc_auc_split:0.2f})"
         )
@@ -139,7 +139,9 @@ def test_multiclass_classification(
             roc_curve_mpl = display.lines_[class_label * cv + split_idx]
             assert isinstance(roc_curve_mpl, mpl.lines.Line2D)
             if split_idx == 0:
-                roc_auc_class = get_roc_auc(display, label=class_label)
+                roc_auc_class = display.roc_auc.query(f"label == {class_label}")[
+                    "roc_auc"
+                ].iloc[0]
                 assert roc_curve_mpl.get_label() == (
                     f"{str(class_label).title()} "
                     f"(AUC = {np.mean(roc_auc_class):0.2f}"
diff --git a/skore/tests/unit/sklearn/plot/roc_curve/test_estimator.py b/skore/tests/unit/sklearn/plot/roc_curve/test_estimator.py
@@ -5,8 +5,6 @@
 from skore.sklearn._plot import RocCurveDisplay
 from skore.sklearn._plot.utils import sample_mpl_colormap
 
-from .conftest import get_roc_auc
-
 
 def test_binary_classification(pyplot, binary_classification_data):
     """Check the attributes and default plotting behaviour of the ROC curve plot with
@@ -48,7 +46,10 @@ def test_binary_classification(pyplot, binary_classification_data):
     assert len(display.lines_) == 1
     roc_curve_mpl = display.lines_[0]
     assert isinstance(roc_curve_mpl, mpl.lines.Line2D)
-    assert roc_curve_mpl.get_label() == f"Test set (AUC = {get_roc_auc(display):0.2f})"
+    assert (
+        roc_curve_mpl.get_label()
+        == f"Test set (AUC = {display.roc_auc['roc_auc'].iloc[0]:0.2f})"
+    )
     assert roc_curve_mpl.get_color() == "#1f77b4"  # tab:blue in hex
 
     assert isinstance(display.chance_level_, mpl.lines.Line2D)
@@ -107,7 +108,9 @@ def test_multiclass_classification(pyplot, multiclass_classification_data):
     for class_label, expected_color in zip(estimator.classes_, default_colors):
         roc_curve_mpl = display.lines_[class_label]
         assert isinstance(roc_curve_mpl, mpl.lines.Line2D)
-        roc_auc_class = get_roc_auc(display, label=class_label)
+        roc_auc_class = display.roc_auc.query(f"label == {class_label}")[
+            "roc_auc"
+        ].iloc[0]
         assert roc_curve_mpl.get_label() == (
             f"{str(class_label).title()} - test set (AUC = {roc_auc_class:0.2f})"
         )
@@ -139,12 +142,15 @@ def test_data_source_binary_classification(pyplot, binary_classification_data):
     display.plot()
     assert (
         display.lines_[0].get_label()
-        == f"Train set (AUC = {get_roc_auc(display):0.2f})"
+        == f"Train set (AUC = {display.roc_auc['roc_auc'].iloc[0]:0.2f})"
     )
 
     display = report.metrics.roc(data_source="X_y", X=X_train, y=y_train)
     display.plot()
-    assert display.lines_[0].get_label() == f"AUC = {get_roc_auc(display):0.2f}"
+    assert (
+        display.lines_[0].get_label()
+        == f"AUC = {display.roc_auc['roc_auc'].iloc[0]:0.2f}"
+    )
 
 
 def test_data_source_multiclass_classification(pyplot, multiclass_classification_data):
@@ -156,17 +162,21 @@ def test_data_source_multiclass_classification(pyplot, multiclass_classification
     display = report.metrics.roc(data_source="train")
     display.plot()
     for class_label in estimator.classes_:
+        roc_auc_class = display.roc_auc.query(f"label == {class_label}")[
+            "roc_auc"
+        ].iloc[0]
         assert display.lines_[class_label].get_label() == (
-            f"{str(class_label).title()} - train set "
-            f"(AUC = {get_roc_auc(display, label=class_label):0.2f})"
+            f"{str(class_label).title()} - train set (AUC = {roc_auc_class:0.2f})"
         )
 
     display = report.metrics.roc(data_source="X_y", X=X_train, y=y_train)
     display.plot()
     for class_label in estimator.classes_:
+        roc_auc_class = display.roc_auc.query(f"label == {class_label}")[
+            "roc_auc"
+        ].iloc[0]
         assert display.lines_[class_label].get_label() == (
-            f"{str(class_label).title()} - "
-            f"AUC = {get_roc_auc(display, label=class_label):0.2f}"
+            f"{str(class_label).title()} - AUC = {roc_auc_class:0.2f}"
         )