iter

GaetandeCast · GaetandeCast · commit cb01d7bb1f01 · 2025-11-28T17:23:27.000+01:00
diff --git a/examples/model_evaluation/plot_estimator_report.py b/examples/model_evaluation/plot_estimator_report.py
@@ -430,14 +430,9 @@ def operational_decision_cost(y_true, y_pred, amount):
 plt.show()
 
 # %%
-# The title shows the threshold value used. By default, the threshold closest to
-# the requested value is selected from the available thresholds.
+# Since there are a finite number of threshold where the predictions change,
+# we plot the decision matrix associated with the threshold closest to the one provided.
 #
-# We can also compare multiple thresholds side by side:
-cm_threshold_display.plot(threshold=[0.3, 0.5, 0.7])
-plt.show()
-
-# %%
 # The frame method also supports threshold selection:
 cm_threshold_display.frame(threshold=0.7)
 
diff --git a/skore/src/skore/_sklearn/_plot/metrics/confusion_matrix.py b/skore/src/skore/_sklearn/_plot/metrics/confusion_matrix.py
@@ -151,20 +151,6 @@ def _plot_single_estimator(
         heatmap_kwargs : dict, default=None
             Additional keyword arguments to be passed to seaborn's `sns.heatmap`.
         """
-        # Handle multiple thresholds
-        if isinstance(threshold, (list, tuple)):
-            if not self.do_threshold:
-                raise ValueError(
-                    "threshold can only be used with binary classification and "
-                    "when `report.metrics.confusion_matrix(threshold=True)` is used."
-                )
-            self._plot_multiple_thresholds(
-                thresholds=threshold,
-                normalize=normalize,
-                heatmap_kwargs=heatmap_kwargs,
-            )
-            return
-
         if threshold is not None:
             if not self.do_threshold:
                 raise ValueError(
@@ -211,67 +197,6 @@ def _plot_single_estimator(
 
         self.figure_.tight_layout()
 
-    def _plot_multiple_thresholds(
-        self,
-        *,
-        thresholds: list[float],
-        normalize: Literal["true", "pred", "all"] | None = None,
-        heatmap_kwargs: dict | None = None,
-    ) -> None:
-        """
-        Plot multiple confusion matrices for different thresholds.
-
-        Parameters
-        ----------
-        thresholds : list of float
-            The decision thresholds to use.
-
-        normalize : {'true', 'pred', 'all'}, default=None
-            Normalizes confusion matrix over the true (rows), predicted (columns)
-            conditions or all the population. If None, the confusion matrix will not be
-            normalized.
-
-        heatmap_kwargs : dict, default=None
-            Additional keyword arguments to be passed to seaborn's `sns.heatmap`.
-        """
-        n_thresholds = len(thresholds)
-        figsize = (5 * n_thresholds, 4)
-        self.figure_, axes = plt.subplots(1, n_thresholds, figsize=figsize)
-
-        # Handle single threshold case (axes won't be an array)
-        if n_thresholds == 1:
-            axes = [axes]
-
-        heatmap_kwargs_validated = _validate_style_kwargs(
-            {"fmt": ".2f" if normalize else "d", **self._default_heatmap_kwargs},
-            heatmap_kwargs or {},
-        )
-        # Disable colorbar for multi-threshold plots to avoid clutter
-        heatmap_kwargs_validated["cbar"] = False
-
-        normalize_by = "normalized_by_" + normalize if normalize else "count"
-
-        for ax, thresh in zip(axes, thresholds, strict=True):
-            # Find the existing threshold that is closest to the given threshold
-            closest_threshold = self.thresholds_[
-                np.argmin(np.abs(self.thresholds_ - thresh))
-            ]
-            cm = self.confusion_matrix[
-                self.confusion_matrix["threshold"] == closest_threshold
-            ]
-
-            sns.heatmap(
-                cm.pivot(
-                    index="True label", columns="Predicted label", values=normalize_by
-                ),
-                ax=ax,
-                **heatmap_kwargs_validated,
-            )
-            ax.set_title(f"threshold: {closest_threshold:.2f}")
-
-        self.ax_ = axes[-1]  # Set ax_ to the last axes for consistency
-        self.figure_.tight_layout()
-
     @classmethod
     def _compute_data_for_display(
         cls,
@@ -342,30 +267,21 @@ def _compute_data_for_display(
 
         confusion_matrix_records = []
         for cm, threshold_value in zip(cms, thresholds, strict=True):
-            # Compute normalized values with proper handling of zero division
-            with np.errstate(all="ignore"):
-                row_sums = cm.sum(axis=1, keepdims=True)
-                col_sums = cm.sum(axis=0, keepdims=True)
-                total_sum = cm.sum()
-
-                cm_true = np.divide(
-                    cm,
-                    row_sums,
-                    out=np.zeros_like(cm, dtype=float),
-                    where=row_sums != 0,
-                )
-                cm_pred = np.divide(
-                    cm,
-                    col_sums,
-                    out=np.zeros_like(cm, dtype=float),
-                    where=col_sums != 0,
-                )
-                cm_all = np.divide(
-                    cm,
-                    total_sum,
-                    out=np.zeros_like(cm, dtype=float),
-                    where=total_sum != 0,
-                )
+            cm_true = np.divide(
+                cm,
+                cm.sum(axis=1, keepdims=True),
+                where=cm.sum(axis=1, keepdims=True) != 0,
+            )
+            cm_pred = np.divide(
+                cm,
+                cm.sum(axis=0, keepdims=True),
+                where=cm.sum(axis=0, keepdims=True) != 0,
+            )
+            cm_all = np.divide(
+                cm,
+                cm.sum(),
+                where=cm.sum() != 0,
+            )
 
             n_classes = len(display_labels)
             true_labels = np.repeat(display_labels, n_classes)
@@ -438,7 +354,7 @@ def frame(
 
         if threshold is not None and not self.do_threshold:
             raise ValueError(
-                "threshold can only be used with binary classification "
+                "threshold can only be used with binary classification and "
                 "when `report.metrics.confusion_matrix(threshold=True)` is used."
             )
         elif threshold is None and self.do_threshold:
diff --git a/skore/tests/unit/displays/confusion_matrix/test_estimator.py b/skore/tests/unit/displays/confusion_matrix/test_estimator.py
@@ -341,7 +341,8 @@ def test_threshold_display_creation(
 def test_threshold_display_without_threshold(
     pyplot, logistic_binary_classification_with_train_test
 ):
-    """Check that do_threshold is False when threshold=False."""
+    """Check that do_threshold is False when threshold=False and that we raise an error
+    when frame or plot is called with threshold."""
     estimator, X_train, X_test, y_train, y_test = (
         logistic_binary_classification_with_train_test
     )
@@ -357,6 +358,18 @@ def test_threshold_display_without_threshold(
     assert display.do_threshold is False
     assert display.thresholds_ is None
 
+    display = report.metrics.confusion_matrix(threshold=False)
+
+    err_msg = (
+        "threshold can only be used with binary classification and "
+        "when `report.metrics.confusion_matrix\\(threshold=True\\)` is used."
+    )
+    with pytest.raises(ValueError, match=err_msg):
+        display.frame(threshold=0.5)
+
+    with pytest.raises(ValueError, match=err_msg):
+        display.plot(threshold=0.5)
+
 
 def test_plot_with_threshold(pyplot, logistic_binary_classification_with_train_test):
     """Check that we can plot with a specific threshold."""
@@ -375,9 +388,6 @@ def test_plot_with_threshold(pyplot, logistic_binary_classification_with_train_t
     display.plot(threshold=0.3)
     assert "threshold" in display.ax_.get_title().lower()
 
-    display.plot(threshold=0.7)
-    assert "threshold" in display.ax_.get_title().lower()
-
 
 def test_plot_with_default_threshold(
     pyplot, logistic_binary_classification_with_train_test
@@ -394,34 +404,15 @@ def test_plot_with_default_threshold(
         y_test=y_test,
     )
     display = report.metrics.confusion_matrix(threshold=True)
-    display.plot()  # Should use default threshold (0.5)
-
-    # The title should include the threshold
-    assert "threshold" in display.ax_.get_title().lower()
-
-
-def test_threshold_error_without_threshold_support(
-    pyplot, forest_binary_classification_with_train_test
-):
-    """Check that we raise an error when threshold is used without threshold support."""
-    estimator, X_train, X_test, y_train, y_test = (
-        forest_binary_classification_with_train_test
-    )
-    report = EstimatorReport(
-        estimator,
-        X_train=X_train,
-        y_train=y_train,
-        X_test=X_test,
-        y_test=y_test,
-    )
-    display = report.metrics.confusion_matrix(threshold=False)
+    display.plot()
 
-    err_msg = (
-        "threshold can only be used with binary classification and "
-        "when `report.metrics.confusion_matrix\\(threshold=True\\)` is used."
+    closest_threshold = display.thresholds_[
+        np.argmin(np.abs(display.thresholds_ - 0.5))
+    ]
+    assert (
+        display.ax_.get_title()
+        == f"Confusion Matrix (threshold: {closest_threshold:.2f})"
     )
-    with pytest.raises(ValueError, match=err_msg):
-        display.plot(threshold=0.5)
 
 
 def test_frame_with_threshold(logistic_binary_classification_with_train_test):
@@ -463,30 +454,6 @@ def test_frame_all_thresholds(logistic_binary_classification_with_train_test):
     assert len(frame) == len(display.thresholds_)
 
 
-def test_frame_threshold_error_without_threshold_support(
-    forest_binary_classification_with_train_test,
-):
-    """Check that we raise an error when threshold is used without threshold support."""
-    estimator, X_train, X_test, y_train, y_test = (
-        forest_binary_classification_with_train_test
-    )
-    report = EstimatorReport(
-        estimator,
-        X_train=X_train,
-        y_train=y_train,
-        X_test=X_test,
-        y_test=y_test,
-    )
-    display = report.metrics.confusion_matrix(threshold=False)
-
-    err_msg = (
-        "threshold can only be used with binary classification "
-        "when `report.metrics.confusion_matrix\\(threshold=True\\)` is used."
-    )
-    with pytest.raises(ValueError, match=err_msg):
-        display.frame(threshold=0.5)
-
-
 def test_threshold_normalization(
     pyplot, logistic_binary_classification_with_train_test
 ):
@@ -503,26 +470,23 @@ def test_threshold_normalization(
     )
     display = report.metrics.confusion_matrix(threshold=True)
 
-    # Test with normalize="true"
     display.plot(threshold=0.5, normalize="true")
     frame = display.frame(threshold=0.5, normalize="true")
     assert np.allclose(frame.sum(axis=1), np.ones(2))
 
-    # Test with normalize="pred"
     display.plot(threshold=0.5, normalize="pred")
     frame = display.frame(threshold=0.5, normalize="pred")
     assert np.allclose(frame.sum(axis=0), np.ones(2))
 
-    # Test with normalize="all"
     display.plot(threshold=0.5, normalize="all")
     frame = display.frame(threshold=0.5, normalize="all")
     assert np.isclose(frame.sum().sum(), 1.0)
 
 
-def test_plot_with_multiple_thresholds(
+def test_threshold_closest_match(
     pyplot, logistic_binary_classification_with_train_test
 ):
-    """Check that we can plot with multiple thresholds."""
+    """Check that the closest threshold is selected."""
     estimator, X_train, X_test, y_train, y_test = (
         logistic_binary_classification_with_train_test
     )
@@ -535,17 +499,25 @@ def test_plot_with_multiple_thresholds(
     )
     display = report.metrics.confusion_matrix(threshold=True)
 
-    # Plot with multiple thresholds
-    display.plot(threshold=[0.3, 0.5, 0.7])
-
-    # Should have 3 subplots
-    assert len(display.figure_.axes) >= 3
+    # Create a threshold that is not in the list to test the closest match
+    middle_index = len(display.thresholds_) // 2
+    threshold = (
+        display.thresholds_[middle_index] + display.thresholds_[middle_index + 1]
+    ) / 2 - 1e-6
+    closest_threshold = display.thresholds_[middle_index]
+    assert threshold not in display.thresholds_
+    display.plot(threshold=threshold)
+    assert (
+        display.ax_.get_title()
+        == f"Confusion Matrix (threshold: {closest_threshold:.2f})"
+    )
 
 
-def test_threshold_closest_match(
+def test_frame_plot_coincidence_with_threshold(
     pyplot, logistic_binary_classification_with_train_test
 ):
-    """Check that the closest threshold is selected."""
+    """Check that the values in the frame and plot coincide when threshold is
+    provided."""
     estimator, X_train, X_test, y_train, y_test = (
         logistic_binary_classification_with_train_test
     )
@@ -557,7 +529,7 @@ def test_threshold_closest_match(
         y_test=y_test,
     )
     display = report.metrics.confusion_matrix(threshold=True)
-
-    # Even with a threshold not in the list, it should work
-    display.plot(threshold=0.12345)
-    assert display.ax_ is not None
+    frame = display.frame(threshold=0.5)
+    frame_values = frame.values.flatten()
+    display.plot(threshold=0.5)
+    assert np.allclose(frame_values, display.ax_.collections[0].get_array().flatten())