add array api support for det_curve

jaffourt · jaffourt · commit fa3f4d156792 · 2025-10-27T13:46:48.000-04:00
diff --git a/doc/modules/array_api.rst b/doc/modules/array_api.rst
@@ -151,6 +151,7 @@ Metrics
 - :func:`sklearn.metrics.d2_brier_score`
 - :func:`sklearn.metrics.d2_log_loss_score`
 - :func:`sklearn.metrics.d2_tweedie_score`
+- :func:`sklearn.metrics.det_curve`
 - :func:`sklearn.metrics.explained_variance_score`
 - :func:`sklearn.metrics.f1_score`
 - :func:`sklearn.metrics.fbeta_score`
diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py
@@ -372,15 +372,17 @@ def det_curve(
     >>> thresholds
     array([0.35, 0.4 , 0.8 ])
     """
+
+    xp, _, device = get_namespace_and_device(y_true, y_score)
     fps, tps, thresholds = _binary_clf_curve(
         y_true, y_score, pos_label=pos_label, sample_weight=sample_weight
     )
 
     # add a threshold at inf where the clf always predicts the negative class
     # i.e. tps = fps = 0
-    tps = np.concatenate(([0], tps))
-    fps = np.concatenate(([0], fps))
-    thresholds = np.concatenate(([np.inf], thresholds))
+    tps = xp.concatenate((xp.asarray([0], device=device), tps))
+    fps = xp.concatenate((xp.asarray([0], device=device), fps))
+    thresholds = xp.concatenate((xp.asarray([np.inf], device=device), thresholds))
 
     if drop_intermediate and len(fps) > 2:
         # Drop thresholds where true positives (tp) do not change from the
@@ -389,16 +391,20 @@ def det_curve(
         # false positive rate (fpr) changes, producing horizontal line segments
         # in the transformed (normal deviate) scale. These intermediate points
         # can be dropped to create lighter DET curve plots.
-        optimal_idxs = np.where(
-            np.concatenate(
-                [[True], np.logical_or(np.diff(tps[:-1]), np.diff(tps[1:])), [True]]
+        optimal_idxs = xp.where(
+            xp.concatenate(
+                [
+                    xp.asarray([True], device=device),
+                    xp.logical_or(xp.diff(tps[:-1]), xp.diff(tps[1:])),
+                    xp.asarray([True], device=device),
+                ]
             )
         )[0]
         fps = fps[optimal_idxs]
         tps = tps[optimal_idxs]
         thresholds = thresholds[optimal_idxs]
 
-    if len(np.unique(y_true)) != 2:
+    if len(xp.unique(y_true)) != 2:
         raise ValueError(
             "Only one class is present in y_true. Detection error "
             "tradeoff curve is not defined in that case."
@@ -410,16 +416,20 @@ def det_curve(
 
     # start with false positives zero, which may be at a finite threshold
     first_ind = (
-        fps.searchsorted(fps[0], side="right") - 1
-        if fps.searchsorted(fps[0], side="right") > 0
+        xp.searchsorted(fps, fps[0], side="right") - 1
+        if xp.searchsorted(fps, fps[0], side="right") > 0
         else None
     )
     # stop with false negatives zero
-    last_ind = tps.searchsorted(tps[-1]) + 1
+    last_ind = xp.searchsorted(tps, tps[-1]) + 1
     sl = slice(first_ind, last_ind)
 
     # reverse the output such that list of false positives is decreasing
-    return (fps[sl][::-1] / n_count, fns[sl][::-1] / p_count, thresholds[sl][::-1])
+    return (
+        xp.flip(fps[sl]) / n_count,
+        xp.flip(fns[sl]) / p_count,
+        xp.flip(thresholds[sl]),
+    )
 
 
 def _binary_roc_auc_score(y_true, y_score, sample_weight=None, max_fpr=None):
diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py
@@ -5,7 +5,7 @@
 import pytest
 from scipy import stats
 
-from sklearn import datasets
+from sklearn import config_context, datasets
 from sklearn.datasets import make_multilabel_classification
 from sklearn.exceptions import UndefinedMetricWarning
 from sklearn.linear_model import LogisticRegression
@@ -28,7 +28,12 @@
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import label_binarize
 from sklearn.random_projection import _sparse_random_matrix
+from sklearn.utils._array_api import (
+    _convert_to_numpy,
+    yield_namespace_device_dtype_combinations,
+)
 from sklearn.utils._testing import (
+    _array_api_for_tests,
     _convert_container,
     assert_allclose,
     assert_almost_equal,
@@ -1392,6 +1397,33 @@ def test_det_curve_pos_label():
     assert_allclose(fnr_pos_cancer, fpr_pos_not_cancer[::-1])
 
 
+@pytest.mark.parametrize(
+    "array_namespace, device_, dtype_name", yield_namespace_device_dtype_combinations()
+)
+def test_det_curve_array_api(array_namespace, device_, dtype_name):
+    xp = _array_api_for_tests(array_namespace, device_)
+
+    y_true_np = np.array([0, 1, 0], dtype=dtype_name)
+    y_score_np = np.array([0, 0.5, 1], dtype=dtype_name)
+
+    # baseline numpy results
+    fpr_np, fnr_np, thresholds_np = det_curve(y_true_np, y_score_np)
+
+    y_true_xp = xp.asarray(y_true_np, device=device_)
+    y_score_xp = xp.asarray(y_score_np, device=device_)
+
+    with config_context(array_api_dispatch=True):
+        fpr_xp, fnr_xp, thresholds_xp = det_curve(y_true_xp, y_score_xp)
+
+    fpr_from_xp = _convert_to_numpy(fpr_xp, xp=xp)
+    fnr_from_xp = _convert_to_numpy(fnr_xp, xp=xp)
+    thresholds_from_xp = _convert_to_numpy(thresholds_xp, xp=xp)
+
+    assert_allclose(fpr_np, fpr_from_xp)
+    assert_allclose(fnr_np, fnr_from_xp)
+    assert_allclose(thresholds_np, thresholds_from_xp)
+
+
 def check_lrap_toy(lrap_score):
     # Check on several small example that it works
     assert_almost_equal(lrap_score([[0, 1]], [[0.25, 0.75]]), 1)