add ranking_metrics to metrax

jshin1394 · jshin1394 · commit 24687eb4c0a0 · 2025-03-11T20:50:52.000Z
diff --git a/src/metrax/__init__.py b/src/metrax/__init__.py
@@ -21,19 +21,23 @@
 from metrax.nlp_metrics import (
     Perplexity,
 )
+from metrax.ranking_metrics import (
+    AveragePrecisionAtK,
+)
 from metrax.regression_metrics import (
     MSE,
     RMSE,
     RSQUARED,
 )
 
 __all__ = [
+    "AUCPR",
+    "AUCROC",
+    "AveragePrecisionAtK",
     "MSE",
-    "RMSE",
-    "RSQUARED",
     "Perplexity",
     "Precision",
     "Recall",
-    "AUCPR",
-    "AUCROC",
+    "RMSE",
+    "RSQUARED",
 ]
diff --git a/src/metrax/ranking_metrics.py b/src/metrax/ranking_metrics.py
@@ -0,0 +1,120 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""A collection of different metrics for ranking models."""
+
+from clu import metrics as clu_metrics
+import flax
+import jax
+import jax.numpy as jnp
+
+
+def _divide_no_nan(x: jax.Array, y: jax.Array) -> jax.Array:
+  """Computes a safe divide which returns 0 if the y is zero."""
+  return jnp.where(y != 0, jnp.divide(x, y), 0.0)
+
+
+@flax.struct.dataclass
+class AveragePrecisionAtK(clu_metrics.Average):
+  r"""Computes AP@k (average precision at k) metrics in JAX.
+
+  Average precision at k (AP@k) is a metric used to evaluate the performance of
+  ranking models. It measures the sum of precision at k where the item at
+  the kth rank is relevant, divided by the total number of relevant items.
+
+  Given the top :math:`K` recommendations, AP@K is calculated as:
+
+  .. math::
+      AP@K = frac{1}{r}\sum_{k=1}^{K} \Precision@k * \rel(k)
+      rel(k) =
+        \begin{cases}
+          1 & \text{if the item at rank } k \text{ is relevant} \\
+          0 & \text{otherwise}
+        \end{cases}
+  """
+
+  @classmethod
+  def average_precision_at_ks(
+      cls, predictions: jax.Array, labels: jax.Array, ks: jax.Array
+  ):
+    """Computes AP@k (average precision at k) metrics for each of k in ks.
+
+    Args:
+      predictions: A floating point 2D vector representing the prediction
+        generated from the model. The shape should be (batch_size, vocab_size).
+      labels: A multi-hot encoding of the true label. The shape should be
+        (batch_size, vocab_size).
+      ks: A 1D vector of integers representing the k's to compute the MAP@k
+        metrics. The shape should be (|ks|).
+
+    Returns:
+      Rank-2 tensor of shape [batch, |ks|] containing AP@k metrics.
+    """
+    top_k_indices = jnp.argsort(-predictions, axis=1)[:, : jnp.max(ks)]
+    labels = jnp.array(labels >= 1, dtype=jnp.float32)
+    total_relevant = labels.sum(axis=1)
+
+    def compute_ap_at_k_single(relevant_labels, total_relevant, ks):
+      cumulative_precision = jnp.where(
+          relevant_labels,
+          _divide_no_nan(
+              jnp.cumsum(relevant_labels),
+              jnp.arange(1, len(relevant_labels) + 1),
+          ),
+          0,
+      )
+      return jnp.array([
+          _divide_no_nan(jnp.sum(cumulative_precision[:k]), total_relevant)
+          for k in ks
+      ])
+
+    vmap_compute_ap_at_k = jax.vmap(
+        compute_ap_at_k_single, in_axes=(0, 0, None), out_axes=0
+    )
+
+    ap_at_ks = vmap_compute_ap_at_k(
+        jnp.take_along_axis(labels, top_k_indices, axis=1), total_relevant, ks
+    )
+    return ap_at_ks
+
+  @classmethod
+  def from_model_output(
+      cls,
+      predictions: jax.Array,
+      labels: jax.Array,
+      ks: jax.Array,
+  ) -> 'AveragePrecisionAtK':
+    """Updates the metric.
+
+    Args:
+      predictions: A floating point 2D vector representing the prediction
+        generated from the model. The shape should be (batch_size, vocab_size).
+      labels: A multi-hot encoding of the true label. The shape should be
+        (batch_size, vocab_size).
+      ks: A 1D vector of integers representing the k's to compute the MAP@k
+        metrics. The shape should be (|ks|).
+
+    Returns:
+      The AveragePrecisionAtK metric. The shape should be (|ks|).
+
+    Raises:
+      ValueError: If type of `labels` is wrong or the shapes of `predictions`
+      and `labels` are incompatible.
+    """
+    ap_at_ks = cls.average_precision_at_ks(predictions, labels, ks)
+    count = jnp.ones((labels.shape[0], 1), dtype=jnp.float32)
+    return cls(
+        total=ap_at_ks.sum(axis=0),
+        count=count.sum(),
+    )
diff --git a/src/metrax/ranking_metrics_test.py b/src/metrax/ranking_metrics_test.py
@@ -0,0 +1,81 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for metrax ranking metrics."""
+
+from absl.testing import absltest
+from absl.testing import parameterized
+import jax.numpy as jnp
+import metrax
+import numpy as np
+
+np.random.seed(42)
+BATCH_SIZE = 4
+VOCAB_SIZE = 8
+OUTPUT_LABELS = np.random.randint(
+    0,
+    2,
+    size=(BATCH_SIZE, VOCAB_SIZE),
+).astype(np.float32)
+OUTPUT_PREDS = np.random.uniform(size=(BATCH_SIZE, VOCAB_SIZE)).astype(
+    np.float32
+)
+OUTPUT_LABELS_VS1 = np.random.randint(
+    0,
+    2,
+    size=(BATCH_SIZE, 1),
+).astype(np.float32)
+OUTPUT_PREDS_VS1 = np.random.uniform(size=(BATCH_SIZE, 1)).astype(np.float32)
+# TODO(jiwonshin): Replace with keras metric once it is available in OSS.
+MAP_FROM_KERAS = np.array([
+    0.2083333432674408,
+    0.4791666865348816,
+    0.4791666865348816,
+    0.5416666865348816,
+    0.574999988079071,
+    0.637499988079071,
+])
+MAP_FROM_KERAS_VS1 = np.array([0.75, 0.75, 0.75, 0.75, 0.75, 0.75])
+
+
+class RankingMetricsTest(parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      ('basic', OUTPUT_LABELS, OUTPUT_PREDS, MAP_FROM_KERAS),
+      (
+          'vocab_size_one',
+          OUTPUT_LABELS_VS1,
+          OUTPUT_PREDS_VS1,
+          MAP_FROM_KERAS_VS1,
+      ),
+  )
+  def test_averageprecisionatk(self, y_true, y_pred, map_from_keras):
+    """Test that `AveragePrecisionAtK` Metric computes correct values."""
+    ks = jnp.array([1, 2, 3, 4, 5, 6])
+    metric = metrax.AveragePrecisionAtK.from_model_output(
+        predictions=y_pred,
+        labels=y_true,
+        ks=ks,
+    )
+
+    np.testing.assert_allclose(
+        metric.compute(),
+        map_from_keras,
+        rtol=1e-05,
+        atol=1e-05,
+    )
+
+
+if __name__ == '__main__':
+  absltest.main()