add precisionAtK to metrax (#72)

jshin1394 · web-flow · commit 8da455b9bf07 · 2025-04-30T15:37:37.000-07:00
* add precisionAtK to metrax

* add back deleted lines by accident

* add changes for ranking_metrics_test

* match keras behavior for invalid ks

* drop JAX from JAX arrays
diff --git a/src/metrax/__init__.py b/src/metrax/__init__.py
@@ -26,6 +26,7 @@
 MSE = regression_metrics.MSE
 Perplexity = nlp_metrics.Perplexity
 Precision = classification_metrics.Precision
+PrecisionAtK = ranking_metrics.PrecisionAtK
 RMSE = regression_metrics.RMSE
 RSQUARED = regression_metrics.RSQUARED
 Recall = classification_metrics.Recall
@@ -43,6 +44,7 @@
     "MSE",
     "Perplexity",
     "Precision",
+    "PrecisionAtK",
     "RMSE",
     "RSQUARED",
     "Recall",
diff --git a/src/metrax/metrax_test.py b/src/metrax/metrax_test.py
@@ -84,6 +84,15 @@ class MetraxTest(parameterized.TestCase):
           metrax.Precision,
           {'predictions': OUTPUT_LABELS, 'labels': OUTPUT_PREDS},
       ),
+      (
+          'precisionAtK',
+          metrax.PrecisionAtK,
+          {
+              'predictions': OUTPUT_LABELS,
+              'labels': OUTPUT_PREDS,
+              'ks': np.array([3]),
+          },
+      ),
       (
           'rmse',
           metrax.RMSE,
diff --git a/src/metrax/nnx/__init__.py b/src/metrax/nnx/__init__.py
@@ -22,6 +22,7 @@
 MSE = nnx_metrics.MSE
 Perplexity = nnx_metrics.Perplexity
 Precision = nnx_metrics.Precision
+PrecisionAtK = nnx_metrics.PrecisionAtK
 RMSE = nnx_metrics.RMSE
 RSQUARED = nnx_metrics.RSQUARED
 Recall = nnx_metrics.Recall
@@ -39,6 +40,7 @@
     "MSE",
     "Perplexity",
     "Precision",
+    "PrecisionAtK",
     "RMSE",
     "RSQUARED",
     "Recall",
diff --git a/src/metrax/nnx/nnx_metrics.py b/src/metrax/nnx/nnx_metrics.py
@@ -74,6 +74,13 @@ def __init__(self):
     super().__init__(metrax.Precision)
 
 
+class PrecisionAtK(NnxWrapper):
+  """An NNX class for the Metrax metric PrecisionAtK."""
+
+  def __init__(self):
+    super().__init__(metrax.PrecisionAtK)
+
+
 class Recall(NnxWrapper):
   """An NNX class for the Metrax metric Recall."""
 
diff --git a/src/metrax/nnx/nnx_metrics_test.py b/src/metrax/nnx/nnx_metrics_test.py
@@ -36,7 +36,7 @@ def test_nnx_metrics_exists(self):
         key for key, metric in inspect.getmembers(metrax.nnx)
         if inspect.isclass(metric) and issubclass(metric, nnx.Metric)
     ]
-    self.assertGreater(len(metrax_metric_keys), 0)
+    self.assertNotEmpty(metrax_metric_keys)
     self.assertSameElements(metrax_metric_keys, metrax_nnx_metric_keys)
 
 
diff --git a/src/metrax/ranking_metrics.py b/src/metrax/ranking_metrics.py
@@ -54,9 +54,9 @@ def average_precision_at_ks(
         metrics. The shape should be (|ks|).
 
     Returns:
-      Rank-2 tensor of shape [batch, |ks|] containing AP@k metrics.
+      Rank-2 tensor of shape (batch, |ks|) containing AP@k metrics.
     """
-    sorted_indices = jnp.argsort(-predictions, axis=1)
+    indices_by_rank = jnp.argsort(-predictions, axis=1)
     labels = jnp.array(labels >= 1, dtype=jnp.float32)
     total_relevant = labels.sum(axis=1)
 
@@ -88,7 +88,7 @@ def compute_ap_at_k_single(relevant_labels, total_relevant, ks):
     )
 
     ap_at_ks = vmap_compute_ap_at_k(
-        jnp.take_along_axis(labels, sorted_indices, axis=1), total_relevant, ks
+        jnp.take_along_axis(labels, indices_by_rank, axis=1), total_relevant, ks
     )
     return ap_at_ks
 
@@ -117,8 +117,79 @@ def from_model_output(
       and `labels` are incompatible.
     """
     ap_at_ks = cls.average_precision_at_ks(predictions, labels, ks)
-    count = jnp.ones((labels.shape[0], 1), dtype=jnp.float32)
+    num_examples = jnp.array(labels.shape[0], dtype=jnp.float32)
     return cls(
         total=ap_at_ks.sum(axis=0),
-        count=count.sum(),
+        count=num_examples,
     )
+
+
+@flax.struct.dataclass
+class PrecisionAtK(base.Average):
+  r"""Computes P@k (precision at k) metrics in JAX.
+
+  Precision at k (P@k) is a metric that measures the proportion of
+  relevant items found in the top k recommendations.
+
+  Given the top :math:`K` recommendations, P@K is calculated as:
+
+  .. math::
+      Precision@K = \frac{\text{Number of relevant items in top K}}{K}
+  """
+
+  @classmethod
+  def precision_at_ks(
+      cls, predictions: jax.Array, labels: jax.Array, ks: jax.Array
+  ) -> jax.Array:
+    """Computes P@k (precision at k) metrics for each of k in ks.
+
+    Args:
+      predictions: A floating point 2D array representing the prediction
+        scores from the model. Higher scores indicate higher relevance. The
+        shape should be (batch_size, vocab_size).
+      labels: A multi-hot encoding (0 or 1) of the true labels. The shape should
+        be (batch_size, vocab_size).
+      ks: A 1D array of integers representing the k's to compute the P@k
+        metrics. The shape should be (|ks|).
+
+    Returns:
+      A rank-2 array of shape (batch_size, |ks|) containing P@k metrics.
+    """
+    labels = jnp.array(labels >= 1, dtype=jnp.float32)
+    indices_by_rank = jnp.argsort(-predictions, axis=1)
+    labels_by_rank = jnp.take_along_axis(labels, indices_by_rank, axis=1)
+    relevant_by_rank = jnp.cumsum(labels_by_rank, axis=1)
+
+    vocab_size = predictions.shape[1]
+    relevant_at_k = relevant_by_rank[:, jnp.minimum(ks - 1, vocab_size - 1)]
+    total_at_k = jnp.minimum(ks, vocab_size)
+    return base.divide_no_nan(relevant_at_k, total_at_k)
+
+  @classmethod
+  def from_model_output(
+      cls,
+      predictions: jax.Array,
+      labels: jax.Array,
+      ks: jax.Array,
+  ) -> 'PrecisionAtK':
+    """Creates a PrecisionAtK metric instance from model output.
+
+    This computes the P@k for each example in the batch and then aggregates
+    them (sum of P@k values and count of examples) to be averaged later by
+    calling .compute() on the returned metric object.
+
+    Args:
+      predictions: A floating point 2D array representing the prediction
+        scores from the model. The shape should be (batch_size, vocab_size).
+      labels: A multi-hot encoding (0 or 1) of the true labels. The shape should
+        be (batch_size, vocab_size).
+      ks: A 1D array of integers representing the k's to compute the P@k
+        metrics. The shape should be (|ks|).
+
+    Returns:
+      The PrecisionAtK metric object. The `total` field will have shape (|ks|),
+      and `count` will be a scalar.
+    """
+    p_at_ks = cls.precision_at_ks(predictions, labels, ks)
+    num_examples = jnp.array(labels.shape[0], dtype=jnp.float32)
+    return cls(total=p_at_ks.sum(axis=0), count=num_examples)
diff --git a/src/metrax/ranking_metrics_test.py b/src/metrax/ranking_metrics_test.py
@@ -48,6 +48,8 @@
     0.637499988079071,
 ])
 MAP_FROM_KERAS_VS1 = np.array([0.75, 0.75, 0.75, 0.75, 0.75, 0.75])
+P_FROM_KERAS = np.array([0.75, 0.875, 0.58333337306976320, 0.5625, 0.5, 0.5])
+P_FROM_KERAS_VS1 = np.array([0.75, 0.75, 0.75, 0.75, 0.75, 0.75])
 
 
 class RankingMetricsTest(parameterized.TestCase):
@@ -89,6 +91,31 @@ def test_averageprecisionatk(self, y_true, y_pred, map_from_keras, jitted):
         atol=1e-05,
     )
 
+  @parameterized.named_parameters(
+      ('basic', OUTPUT_LABELS, OUTPUT_PREDS, P_FROM_KERAS),
+      (
+          'vocab_size_one',
+          OUTPUT_LABELS_VS1,
+          OUTPUT_PREDS_VS1,
+          P_FROM_KERAS_VS1,
+      ),
+  )
+  def test_precisionatk(self, y_true, y_pred, map_from_keras):
+    """Test that `PrecisionAtK` Metric computes correct values."""
+    ks = jnp.array([1, 2, 3, 4, 5, 6])
+    metric = metrax.PrecisionAtK.from_model_output(
+        predictions=y_pred,
+        labels=y_true,
+        ks=ks,
+    )
+
+    np.testing.assert_allclose(
+        metric.compute(),
+        map_from_keras,
+        rtol=1e-05,
+        atol=1e-05,
+    )
+
 
 if __name__ == '__main__':
   absltest.main()

Original file line number	Diff line number	Diff line change
`@@ -36,7 +36,7 @@ def test_nnx_metrics_exists(self):`
`36`	`36`	`key for key, metric in inspect.getmembers(metrax.nnx)`
`37`	`37`	`if inspect.isclass(metric) and issubclass(metric, nnx.Metric)`
`38`	`38`	`]`
`39`		`- self.assertGreater(len(metrax_metric_keys), 0)`
	`39`	`+ self.assertNotEmpty(metrax_metric_keys)`
`40`	`40`	`self.assertSameElements(metrax_metric_keys, metrax_nnx_metric_keys)`
`41`	`41`
`42`	`42`