add DCGAtK to metrax

jshin1394 · jshin1394 · commit f8922928b059 · 2025-05-01T21:37:38.000Z
diff --git a/src/metrax/__init__.py b/src/metrax/__init__.py
@@ -23,6 +23,7 @@
 Average = base.Average
 AveragePrecisionAtK = ranking_metrics.AveragePrecisionAtK
 BLEU = nlp_metrics.BLEU
+DCGAtK = ranking_metrics.DCGAtK
 MSE = regression_metrics.MSE
 Perplexity = nlp_metrics.Perplexity
 Precision = classification_metrics.Precision
@@ -42,6 +43,7 @@
     "Average",
     "AveragePrecisionAtK",
     "BLEU",
+    "DCGAtK",
     "MSE",
     "Perplexity",
     "Precision",
diff --git a/src/metrax/metrax_test.py b/src/metrax/metrax_test.py
@@ -70,6 +70,15 @@ class MetraxTest(parameterized.TestCase):
               'ks': KS,
           },
       ),
+      (
+          'dcgAtK',
+          metrax.DCGAtK,
+          {
+              'predictions': OUTPUT_LABELS,
+              'labels': OUTPUT_PREDS,
+              'ks': KS,
+          },
+      ),
       (
           'mse',
           metrax.MSE,
diff --git a/src/metrax/nnx/__init__.py b/src/metrax/nnx/__init__.py
@@ -19,6 +19,7 @@
 Average = nnx_metrics.Average
 AveragePrecisionAtK = nnx_metrics.AveragePrecisionAtK
 BLEU = nnx_metrics.BLEU
+DCGAtK = nnx_metrics.DCGAtK
 MSE = nnx_metrics.MSE
 Perplexity = nnx_metrics.Perplexity
 Precision = nnx_metrics.Precision
@@ -38,6 +39,7 @@
     "Average",
     "AveragePrecisionAtK",
     "BLEU",
+    "DCGAtK",
     "MSE",
     "Perplexity",
     "Precision",
diff --git a/src/metrax/nnx/nnx_metrics.py b/src/metrax/nnx/nnx_metrics.py
@@ -53,6 +53,13 @@ def __init__(self):
     super().__init__(metrax.BLEU)
 
 
+class DCGAtK(NnxWrapper):
+  """An NNX class for the Metrax metric DCGAtK."""
+
+  def __init__(self):
+    super().__init__(metrax.DCGAtK)
+
+
 class MSE(NnxWrapper):
   """An NNX class for the Metrax metric MSE."""
 
diff --git a/src/metrax/ranking_metrics.py b/src/metrax/ranking_metrics.py
@@ -21,6 +21,92 @@
 from metrax import base
 
 
+@flax.struct.dataclass
+class DCGAtK(base.Average):
+  r"""Computes DCG@k (Discounted Cumulative Gain at k) metrics.
+
+  This implementation calculates DCG@k based on the principle:
+  $DCG@k(y, s) = \sum_{i | \text{rank}(s_i) \le k} \text{gain}(y_i) \times
+  \text{rank\_discount}(\text{rank}(s_i))$
+  where $y_i$ is the label of item $i$, $s_i$ is its score,
+  and $\text{rank}(s_i)$ is the 1-based rank of item $i$ based on its score.
+
+  The gain is $gain(y_i) = 2^{y_i} - 1$.
+  The rank_discount is $1 / \log_2(\text{rank} + 1)$.
+  """
+
+  @classmethod
+  def _calculate_dcg_at_ks(
+      cls,
+      predictions: jax.Array,
+      labels: jax.Array,
+      ks: jax.Array,
+  ) -> jax.Array:
+    """Computes DCG@k for each example and for each k, using 'exp2' gain.
+
+    This function is JIT-compiled. The gain calculation is fixed to 'exp2'.
+    It uses jax.vmap to compute DCG for multiple k values efficiently.
+
+    Args:
+      predictions: A floating point 2D array (batch_size, vocab_size)
+        representing prediction scores. Higher scores mean higher rank.
+      labels: A 2D array (batch_size, vocab_size) of graded relevance scores.
+      ks: A 1D array of integers representing the k values for which DCG is
+        computed (e.g., jnp.array([1, 5, 10])). Shape: (num_ks,).
+
+    Returns:
+      A 2D array (batch_size, num_ks) containing DCG@k values.
+    """
+    gains = jnp.power(2.0, labels.astype(jnp.float32)) - 1.0
+    score_ranks = jnp.argsort(jnp.argsort(-predictions, axis=1), axis=1) + 1
+    score_rank_discounts = 1.0 / jnp.log2(score_ranks.astype(jnp.float32) + 1.0)
+    item_contributions = gains * score_rank_discounts
+
+    def _compute_dcg_at_k(k, current_item_contributions, current_score_ranks):
+      """Computes DCG for a single k value across all examples in a batch.
+
+      Args:
+        k: A scalar JAX array representing the single 'k' (top-k) value for
+          which DCG is to be computed.
+        current_item_contributions: A 2D JAX array containing the pre-calculated
+          contribution (gain * discount) for each item in each example of the
+          batch. The shape should be (batch_size, vocab_size).
+        current_score_ranks: A 2D JAX array containing the 1-based rank for each
+          item in each example of the batch. The shape should be (batch_size,
+          vocab_size).
+
+      Returns:
+        A 1D JAX array containing the DCG@k for each example in the batch.
+        The shape should be (batch_size, ).
+      """
+      mask_for_k = current_score_ranks <= k
+      dcg_at_k = jnp.sum(current_item_contributions * mask_for_k, axis=1)
+      return dcg_at_k
+
+    dcg_at_ks = jax.vmap(
+        _compute_dcg_at_k,
+        in_axes=(0, None, None),
+        out_axes=1,  # Place the mapped axis(from ks) as the second axis
+    )(ks, item_contributions, score_ranks)
+
+    return dcg_at_ks
+
+  @classmethod
+  def from_model_output(
+      cls,
+      predictions: jax.Array,
+      labels: jax.Array,
+      ks: jax.Array,
+  ) -> 'DCGAtK':
+    """Creates a DCGAtK metric instance from model output."""
+    dcg_at_ks = cls._calculate_dcg_at_ks(predictions, labels, ks)
+    num_examples = jnp.array(labels.shape[0], dtype=jnp.float32)
+    return cls(
+        total=jnp.sum(dcg_at_ks, axis=0),
+        count=num_examples,
+    )
+
+
 @flax.struct.dataclass
 class AveragePrecisionAtK(base.Average):
   r"""Computes AP@k (average precision at k) metrics.
@@ -151,10 +237,10 @@ def _get_relevant_at_k(
       predictions: A floating point 2D array representing the prediction scores
         from the model. Higher scores indicate higher relevance. The shape
         should be (batch_size, vocab_size).
-      labels: A multi-hot encoding (0 or 1, or counts) of the true labels. The
-        shape should be (batch_size, vocab_size).
-      ks: A 1D array of integers representing the k's (cut-off points) for which
-        to compute metrics. The shape should be (|ks|).
+      labels: A multi-hot encoding (0 or 1) of the true labels. The shape should
+        be (batch_size, vocab_size).
+      ks: A 1D array of integers representing the k's to compute the P@k
+        metrics. The shape should be (|ks|).
 
     Returns:
       relevant_at_k: A 2D array of shape (batch_size, |ks|). Each element [i, j]
@@ -279,7 +365,7 @@ class to get the number of relevant items at each k, and then divides
 
 @flax.struct.dataclass
 class RecallAtK(TopKRankingMetric):
-  r"""Computes R@k (recall at k) metrics in JAX.
+  r"""Computes R@k (recall at k) metrics.
 
   Recall at k (R@k) is a metric that measures the proportion of
   relevant items that are found in the top k recommendations, out of the
diff --git a/src/metrax/ranking_metrics_test.py b/src/metrax/ranking_metrics_test.py
@@ -32,12 +32,22 @@
 OUTPUT_PREDS = np.random.uniform(size=(BATCH_SIZE, VOCAB_SIZE)).astype(
     np.float32
 )
+OUTPUT_RELEVANCES = np.random.randint(
+    0,
+    2,
+    size=(BATCH_SIZE, VOCAB_SIZE),
+).astype(np.float32)
 OUTPUT_LABELS_VS1 = np.random.randint(
     0,
     2,
     size=(BATCH_SIZE, 1),
 ).astype(np.float32)
 OUTPUT_PREDS_VS1 = np.random.uniform(size=(BATCH_SIZE, 1)).astype(np.float32)
+OUTPUT_RELEVANCES_VS1 = np.random.randint(
+    0,
+    2,
+    size=(BATCH_SIZE, 1),
+).astype(np.float32)
 # TODO(jiwonshin): Replace with keras metric once it is available in OSS.
 MAP_FROM_KERAS = np.array([
     0.2083333432674408,
@@ -59,6 +69,15 @@
     0.75,
 ])
 R_FROM_KERAS_VS1 = np.array([0.75, 0.75, 0.75, 0.75, 0.75, 0.75])
+DCG_FROM_KERAS = np.array([
+    0.25,
+    0.880929708480835,
+    1.255929708480835,
+    1.5789371728897095,
+    1.8690768480300903,
+    2.04718017578125,
+])
+DCG_FROM_KERAS_VS1 = np.array([0.75, 0.75, 0.75, 0.75, 0.75, 0.75])
 
 
 class RankingMetricsTest(parameterized.TestCase):
@@ -150,6 +169,31 @@ def test_recallatk(self, y_true, y_pred, map_from_keras):
         atol=1e-05,
     )
 
+  @parameterized.named_parameters(
+      ('basic', OUTPUT_RELEVANCES, OUTPUT_PREDS, DCG_FROM_KERAS),
+      (
+          'vocab_size_one',
+          OUTPUT_RELEVANCES_VS1,
+          OUTPUT_PREDS_VS1,
+          DCG_FROM_KERAS_VS1,
+      ),
+  )
+  def test_dcgatk(self, y_true, y_pred, map_from_keras):
+    """Test that `DCGAtK` Metric computes correct values."""
+    ks = jnp.array([1, 2, 3, 4, 5, 6])
+    metric = metrax.DCGAtK.from_model_output(
+        predictions=y_pred,
+        labels=y_true,
+        ks=ks,
+    )
+
+    np.testing.assert_allclose(
+        metric.compute(),
+        map_from_keras,
+        rtol=1e-05,
+        atol=1e-05,
+    )
+
 
 if __name__ == '__main__':
   absltest.main()