Merge pull request #1065 from samiravaez/bugfix/ndcg

mdekstrand · web-flow · commit ca1ee7a4e03a · 2026-04-07T13:45:03.000-04:00
Filter negative gains in NDCG calculation
diff --git a/src/lenskit/metrics/ranking/_dcg.py b/src/lenskit/metrics/ranking/_dcg.py
@@ -45,6 +45,11 @@ class NDCG(ListMetric, RankingMetricBase):
         \\mathrm{nDCG}(L, u) & = \\frac{\\mathrm{DCG}(L,u)}{\\mathrm{DCG}(L_{\\mathrm{ideal}}, u)}
         \\end{align*}
 
+    .. note::
+        Negative gains are clipped to zero before computing NDCG.
+        This keeps the metric bounded between 0 and 1 and prevents cases where
+        negative gains can lead to misleading positive scores due to
+        cancellation effects.
     Args:
         n:
             The maximum recommendation list length to consider (longer lists are
@@ -105,13 +110,17 @@ def measure_list(self, recs: ItemList, test: ItemList) -> float:
             gains = test.field(self.gain, "pandas", index="ids")
             if gains is None:
                 raise KeyError(f"test items have no field {self.gain}")
+            gains = gains.clip(lower=0)
             if self.n:
                 gains = gains.nlargest(n=self.n)
             else:
                 gains = gains.sort_values(ascending=False)
             iweight = self.weight.weight(np.arange(1, len(gains) + 1))
             ideal = np.dot(gains.values, iweight).item()  # type: ignore
 
+            if ideal == 0:
+                return 0.0
+
         else:
             realized = _binary_dcg(recs, test, self.weight)
             n = len(test)
@@ -201,6 +210,8 @@ def _graded_dcg(
     if gains is None:
         raise KeyError(f"test items have no field {field}")
 
+    gains = gains.clip(lower=0)
+
     ranks = recs.ranks(format="pandas")
     if ranks is None:
         raise TypeError("item list is not ordered")
diff --git a/tests/eval/test_rank_ndcg.py b/tests/eval/test_rank_ndcg.py
@@ -110,3 +110,17 @@ def test_ndcg_alt_discount(items, n):
         e.add_note(f"recs: {recs}")
         e.add_note(f"truth: {truth}")
         raise e
+
+
+@mark.parametrize(
+    "ratings, expected_ndcg",
+    [
+        ([-1, -2, -3, -4, -5], 0.0),  # all negative
+        ([-6, -2, 3, 1, -3], 0.5982),  # mixed
+    ],
+)
+def test_ndcg_negative_gains(ratings, expected_ndcg):
+    recs = ItemList([1, 2, 3, 4, 5], ordered=True)
+    truth = ItemList([1, 2, 3, 4, 5], rating=ratings)
+    val = call_metric(NDCG, recs, truth, gain="rating")
+    assert val == approx(expected_ndcg, rel=1e-3)