Skip to content

Commit ca1ee7a

Browse files
authored
Merge pull request #1065 from samiravaez/bugfix/ndcg
Filter negative gains in NDCG calculation
2 parents 6f33824 + e2b71b5 commit ca1ee7a

File tree

2 files changed

+25
-0
lines changed

2 files changed

+25
-0
lines changed

src/lenskit/metrics/ranking/_dcg.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,11 @@ class NDCG(ListMetric, RankingMetricBase):
4545
\\mathrm{nDCG}(L, u) & = \\frac{\\mathrm{DCG}(L,u)}{\\mathrm{DCG}(L_{\\mathrm{ideal}}, u)}
4646
\\end{align*}
4747
48+
.. note::
49+
Negative gains are clipped to zero before computing NDCG.
50+
This keeps the metric bounded between 0 and 1 and prevents cases where
51+
negative gains can lead to misleading positive scores due to
52+
cancellation effects.
4853
Args:
4954
n:
5055
The maximum recommendation list length to consider (longer lists are
@@ -105,13 +110,17 @@ def measure_list(self, recs: ItemList, test: ItemList) -> float:
105110
gains = test.field(self.gain, "pandas", index="ids")
106111
if gains is None:
107112
raise KeyError(f"test items have no field {self.gain}")
113+
gains = gains.clip(lower=0)
108114
if self.n:
109115
gains = gains.nlargest(n=self.n)
110116
else:
111117
gains = gains.sort_values(ascending=False)
112118
iweight = self.weight.weight(np.arange(1, len(gains) + 1))
113119
ideal = np.dot(gains.values, iweight).item() # type: ignore
114120

121+
if ideal == 0:
122+
return 0.0
123+
115124
else:
116125
realized = _binary_dcg(recs, test, self.weight)
117126
n = len(test)
@@ -201,6 +210,8 @@ def _graded_dcg(
201210
if gains is None:
202211
raise KeyError(f"test items have no field {field}")
203212

213+
gains = gains.clip(lower=0)
214+
204215
ranks = recs.ranks(format="pandas")
205216
if ranks is None:
206217
raise TypeError("item list is not ordered")

tests/eval/test_rank_ndcg.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,3 +110,17 @@ def test_ndcg_alt_discount(items, n):
110110
e.add_note(f"recs: {recs}")
111111
e.add_note(f"truth: {truth}")
112112
raise e
113+
114+
115+
@mark.parametrize(
116+
"ratings, expected_ndcg",
117+
[
118+
([-1, -2, -3, -4, -5], 0.0), # all negative
119+
([-6, -2, 3, 1, -3], 0.5982), # mixed
120+
],
121+
)
122+
def test_ndcg_negative_gains(ratings, expected_ndcg):
123+
recs = ItemList([1, 2, 3, 4, 5], ordered=True)
124+
truth = ItemList([1, 2, 3, 4, 5], rating=ratings)
125+
val = call_metric(NDCG, recs, truth, gain="rating")
126+
assert val == approx(expected_ndcg, rel=1e-3)

0 commit comments

Comments
 (0)