Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 47 additions & 5 deletions src/lenskit/metrics/ranking/_rbp.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,27 @@ def rank_biased_precision(
return rbp / normalization


def graded_rank_biased_precision(
relevance: np.ndarray, weights: np.ndarray, normalization: float = 1.0
) -> float:
"""
Compute graded rank-biased precision.

Args:
relevance:
Float array of relevance/grade scores at each position
weights:
Weight for each item position (same length as relevance)
normalization:
Optional normalization factor, defaults to 1.0

Returns:
Graded RBP score
"""
score = np.sum(weights * relevance).item()
return score / normalization


class RBP(ListMetric, RankingMetricBase):
"""
Evaluate recommendations with rank-biased precision :cite:p:`rbp`.
Expand Down Expand Up @@ -63,6 +84,9 @@ class RBP(ListMetric, RankingMetricBase):
in the paper; however, RBP with high patience should be no worse than nDCG
(and perhaps even better) in this regard.

This metric class supports relevance grades :math:`r_{ui} \\in \\[0, 1\\]`
via an optional ``grade_field``.
Comment on lines +87 to +88
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

need to add grade_field to the Args: documentation.


In recommender evaluation, we usually have a small test set, so the maximum
achievable RBP is significantly less than the theoretical maximum, and is a
function of the number of test items. With ``normalize=True``, the RBP
Expand Down Expand Up @@ -99,6 +123,8 @@ class RBP(ListMetric, RankingMetricBase):
patience: float
normalize: bool
weight_field: str | None
grade_field: str | None
unknown_grade: float

def __init__(
self,
Expand All @@ -109,6 +135,8 @@ def __init__(
patience: float = 0.85,
normalize: bool = False,
weight_field: str | None = None,
grade_field: str | None = None,
unknown_grade: float = 0.25,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unknown_grade should default to 0, 0.25 is specific to our experiment.

):
super().__init__(n, k=k)
self.patience = patience
Expand All @@ -117,13 +145,16 @@ def __init__(
self.weight = weight
self.normalize = normalize
self.weight_field = weight_field
self.grade_field = grade_field
self.unknown_grade = unknown_grade

@property
def label(self):
base = "RBP" if self.grade_field is None else "GradedRBP"
if self.n is not None:
return f"RBP@{self.n}"
return f"{base}@{self.n}"
else:
return "RBP"
return base
Comment on lines 152 to +157
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good.


@override
def measure_list(self, recs: ItemList, test: ItemList) -> float:
Expand All @@ -134,8 +165,6 @@ def measure_list(self, recs: ItemList, test: ItemList) -> float:
if nrel == 0:
return np.nan

good = recs.isin(test)

if self.weight_field is not None:
# use custom weights from field
weights = recs.field(self.weight_field)
Expand All @@ -158,4 +187,17 @@ def measure_list(self, recs: ItemList, test: ItemList) -> float:
else:
normalization = np.sum(weights).item()

return rank_biased_precision(good, weights, normalization)
# Binary relevance
if self.grade_field is None:
good = recs.isin(test)
return rank_biased_precision(good, weights, normalization)

# Graded relevance
if self.grade_field not in test._fields:
raise ValueError(f"Grade field '{self.grade_field}' not found in test ItemList")

grades = test.field(self.grade_field)
Comment on lines +196 to +199
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We usually should not poke around inside other classes' private data — _fields is not a public member of item list. It might be a good idea to add a public method to access the list of fields if we don't have one yet.

field will return None if the field is not found, so just call it and check the result for None.

grade_map = dict(zip(test.ids(), grades))
relevance = np.array([grade_map.get(item, self.unknown_grade) for item in recs.ids()])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing test items get unknown_grade with a default of 0.25, and the tests also check for that same default. Was that intentional? It doesn't seem righ to me

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. It probably makes sense to split this into two tests: one that verifies the default behavior, and another that verifies the parameter is actually being used.


return graded_rank_biased_precision(relevance, weights, normalization)
35 changes: 35 additions & 0 deletions tests/eval/test_rank_rbp.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,3 +128,38 @@ def test_rank_biased_precision():
weights = np.array([1.0, 0.8, 0.6, 0.4, 0.2])
result = rank_biased_precision(good, weights, normalization=3.0)
assert result == approx(1.2 / 3.0)


# test for graded rbp


def test_rbp_empty_graded():
recs = ItemList([], ordered=True)
truth = ItemList(item_ids=[1, 2, 3], grade=[1.0, 1.0, 1.0])

metric = RBP(grade_field="grade")
assert metric.measure_list(recs, truth) == approx(0.0)


def test_rbp_unknown_grade():
recs = ItemList([1, 2], ordered=True)
truth = ItemList(item_ids=[1], grade=[1.0])

p = 0.5
metric = RBP(patience=p, grade_field="grade", unknown_grade=0.25)

# RBP = (1-p)*(relevance[0] + relevance[1]*p)
expected = (1 - p) * (1 + 0.25 * p)
assert metric.measure_list(recs, truth) == approx(expected)


def test_rbp_binary_vs_graded_equivalent():
recs = ItemList([1, 3], ordered=True)

graded_truth = ItemList(item_ids=[1, 3], grade=[1.0, 1.0])
binary_truth = ItemList([1, 3]) # no grade field

grbp = RBP(grade_field="grade")
rbp = RBP() # binary

assert grbp.measure_list(recs, graded_truth) == approx(rbp.measure_list(recs, binary_truth))
Loading