lenskit · sushobhan2024 · Mar 5, 2026 · Mar 5, 2026 · Mar 5, 2026 · Mar 5, 2026
diff --git a/src/lenskit/metrics/ranking/_rbp.py b/src/lenskit/metrics/ranking/_rbp.py
@@ -36,6 +36,27 @@ def rank_biased_precision(
     return rbp / normalization
 
 
+def graded_rank_biased_precision(
+    relevance: np.ndarray, weights: np.ndarray, normalization: float = 1.0
+) -> float:
+    """
+    Compute graded rank-biased precision.
+
+    Args:
+        relevance:
+            Float array of relevance/grade scores at each position
+        weights:
+            Weight for each item position (same length as relevance)
+        normalization:
+            Optional normalization factor, defaults to 1.0
+
+    Returns:
+        Graded RBP score
+    """
+    score = np.sum(weights * relevance).item()
+    return score / normalization
+
+
 class RBP(ListMetric, RankingMetricBase):
     """
     Evaluate recommendations with rank-biased precision :cite:p:`rbp`.
@@ -63,6 +84,9 @@ class RBP(ListMetric, RankingMetricBase):
     in the paper; however, RBP with high patience should be no worse than nDCG
     (and perhaps even better) in this regard.
 
+    This metric class supports relevance grades :math:`r_{ui} \\in \\[0, 1\\]`
+    via an optional ``grade_field``.
+
     In recommender evaluation, we usually have a small test set, so the maximum
     achievable RBP is significantly less than the theoretical maximum, and is a
     function of the number of test items.  With ``normalize=True``, the RBP
@@ -99,6 +123,8 @@ class RBP(ListMetric, RankingMetricBase):
     patience: float
     normalize: bool
     weight_field: str | None
+    grade_field: str | None
+    unknown_grade: float
 
     def __init__(
         self,
@@ -109,6 +135,8 @@ def __init__(
         patience: float = 0.85,
         normalize: bool = False,
         weight_field: str | None = None,
+        grade_field: str | None = None,
+        unknown_grade: float = 0.25,
     ):
         super().__init__(n, k=k)
         self.patience = patience
@@ -117,13 +145,16 @@ def __init__(
         self.weight = weight
         self.normalize = normalize
         self.weight_field = weight_field
+        self.grade_field = grade_field
+        self.unknown_grade = unknown_grade
 
     @property
     def label(self):
+        base = "RBP" if self.grade_field is None else "GradedRBP"
         if self.n is not None:
-            return f"RBP@{self.n}"
+            return f"{base}@{self.n}"
         else:
-            return "RBP"
+            return base
 
     @override
     def measure_list(self, recs: ItemList, test: ItemList) -> float:
@@ -134,8 +165,6 @@ def measure_list(self, recs: ItemList, test: ItemList) -> float:
         if nrel == 0:
             return np.nan
 
-        good = recs.isin(test)
-
         if self.weight_field is not None:
             # use custom weights from field
             weights = recs.field(self.weight_field)
@@ -158,4 +187,17 @@ def measure_list(self, recs: ItemList, test: ItemList) -> float:
             else:
                 normalization = np.sum(weights).item()
 
-        return rank_biased_precision(good, weights, normalization)
+        # Binary relevance
+        if self.grade_field is None:
+            good = recs.isin(test)
+            return rank_biased_precision(good, weights, normalization)
+
+        # Graded relevance
+        if self.grade_field not in test._fields:
+            raise ValueError(f"Grade field '{self.grade_field}' not found in test ItemList")
+
+        grades = test.field(self.grade_field)
+        grade_map = dict(zip(test.ids(), grades))
+        relevance = np.array([grade_map.get(item, self.unknown_grade) for item in recs.ids()])
+
+        return graded_rank_biased_precision(relevance, weights, normalization)
diff --git a/tests/eval/test_rank_rbp.py b/tests/eval/test_rank_rbp.py
@@ -128,3 +128,38 @@ def test_rank_biased_precision():
     weights = np.array([1.0, 0.8, 0.6, 0.4, 0.2])
     result = rank_biased_precision(good, weights, normalization=3.0)
     assert result == approx(1.2 / 3.0)
+
+
+# test for graded rbp
+
+
+def test_rbp_empty_graded():
+    recs = ItemList([], ordered=True)
+    truth = ItemList(item_ids=[1, 2, 3], grade=[1.0, 1.0, 1.0])
+
+    metric = RBP(grade_field="grade")
+    assert metric.measure_list(recs, truth) == approx(0.0)
+
+
+def test_rbp_unknown_grade():
+    recs = ItemList([1, 2], ordered=True)
+    truth = ItemList(item_ids=[1], grade=[1.0])
+
+    p = 0.5
+    metric = RBP(patience=p, grade_field="grade", unknown_grade=0.25)
+
+    # RBP = (1-p)*(relevance[0] + relevance[1]*p)
+    expected = (1 - p) * (1 + 0.25 * p)
+    assert metric.measure_list(recs, truth) == approx(expected)
+
+
+def test_rbp_binary_vs_graded_equivalent():
+    recs = ItemList([1, 3], ordered=True)
+
+    graded_truth = ItemList(item_ids=[1, 3], grade=[1.0, 1.0])
+    binary_truth = ItemList([1, 3])  # no grade field
+
+    grbp = RBP(grade_field="grade")
+    rbp = RBP()  # binary
+
+    assert grbp.measure_list(recs, graded_truth) == approx(rbp.measure_list(recs, binary_truth))