fix: update duplicate percentile filtering to retain largest values for accuracy

sephib · sjmonson · commit 175611c9500c · 2025-12-11T14:46:43.000-05:00
Signed-off-by: Joseph Berry &lt;joberry@redhat.com&gt;
diff --git a/src/guidellm/benchmark/outputs/html.py b/src/guidellm/benchmark/outputs/html.py
@@ -246,27 +246,31 @@ def _filter_duplicate_percentiles(percentiles: dict[str, float]) -> dict[str, fl
 
     When distributions have very few data points, multiple percentiles can have
     the same value, which causes visualization libraries to fail. This function
-    keeps only the first occurrence of consecutive duplicate values.
+    keeps only the largest percentile for consecutive duplicate values, which is
+    more mathematically accurate as higher percentiles have greater statistical
+    significance.
 
     :param percentiles: Dictionary of percentile names to values
     :return: Filtered percentiles dictionary with no consecutive duplicates
     """
     if not percentiles:
         return percentiles
-    
+
     percentile_order = list(Percentiles.model_fields.keys())
 
+    # Iterate in reverse to keep the largest percentile for each value
     filtered = {}
     previous_value = None
 
-    for key in percentile_order:
+    for key in reversed(percentile_order):
         if key in percentiles:
             current_value = percentiles[key]
             if previous_value is None or current_value != previous_value:
                 filtered[key] = current_value
                 previous_value = current_value
 
-    return filtered
+    # Restore original order
+    return {key: filtered[key] for key in percentile_order if key in filtered}
 
 
 def _inject_data(js_data: dict[str, str], html: str) -> str:
diff --git a/tests/unit/benchmark/test_html_output.py b/tests/unit/benchmark/test_html_output.py
@@ -21,8 +21,8 @@ def test_filter_all_same_values():
 
     filtered = _filter_duplicate_percentiles(percentiles)
 
-    # Should only keep the first one
-    assert filtered == {"p001": 15.288091352804853}
+    # Should only keep the largest (p999) for mathematical accuracy
+    assert filtered == {"p999": 15.288091352804853}
 
 
 def test_filter_consecutive_duplicates():
@@ -43,11 +43,11 @@ def test_filter_consecutive_duplicates():
 
     filtered = _filter_duplicate_percentiles(percentiles)
 
-    # Should keep first of each group
+    # Should keep largest of each group for mathematical accuracy
     assert filtered == {
-        "p001": 15.288091352804853,
-        "p50": 16.41327511776994,
-        "p90": 17.03541629998259,
+        "p25": 15.288091352804853,
+        "p75": 16.41327511776994,
+        "p999": 17.03541629998259,
     }
 
 
@@ -69,15 +69,16 @@ def test_no_duplicates():
 
     filtered = _filter_duplicate_percentiles(percentiles)
 
+    # Should keep largest of each duplicate group (p01 instead of p001, p999 instead of p95)
     assert filtered == {
-        "p001": 13.181080445834912,
+        "p01": 13.181080445834912,
         "p05": 13.530595573836457,
         "p10": 13.843972502554365,
         "p25": 14.086376978251748,
         "p50": 14.403258051191058,
         "p75": 14.738608817056042,
         "p90": 15.18136631856698,
-        "p95": 15.7213110894772,
+        "p999": 15.7213110894772,
     }
 
 
@@ -150,11 +151,11 @@ def test_model_dump_filters_duplicates():
 
     data = dist.model_dump()
 
-    # Check that percentiles were filtered
+    # Check that percentiles were filtered, keeping largest of each group
     assert data["percentiles"] == {
-        "p001": 15.288091352804853,
-        "p50": 16.41327511776994,
-        "p90": 17.03541629998259,
+        "p25": 15.288091352804853,
+        "p75": 16.41327511776994,
+        "p999": 17.03541629998259,
     }
 
     # Ensure other fields remain unchanged