justrach · justrach · Apr 26, 2026 · Apr 26, 2026
diff --git a/scripts/compare-bench.py b/scripts/compare-bench.py
@@ -28,18 +28,28 @@ def pct_change(base_ns: int, head_ns: int) -> float:
     return ((head_ns - base_ns) / base_ns) * 100.0
 
 
-def render_markdown(rows: list[tuple[str, int, int, float]], threshold_pct: float) -> str:
+def status_for(delta_pct: float, abs_delta_ns: int, threshold_pct: float, min_abs_ns: int) -> str:
+    if delta_pct <= threshold_pct:
+        return "OK"
+    if abs_delta_ns <= min_abs_ns:
+        return "NOISE"
+    return "FAIL"
+
+
+def render_markdown(rows: list[tuple[str, int, int, float, int]], threshold_pct: float, min_abs_ns: int) -> str:
     lines = [
         "## Benchmark Regression Report",
         "",
-        f"Threshold: {threshold_pct:.2f}%",
+        f"Thresholds: {threshold_pct:.2f}% and {min_abs_ns:,} ns absolute delta",
+        "",
+        "`NOISE` means the percentage threshold was exceeded, but the absolute delta was too small to fail CI.",
         "",
-        "| Tool | Base (ns) | Head (ns) | Delta | Status |",
-        "| --- | ---: | ---: | ---: | --- |",
+        "| Tool | Base (ns) | Head (ns) | Delta | Abs Delta (ns) | Status |",
+        "| --- | ---: | ---: | ---: | ---: | --- |",
     ]
-    for tool, base_ns, head_ns, delta in rows:
-        status = "FAIL" if delta > threshold_pct else "OK"
-        lines.append(f"| `{tool}` | {base_ns} | {head_ns} | {delta:+.2f}% | {status} |")
+    for tool, base_ns, head_ns, delta, abs_delta in rows:
+        status = status_for(delta, abs_delta, threshold_pct, min_abs_ns)
+        lines.append(f"| `{tool}` | {base_ns} | {head_ns} | {delta:+.2f}% | {abs_delta:+d} | {status} |")
     return "\n".join(lines) + "\n"
 
 
@@ -57,21 +67,21 @@ def main() -> int:
         return 1
     common = sorted(set(base) & set(head))
 
-    rows: list[tuple[str, int, int, float]] = []
+    rows: list[tuple[str, int, int, float, int]] = []
     failures: list[str] = []
 
     for tool in common:
         base_ns = int(base[tool]["avg_latency_ns"])
         head_ns = int(head[tool]["avg_latency_ns"])
         delta = pct_change(base_ns, head_ns)
         abs_delta = head_ns - base_ns
-        rows.append((tool, base_ns, head_ns, delta))
+        rows.append((tool, base_ns, head_ns, delta, abs_delta))
         # Only flag as regression if BOTH percentage AND absolute delta exceed thresholds
         # This prevents false positives on fast tools where CI noise dominates
         if delta > args.threshold_pct and abs_delta > args.min_abs_ns:
-            failures.append(f"{tool} regressed by {delta:.2f}%")
+            failures.append(f"{tool} regressed by {delta:.2f}% ({abs_delta:+d} ns)")
 
-    report = render_markdown(rows, args.threshold_pct)
+    report = render_markdown(rows, args.threshold_pct, args.min_abs_ns)
     sys.stdout.write(report)
 
     if args.markdown_out:

diff --git a/scripts/test_compare_bench.py b/scripts/test_compare_bench.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import importlib.util
+from pathlib import Path
+import unittest
+
+
+SCRIPT = Path(__file__).with_name("compare-bench.py")
+spec = importlib.util.spec_from_file_location("compare_bench", SCRIPT)
+assert spec is not None
+compare_bench = importlib.util.module_from_spec(spec)
+assert spec.loader is not None
+spec.loader.exec_module(compare_bench)
+
+
+class CompareBenchTests(unittest.TestCase):
+    def test_small_absolute_regression_is_noise(self) -> None:
+        self.assertEqual(compare_bench.status_for(22.54, 11_399, 10.0, 50_000), "NOISE")
+
+    def test_large_absolute_regression_fails(self) -> None:
+        self.assertEqual(compare_bench.status_for(12.0, 75_000, 10.0, 50_000), "FAIL")
+
+    def test_report_explains_noise_status(self) -> None:
+        report = compare_bench.render_markdown(
+            [("codedb_read", 50_580, 61_979, 22.54, 11_399)],
+            10.0,
+            50_000,
+        )
+        self.assertIn("50,000 ns absolute delta", report)
+        self.assertIn("| `codedb_read` | 50580 | 61979 | +22.54% | +11399 | NOISE |", report)
+
+
+if __name__ == "__main__":
+    unittest.main()