Skip to content

Commit d04acf3

Browse files
committed
Add configurable threshold for benchmark ratio
- Add --threshold/-t flag to benchmark script (default 0.05) to define the minimum ratio difference before marking a change as slower/faster - Rewrite _ratio to compare % change against threshold - Display 3 decimal places in ratio output to surface small differences
1 parent d729053 commit d04acf3

2 files changed

Lines changed: 27 additions & 7 deletions

File tree

.github/workflows/benchmark.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ jobs:
2323
- name: Run benchmarks
2424
id: run-benchmarks
2525
run: |
26-
uv run python scripts/benchmark.py -b $BRANCH_NAME -o . -f benchmarks.md
26+
uv run python scripts/benchmark.py -b $BRANCH_NAME -o . -f benchmarks.md -t 0.05
2727
- name: Record pr number
2828
run: |
2929
echo "${{ github.event.number }}" > pr-number.txt

scripts/benchmark.py

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
"""Perform benchmarking of bids2table against last tag, main and feature branches.
66
77
Run with:
8-
uv run --with <repo> scripts/benchmark.py -b <feature_branch> [-o <output_dir>]
8+
uv run --with <repo> scripts/benchmark.py \
9+
-b <feature_branch> [-o <output_dir>] [-f <output_file>] [-t <threshold>]
910
"""
1011

1112
from __future__ import annotations
@@ -173,10 +174,15 @@ def _fmt(res: BenchmarkResult) -> str:
173174
return f"{median.value:.3f} ({mean:.3f} ± {stddev:.3f}) {median.unit}"
174175

175176

176-
def _ratio(pr: BenchmarkResult, ref: BenchmarkResult) -> str:
177+
def _ratio(pr: BenchmarkResult, ref: BenchmarkResult, threshold: float) -> str:
177178
ratio = pr.median / ref.median
178-
icon = "🔴" if ratio > 1 else "🟢" if ratio < 1 else "⚪"
179-
return f"{icon} {ratio:.2f}"
179+
if abs(1 - ratio) <= threshold:
180+
icon = "⚪"
181+
elif ratio > 1:
182+
icon = "🔴"
183+
else:
184+
icon = "🟢"
185+
return f"{icon} {ratio:.3f}"
180186

181187

182188
def _label(result: BenchmarkResult) -> str:
@@ -191,6 +197,7 @@ def _label(result: BenchmarkResult) -> str:
191197

192198

193199
def build_table(
200+
threshold: float,
194201
branch_name: str,
195202
branch: dict[str, BenchmarkResult],
196203
main: dict[str, BenchmarkResult],
@@ -213,7 +220,7 @@ def row(name: str, results: dict[str, BenchmarkResult]) -> str:
213220

214221
def ratio_row(label: str, ref: dict[str, BenchmarkResult]) -> str:
215222
cells = [
216-
_ratio(branch[k], ref[k]) if k in branch and k in ref else "—"
223+
_ratio(branch[k], ref[k], threshold) if k in branch and k in ref else "—"
217224
for k in all_keys
218225
]
219226
return "| *" + label + "* |" + col_sep.join(f" {c} " for c in cells) + " |"
@@ -252,6 +259,13 @@ def _parser() -> argparse.Namespace:
252259
type=str,
253260
help="Output file name",
254261
)
262+
parser.add_argument(
263+
"-t",
264+
"--threshold",
265+
default=0.05,
266+
type=float,
267+
help="Threshold for performance to be considered unchanged",
268+
)
255269
return parser.parse_args()
256270

257271

@@ -302,14 +316,16 @@ def run_benchmark(git: Git, branch: str, out_dir: Path) -> None:
302316

303317

304318
def generate_report(
305-
git: Git, branch: str, out_dir: Path, out_fname: str | None = None
319+
git: Git, branch: str, threshold: float, out_dir: Path, out_fname: str | None = None
306320
) -> Path:
307321
"""Generate markdown report from benchmarks.
308322
309323
Args:
310324
git: Representation of current git repository for benchmarking
311325
branch: Feature branch benchmarked
326+
threshold: Threshold for performance to be considered unchanged
312327
out_dir: Directory benchmarks are saved to / output report to
328+
out_fname: Benchmark output file name
313329
314330
Returns:
315331
Path to file containing benchmark comparison table
@@ -345,6 +361,7 @@ def generate_report(
345361
_logger.warning("Tag '%s' not found in benchmark files.", tag)
346362

347363
report_contents = build_table(
364+
threshold,
348365
branch,
349366
parsed[branch],
350367
parsed["main"],
@@ -362,13 +379,16 @@ def generate_report(
362379

363380
def main() -> None:
364381
args = _parser()
382+
if abs(args.threshold) > 1:
383+
raise ValueError(f"Threshold should be between 0 and 1, got: {args.threshold}")
365384
args.output_dir.mkdir(parents=True, exist_ok=True)
366385

367386
with Git() as git:
368387
run_benchmark(git=git, branch=args.branch, out_dir=args.output_dir)
369388
report_file = generate_report(
370389
git=git,
371390
branch=args.branch,
391+
threshold=args.threshold,
372392
out_dir=args.output_dir,
373393
out_fname=args.output_file,
374394
)

0 commit comments

Comments
 (0)