Add a script to synthesize a pyperformance loops file from one or more

Yhg1s · Yhg1s · commit 2a768f38ed57 · 2025-04-05T16:46:53.000+02:00
benchmark results.
diff --git a/bench_runner/__main__.py b/bench_runner/__main__.py
@@ -19,6 +19,7 @@
     "remove_benchmark": "Remove specific benchmarks from the data set",
     "run_benchmarks": "Run benchmarks (in timing, pyperf or perf modes)",
     "should_run": "Determine whether we need to rerun results for the current commit",
+    "synthesize_loops_file": "Create a loops file from multiple benchmark results",
     "notify": "Send a notification about the completion of the workflow",
 }
 
diff --git a/bench_runner/scripts/synthesize_loops_file.py b/bench_runner/scripts/synthesize_loops_file.py
@@ -0,0 +1,111 @@
+import argparse
+import collections
+import errno
+import json
+import pathlib
+import sys
+from typing import Iterable
+
+import rich_argparse
+
+
+def parse_result(results_file, benchmark_data):
+    with results_file.open() as f:
+        result = json.load(f)
+    bms = result["benchmarks"]
+    if len(bms) == 1 and "metadata" not in bms[0]:
+        # Sometimes a .json file contains just a single benchmark.
+        bms = [result]
+    for bm in bms:
+        if "metadata" not in bm:
+            raise RuntimeError(f"Invalid data {bm.keys()!r} in {results_file}")
+            return
+        benchmark_data[bm["metadata"]["name"]].append(bm["metadata"]["loops"])
+
+
+def _main(
+    loops_file: pathlib.Path,
+    update: bool,
+    overwrite: bool,
+    merger: str,
+    results: Iterable[pathlib.Path],
+):
+    if not update and not overwrite and loops_file.exists():
+        raise OSError(
+            errno.EEXIST,
+            f"{loops_file} exists (use -f to overwrite, -u to merge data)",
+        )
+    if update and merger in ("median", "mean"):
+        print(
+            f"WARNING: merging existing data with {merger!r} "
+            + "overrepresents new results",
+            file=sys.stderr,
+        )
+    benchmark_data = collections.defaultdict(list)
+    if update:
+        parse_result(loops_file, benchmark_data)
+    for result_file in results:
+        parse_result(result_file, benchmark_data)
+
+    merge_func = {
+        "max": max,
+        "min": min,
+        # The only merge strategy that may not produce one of the input
+        # values, and probably a bad idea to use.
+        "mean": lambda L: int(round(sum(L) / len(L))),
+        # Close enough to median for benchmarking work.
+        "median": lambda L: L[len(L) // 2],
+    }[merger]
+
+    # pyperformance expects a specific layout, and needs the top-level
+    # metadata even if it's empty.
+    loops_data = {"benchmarks": [], "metadata": {}}
+    for bm in sorted(benchmark_data):
+        loops = merge_func(benchmark_data[bm])
+        bm_result = {"metadata": {"name": bm, "loops": loops}}
+        loops_data["benchmarks"].append(bm_result)
+    with loops_file.open("w") as f:
+        json.dump(loops_data, f, sort_keys=True, indent=4)
+        f.write("\n")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="""
+        Synthesize a loops.json file for use with `pyperformance`'s
+        `--same-loops` (or `PYPERFORMANCE_LOOPS_FILE`) from one or more
+        benchmark results.
+        """,
+        formatter_class=rich_argparse.ArgumentDefaultsRichHelpFormatter,
+    )
+    parser.add_argument(
+        "-o", "--loops_file", help="loops file to write to", required=True
+    )
+    group = parser.add_mutually_exclusive_group(required=False)
+    group.add_argument(
+        "-u", "--update", action="store_true", help="add to existing loops file"
+    )
+    group.add_argument(
+        "-f", "--overwrite", action="store_true", help="replace loops file"
+    )
+    parser.add_argument(
+        "-s",
+        "--select",
+        choices=("max", "min", "median", "mean"),
+        default="max",
+        help="how to merge multiple runs",
+    )
+    parser.add_argument("results", nargs="+", help="benchmark results to parse")
+    args = parser.parse_args()
+
+    _main(
+        pathlib.Path(args.loops_file),
+        args.update,
+        args.overwrite,
+        args.select,
+        [pathlib.Path(r) for r in args.results],
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_synthesize_loops_file.py b/tests/test_synthesize_loops_file.py
@@ -0,0 +1,90 @@
+import json
+import pathlib
+import sys
+import tempfile
+
+import pytest
+
+from bench_runner.scripts import synthesize_loops_file
+
+DATA_PATH = pathlib.Path(__file__).parent / "data"
+
+
+def run_synthesize(
+    output: pathlib.Path,
+    datadir: pathlib.Path,
+    *,
+    update: bool = False,
+    overwrite: bool = False,
+    merger: str = "max",
+):
+    files = datadir.glob("results/**/*.json")
+    synthesize_loops_file._main(
+        loops_file=output,
+        update=update,
+        overwrite=overwrite,
+        merger=merger,
+        results=files,
+    )
+
+
+def check_loops(output: pathlib.Path):
+    with output.open() as f:
+        data = json.load(f)
+    assert "benchmarks" in data
+    assert "metadata" in data
+    seen = set()
+    for bm in data["benchmarks"]:
+        assert "metadata" in bm
+        assert "loops" in bm["metadata"]
+        assert isinstance(bm["metadata"]["loops"], int)
+        assert "name" in bm["metadata"]
+        assert (name := bm["metadata"]["name"]) not in seen
+        assert isinstance(name, str)
+        seen.add(name)
+    data["benchmarks"].sort(key=lambda item: item["metadata"]["name"])
+    return data
+
+
+def set_loops(output, value):
+    with output.open() as f:
+        data = json.load(f)
+    for bm in data["benchmarks"]:
+        bm["metadata"]["loops"] = value
+    with output.open("w") as f:
+        json.dump(data, f, sort_keys=True, indent=4)
+
+
+def test_synthesize():
+    with tempfile.TemporaryDirectory() as tmpdir:
+        output = pathlib.Path(tmpdir) / "loops.json"
+        run_synthesize(output, DATA_PATH)
+        expected_data = check_loops(output)
+
+        with pytest.raises(FileExistsError):
+            run_synthesize(output, DATA_PATH)
+
+        run_synthesize(output, DATA_PATH, update=True)
+        assert expected_data == check_loops(output)
+
+        set_loops(output, 0)
+        run_synthesize(output, DATA_PATH, update=True)
+        assert expected_data == check_loops(output)
+
+        set_loops(output, sys.maxsize)
+        run_synthesize(output, DATA_PATH, overwrite=True)
+        assert expected_data == check_loops(output)
+
+        run_synthesize(output, DATA_PATH, overwrite=True, merger="min")
+        expected_data = check_loops(output)
+        set_loops(output, sys.maxsize)
+        run_synthesize(output, DATA_PATH, update=True, merger="min")
+        assert expected_data == check_loops(output)
+
+        # Can't easily check the values for correctness, so just verify it
+        # doesn't produce bogus output.
+        run_synthesize(output, DATA_PATH, overwrite=True, merger="mean")
+        check_loops(output)
+
+        run_synthesize(output, DATA_PATH, overwrite=True, merger="median")
+        check_loops(output)

Original file line number	Diff line number	Diff line change
`@@ -19,6 +19,7 @@`
`19`	`19`	`"remove_benchmark": "Remove specific benchmarks from the data set",`
`20`	`20`	`"run_benchmarks": "Run benchmarks (in timing, pyperf or perf modes)",`
`21`	`21`	`"should_run": "Determine whether we need to rerun results for the current commit",`
	`22`	`+ "synthesize_loops_file": "Create a loops file from multiple benchmark results",`
`22`	`23`	`"notify": "Send a notification about the completion of the workflow",`
`23`	`24`	`}`
`24`	`25`