Skip to content

Commit 2a768f3

Browse files
committed
Add a script to synthesize a pyperformance loops file from one or more
benchmark results.
1 parent 0dee4d0 commit 2a768f3

File tree

3 files changed

+202
-0
lines changed

3 files changed

+202
-0
lines changed

Diff for: bench_runner/__main__.py

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
"remove_benchmark": "Remove specific benchmarks from the data set",
2020
"run_benchmarks": "Run benchmarks (in timing, pyperf or perf modes)",
2121
"should_run": "Determine whether we need to rerun results for the current commit",
22+
"synthesize_loops_file": "Create a loops file from multiple benchmark results",
2223
"notify": "Send a notification about the completion of the workflow",
2324
}
2425

Diff for: bench_runner/scripts/synthesize_loops_file.py

+111
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
import argparse
2+
import collections
3+
import errno
4+
import json
5+
import pathlib
6+
import sys
7+
from typing import Iterable
8+
9+
import rich_argparse
10+
11+
12+
def parse_result(results_file, benchmark_data):
13+
with results_file.open() as f:
14+
result = json.load(f)
15+
bms = result["benchmarks"]
16+
if len(bms) == 1 and "metadata" not in bms[0]:
17+
# Sometimes a .json file contains just a single benchmark.
18+
bms = [result]
19+
for bm in bms:
20+
if "metadata" not in bm:
21+
raise RuntimeError(f"Invalid data {bm.keys()!r} in {results_file}")
22+
return
23+
benchmark_data[bm["metadata"]["name"]].append(bm["metadata"]["loops"])
24+
25+
26+
def _main(
27+
loops_file: pathlib.Path,
28+
update: bool,
29+
overwrite: bool,
30+
merger: str,
31+
results: Iterable[pathlib.Path],
32+
):
33+
if not update and not overwrite and loops_file.exists():
34+
raise OSError(
35+
errno.EEXIST,
36+
f"{loops_file} exists (use -f to overwrite, -u to merge data)",
37+
)
38+
if update and merger in ("median", "mean"):
39+
print(
40+
f"WARNING: merging existing data with {merger!r} "
41+
+ "overrepresents new results",
42+
file=sys.stderr,
43+
)
44+
benchmark_data = collections.defaultdict(list)
45+
if update:
46+
parse_result(loops_file, benchmark_data)
47+
for result_file in results:
48+
parse_result(result_file, benchmark_data)
49+
50+
merge_func = {
51+
"max": max,
52+
"min": min,
53+
# The only merge strategy that may not produce one of the input
54+
# values, and probably a bad idea to use.
55+
"mean": lambda L: int(round(sum(L) / len(L))),
56+
# Close enough to median for benchmarking work.
57+
"median": lambda L: L[len(L) // 2],
58+
}[merger]
59+
60+
# pyperformance expects a specific layout, and needs the top-level
61+
# metadata even if it's empty.
62+
loops_data = {"benchmarks": [], "metadata": {}}
63+
for bm in sorted(benchmark_data):
64+
loops = merge_func(benchmark_data[bm])
65+
bm_result = {"metadata": {"name": bm, "loops": loops}}
66+
loops_data["benchmarks"].append(bm_result)
67+
with loops_file.open("w") as f:
68+
json.dump(loops_data, f, sort_keys=True, indent=4)
69+
f.write("\n")
70+
71+
72+
def main():
73+
parser = argparse.ArgumentParser(
74+
description="""
75+
Synthesize a loops.json file for use with `pyperformance`'s
76+
`--same-loops` (or `PYPERFORMANCE_LOOPS_FILE`) from one or more
77+
benchmark results.
78+
""",
79+
formatter_class=rich_argparse.ArgumentDefaultsRichHelpFormatter,
80+
)
81+
parser.add_argument(
82+
"-o", "--loops_file", help="loops file to write to", required=True
83+
)
84+
group = parser.add_mutually_exclusive_group(required=False)
85+
group.add_argument(
86+
"-u", "--update", action="store_true", help="add to existing loops file"
87+
)
88+
group.add_argument(
89+
"-f", "--overwrite", action="store_true", help="replace loops file"
90+
)
91+
parser.add_argument(
92+
"-s",
93+
"--select",
94+
choices=("max", "min", "median", "mean"),
95+
default="max",
96+
help="how to merge multiple runs",
97+
)
98+
parser.add_argument("results", nargs="+", help="benchmark results to parse")
99+
args = parser.parse_args()
100+
101+
_main(
102+
pathlib.Path(args.loops_file),
103+
args.update,
104+
args.overwrite,
105+
args.select,
106+
[pathlib.Path(r) for r in args.results],
107+
)
108+
109+
110+
if __name__ == "__main__":
111+
main()

Diff for: tests/test_synthesize_loops_file.py

+90
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import json
2+
import pathlib
3+
import sys
4+
import tempfile
5+
6+
import pytest
7+
8+
from bench_runner.scripts import synthesize_loops_file
9+
10+
DATA_PATH = pathlib.Path(__file__).parent / "data"
11+
12+
13+
def run_synthesize(
14+
output: pathlib.Path,
15+
datadir: pathlib.Path,
16+
*,
17+
update: bool = False,
18+
overwrite: bool = False,
19+
merger: str = "max",
20+
):
21+
files = datadir.glob("results/**/*.json")
22+
synthesize_loops_file._main(
23+
loops_file=output,
24+
update=update,
25+
overwrite=overwrite,
26+
merger=merger,
27+
results=files,
28+
)
29+
30+
31+
def check_loops(output: pathlib.Path):
32+
with output.open() as f:
33+
data = json.load(f)
34+
assert "benchmarks" in data
35+
assert "metadata" in data
36+
seen = set()
37+
for bm in data["benchmarks"]:
38+
assert "metadata" in bm
39+
assert "loops" in bm["metadata"]
40+
assert isinstance(bm["metadata"]["loops"], int)
41+
assert "name" in bm["metadata"]
42+
assert (name := bm["metadata"]["name"]) not in seen
43+
assert isinstance(name, str)
44+
seen.add(name)
45+
data["benchmarks"].sort(key=lambda item: item["metadata"]["name"])
46+
return data
47+
48+
49+
def set_loops(output, value):
50+
with output.open() as f:
51+
data = json.load(f)
52+
for bm in data["benchmarks"]:
53+
bm["metadata"]["loops"] = value
54+
with output.open("w") as f:
55+
json.dump(data, f, sort_keys=True, indent=4)
56+
57+
58+
def test_synthesize():
59+
with tempfile.TemporaryDirectory() as tmpdir:
60+
output = pathlib.Path(tmpdir) / "loops.json"
61+
run_synthesize(output, DATA_PATH)
62+
expected_data = check_loops(output)
63+
64+
with pytest.raises(FileExistsError):
65+
run_synthesize(output, DATA_PATH)
66+
67+
run_synthesize(output, DATA_PATH, update=True)
68+
assert expected_data == check_loops(output)
69+
70+
set_loops(output, 0)
71+
run_synthesize(output, DATA_PATH, update=True)
72+
assert expected_data == check_loops(output)
73+
74+
set_loops(output, sys.maxsize)
75+
run_synthesize(output, DATA_PATH, overwrite=True)
76+
assert expected_data == check_loops(output)
77+
78+
run_synthesize(output, DATA_PATH, overwrite=True, merger="min")
79+
expected_data = check_loops(output)
80+
set_loops(output, sys.maxsize)
81+
run_synthesize(output, DATA_PATH, update=True, merger="min")
82+
assert expected_data == check_loops(output)
83+
84+
# Can't easily check the values for correctness, so just verify it
85+
# doesn't produce bogus output.
86+
run_synthesize(output, DATA_PATH, overwrite=True, merger="mean")
87+
check_loops(output)
88+
89+
run_synthesize(output, DATA_PATH, overwrite=True, merger="median")
90+
check_loops(output)

0 commit comments

Comments
 (0)