Skip to content

Commit 194b390

Browse files
authored
Merge pull request #371 from faster-cpython/arbitrary-compare
Allow comparison of completely arbitrary json files
2 parents 6bcd11e + 901b4af commit 194b390

File tree

4 files changed

+124
-37
lines changed

4 files changed

+124
-37
lines changed

Diff for: bench_runner/result.py

+32-4
Original file line numberDiff line numberDiff line change
@@ -74,11 +74,13 @@ def _get_architecture(python: PathLike) -> str:
7474

7575

7676
class Comparison:
77-
def __init__(self, ref: "Result", head: "Result", base: str):
77+
def __init__(
78+
self, ref: "Result", head: "Result", base: str, force_valid: bool = False
79+
):
7880
self.ref = ref
7981
self.head = head
8082
self.base = base
81-
self.valid_comparison = not (
83+
self.valid_comparison = force_valid or not (
8284
self.ref == self.head
8385
or (
8486
self.ref.cpython_hash == self.head.cpython_hash
@@ -108,7 +110,7 @@ def get_files(self) -> Iterable[tuple[Callable, str, str]]:
108110
return
109111
yield (self.write_table, ".md", "table")
110112
yield (self.write_timing_plot, ".svg", "time plot")
111-
if self.head.system != "windows" and self.base == "base":
113+
if not self.head.is_windows() and self.base == "base":
112114
yield (self.write_memory_plot, "-mem.svg", "memory plot")
113115

114116
@functools.cached_property
@@ -289,7 +291,7 @@ def geometric_mean(self) -> str:
289291

290292
def _calculate_memory_change(self):
291293
# Windows doesn't have memory data
292-
if self.head.system == "windows":
294+
if self.head.is_windows():
293295
return "unknown"
294296

295297
combined_data = self.get_memory_diff()
@@ -494,6 +496,24 @@ def from_filename(cls, filename: PathLike) -> "Result":
494496
obj._filename = filename
495497
return obj
496498

499+
@classmethod
500+
def from_arbitrary_filename(cls, filename: PathLike) -> "Result":
501+
filename = Path(filename)
502+
content = json.loads(filename.read_text())
503+
obj = cls(
504+
nickname="unknown",
505+
machine="unknown",
506+
fork="unknown",
507+
ref=filename.stem,
508+
version="unknown",
509+
cpython_hash=content.get("metadata", {}).get("commit_id", "unknown"),
510+
extra=[],
511+
suffix=filename.suffix,
512+
flags=[],
513+
)
514+
obj._filename = filename
515+
return obj
516+
497517
@classmethod
498518
def from_scratch(
499519
cls,
@@ -623,6 +643,14 @@ def hostname(self) -> str:
623643
def system(self) -> str:
624644
return runners.get_runner_by_nickname(self.nickname).os
625645

646+
def is_windows(self) -> bool:
647+
if self.nickname != "unknown":
648+
return self.system == "windows"
649+
else:
650+
return (
651+
self.metadata.get("platform", "unknown").lower().startswith("windows")
652+
)
653+
626654
@property
627655
def runner(self) -> str:
628656
return f"{self.system} {self.machine} ({self.nickname})"

Diff for: bench_runner/runners.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ def display_name(self) -> str:
4242
return f"{self.os} {self.arch} ({self.nickname})"
4343

4444

45+
unknown_runner = Runner("unknown", "unknown", "unknown", "unknown", False, {}, None)
46+
47+
4548
@functools.cache
4649
def get_runners() -> list[Runner]:
4750
conf = config.get_bench_runner_config().get("runners", [{}])[0]
@@ -82,8 +85,8 @@ def get_nickname_for_hostname(hostname: str) -> str:
8285
# results are reported for.
8386
if "BENCHMARK_MACHINE_NICKNAME" in os.environ:
8487
return os.environ["BENCHMARK_MACHINE_NICKNAME"]
85-
return get_runners_by_hostname()[hostname].nickname
88+
return get_runners_by_hostname().get(hostname, unknown_runner).nickname
8689

8790

8891
def get_runner_by_nickname(nickname: str) -> Runner:
89-
return get_runners_by_nickname()[nickname]
92+
return get_runners_by_nickname().get(nickname, unknown_runner)

Diff for: bench_runner/scripts/compare.py

+84-27
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def get_machines(results: Iterable[mod_result.Result]) -> set[str]:
5151

5252
def compare_pair(
5353
output_dir: PathLike,
54-
machine: str,
54+
machine: str | None,
5555
ref_name: str,
5656
ref: mod_result.Result,
5757
head_name: str,
@@ -60,11 +60,17 @@ def compare_pair(
6060
) -> str:
6161
output_dir = Path(output_dir)
6262

63-
rich.print(f"Comparing {counter[0]+1}/{counter[1]}", end="\r")
63+
rich.print(
64+
f"Comparing {counter[0]+1: 2}/{counter[1]: 2}: {head_name} vs. {ref_name}"
65+
)
6466
counter[0] += 1
6567

66-
name = f"{machine}-{head_name}-vs-{ref_name}"
67-
comparison = mod_result.BenchmarkComparison(ref, head, "base")
68+
name_parts = []
69+
if machine is not None:
70+
name_parts.append(machine)
71+
name_parts.extend([head_name, "vs", ref_name])
72+
name = "-".join(name_parts)
73+
comparison = mod_result.BenchmarkComparison(ref, head, "base", True)
6874
entry = [comparison.summary]
6975
for func, suffix, file_type in comparison.get_files():
7076
output_filename = util.apply_suffix(output_dir / name, suffix)
@@ -78,46 +84,56 @@ def write_row(fd: TextIO, columns: Iterable[str]):
7884
fd.write(f"| {' | '.join(columns)} |\n")
7985

8086

87+
def name_and_hash(name: str, hash: str) -> str:
88+
if name == hash:
89+
return name
90+
return f"{name} ({hash})"
91+
92+
93+
def get_first_result_for_machine(
94+
results: Iterable[mod_result.Result], machine: str | None
95+
) -> mod_result.Result:
96+
return next(
97+
result for result in results if machine is None or result.nickname == machine
98+
)
99+
100+
81101
def do_one_to_many(
82102
fd: TextIO,
83103
parsed_commits: ParsedCommits,
84-
machine: str,
104+
machine: str | None,
85105
output_dir: PathLike,
86106
counter: list[int],
87107
) -> None:
88108
_, _, first_name, first_results = parsed_commits[0]
89-
first_result = next(
90-
result for result in first_results if result.nickname == machine
91-
)
109+
first_result = get_first_result_for_machine(first_results, machine)
92110
write_row(fd, ["commit", "change"])
93111
write_row(fd, ["--"] * 2)
94112
for hash, _, name, results in parsed_commits[1:]:
95-
result = next(result for result in results if result.nickname == machine)
113+
result = get_first_result_for_machine(results, machine)
96114
link = compare_pair(
97115
output_dir, machine, first_name, first_result, name, result, counter
98116
)
99-
write_row(fd, [f"{name} ({hash})", link])
117+
write_row(fd, [name_and_hash(name, hash), link])
100118

101119

102120
def do_many_to_many(
103121
fd,
104122
parsed_commits: ParsedCommits,
105-
machine: str,
123+
machine: str | None,
106124
output_dir: PathLike,
107125
counter: list[int],
108126
) -> None:
109-
write_row(fd, ["", *[f"{x[2]} ({x[0]})" for x in parsed_commits]])
127+
write_row(fd, ["", *[name_and_hash(x[2], x[0]) for x in parsed_commits]])
110128
write_row(fd, ["--"] * (len(parsed_commits) + 1))
111129
for hash1, flags1, name1, results1 in parsed_commits:
112130
columns = [name1]
113-
result1 = next(result for result in results1 if result.nickname == machine)
131+
result1 = get_first_result_for_machine(results1, machine)
114132
for hash2, flags2, name2, results2 in parsed_commits:
115133
if hash1 == hash2 and flags1 == flags2:
116134
columns.append("")
117135
else:
118-
result2 = next(
119-
result for result in results2 if result.nickname == machine
120-
)
136+
result2 = get_first_result_for_machine(results2, machine)
121137
link = compare_pair(
122138
output_dir, machine, name1, result1, name2, result2, counter
123139
)
@@ -127,14 +143,11 @@ def do_many_to_many(
127143
fd.write("\n\nRows are 'bases', columns are 'heads'\n")
128144

129145

130-
def _main(commits: Sequence[str], output_dir: PathLike, comparison_type: str):
146+
def _main_with_hashes(commits: Sequence[str], output_dir: Path, comparison_type: str):
131147
results = mod_result.load_all_results(
132148
None, Path("results"), sorted=False, match=False
133149
)
134150

135-
if len(commits) < 2:
136-
raise ValueError("Must provide at least 2 commits")
137-
138151
parsed_commits = []
139152
machines = set()
140153

@@ -163,10 +176,6 @@ def _main(commits: Sequence[str], output_dir: PathLike, comparison_type: str):
163176
if len(machines) == 0:
164177
raise ValueError("No single machine in common with all of the results")
165178

166-
output_dir_path = Path(output_dir)
167-
if not output_dir_path.exists():
168-
output_dir_path.mkdir()
169-
170179
match comparison_type:
171180
case "1:n":
172181
total = (len(parsed_commits) - 1) * len(machines)
@@ -180,14 +189,61 @@ def _main(commits: Sequence[str], output_dir: PathLike, comparison_type: str):
180189
runners = mod_runners.get_runners_by_nickname()
181190

182191
counter = [0, total]
183-
with (output_dir_path / "README.md").open("w", encoding="utf-8") as fd:
192+
with (output_dir / "README.md").open("w", encoding="utf-8") as fd:
184193
for machine in machines:
185194
fd.write(f"# {runners[machine].display_name}\n\n")
186-
func(fd, parsed_commits, machine, output_dir_path, counter)
195+
func(fd, parsed_commits, machine, output_dir, counter)
187196
fd.write("\n")
188197
rich.print()
189198

190199

200+
def _main_with_files(commits: Sequence[str], output_dir: Path, comparison_type: str):
201+
parsed_results = []
202+
for commit in commits:
203+
if "," in commit:
204+
commit_path, name = commit.split(",", 1)
205+
commit_path = Path(commit_path)
206+
else:
207+
commit_path = Path(commit)
208+
name = commit_path.stem
209+
parsed_results.append(
210+
(name, [], name, [mod_result.Result.from_arbitrary_filename(commit_path)])
211+
)
212+
213+
match comparison_type:
214+
case "1:n":
215+
total = len(commits) - 1
216+
func = do_one_to_many
217+
case "n:n":
218+
total = (len(commits) ** 2) - len(commits)
219+
func = do_many_to_many
220+
case _:
221+
raise ValueError(f"Unknown comparison type {comparison_type}")
222+
223+
counter = [0, total]
224+
with (output_dir / "README.md").open("w", encoding="utf-8") as fd:
225+
fd.write("# Comparisons\n\n")
226+
func(fd, parsed_results, None, output_dir, counter)
227+
fd.write("\n")
228+
rich.print()
229+
230+
231+
def _main(commits: Sequence[str], output_dir: PathLike, comparison_type: str):
232+
if len(commits) < 2:
233+
raise ValueError("Must provide at least 2 commits")
234+
235+
output_dir_path = Path(output_dir)
236+
if not output_dir_path.exists():
237+
output_dir_path.mkdir()
238+
239+
if all(commit.endswith(".json") for commit in commits):
240+
_main_with_files(commits, output_dir_path, comparison_type)
241+
elif any(commit.endswith(".json") for commit in commits):
242+
raise ValueError("All commits must be either hashes or JSON files")
243+
else:
244+
_main_with_hashes(commits, output_dir_path, comparison_type)
245+
246+
191247
def main():
192248
parser = argparse.ArgumentParser(
193249
description="""
@@ -204,7 +260,8 @@ def main():
204260
"commit",
205261
nargs="+",
206262
help="""
207-
Commits to compare. Must be a git commit hash prefix. May optionally
263+
Commits or files to compare. If ends with ".json", it is a path to a
264+
JSON file, otherwise, it is a git commit hash prefix. May optionally
208265
have a friendly name after a comma, e.g. c0ffee,main. If ends with
209266
a "T", use the Tier 2 run for that commit. If ends with a "J", use
210267
the JIT run for that commit. If ends with a "N", use the NOGIL run

Diff for: tests/test_compare.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ def test_compare_1_to_n(tmp_path, monkeypatch):
3838
3939
| commit | change |
4040
| -- | -- |
41-
| eb0004c (eb0004c) | 1.571x ↑[📄](linux-eb0004c-vs-9d38120.md)[📈](linux-eb0004c-vs-9d38120.svg)[🧠](linux-eb0004c-vs-9d38120-mem.svg) |
42-
| b0e1f9c (b0e1f9c) | 1.702x ↑[📄](linux-b0e1f9c-vs-9d38120.md)[📈](linux-b0e1f9c-vs-9d38120.svg)[🧠](linux-b0e1f9c-vs-9d38120-mem.svg) |
41+
| eb0004c | 1.571x ↑[📄](linux-eb0004c-vs-9d38120.md)[📈](linux-eb0004c-vs-9d38120.svg)[🧠](linux-eb0004c-vs-9d38120-mem.svg) |
42+
| b0e1f9c | 1.702x ↑[📄](linux-b0e1f9c-vs-9d38120.md)[📈](linux-b0e1f9c-vs-9d38120.svg)[🧠](linux-b0e1f9c-vs-9d38120-mem.svg) |
4343
""" # noqa
4444
).strip()
4545
assert expected in content
@@ -70,12 +70,11 @@ def test_compare_n_to_n(tmp_path, monkeypatch):
7070
"""
7171
# linux x86_64 (linux)
7272
73-
| | 9d38120 (9d38120) | eb0004c (eb0004c) | b0e1f9c (b0e1f9c) |
73+
| | 9d38120 | eb0004c | b0e1f9c |
7474
| -- | -- | -- | -- |
7575
| 9d38120 | | 1.571x ↑[📄](linux-eb0004c-vs-9d38120.md)[📈](linux-eb0004c-vs-9d38120.svg)[🧠](linux-eb0004c-vs-9d38120-mem.svg) | 1.702x ↑[📄](linux-b0e1f9c-vs-9d38120.md)[📈](linux-b0e1f9c-vs-9d38120.svg)[🧠](linux-b0e1f9c-vs-9d38120-mem.svg) |
7676
| eb0004c | 1.363x ↓[📄](linux-9d38120-vs-eb0004c.md)[📈](linux-9d38120-vs-eb0004c.svg)[🧠](linux-9d38120-vs-eb0004c-mem.svg) | | 1.083x ↑[📄](linux-b0e1f9c-vs-eb0004c.md)[📈](linux-b0e1f9c-vs-eb0004c.svg)[🧠](linux-b0e1f9c-vs-eb0004c-mem.svg) |
7777
| b0e1f9c | 1.412x ↓[📄](linux-9d38120-vs-b0e1f9c.md)[📈](linux-9d38120-vs-b0e1f9c.svg)[🧠](linux-9d38120-vs-b0e1f9c-mem.svg) | 1.077x ↓[📄](linux-eb0004c-vs-b0e1f9c.md)[📈](linux-eb0004c-vs-b0e1f9c.svg)[🧠](linux-eb0004c-vs-b0e1f9c-mem.svg) | |
7878
""" # noqa
7979
).strip()
80-
print(content)
8180
assert expected in content

0 commit comments

Comments
 (0)