Skip to content

Commit 070e1ec

Browse files
committed
Adjust the loop generation script to account for the discrepancy between
data points and benchmarks, and drop the mean/median options for merging results, as they've become even more nonsensical now.
1 parent 2c95d27 commit 070e1ec

File tree

1 file changed

+172
-13
lines changed

1 file changed

+172
-13
lines changed

Diff for: bench_runner/scripts/synthesize_loops_file.py

+172-13
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,166 @@
88

99
import rich_argparse
1010

11+
# pyperf/pyperformance run benchmarks by benchmark name, but store results,
12+
# including loops used, by data point name (as reported by the benchmark).
13+
# There's no mapping from the one to the other that we can easily use (other
14+
# than running benchmarks one by one and checking what data points they
15+
# report), so here's a hand-written mapping instead. Benchmarks that use
16+
# their own name for the data point are omitted. This will probably (slowly)
17+
# get out of date, but so be it.
18+
#
19+
# (Sorted by value, then key.)
20+
DATAPOINT_TO_BENCHMARK = {
21+
"many_optionals": "argparse",
22+
"subparsers": "argparse",
23+
"async_tree_none": "async_tree",
24+
"async_tree_none_tg": "async_tree_tg",
25+
"bench_mp_pool": "concurrent_imap",
26+
"bench_thread_pool": "concurrent_imap",
27+
"deepcopy_memo": "deepcopy",
28+
"deepcopy_reduce": "deepcopy",
29+
"create_gc_cycles": "gc_collect",
30+
"genshi_text": "genshi",
31+
"genshi_xml": "genshi",
32+
"logging_format": "logging",
33+
"logging_silent": "logging",
34+
"logging_simple": "logging",
35+
"shortest_path": "networkx",
36+
"connected_components": "networkx_connected_components",
37+
"k_core": "networkx_k_core",
38+
"pprint_pformat": "pprint",
39+
"pprint_safe_repr": "pprint",
40+
"scimark_fft": "scimark",
41+
"scimark_lu": "scimark",
42+
"scimark_monte_carlo": "scimark",
43+
"scimark_sor": "scimark",
44+
"scimark_sparse_mat_mult": "scimark",
45+
"sqlglot_v2_normalize": "sqlglot_v2",
46+
"sympy_expand": "sympy",
47+
"sympy_integrate": "sympy",
48+
"sympy_str": "sympy",
49+
"sympy_sum": "sympy",
50+
"xml_etree_generate": "xml_etree",
51+
"xml_etree_iterparse": "xml_etree",
52+
"xml_etree_parse": "xml_etree",
53+
"xml_etree_process": "xml_etree",
54+
}
55+
56+
# The list of bm_* directories in pyperformance and pyston-benchmarks, plus
57+
# the aliases defined in their MANIFEST files (entries with
58+
# '<local:$dirname>')
59+
KNOWN_BENCHMARKS = {
60+
"2to3",
61+
"aiohttp",
62+
"argparse",
63+
"argparse_subparsers",
64+
"async_generators",
65+
"async_tree",
66+
"async_tree_cpu_io_mixed",
67+
"async_tree_cpu_io_mixed_tg",
68+
"async_tree_eager",
69+
"async_tree_eager_cpu_io_mixed",
70+
"async_tree_eager_cpu_io_mixed_tg",
71+
"async_tree_eager_io",
72+
"async_tree_eager_io_tg",
73+
"async_tree_eager_memoization",
74+
"async_tree_eager_memoization_tg",
75+
"async_tree_eager_tg",
76+
"async_tree_io",
77+
"async_tree_io_tg",
78+
"async_tree_memoization",
79+
"async_tree_memoization_tg",
80+
"async_tree_tg",
81+
"asyncio_tcp",
82+
"asyncio_tcp_ssl",
83+
"asyncio_websockets",
84+
"bpe_tokeniser",
85+
"chameleon",
86+
"chaos",
87+
"comprehensions",
88+
"concurrent_imap",
89+
"coroutines",
90+
"coverage",
91+
"crypto_pyaes",
92+
"dask",
93+
"decimal_factorial",
94+
"decimal_pi",
95+
"deepcopy",
96+
"deltablue",
97+
"django_template",
98+
"djangocms",
99+
"docutils",
100+
"dulwich_log",
101+
"fannkuch",
102+
"flaskblogging",
103+
"float",
104+
"gc_collect",
105+
"gc_traversal",
106+
"generators",
107+
"genshi",
108+
"gevent_hub",
109+
"go",
110+
"gunicorn",
111+
"hexiom",
112+
"hg_startup",
113+
"html5lib",
114+
"json",
115+
"json_dumps",
116+
"json_loads",
117+
"kinto",
118+
"logging",
119+
"mako",
120+
"mdp",
121+
"meteor_contest",
122+
"mypy2",
123+
"nbody",
124+
"networkx",
125+
"networkx_connected_components",
126+
"networkx_k_core",
127+
"nqueens",
128+
"pathlib",
129+
"pickle",
130+
"pickle_dict",
131+
"pickle_list",
132+
"pickle_pure_python",
133+
"pidigits",
134+
"pprint",
135+
"pycparser",
136+
"pyflate",
137+
"pylint",
138+
"python_startup",
139+
"python_startup_no_site",
140+
"pytorch_alexnet_inference",
141+
"raytrace",
142+
"regex_compile",
143+
"regex_dna",
144+
"regex_effbot",
145+
"regex_v8",
146+
"richards",
147+
"richards_super",
148+
"scimark",
149+
"spectral_norm",
150+
"sphinx",
151+
"sqlalchemy_declarative",
152+
"sqlalchemy_imperative",
153+
"sqlglot_v2",
154+
"sqlglot_v2_optimize",
155+
"sqlglot_v2_parse",
156+
"sqlglot_v2_transpile",
157+
"sqlite_synth",
158+
"sympy",
159+
"telco",
160+
"thrift",
161+
"tomli_loads",
162+
"tornado_http",
163+
"typing_runtime_protocols",
164+
"unpack_sequence",
165+
"unpickle",
166+
"unpickle_list",
167+
"unpickle_pure_python",
168+
"xml_etree",
169+
}
170+
11171

12172
def parse_result(results_file, benchmark_data):
13173
with results_file.open() as f:
@@ -20,7 +180,17 @@ def parse_result(results_file, benchmark_data):
20180
if "metadata" not in bm:
21181
raise RuntimeError(f"Invalid data {bm.keys()!r} in {results_file}")
22182
return
23-
benchmark_data[bm["metadata"]["name"]].append(bm["metadata"]["loops"])
183+
name = bm["metadata"]["name"]
184+
name = DATAPOINT_TO_BENCHMARK.get(name, name)
185+
assert name is not None # to satisfy pyright.
186+
if name not in KNOWN_BENCHMARKS:
187+
print(
188+
f"WARNING: unknown benchmark {name!r} in {results_file}",
189+
file=sys.stderr,
190+
)
191+
# Avoid repeated warnings.
192+
KNOWN_BENCHMARKS.add(name)
193+
benchmark_data[name].append(bm["metadata"]["loops"])
24194

25195

26196
def _main(
@@ -35,12 +205,6 @@ def _main(
35205
errno.EEXIST,
36206
f"{loops_file} exists (use -f to overwrite, -u to merge data)",
37207
)
38-
if update and merger in ("median", "mean"):
39-
print(
40-
f"WARNING: merging existing data with {merger!r} "
41-
+ "overrepresents new results",
42-
file=sys.stderr,
43-
)
44208
benchmark_data = collections.defaultdict(list)
45209
if update:
46210
parse_result(loops_file, benchmark_data)
@@ -50,11 +214,6 @@ def _main(
50214
merge_func = {
51215
"max": max,
52216
"min": min,
53-
# The only merge strategy that may not produce one of the input
54-
# values, and probably a bad idea to use.
55-
"mean": lambda L: int(round(sum(L) / len(L))),
56-
# Close enough to median for benchmarking work.
57-
"median": lambda L: L[len(L) // 2],
58217
}[merger]
59218

60219
# pyperformance expects a specific layout, and needs the top-level
@@ -91,7 +250,7 @@ def main():
91250
parser.add_argument(
92251
"-s",
93252
"--select",
94-
choices=("max", "min", "median", "mean"),
253+
choices=("max", "min"),
95254
default="max",
96255
help="how to merge multiple runs",
97256
)

0 commit comments

Comments
 (0)