Adjust the loop generation script to account for the discrepancy between

Yhg1s · Yhg1s · commit 070e1ec0b223 · 2025-04-07T15:20:22.000+02:00
data points and benchmarks, and drop the mean/median options for merging
results, as they've become even more nonsensical now.
diff --git a/bench_runner/scripts/synthesize_loops_file.py b/bench_runner/scripts/synthesize_loops_file.py
@@ -8,6 +8,166 @@
 
 import rich_argparse
 
+# pyperf/pyperformance run benchmarks by benchmark name, but store results,
+# including loops used, by data point name (as reported by the benchmark).
+# There's no mapping from the one to the other that we can easily use (other
+# than running benchmarks one by one and checking what data points they
+# report), so here's a hand-written mapping instead. Benchmarks that use
+# their own name for the data point are omitted. This will probably (slowly)
+# get out of date, but so be it.
+#
+# (Sorted by value, then key.)
+DATAPOINT_TO_BENCHMARK = {
+    "many_optionals": "argparse",
+    "subparsers": "argparse",
+    "async_tree_none": "async_tree",
+    "async_tree_none_tg": "async_tree_tg",
+    "bench_mp_pool": "concurrent_imap",
+    "bench_thread_pool": "concurrent_imap",
+    "deepcopy_memo": "deepcopy",
+    "deepcopy_reduce": "deepcopy",
+    "create_gc_cycles": "gc_collect",
+    "genshi_text": "genshi",
+    "genshi_xml": "genshi",
+    "logging_format": "logging",
+    "logging_silent": "logging",
+    "logging_simple": "logging",
+    "shortest_path": "networkx",
+    "connected_components": "networkx_connected_components",
+    "k_core": "networkx_k_core",
+    "pprint_pformat": "pprint",
+    "pprint_safe_repr": "pprint",
+    "scimark_fft": "scimark",
+    "scimark_lu": "scimark",
+    "scimark_monte_carlo": "scimark",
+    "scimark_sor": "scimark",
+    "scimark_sparse_mat_mult": "scimark",
+    "sqlglot_v2_normalize": "sqlglot_v2",
+    "sympy_expand": "sympy",
+    "sympy_integrate": "sympy",
+    "sympy_str": "sympy",
+    "sympy_sum": "sympy",
+    "xml_etree_generate": "xml_etree",
+    "xml_etree_iterparse": "xml_etree",
+    "xml_etree_parse": "xml_etree",
+    "xml_etree_process": "xml_etree",
+}
+
+# The list of bm_* directories in pyperformance and pyston-benchmarks, plus
+# the aliases defined in their MANIFEST files (entries with
+# '<local:$dirname>')
+KNOWN_BENCHMARKS = {
+    "2to3",
+    "aiohttp",
+    "argparse",
+    "argparse_subparsers",
+    "async_generators",
+    "async_tree",
+    "async_tree_cpu_io_mixed",
+    "async_tree_cpu_io_mixed_tg",
+    "async_tree_eager",
+    "async_tree_eager_cpu_io_mixed",
+    "async_tree_eager_cpu_io_mixed_tg",
+    "async_tree_eager_io",
+    "async_tree_eager_io_tg",
+    "async_tree_eager_memoization",
+    "async_tree_eager_memoization_tg",
+    "async_tree_eager_tg",
+    "async_tree_io",
+    "async_tree_io_tg",
+    "async_tree_memoization",
+    "async_tree_memoization_tg",
+    "async_tree_tg",
+    "asyncio_tcp",
+    "asyncio_tcp_ssl",
+    "asyncio_websockets",
+    "bpe_tokeniser",
+    "chameleon",
+    "chaos",
+    "comprehensions",
+    "concurrent_imap",
+    "coroutines",
+    "coverage",
+    "crypto_pyaes",
+    "dask",
+    "decimal_factorial",
+    "decimal_pi",
+    "deepcopy",
+    "deltablue",
+    "django_template",
+    "djangocms",
+    "docutils",
+    "dulwich_log",
+    "fannkuch",
+    "flaskblogging",
+    "float",
+    "gc_collect",
+    "gc_traversal",
+    "generators",
+    "genshi",
+    "gevent_hub",
+    "go",
+    "gunicorn",
+    "hexiom",
+    "hg_startup",
+    "html5lib",
+    "json",
+    "json_dumps",
+    "json_loads",
+    "kinto",
+    "logging",
+    "mako",
+    "mdp",
+    "meteor_contest",
+    "mypy2",
+    "nbody",
+    "networkx",
+    "networkx_connected_components",
+    "networkx_k_core",
+    "nqueens",
+    "pathlib",
+    "pickle",
+    "pickle_dict",
+    "pickle_list",
+    "pickle_pure_python",
+    "pidigits",
+    "pprint",
+    "pycparser",
+    "pyflate",
+    "pylint",
+    "python_startup",
+    "python_startup_no_site",
+    "pytorch_alexnet_inference",
+    "raytrace",
+    "regex_compile",
+    "regex_dna",
+    "regex_effbot",
+    "regex_v8",
+    "richards",
+    "richards_super",
+    "scimark",
+    "spectral_norm",
+    "sphinx",
+    "sqlalchemy_declarative",
+    "sqlalchemy_imperative",
+    "sqlglot_v2",
+    "sqlglot_v2_optimize",
+    "sqlglot_v2_parse",
+    "sqlglot_v2_transpile",
+    "sqlite_synth",
+    "sympy",
+    "telco",
+    "thrift",
+    "tomli_loads",
+    "tornado_http",
+    "typing_runtime_protocols",
+    "unpack_sequence",
+    "unpickle",
+    "unpickle_list",
+    "unpickle_pure_python",
+    "xml_etree",
+}
+
 
 def parse_result(results_file, benchmark_data):
     with results_file.open() as f:
@@ -20,7 +180,17 @@ def parse_result(results_file, benchmark_data):
         if "metadata" not in bm:
             raise RuntimeError(f"Invalid data {bm.keys()!r} in {results_file}")
             return
-        benchmark_data[bm["metadata"]["name"]].append(bm["metadata"]["loops"])
+        name = bm["metadata"]["name"]
+        name = DATAPOINT_TO_BENCHMARK.get(name, name)
+        assert name is not None  # to satisfy pyright.
+        if name not in KNOWN_BENCHMARKS:
+            print(
+                f"WARNING: unknown benchmark {name!r} in {results_file}",
+                file=sys.stderr,
+            )
+            # Avoid repeated warnings.
+            KNOWN_BENCHMARKS.add(name)
+        benchmark_data[name].append(bm["metadata"]["loops"])
 
 
 def _main(
@@ -35,12 +205,6 @@ def _main(
             errno.EEXIST,
             f"{loops_file} exists (use -f to overwrite, -u to merge data)",
         )
-    if update and merger in ("median", "mean"):
-        print(
-            f"WARNING: merging existing data with {merger!r} "
-            + "overrepresents new results",
-            file=sys.stderr,
-        )
     benchmark_data = collections.defaultdict(list)
     if update:
         parse_result(loops_file, benchmark_data)
@@ -50,11 +214,6 @@ def _main(
     merge_func = {
         "max": max,
         "min": min,
-        # The only merge strategy that may not produce one of the input
-        # values, and probably a bad idea to use.
-        "mean": lambda L: int(round(sum(L) / len(L))),
-        # Close enough to median for benchmarking work.
-        "median": lambda L: L[len(L) // 2],
     }[merger]
 
     # pyperformance expects a specific layout, and needs the top-level
@@ -91,7 +250,7 @@ def main():
     parser.add_argument(
         "-s",
         "--select",
-        choices=("max", "min", "median", "mean"),
+        choices=("max", "min"),
         default="max",
         help="how to merge multiple runs",
     )