faster-cpython · mdboom · Apr 11, 2025 · Mar 28, 2025 · Mar 31, 2025 · Mar 31, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,14 @@
+## Unreleased
+
+## v2.0.0
+
+Most of the work has moved from GitHub Actions `.yml` files to Python code in `workflow.py`.
+In the future, this will allow supporting more workflow engines beyond just GitHub Actions.
+
+**Migration note**: After running `python -m bench_runner install` to update your local files, but sure to add the new `workflow_bootstrap.py` file to your git repository.
+
+### New configuration
+
+Runners have a new configuration `use_cores` to control the number of CPU cores
+used to build CPython. By default, this will use all available cores, but some
+Cloud VMs require using fewer.
diff --git a/README.md b/README.md
@@ -93,6 +93,12 @@ If you don't want a machine to be included when the user selects "machine == 'al
 include_in_all = false
 ```
 
+You may limit the number of cores used to build Python with the `use_cores` option. This may be necessary, for example, on cloud VMs.
+
+```
+use_cores = 2
+```
+
 ### Try a benchmarking run
 
 There are instructions for running a benchmarking action already in the `README.md` of your repo. Look there and give it a try!

diff --git a/bench_runner/__main__.py b/bench_runner/__main__.py
@@ -14,13 +14,13 @@
         "Get the merge base of the selected commit, and determine if it should run"
     ),
     "install": "Install the workflow files into a results repository",
+    "notify": "Send a notification about the completion of the workflow",
     "profiling_plot": "Generate the profiling plots from raw data",
     "purge": "Purge old results from a results repository",
     "remove_benchmark": "Remove specific benchmarks from the data set",
     "run_benchmarks": "Run benchmarks (in timing, pyperf or perf modes)",
-    "should_run": "Determine whether we need to rerun results for the current commit",
     "synthesize_loops_file": "Create a loops file from multiple benchmark results",
-    "notify": "Send a notification about the completion of the workflow",
+    "workflow": "Run the full compile/benchmark workflow",
 }
 
 if __name__ == "__main__":

diff --git a/bench_runner/benchmark_definitions.py b/bench_runner/benchmark_definitions.py
@@ -0,0 +1,41 @@
+from __future__ import annotations
+
+
+import dataclasses
+import hashlib
+from pathlib import Path
+
+
+from . import git
+
+
+@dataclasses.dataclass
+class BenchmarkRepo:
+    hash: str
+    url: str
+    dirname: str
+
+
+BENCHMARK_REPOS = [
+    BenchmarkRepo(
+        "56d12a8fd7cc1432835965d374929bfa7f6f7a07",
+        "https://github.com/python/pyperformance.git",
+        "pyperformance",
+    ),
+    BenchmarkRepo(
+        "265655e7f03ace13ec1e00e1ba299179e69f8a00",
+        "https://github.com/pyston/python-macrobenchmarks.git",
+        "pyston-benchmarks",
+    ),
+]
+
+
+def get_benchmark_hash() -> str:
+    hash = hashlib.sha256()
+    for repo in BENCHMARK_REPOS:
+        if Path(repo.dirname).is_dir():
+            current_hash = git.get_git_hash(Path(repo.dirname))
+        else:
+            current_hash = repo.hash
+        hash.update(current_hash.encode("ascii")[:7])
+    return hash.hexdigest()[:6]
diff --git a/bench_runner/config.py b/bench_runner/config.py
@@ -4,16 +4,29 @@
 
 import functools
 from pathlib import Path
+from typing import Any
 
 try:
     import tomllib
 except ImportError:
     import tomli as tomllib  # type: ignore
 
 
+from . import runners
+
+
 @functools.cache
 def get_bench_runner_config(
     filepath: Path | str = Path("bench_runner.toml"),
 ):
     with Path(filepath).open("rb") as fd:
         return tomllib.load(fd)
+
+
+def get_config_for_current_runner() -> dict[str, Any]:
+    config = get_bench_runner_config()
+    runner = runners.get_runner_for_hostname()
+    all_runners = config.get("runners", [])
+    if len(all_runners) >= 1:
+        return all_runners[0].get(runner.nickname, {})
+    return {}
diff --git a/bench_runner/git.py b/bench_runner/git.py
@@ -2,9 +2,12 @@
 from __future__ import annotations
 
 
+import contextlib
 import datetime
 from pathlib import Path
+import shutil
 import subprocess
+import re
 
 
 import rich
@@ -128,3 +131,40 @@ def get_commits_between(dirname: PathLike, ref1: str, ref2: str) -> list[str]:
 def bisect_commits(dirname: PathLike, ref1: str, ref2: str) -> str:
     commits = get_commits_between(dirname, ref1, ref2)
     return commits[len(commits) // 2]
+
+
+def clone(
+    dirname: PathLike,
+    url: str,
+    *,
+    branch: str | None = None,
+    depth: int = 1,
+) -> None:
+    is_hash = re.match(r"^[0-9a-f]{40}$", branch) if branch else False
+
+    dirname = Path(dirname)
+    if dirname.is_dir():
+        if is_hash and (dirname / ".git").is_dir() and get_git_hash(dirname) == branch:
+            # This is a git repo, and the hash matches
+            return
+        shutil.rmtree(dirname)
+
+    # Fetching a hash and fetching a branch require different approaches
+
+    if is_hash:
+        assert branch is not None
+        dirname.mkdir()
+        with contextlib.chdir(dirname):
+            subprocess.check_call(["git", "init"])
+            subprocess.check_call(["git", "remote", "add", "origin", url])
+            subprocess.check_call(
+                ["git", "fetch", "--depth", str(depth), "origin", branch]
+            )
+            subprocess.check_call(["git", "checkout", branch])
+    else:
+        args = ["git", "clone", url, str(dirname)]
+        if branch is not None:
+            args += ["--branch", branch]
+        if depth is not None:
+            args += ["--depth", str(depth)]
+        subprocess.check_call(args)
diff --git a/bench_runner/result.py b/bench_runner/result.py
@@ -9,7 +9,6 @@
 from operator import itemgetter
 from pathlib import Path
 import re
-import socket
 import subprocess
 import sys
 from typing import Any, Callable, Iterable, Sequence
@@ -524,7 +523,7 @@ def from_scratch(
         flags: Iterable[str] | None = None,
     ) -> "Result":
         result = cls(
-            _clean(runners.get_nickname_for_hostname(socket.gethostname())),
+            _clean(runners.get_nickname_for_hostname()),
             _clean(_get_architecture(python)),
             _clean_for_url(fork),
             _clean(ref[:20]),

diff --git a/bench_runner/runners.py b/bench_runner/runners.py
@@ -3,6 +3,7 @@
 
 import functools
 import os
+import socket
 
 
 from . import config
@@ -80,13 +81,19 @@ def get_runners_by_nickname() -> dict[str, Runner]:
     return {x.nickname: x for x in get_runners()}
 
 
-def get_nickname_for_hostname(hostname: str) -> str:
+def get_nickname_for_hostname(hostname: str | None = None) -> str:
     # The envvar BENCHMARK_MACHINE_NICKNAME is used to override the machine that
     # results are reported for.
     if "BENCHMARK_MACHINE_NICKNAME" in os.environ:
         return os.environ["BENCHMARK_MACHINE_NICKNAME"]
-    return get_runners_by_hostname().get(hostname, unknown_runner).nickname
+    return get_runner_for_hostname(hostname).nickname
 
 
 def get_runner_by_nickname(nickname: str) -> Runner:
     return get_runners_by_nickname().get(nickname, unknown_runner)
+
+
+def get_runner_for_hostname(hostname: str | None = None) -> Runner:
+    if hostname is None:
+        hostname = socket.gethostname()
+    return get_runners_by_hostname().get(hostname, unknown_runner)
diff --git a/bench_runner/scripts/generate_results.py b/bench_runner/scripts/generate_results.py
@@ -126,7 +126,11 @@ def sort_runner_names(runner_names: Iterable[str]) -> list[str]:
     def sorter(val):
         if val is None:
             return ()
-        return order.index(val.split()[0]), val
+        try:
+            idx = order.index(val.split()[0])
+        except ValueError:
+            idx = -1
+        return idx, val
 
     return sorted(runner_names, key=sorter)
 

diff --git a/bench_runner/scripts/get_merge_base.py b/bench_runner/scripts/get_merge_base.py
@@ -6,10 +6,10 @@
 import rich_argparse
 
 
+from bench_runner import benchmark_definitions
 from bench_runner import flags as mflags
 from bench_runner import git
 from bench_runner.result import has_result
-from bench_runner import util
 from bench_runner.util import PathLike
 
 
@@ -55,7 +55,7 @@ def _main(
                     machine,
                     pystats,
                     flags,
-                    util.get_benchmark_hash(),
+                    benchmark_definitions.get_benchmark_hash(),
                     progress=False,
                 )
                 is None

diff --git a/bench_runner/scripts/install.py b/bench_runner/scripts/install.py
@@ -241,13 +241,11 @@ def generate_generic(dst: Any) -> Any:
 def _main(check: bool) -> None:
     WORKFLOW_PATH.mkdir(parents=True, exist_ok=True)
 
-    env = load_yaml(TEMPLATE_PATH / "env.yml")
-
     for path in TEMPLATE_PATH.glob("*"):
         if path.name.endswith(".src.yml") or path.name == "env.yml":
             continue
 
-        if not (ROOT_PATH / path.name).is_file():
+        if not (ROOT_PATH / path.name).is_file() or path.suffix == ".py":
             if check:
                 fail_check(ROOT_PATH / path.name)
             else:
@@ -258,7 +256,6 @@ def _main(check: bool) -> None:
         generator = GENERATORS.get(src_path.name, generate_generic)
         src = load_yaml(src_path)
         dst = generator(src)
-        dst = {"env": env, **dst}
         write_yaml(dst_path, dst, check)
 
 

diff --git a/bench_runner/scripts/run_benchmarks.py b/bench_runner/scripts/run_benchmarks.py
@@ -18,6 +18,7 @@
 import rich_argparse
 
 
+from bench_runner import benchmark_definitions
 from bench_runner import flags
 from bench_runner import git
 from bench_runner.result import Result
@@ -278,7 +279,7 @@ def update_metadata(
     merge_base = git.get_git_merge_base(cpython)
     if merge_base is not None:
         metadata["commit_merge_base"] = merge_base
-    metadata["benchmark_hash"] = util.get_benchmark_hash()
+    metadata["benchmark_hash"] = benchmark_definitions.get_benchmark_hash()
     if run_id is not None:
         metadata["github_action_url"] = f"{GITHUB_URL}/actions/runs/{run_id}"
     actor = os.environ.get("GITHUB_ACTOR")