Skip to content

Refactor to do more work in Python #382

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 30 commits into from
Apr 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
2124b6d
Refactor so as much work as possible is done in Python
mdboom Mar 28, 2025
aa60ce0
Remove _should_run checks in _pystats.src.yml
mdboom Mar 31, 2025
30baeea
Update minimum Python version
mdboom Mar 31, 2025
7ff47c7
Simplify bootstrapping
mdboom Mar 31, 2025
1feab3b
Bugfixes
mdboom Mar 31, 2025
3dc1057
Update bench_runner/scripts/workflow.py
mdboom Apr 7, 2025
3dbc349
Address comments from the PR
mdboom Apr 8, 2025
859b068
Port #388 to Python
mdboom Apr 8, 2025
eb43dc3
Port #389 to Python
mdboom Apr 8, 2025
4f84f80
Port #390 to Python
mdboom Apr 8, 2025
7c9d649
Bugfix for machines that don't match up with ordering
mdboom Apr 8, 2025
8915035
Don't use my personal fork
mdboom Apr 9, 2025
27dc96f
Extend PATH on Darwin for clang
mdboom Apr 9, 2025
a8b142f
Limit cores on specific machines
mdboom Apr 10, 2025
b0ce775
Bugfix
mdboom Apr 10, 2025
4eca84e
Investigate Windows failure
mdboom Apr 10, 2025
37c68b6
Windows bugfixes
mdboom Apr 10, 2025
6600630
Fix Windows again
mdboom Apr 10, 2025
5685297
Minor simplification
mdboom Apr 10, 2025
cead7aa
Add CHANGELOG.md
mdboom Apr 10, 2025
68c07ab
Fix Clang
mdboom Apr 10, 2025
2fd3804
Try to get CLANG on Windows working
mdboom Apr 11, 2025
5b1c259
Testing Windows syntax
mdboom Apr 11, 2025
9e29f83
Try to fix Windows
mdboom Apr 11, 2025
82b1a56
Try to fix Windows
mdboom Apr 11, 2025
c1a0bc1
Please HELP with Windows syntax
mdboom Apr 11, 2025
8e0c293
Escaping
mdboom Apr 11, 2025
f9f4ba8
Maybe I'll finally get lucky...
mdboom Apr 11, 2025
cc90a95
Try again
mdboom Apr 11, 2025
9684fa4
Reorder arguments
mdboom Apr 11, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
## Unreleased

## v2.0.0

Most of the work has moved from GitHub Actions `.yml` files to Python code in `workflow.py`.
In the future, this will allow supporting more workflow engines beyond just GitHub Actions.

**Migration note**: After running `python -m bench_runner install` to update your local files, but sure to add the new `workflow_bootstrap.py` file to your git repository.

### New configuration

Runners have a new configuration `use_cores` to control the number of CPU cores
used to build CPython. By default, this will use all available cores, but some
Cloud VMs require using fewer.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,12 @@ If you don't want a machine to be included when the user selects "machine == 'al
include_in_all = false
```

You may limit the number of cores used to build Python with the `use_cores` option. This may be necessary, for example, on cloud VMs.

```
use_cores = 2
```

### Try a benchmarking run

There are instructions for running a benchmarking action already in the `README.md` of your repo. Look there and give it a try!
Expand Down
4 changes: 2 additions & 2 deletions bench_runner/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@
"Get the merge base of the selected commit, and determine if it should run"
),
"install": "Install the workflow files into a results repository",
"notify": "Send a notification about the completion of the workflow",
"profiling_plot": "Generate the profiling plots from raw data",
"purge": "Purge old results from a results repository",
"remove_benchmark": "Remove specific benchmarks from the data set",
"run_benchmarks": "Run benchmarks (in timing, pyperf or perf modes)",
"should_run": "Determine whether we need to rerun results for the current commit",
"synthesize_loops_file": "Create a loops file from multiple benchmark results",
"notify": "Send a notification about the completion of the workflow",
"workflow": "Run the full compile/benchmark workflow",
}

if __name__ == "__main__":
Expand Down
41 changes: 41 additions & 0 deletions bench_runner/benchmark_definitions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from __future__ import annotations


import dataclasses
import hashlib
from pathlib import Path


from . import git


@dataclasses.dataclass
class BenchmarkRepo:
hash: str
url: str
dirname: str


BENCHMARK_REPOS = [
BenchmarkRepo(
"56d12a8fd7cc1432835965d374929bfa7f6f7a07",
"https://github.com/python/pyperformance.git",
"pyperformance",
),
BenchmarkRepo(
"265655e7f03ace13ec1e00e1ba299179e69f8a00",
"https://github.com/pyston/python-macrobenchmarks.git",
"pyston-benchmarks",
),
]


def get_benchmark_hash() -> str:
hash = hashlib.sha256()
for repo in BENCHMARK_REPOS:
if Path(repo.dirname).is_dir():
current_hash = git.get_git_hash(Path(repo.dirname))
else:
current_hash = repo.hash
hash.update(current_hash.encode("ascii")[:7])
return hash.hexdigest()[:6]
13 changes: 13 additions & 0 deletions bench_runner/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,29 @@

import functools
from pathlib import Path
from typing import Any

try:
import tomllib
except ImportError:
import tomli as tomllib # type: ignore


from . import runners


@functools.cache
def get_bench_runner_config(
filepath: Path | str = Path("bench_runner.toml"),
):
with Path(filepath).open("rb") as fd:
return tomllib.load(fd)


def get_config_for_current_runner() -> dict[str, Any]:
config = get_bench_runner_config()
runner = runners.get_runner_for_hostname()
all_runners = config.get("runners", [])
if len(all_runners) >= 1:
return all_runners[0].get(runner.nickname, {})
return {}
40 changes: 40 additions & 0 deletions bench_runner/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@
from __future__ import annotations


import contextlib
import datetime
from pathlib import Path
import shutil
import subprocess
import re


import rich
Expand Down Expand Up @@ -128,3 +131,40 @@ def get_commits_between(dirname: PathLike, ref1: str, ref2: str) -> list[str]:
def bisect_commits(dirname: PathLike, ref1: str, ref2: str) -> str:
commits = get_commits_between(dirname, ref1, ref2)
return commits[len(commits) // 2]


def clone(
dirname: PathLike,
url: str,
*,
branch: str | None = None,
depth: int = 1,
) -> None:
is_hash = re.match(r"^[0-9a-f]{40}$", branch) if branch else False

dirname = Path(dirname)
if dirname.is_dir():
if is_hash and (dirname / ".git").is_dir() and get_git_hash(dirname) == branch:
# This is a git repo, and the hash matches
return
shutil.rmtree(dirname)

# Fetching a hash and fetching a branch require different approaches

if is_hash:
assert branch is not None
dirname.mkdir()
with contextlib.chdir(dirname):
subprocess.check_call(["git", "init"])
subprocess.check_call(["git", "remote", "add", "origin", url])
subprocess.check_call(
["git", "fetch", "--depth", str(depth), "origin", branch]
)
subprocess.check_call(["git", "checkout", branch])
else:
args = ["git", "clone", url, str(dirname)]
if branch is not None:
args += ["--branch", branch]
if depth is not None:
args += ["--depth", str(depth)]
subprocess.check_call(args)
3 changes: 1 addition & 2 deletions bench_runner/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from operator import itemgetter
from pathlib import Path
import re
import socket
import subprocess
import sys
from typing import Any, Callable, Iterable, Sequence
Expand Down Expand Up @@ -524,7 +523,7 @@ def from_scratch(
flags: Iterable[str] | None = None,
) -> "Result":
result = cls(
_clean(runners.get_nickname_for_hostname(socket.gethostname())),
_clean(runners.get_nickname_for_hostname()),
_clean(_get_architecture(python)),
_clean_for_url(fork),
_clean(ref[:20]),
Expand Down
11 changes: 9 additions & 2 deletions bench_runner/runners.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import functools
import os
import socket


from . import config
Expand Down Expand Up @@ -80,13 +81,19 @@ def get_runners_by_nickname() -> dict[str, Runner]:
return {x.nickname: x for x in get_runners()}


def get_nickname_for_hostname(hostname: str) -> str:
def get_nickname_for_hostname(hostname: str | None = None) -> str:
# The envvar BENCHMARK_MACHINE_NICKNAME is used to override the machine that
# results are reported for.
if "BENCHMARK_MACHINE_NICKNAME" in os.environ:
return os.environ["BENCHMARK_MACHINE_NICKNAME"]
return get_runners_by_hostname().get(hostname, unknown_runner).nickname
return get_runner_for_hostname(hostname).nickname


def get_runner_by_nickname(nickname: str) -> Runner:
return get_runners_by_nickname().get(nickname, unknown_runner)


def get_runner_for_hostname(hostname: str | None = None) -> Runner:
if hostname is None:
hostname = socket.gethostname()
return get_runners_by_hostname().get(hostname, unknown_runner)
6 changes: 5 additions & 1 deletion bench_runner/scripts/generate_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,11 @@ def sort_runner_names(runner_names: Iterable[str]) -> list[str]:
def sorter(val):
if val is None:
return ()
return order.index(val.split()[0]), val
try:
idx = order.index(val.split()[0])
except ValueError:
idx = -1
return idx, val

return sorted(runner_names, key=sorter)

Expand Down
4 changes: 2 additions & 2 deletions bench_runner/scripts/get_merge_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
import rich_argparse


from bench_runner import benchmark_definitions
from bench_runner import flags as mflags
from bench_runner import git
from bench_runner.result import has_result
from bench_runner import util
from bench_runner.util import PathLike


Expand Down Expand Up @@ -55,7 +55,7 @@ def _main(
machine,
pystats,
flags,
util.get_benchmark_hash(),
benchmark_definitions.get_benchmark_hash(),
progress=False,
)
is None
Expand Down
5 changes: 1 addition & 4 deletions bench_runner/scripts/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,13 +241,11 @@ def generate_generic(dst: Any) -> Any:
def _main(check: bool) -> None:
WORKFLOW_PATH.mkdir(parents=True, exist_ok=True)

env = load_yaml(TEMPLATE_PATH / "env.yml")

for path in TEMPLATE_PATH.glob("*"):
if path.name.endswith(".src.yml") or path.name == "env.yml":
continue

if not (ROOT_PATH / path.name).is_file():
if not (ROOT_PATH / path.name).is_file() or path.suffix == ".py":
if check:
fail_check(ROOT_PATH / path.name)
else:
Expand All @@ -258,7 +256,6 @@ def _main(check: bool) -> None:
generator = GENERATORS.get(src_path.name, generate_generic)
src = load_yaml(src_path)
dst = generator(src)
dst = {"env": env, **dst}
write_yaml(dst_path, dst, check)


Expand Down
3 changes: 2 additions & 1 deletion bench_runner/scripts/run_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import rich_argparse


from bench_runner import benchmark_definitions
from bench_runner import flags
from bench_runner import git
from bench_runner.result import Result
Expand Down Expand Up @@ -278,7 +279,7 @@ def update_metadata(
merge_base = git.get_git_merge_base(cpython)
if merge_base is not None:
metadata["commit_merge_base"] = merge_base
metadata["benchmark_hash"] = util.get_benchmark_hash()
metadata["benchmark_hash"] = benchmark_definitions.get_benchmark_hash()
if run_id is not None:
metadata["github_action_url"] = f"{GITHUB_URL}/actions/runs/{run_id}"
actor = os.environ.get("GITHUB_ACTOR")
Expand Down
Loading