Skip to content

Commit 73c8481

Browse files
committed
Fix benchmark workflow bugs
- Switch to shortened SHA for PR - Add PR for unique output file artifact - Disable comparison against tag due to lack of dependency group - Add step to comment on PR
1 parent 33c5f3a commit 73c8481

2 files changed

Lines changed: 51 additions & 24 deletions

File tree

.github/scripts/compare_benchmarks.py

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,14 @@
22
"""Compare benchmark results across PR, main, and tag and output a markdown table."""
33

44
import json
5+
import logging
6+
import re
57
import statistics
68
from pathlib import Path
79
from typing import Literal, NamedTuple
810

11+
_logger = logging.getLogger(__name__)
12+
913

1014
class BenchmarkResult(NamedTuple):
1115
fullname: str
@@ -83,8 +87,8 @@ def _label(result: BenchmarkResult) -> str:
8387
def build_table(
8488
pr: dict[str, BenchmarkResult],
8589
main: dict[str, BenchmarkResult],
86-
tag: dict[str, BenchmarkResult],
87-
tag_name: str,
90+
tag: dict[str, BenchmarkResult] = {},
91+
tag_name: str | None = None,
8892
) -> str:
8993
all_keys = set(pr) | set(main) | set(tag)
9094
labels = [_label((pr.get(k) or main.get(k) or tag.get(k))) for k in all_keys]
@@ -110,10 +114,10 @@ def delta_row(label: str, ref: dict[str, BenchmarkResult]) -> str:
110114
divider,
111115
row("PR", pr),
112116
row("main", main),
113-
row(tag_name, tag),
117+
# row(tag_name, tag),
114118
divider.replace("-", ""),
115119
delta_row("PR vs main", main),
116-
delta_row(f"PR vs {tag_name}", tag),
120+
# delta_row(f"PR vs {tag_name}", tag),
117121
"",
118122
"> `median (mean ± std)`",
119123
"> ",
@@ -139,22 +143,27 @@ def main():
139143
args = parser.parse_args()
140144

141145
files = sorted(Path(".").glob(args.pattern))
142-
assert len(files) == 3, f"Expected 3 files, found {len(files)}: {files}"
146+
assert len(files) > 1, "Expected more than 1 file for benchmark comparison."
143147

144148
# Infer pr/main/tag from directory name
145149
parsed: dict[str, BenchmarkResult] = {}
146150
tag = None
147151
for f in files:
148-
stem = f.parent.name # e.g. "benchmark-pr"
149-
key = stem.split("-")[-1] # "pr", "main", tag
150-
if key not in ("pr", "main"):
152+
stem = f.name # e.g. "benchmark-pr-PR-#"
153+
key = stem.split("-")[1] # commit-sha, "main", tag
154+
155+
# Special cases
156+
if re.match(r"^v\d+\.\d+.\d+$", key):
151157
tag = key
158+
elif key != "main":
159+
key = "pr"
160+
152161
parsed[key] = parse_file(f)
153162
if tag is None:
154-
raise ValueError("Unknown tag")
155-
table = build_table(parsed["pr"], parsed["main"], parsed[tag], tag_name=tag)
163+
_logger.warning("Tag not found")
164+
table = build_table(parsed["pr"], parsed["main"], parsed.get(tag, {}), tag_name=tag)
156165
args.output.write_text(table)
157-
print(table)
166+
_logger.info(table)
158167

159168

160169
if __name__ == "__main__":

.github/workflows/benchmark.yaml

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,31 +4,38 @@ on:
44
pull_request:
55
branches: [ "main" ]
66

7+
permissions:
8+
pull-requests: write
79
jobs:
8-
get-tag:
10+
prep:
911
runs-on: ubuntu-latest
1012
outputs:
1113
tag: ${{ steps.last_tag.outputs.tag }}
14+
short_sha: ${{ steps.short.outputs.sha }}
1215
steps:
1316
- uses: actions/checkout@v6
1417
with:
1518
fetch-tags: true
1619
fetch-depth: 0
1720
- id: last_tag
18-
run: echo ="tag=$(git describe --tags --abbrev=0)" >> $GITHUB_OUTPUT
21+
run: echo "tag=$(git describe --tags --abbrev=0)" >> $GITHUB_OUTPUT
22+
- id: short
23+
run: echo "sha=$(echo ${{ github.sha }} | cut -c1-7)" >> $GITHUB_OUTPUT
1924

2025
benchmark:
21-
needs: get-tag
26+
needs: prep
2227
runs-on: ubuntu-latest
2328
strategy:
29+
fail-fast: false
2430
matrix:
2531
target:
26-
- name: pr
32+
- name: ${{ needs.prep.outputs.short_sha }}
2733
ref: ${{ github.sha }}
2834
- name: main
2935
ref: main
30-
- name: ${{ needs.get_tag.outputs.tag }}
31-
ref: ${{ needs.get_tag.outputs.tag }}
36+
# Tag comparison disabled until next release (missing benchmark dependencies)
37+
# - name: ${{ needs.prep.outputs.tag }}
38+
# ref: ${{ needs.prep.outputs.tag }}
3239
steps:
3340
- uses: actions/checkout@v6
3441
with:
@@ -38,24 +45,35 @@ jobs:
3845
- run: uv sync --extra "cloud"
3946
- name: Run benchmarks
4047
run: |
41-
uv run .github/scripts/run_benchmarks.py \
42-
--output benchmark-${{matrix.target.name }}.json
48+
FILENAME="benchmark-${{ matrix.target.name }}-PR-${{ github.event.pull_request.number }}.json"
49+
uv run .github/scripts/run_benchmarks.py --output "$FILENAME"
50+
echo "REPORT_PATH=$FILENAME" >> $GITHUB_ENV
4351
- uses: actions/upload-artifact@v7
4452
with:
45-
name: benchmark-${{ matrix.target.name }}
46-
path: benchmark-${{ matrix.target.name }}.json
53+
name: benchmark-${{ matrix.target.name }}-PR-${{
54+
github.event.pull_request.number }}
55+
path: ${{ env.REPORT_PATH }}
56+
retention-days: 1
57+
overwrite: true
4758

4859
report:
49-
needs: [ get-tag, benchmark ]
60+
needs: [ prep, benchmark ]
5061
runs-on: ubuntu-latest
5162
steps:
5263
- uses: actions/checkout@v6
5364
- uses: astral-sh/setup-uv@v8.1.0
5465
- uses: actions/download-artifact@v8
5566
with:
5667
pattern: benchmark-*
68+
merge-multiple: true
69+
path: benchmark-results
5770
- name: Generate report
5871
run: |
5972
uv run .github/scripts/compare_benchmarks.py \
60-
--output benchmarks.md \
61-
--pattern benchmark-*.json
73+
--output "benchmarks.md" \
74+
--pattern "benchmark-results/benchmark-*-PR-${{ github.event.pull_request.number }}.json"
75+
- name: PR comment
76+
uses: peter-evans/create-or-update-comment@v5
77+
with:
78+
issue-number: ${{ github.event.pull_request.number }}
79+
body-path: "benchmarks.md"

0 commit comments

Comments
 (0)