Skip to content

Commit b35fc76

Browse files
committed
Fix benchmark workflow bugs
- Switch to shortened SHA for PR - Add PR for unique output file artifact - Disable comparison against tag due to lack of dependency group - Add step to comment on PR - Sort labels for comment
1 parent 33c5f3a commit b35fc76

3 files changed

Lines changed: 73 additions & 29 deletions

File tree

.github/scripts/compare_benchmarks.py

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,17 @@
22
"""Compare benchmark results across PR, main, and tag and output a markdown table."""
33

44
import json
5+
import logging
6+
import re
57
import statistics
68
from pathlib import Path
79
from typing import Literal, NamedTuple
810

11+
_logger = logging.getLogger(__name__)
12+
13+
14+
ALERT = 250 # Value (arbitrary; in ms) to indicate difference between benchmarks
15+
916

1017
class BenchmarkResult(NamedTuple):
1118
fullname: str
@@ -64,9 +71,9 @@ def _delta(pr: BenchmarkResult, ref: BenchmarkResult) -> str:
6471
if ref == 0:
6572
return "N/A"
6673
diff = _scale(pr.median - ref.median)
67-
pct = (pr.median / ref.median - 1) * 100
68-
icon = "🔴" if pct > 5 else "🟢" if pct < -5 else "⚪"
69-
return f"{icon} {diff:+.3f} ms ({pct:+.1f}%)"
74+
# Indicator for 250ms absolute diff (arbitrary)
75+
icon = "🔴" if diff > ALERT else "🟢" if diff < -ALERT else "⚪"
76+
return f"{icon} {diff:+.3f}ms"
7077

7178

7279
def _label(result: BenchmarkResult) -> str:
@@ -83,10 +90,13 @@ def _label(result: BenchmarkResult) -> str:
8390
def build_table(
8491
pr: dict[str, BenchmarkResult],
8592
main: dict[str, BenchmarkResult],
86-
tag: dict[str, BenchmarkResult],
87-
tag_name: str,
93+
tag: dict[str, BenchmarkResult] = {},
94+
tag_name: str | None = None,
8895
) -> str:
8996
all_keys = set(pr) | set(main) | set(tag)
97+
all_keys = sorted(
98+
all_keys, key=lambda x: (0 if "index" in x else 1 if "query" in x else 2, x)
99+
)
90100
labels = [_label((pr.get(k) or main.get(k) or tag.get(k))) for k in all_keys]
91101

92102
col_sep = " | "
@@ -110,14 +120,14 @@ def delta_row(label: str, ref: dict[str, BenchmarkResult]) -> str:
110120
divider,
111121
row("PR", pr),
112122
row("main", main),
113-
row(tag_name, tag),
123+
# row(tag_name, tag),
114124
divider.replace("-", ""),
115125
delta_row("PR vs main", main),
116-
delta_row(f"PR vs {tag_name}", tag),
126+
# delta_row(f"PR vs {tag_name}", tag),
117127
"",
118128
"> `median (mean ± std)`",
119129
"> ",
120-
"🔴 >5% slower &nbsp; ⚪ within 5% &nbsp; 🟢 >5% faster",
130+
f"> 🔴 >{ALERT}ms slower &nbsp; ⚪ within {ALERT}ms &nbsp; 🟢 >{ALERT}ms faster",
121131
]
122132
return "\n".join(lines)
123133

@@ -134,27 +144,33 @@ def main():
134144
parser.add_argument(
135145
"-o",
136146
"--output",
147+
type=Path,
137148
help="Output markdown filepath containing benchmark comparisons",
138149
)
139150
args = parser.parse_args()
140151

141152
files = sorted(Path(".").glob(args.pattern))
142-
assert len(files) == 3, f"Expected 3 files, found {len(files)}: {files}"
153+
assert len(files) > 1, "Expected more than 1 file for benchmark comparison."
143154

144155
# Infer pr/main/tag from directory name
145156
parsed: dict[str, BenchmarkResult] = {}
146157
tag = None
147158
for f in files:
148-
stem = f.parent.name # e.g. "benchmark-pr"
149-
key = stem.split("-")[-1] # "pr", "main", tag
150-
if key not in ("pr", "main"):
159+
stem = f.name # e.g. "benchmark-pr-PR-#"
160+
key = stem.split("-")[1] # commit-sha, "main", tag
161+
162+
# Special cases
163+
if re.match(r"^v\d+\.\d+.\d+$", key):
151164
tag = key
165+
elif key != "main":
166+
key = "pr"
167+
152168
parsed[key] = parse_file(f)
153169
if tag is None:
154-
raise ValueError("Unknown tag")
155-
table = build_table(parsed["pr"], parsed["main"], parsed[tag], tag_name=tag)
170+
_logger.warning("Tag not found")
171+
table = build_table(parsed["pr"], parsed["main"], parsed.get(tag, {}), tag_name=tag)
156172
args.output.write_text(table)
157-
print(table)
173+
_logger.info(table)
158174

159175

160176
if __name__ == "__main__":

.github/scripts/run_benchmarks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def main():
1414
pytest.main(
1515
[
1616
"-m",
17-
"benchmark and not cloud",
17+
"benchmark",
1818
"--benchmark-save-data",
1919
f"--benchmark-json={args.output}",
2020
"--benchmark-time-unit=ms",

.github/workflows/benchmark.yaml

Lines changed: 41 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,31 +4,38 @@ on:
44
pull_request:
55
branches: [ "main" ]
66

7+
permissions:
8+
pull-requests: write
79
jobs:
8-
get-tag:
10+
prep:
911
runs-on: ubuntu-latest
1012
outputs:
1113
tag: ${{ steps.last_tag.outputs.tag }}
14+
short_sha: ${{ steps.short.outputs.sha }}
1215
steps:
1316
- uses: actions/checkout@v6
1417
with:
1518
fetch-tags: true
1619
fetch-depth: 0
1720
- id: last_tag
18-
run: echo ="tag=$(git describe --tags --abbrev=0)" >> $GITHUB_OUTPUT
21+
run: echo "tag=$(git describe --tags --abbrev=0)" >> $GITHUB_OUTPUT
22+
- id: short
23+
run: echo "sha=$(echo ${{ github.sha }} | cut -c1-7)" >> $GITHUB_OUTPUT
1924

2025
benchmark:
21-
needs: get-tag
26+
needs: prep
2227
runs-on: ubuntu-latest
2328
strategy:
29+
fail-fast: false
2430
matrix:
2531
target:
26-
- name: pr
32+
- name: ${{ needs.prep.outputs.short_sha }}
2733
ref: ${{ github.sha }}
2834
- name: main
2935
ref: main
30-
- name: ${{ needs.get_tag.outputs.tag }}
31-
ref: ${{ needs.get_tag.outputs.tag }}
36+
# Tag comparison disabled until next release (missing benchmark dependencies)
37+
# - name: ${{ needs.prep.outputs.tag }}
38+
# ref: ${{ needs.prep.outputs.tag }}
3239
steps:
3340
- uses: actions/checkout@v6
3441
with:
@@ -38,24 +45,45 @@ jobs:
3845
- run: uv sync --extra "cloud"
3946
- name: Run benchmarks
4047
run: |
41-
uv run .github/scripts/run_benchmarks.py \
42-
--output benchmark-${{matrix.target.name }}.json
48+
FILENAME="benchmark-${{ matrix.target.name }}-PR-${{ github.event.pull_request.number }}.json"
49+
uv run .github/scripts/run_benchmarks.py --output "$FILENAME"
50+
echo "REPORT_PATH=$FILENAME" >> $GITHUB_ENV
4351
- uses: actions/upload-artifact@v7
4452
with:
45-
name: benchmark-${{ matrix.target.name }}
46-
path: benchmark-${{ matrix.target.name }}.json
53+
name: benchmark-${{ matrix.target.name }}-PR-${{
54+
github.event.pull_request.number }}
55+
path: ${{ env.REPORT_PATH }}
56+
retention-days: 1
57+
overwrite: true
4758

4859
report:
49-
needs: [ get-tag, benchmark ]
60+
needs: [ prep, benchmark ]
5061
runs-on: ubuntu-latest
5162
steps:
5263
- uses: actions/checkout@v6
5364
- uses: astral-sh/setup-uv@v8.1.0
5465
- uses: actions/download-artifact@v8
5566
with:
5667
pattern: benchmark-*
68+
merge-multiple: true
69+
path: benchmark-results
5770
- name: Generate report
5871
run: |
5972
uv run .github/scripts/compare_benchmarks.py \
60-
--output benchmarks.md \
61-
--pattern benchmark-*.json
73+
--output "benchmarks.md" \
74+
--pattern "benchmark-results/benchmark-*-PR-${{ github.event.pull_request.number }}.json"
75+
- name: Find Comment
76+
uses: peter-evans/find-comment@v3
77+
id: fc
78+
with:
79+
issue-number: ${{ github.event.pull_request.number }}
80+
comment-author: "github-actions[bot]"
81+
body-includes: "Benchmark Results"
82+
83+
- name: Create / update comment
84+
uses: peter-evans/create-or-update-comment@v5
85+
with:
86+
comment-id: ${{ steps.fc.outputs.comment-id }}
87+
issue-number: ${{ github.event.pull_request.number }}
88+
body-path: "benchmarks.md"
89+
edit-mode: replace

0 commit comments

Comments
 (0)