Skip to content

Commit fc074bd

Browse files
committed
feat: add benchmark workflow and scripts
1 parent feccd90 commit fc074bd

File tree

11 files changed

+1016
-0
lines changed

11 files changed

+1016
-0
lines changed
Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,253 @@
1+
import pathlib, re, sys
2+
3+
try:
4+
p = pathlib.Path("comparison.md")
5+
if not p.exists():
6+
print("comparison.md not found, skipping post-processing.")
7+
sys.exit(0)
8+
9+
lines = p.read_text(encoding="utf-8").splitlines()
10+
processed_lines = []
11+
in_code = False
12+
13+
def strip_worker_suffix(text: str) -> str:
14+
return re.sub(r"(\S+?)-\d+(\s|$)", r"\1\2", text)
15+
16+
def get_icon(diff_val: float) -> str:
17+
if diff_val > 10:
18+
return "🐌"
19+
if diff_val < -10:
20+
return "🚀"
21+
return "➡️"
22+
23+
def clean_superscripts(text: str) -> str:
24+
return re.sub(r"[¹²³⁴⁵⁶⁷⁸⁹⁰]", "", text)
25+
26+
def parse_val(token: str):
27+
if "%" in token or "=" in token:
28+
return None
29+
token = clean_superscripts(token)
30+
token = token.split("±")[0].strip()
31+
token = token.split("(")[0].strip()
32+
if not token:
33+
return None
34+
35+
m = re.match(r"^([-+]?\d*\.?\d+)([a-zA-Zµ]+)?$", token)
36+
if not m:
37+
return None
38+
try:
39+
val = float(m.group(1))
40+
except ValueError:
41+
return None
42+
suffix = (m.group(2) or "").replace("µ", "u")
43+
multipliers = {
44+
"n": 1e-9,
45+
"ns": 1e-9,
46+
"u": 1e-6,
47+
"us": 1e-6,
48+
"m": 1e-3,
49+
"ms": 1e-3,
50+
"s": 1.0,
51+
"k": 1e3,
52+
"K": 1e3,
53+
"M": 1e6,
54+
"G": 1e9,
55+
"Ki": 1024.0,
56+
"Mi": 1024.0**2,
57+
"Gi": 1024.0**3,
58+
"Ti": 1024.0**4,
59+
"B": 1.0,
60+
"B/op": 1.0,
61+
"C": 1.0, # tolerate degree/unit markers that don't affect ratio
62+
}
63+
return val * multipliers.get(suffix, 1.0)
64+
65+
def extract_two_numbers(tokens):
66+
found = []
67+
for t in tokens[1:]: # skip name
68+
if t in {"±", "∞", "~", "│", "│"}:
69+
continue
70+
if "%" in t or "=" in t:
71+
continue
72+
val = parse_val(t)
73+
if val is not None:
74+
found.append(val)
75+
if len(found) == 2:
76+
break
77+
return found
78+
79+
# Pass 0:
80+
# 1. find a header line with pipes to derive alignment hint
81+
# 2. calculate max content width to ensure right-most alignment
82+
max_content_width = 0
83+
84+
for line in lines:
85+
if line.strip() == "```":
86+
in_code = not in_code
87+
continue
88+
if not in_code:
89+
continue
90+
91+
# Skip footnotes/meta for width calculation
92+
if re.match(r"^\s*[¹²³⁴⁵⁶⁷⁸⁹⁰]", line) or re.search(r"need\s*>?=\s*\d+\s+samples", line):
93+
continue
94+
if not line.strip() or line.strip().startswith(("goos:", "goarch:", "pkg:", "cpu:")):
95+
continue
96+
# Header lines are handled separately in Pass 1
97+
if "│" in line and ("vs base" in line or "old" in line or "new" in line):
98+
continue
99+
100+
# It's likely a data line
101+
# Check if it has an existing percentage we might move/align
102+
curr_line = strip_worker_suffix(line).rstrip()
103+
pct_match = re.search(r"([+-]?\d+\.\d+)%", curr_line)
104+
if pct_match:
105+
# If we are going to realign this, we count width up to the percentage
106+
w = len(curr_line[: pct_match.start()].rstrip())
107+
else:
108+
w = len(curr_line)
109+
110+
if w > max_content_width:
111+
max_content_width = w
112+
113+
# Calculate global alignment target for Diff column
114+
# Ensure target column is beyond the longest line with some padding
115+
diff_col_start = max_content_width + 4
116+
117+
# Calculate right boundary (pipe) position
118+
# Diff column width ~12 chars (e.g. "+100.00% 🚀")
119+
right_boundary = diff_col_start + 14
120+
121+
for line in lines:
122+
if line.strip() == "```":
123+
in_code = not in_code
124+
processed_lines.append(line)
125+
continue
126+
127+
if not in_code:
128+
processed_lines.append(line)
129+
continue
130+
131+
# footnotes keep untouched
132+
if re.match(r"^\s*[¹²³⁴⁵⁶⁷⁸⁹⁰]", line) or re.search(r"need\s*>?=\s*\d+\s+samples", line):
133+
processed_lines.append(line)
134+
continue
135+
136+
# header lines: ensure last column labeled Diff and force alignment
137+
if "│" in line and ("vs base" in line or "old" in line or "new" in line):
138+
# Strip trailing pipe and whitespace
139+
stripped_header = line.rstrip().rstrip("│").rstrip()
140+
141+
# If "vs base" is present, ensure we don't duplicate "Diff" if it's already there
142+
# But we want to enforce OUR alignment, so we might strip existing Diff
143+
stripped_header = re.sub(r"\s+Diff\s*$", "", stripped_header, flags=re.IGNORECASE)
144+
stripped_header = re.sub(r"\s+Delta\b", "", stripped_header, flags=re.IGNORECASE)
145+
146+
# Pad to diff_col_start
147+
padding = diff_col_start - len(stripped_header)
148+
if padding < 2:
149+
padding = 2 # minimum spacing
150+
# If header is wider than data (unlikely but possible), adjust diff_col_start
151+
# But for now let's trust max_content_width or just append
152+
153+
if len(stripped_header) < diff_col_start:
154+
new_header = stripped_header + " " * (diff_col_start - len(stripped_header))
155+
else:
156+
new_header = stripped_header + " "
157+
158+
# Add Diff column header if it's the second header row (vs base)
159+
if "vs base" in line or "new pr.json" in line:
160+
new_header += "Diff"
161+
162+
# Add closing pipe at the right boundary
163+
current_len = len(new_header)
164+
if current_len < right_boundary:
165+
new_header += " " * (right_boundary - current_len)
166+
167+
new_header += "│"
168+
processed_lines.append(new_header)
169+
continue
170+
171+
# non-data meta lines
172+
if not line.strip() or line.strip().startswith(("goos:", "goarch:", "pkg:")):
173+
processed_lines.append(line)
174+
continue
175+
176+
original_line = line
177+
line = strip_worker_suffix(line)
178+
tokens = line.split()
179+
if not tokens:
180+
processed_lines.append(line)
181+
continue
182+
183+
numbers = extract_two_numbers(tokens)
184+
pct_match = re.search(r"([+-]?\d+\.\d+)%", line)
185+
186+
# Helper to align and append
187+
def append_aligned(left_part, content):
188+
if len(left_part) < diff_col_start:
189+
aligned = left_part + " " * (diff_col_start - len(left_part))
190+
else:
191+
aligned = left_part + " "
192+
193+
# Ensure content doesn't exceed right boundary (visual check only, we don't truncate)
194+
# But users asked not to exceed header pipe.
195+
# Header pipe is at right_boundary.
196+
# Content starts at diff_col_start.
197+
# So content length should be <= right_boundary - diff_col_start
198+
return f"{aligned}{content}"
199+
200+
# Special handling for geomean when values missing or zero
201+
is_geomean = tokens[0] == "geomean"
202+
if is_geomean and (len(numbers) < 2 or any(v == 0 for v in numbers)) and not pct_match:
203+
leading = re.match(r"^\s*", line).group(0)
204+
left = f"{leading}geomean"
205+
processed_lines.append(append_aligned(left, "n/a (has zero)"))
206+
continue
207+
208+
# when both values are zero, force diff = 0 and align
209+
if len(numbers) == 2 and numbers[0] == 0 and numbers[1] == 0:
210+
diff_val = 0.0
211+
icon = get_icon(diff_val)
212+
left = line.rstrip()
213+
processed_lines.append(append_aligned(left, f"{diff_val:+.2f}% {icon}"))
214+
continue
215+
216+
# recompute diff when we have two numeric values
217+
if len(numbers) == 2 and numbers[0] != 0:
218+
diff_val = (numbers[1] - numbers[0]) / numbers[0] * 100
219+
icon = get_icon(diff_val)
220+
221+
left = line
222+
if pct_match:
223+
left = line[: pct_match.start()].rstrip()
224+
else:
225+
left = line.rstrip()
226+
227+
processed_lines.append(append_aligned(left, f"{diff_val:+.2f}% {icon}"))
228+
continue
229+
230+
# fallback: align existing percentage to Diff column and (re)append icon
231+
if pct_match:
232+
try:
233+
pct_val = float(pct_match.group(1))
234+
icon = get_icon(pct_val)
235+
236+
left = line[: pct_match.start()].rstrip()
237+
suffix = line[pct_match.end() :]
238+
# Remove any existing icon after the percentage to avoid duplicates
239+
suffix = re.sub(r"\s*(🐌|🚀|➡️)", "", suffix)
240+
241+
processed_lines.append(append_aligned(left, f"{pct_val:+.2f}% {icon}{suffix}"))
242+
except ValueError:
243+
processed_lines.append(line)
244+
continue
245+
246+
# If we cannot parse numbers or percentages, keep the original (only worker suffix stripped)
247+
processed_lines.append(line)
248+
249+
p.write_text("\n".join(processed_lines) + "\n", encoding="utf-8")
250+
251+
except Exception as e:
252+
print(f"Error post-processing comparison.md: {e}")
253+
sys.exit(1)
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
module.exports = async ({github, context, core}) => {
2+
try {
3+
const artifacts = await github.rest.actions.listWorkflowRunArtifacts({
4+
owner: context.repo.owner,
5+
repo: context.repo.repo,
6+
run_id: context.payload.workflow_run.id,
7+
});
8+
9+
const matchArtifact = artifacts.data.artifacts.find((artifact) => {
10+
return artifact.name == "benchmark-results";
11+
});
12+
13+
if (!matchArtifact) {
14+
core.setFailed("No artifact named 'benchmark-results' found.");
15+
return;
16+
}
17+
18+
const download = await github.rest.actions.downloadArtifact({
19+
owner: context.repo.owner,
20+
repo: context.repo.repo,
21+
artifact_id: matchArtifact.id,
22+
archive_format: 'zip',
23+
});
24+
25+
const fs = require('fs');
26+
const path = require('path');
27+
const workspace = process.env.GITHUB_WORKSPACE;
28+
fs.writeFileSync(path.join(workspace, 'benchmark-results.zip'), Buffer.from(download.data));
29+
} catch (error) {
30+
core.setFailed(`Failed to download artifact: ${error.message}`);
31+
}
32+
};
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
import json
2+
import os
3+
import sys
4+
import datetime
5+
import re
6+
7+
8+
def normalize_name(name):
9+
name = re.sub(r"^test_benchmark_", "", name)
10+
parts = name.split("_")
11+
new_parts = []
12+
for p in parts:
13+
if p.lower() in ["rbac", "abac", "acl", "api", "rest"]:
14+
new_parts.append(p.upper())
15+
else:
16+
new_parts.append(p.capitalize())
17+
return "".join(new_parts)
18+
19+
20+
def main():
21+
if len(sys.argv) < 3:
22+
print("Usage: python format_benchmark_data.py input.json output.json")
23+
sys.exit(1)
24+
25+
input_path = sys.argv[1]
26+
output_path = sys.argv[2]
27+
28+
try:
29+
with open(input_path, "r", encoding="utf-8") as f:
30+
data = json.load(f)
31+
except Exception as e:
32+
print(f"Error loading {input_path}: {e}")
33+
sys.exit(1)
34+
35+
# Get commit info from environment variables
36+
# These should be set in the GitHub Action
37+
commit_info = {
38+
"author": {
39+
"email": os.environ.get("COMMIT_AUTHOR_EMAIL", ""),
40+
"name": os.environ.get("COMMIT_AUTHOR_NAME", ""),
41+
"username": os.environ.get("COMMIT_AUTHOR_USERNAME", ""),
42+
},
43+
"committer": {
44+
"email": os.environ.get("COMMIT_COMMITTER_EMAIL", ""),
45+
"name": os.environ.get("COMMIT_COMMITTER_NAME", ""),
46+
"username": os.environ.get("COMMIT_COMMITTER_USERNAME", ""),
47+
},
48+
"distinct": True, # Assuming true for push to master
49+
"id": os.environ.get("COMMIT_ID", ""),
50+
"message": os.environ.get("COMMIT_MESSAGE", ""),
51+
"timestamp": os.environ.get("COMMIT_TIMESTAMP", ""),
52+
"tree_id": os.environ.get("COMMIT_TREE_ID", ""),
53+
"url": os.environ.get("COMMIT_URL", ""),
54+
}
55+
56+
# Get CPU count
57+
cpu_count = data.get("machine_info", {}).get("cpu", {}).get("count")
58+
if not cpu_count:
59+
cpu_count = os.cpu_count() or 1
60+
61+
benches = []
62+
for bench in data.get("benchmarks", []):
63+
# Convert mean (seconds) to ns
64+
val_ns = bench["stats"]["mean"] * 1e9
65+
66+
# Format extra info
67+
total_ops = bench["stats"]["rounds"] * bench["stats"]["iterations"]
68+
extra = f"{total_ops} times"
69+
70+
# Create entry
71+
benches.append(
72+
{"name": normalize_name(bench["name"]), "value": round(val_ns, 2), "unit": "ns/op", "extra": extra}
73+
)
74+
75+
output_data = {
76+
"commit": commit_info,
77+
"date": int(datetime.datetime.now().timestamp() * 1000), # Current timestamp in ms
78+
"tool": "python",
79+
"procs": cpu_count,
80+
"benches": benches,
81+
}
82+
83+
with open(output_path, "w", encoding="utf-8") as f:
84+
json.dump(output_data, f, indent=2)
85+
86+
print(f"Successfully formatted benchmark data to {output_path}")
87+
88+
89+
if __name__ == "__main__":
90+
main()

0 commit comments

Comments
 (0)