Skip to content

Commit e1308ee

Browse files
committed
Enhance benchmark CI robustness and separate base/PR steps
1 parent 6172cf5 commit e1308ee

File tree

9 files changed

+784
-66
lines changed

9 files changed

+784
-66
lines changed
Lines changed: 266 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
import pathlib, re, sys
2+
3+
try:
4+
p = pathlib.Path("comparison.md")
5+
if not p.exists():
6+
print("comparison.md not found, skipping post-processing.")
7+
sys.exit(0)
8+
9+
lines = p.read_text(encoding="utf-8").splitlines()
10+
processed_lines = []
11+
in_code = False
12+
delta_col = None # record "Diff" column start per table
13+
align_hint = None # derived from benchstat header last pipe position
14+
15+
ALIGN_COLUMN = 60 # fallback alignment when header not found
16+
17+
def strip_worker_suffix(text: str) -> str:
18+
return re.sub(r'(\S+?)-\d+(\s|$)', r'\1\2', text)
19+
20+
def get_icon(diff_val: float) -> str:
21+
if diff_val > 10:
22+
return "🐌"
23+
if diff_val < -10:
24+
return "🚀"
25+
return "➡️"
26+
27+
def clean_superscripts(text: str) -> str:
28+
return re.sub(r'[¹²³⁴⁵⁶⁷⁸⁹⁰]', '', text)
29+
30+
def parse_val(token: str):
31+
if '%' in token or '=' in token:
32+
return None
33+
token = clean_superscripts(token)
34+
token = token.split('±')[0].strip()
35+
token = token.split('(')[0].strip()
36+
if not token:
37+
return None
38+
39+
m = re.match(r'^([-+]?\d*\.?\d+)([a-zA-Zµ]+)?$', token)
40+
if not m:
41+
return None
42+
try:
43+
val = float(m.group(1))
44+
except ValueError:
45+
return None
46+
suffix = (m.group(2) or "").replace("µ", "u")
47+
if not suffix:
48+
return val
49+
50+
multipliers = {
51+
"n": 1e-9,
52+
"ns": 1e-9,
53+
"u": 1e-6,
54+
"us": 1e-6,
55+
"m": 1e-3,
56+
"ms": 1e-3,
57+
"s": 1.0,
58+
"k": 1e3,
59+
"K": 1e3,
60+
"M": 1e6,
61+
"G": 1e9,
62+
"Ki": 1024.0,
63+
"Mi": 1024.0**2,
64+
"Gi": 1024.0**3,
65+
"Ti": 1024.0**4,
66+
"B": 1.0,
67+
"B/op": 1.0,
68+
"C": 1.0, # tolerate degree/unit markers that don't affect ratio
69+
}
70+
71+
mult = multipliers.get(suffix)
72+
if mult is None:
73+
raise ValueError(f"Unknown unit suffix: {suffix}")
74+
75+
return val * mult
76+
77+
def extract_two_numbers(tokens):
78+
found = []
79+
for t in tokens[1:]: # skip name
80+
if t in {"±", "∞", "~", "│", "│"}:
81+
continue
82+
if '%' in t or '=' in t:
83+
continue
84+
val = parse_val(t)
85+
if val is not None:
86+
found.append(val)
87+
if len(found) == 2:
88+
break
89+
return found
90+
91+
# Pass 0:
92+
# 1. find a header line with pipes to derive alignment hint
93+
# 2. calculate max content width to ensure right-most alignment
94+
max_content_width = 0
95+
96+
for line in lines:
97+
if line.strip() == "```":
98+
in_code = not in_code
99+
continue
100+
if not in_code:
101+
continue
102+
103+
# Skip footnotes/meta for width calculation
104+
if re.match(r'^\s*[¹²³⁴⁵⁶⁷⁸⁹⁰]', line) or re.search(r'need\s*>?=\s*\d+\s+samples', line):
105+
continue
106+
if not line.strip() or line.strip().startswith(('goos:', 'goarch:', 'pkg:', 'cpu:')):
107+
continue
108+
# Header lines are handled separately in Pass 1
109+
if '│' in line and ('vs base' in line or 'old' in line or 'new' in line):
110+
continue
111+
112+
# It's likely a data line
113+
# Check if it has an existing percentage we might move/align
114+
curr_line = strip_worker_suffix(line).rstrip()
115+
pct_match = re.search(r'([+-]?\d+\.\d+)%', curr_line)
116+
if pct_match:
117+
# If we are going to realign this, we count width up to the percentage
118+
w = len(curr_line[:pct_match.start()].rstrip())
119+
else:
120+
w = len(curr_line)
121+
122+
if w > max_content_width:
123+
max_content_width = w
124+
125+
# Calculate global alignment target for Diff column
126+
# Ensure target column is beyond the longest line with some padding
127+
diff_col_start = max_content_width + 4
128+
129+
# Calculate right boundary (pipe) position
130+
# Diff column width ~12 chars (e.g. "+100.00% 🚀")
131+
right_boundary = diff_col_start + 14
132+
133+
for line in lines:
134+
135+
if line.strip() == "```":
136+
in_code = not in_code
137+
processed_lines.append(line)
138+
continue
139+
140+
if not in_code:
141+
processed_lines.append(line)
142+
continue
143+
144+
# footnotes keep untouched
145+
if re.match(r'^\s*[¹²³⁴⁵⁶⁷⁸⁹⁰]', line) or re.search(r'need\s*>?=\s*\d+\s+samples', line):
146+
processed_lines.append(line)
147+
continue
148+
149+
# header lines: ensure last column labeled Diff and force alignment
150+
if '│' in line and ('vs base' in line or 'old' in line or 'new' in line):
151+
# Strip trailing pipe and whitespace
152+
stripped_header = line.rstrip().rstrip('│').rstrip()
153+
154+
# If "vs base" is present, ensure we don't duplicate "Diff" if it's already there
155+
# But we want to enforce OUR alignment, so we might strip existing Diff
156+
stripped_header = re.sub(r'\s+Diff\s*$', '', stripped_header, flags=re.IGNORECASE)
157+
stripped_header = re.sub(r'\s+Delta\b', '', stripped_header, flags=re.IGNORECASE)
158+
159+
# Pad to diff_col_start
160+
padding = diff_col_start - len(stripped_header)
161+
if padding < 2:
162+
padding = 2 # minimum spacing
163+
# If header is wider than data (unlikely but possible), adjust diff_col_start
164+
# But for now let's trust max_content_width or just append
165+
166+
if len(stripped_header) < diff_col_start:
167+
new_header = stripped_header + " " * (diff_col_start - len(stripped_header))
168+
else:
169+
new_header = stripped_header + " "
170+
171+
# Add Diff column header if it's the second header row (vs base)
172+
if 'vs base' in line or 'new pr.json' in line:
173+
new_header += "Diff"
174+
175+
# Add closing pipe at the right boundary
176+
current_len = len(new_header)
177+
if current_len < right_boundary:
178+
new_header += " " * (right_boundary - current_len)
179+
180+
new_header += "│"
181+
processed_lines.append(new_header)
182+
continue
183+
184+
# non-data meta lines
185+
if not line.strip() or line.strip().startswith(('goos:', 'goarch:', 'pkg:')):
186+
processed_lines.append(line)
187+
continue
188+
189+
original_line = line
190+
line = strip_worker_suffix(line)
191+
tokens = line.split()
192+
if not tokens:
193+
processed_lines.append(line)
194+
continue
195+
196+
numbers = extract_two_numbers(tokens)
197+
pct_match = re.search(r'([+-]?\d+\.\d+)%', line)
198+
199+
# Helper to align and append
200+
def append_aligned(left_part, content):
201+
if len(left_part) < diff_col_start:
202+
aligned = left_part + " " * (diff_col_start - len(left_part))
203+
else:
204+
aligned = left_part + " "
205+
206+
# Ensure content doesn't exceed right boundary (visual check only, we don't truncate)
207+
# But users asked not to exceed header pipe.
208+
# Header pipe is at right_boundary.
209+
# Content starts at diff_col_start.
210+
# So content length should be <= right_boundary - diff_col_start
211+
return f"{aligned}{content}"
212+
213+
# Special handling for geomean when values missing or zero
214+
is_geomean = tokens[0] == "geomean"
215+
if is_geomean and (len(numbers) < 2 or any(v == 0 for v in numbers)) and not pct_match:
216+
leading = re.match(r'^\s*', line).group(0)
217+
left = f"{leading}geomean"
218+
processed_lines.append(append_aligned(left, "n/a (has zero)"))
219+
continue
220+
221+
# when both values are zero, force diff = 0 and align
222+
if len(numbers) == 2 and numbers[0] == 0 and numbers[1] == 0:
223+
diff_val = 0.0
224+
icon = get_icon(diff_val)
225+
left = line.rstrip()
226+
processed_lines.append(append_aligned(left, f"{diff_val:+.2f}% {icon}"))
227+
continue
228+
229+
# recompute diff when we have two numeric values
230+
if len(numbers) == 2 and numbers[0] != 0:
231+
diff_val = (numbers[1] - numbers[0]) / numbers[0] * 100
232+
icon = get_icon(diff_val)
233+
234+
left = line
235+
if pct_match:
236+
left = line[:pct_match.start()].rstrip()
237+
else:
238+
left = line.rstrip()
239+
240+
processed_lines.append(append_aligned(left, f"{diff_val:+.2f}% {icon}"))
241+
continue
242+
243+
# fallback: align existing percentage to Diff column and (re)append icon
244+
if pct_match:
245+
try:
246+
pct_val = float(pct_match.group(1))
247+
icon = get_icon(pct_val)
248+
249+
left = line[:pct_match.start()].rstrip()
250+
suffix = line[pct_match.end():]
251+
# Remove any existing icon after the percentage to avoid duplicates
252+
suffix = re.sub(r'\s*(🐌|🚀|➡️)', '', suffix)
253+
254+
processed_lines.append(append_aligned(left, f"{pct_val:+.2f}% {icon}{suffix}"))
255+
except ValueError:
256+
processed_lines.append(line)
257+
continue
258+
259+
# If we cannot parse numbers or percentages, keep the original (only worker suffix stripped)
260+
processed_lines.append(line)
261+
262+
p.write_text("\n".join(processed_lines) + "\n", encoding="utf-8")
263+
264+
except Exception as e:
265+
print(f"Error post-processing comparison.md: {e}")
266+
sys.exit(1)
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
module.exports = async ({github, context, core}) => {
2+
try {
3+
const artifacts = await github.rest.actions.listWorkflowRunArtifacts({
4+
owner: context.repo.owner,
5+
repo: context.repo.repo,
6+
run_id: context.payload.workflow_run.id,
7+
});
8+
9+
const matchArtifact = artifacts.data.artifacts.find((artifact) => {
10+
return artifact.name == "benchmark-results";
11+
});
12+
13+
if (!matchArtifact) {
14+
core.setFailed("No artifact named 'benchmark-results' found.");
15+
return;
16+
}
17+
18+
const download = await github.rest.actions.downloadArtifact({
19+
owner: context.repo.owner,
20+
repo: context.repo.repo,
21+
artifact_id: matchArtifact.id,
22+
archive_format: 'zip',
23+
});
24+
25+
const fs = require('fs');
26+
const path = require('path');
27+
const workspace = process.env.GITHUB_WORKSPACE;
28+
fs.writeFileSync(path.join(workspace, 'benchmark-results.zip'), Buffer.from(download.data));
29+
} catch (error) {
30+
core.setFailed(`Failed to download artifact: ${error.message}`);
31+
}
32+
};
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import json
2+
import sys
3+
import glob
4+
5+
def merge_jsons(output_file, input_files):
6+
all_benchmarks = []
7+
8+
for file_path in input_files:
9+
try:
10+
with open(file_path, 'r', encoding='utf-8') as f:
11+
data = json.load(f)
12+
# Handle JMH list format
13+
if isinstance(data, list):
14+
all_benchmarks.extend(data)
15+
# Handle Pytest-benchmark dict format (legacy/compatibility)
16+
elif isinstance(data, dict):
17+
all_benchmarks.extend(data.get('benchmarks', []))
18+
except Exception as e:
19+
print(f"Warning: Failed to parse {file_path}: {e}")
20+
21+
# Wrap in dict to match Pytest-benchmark structure expected by downstream
22+
merged_data = {'benchmarks': all_benchmarks}
23+
24+
with open(output_file, 'w', encoding='utf-8') as f:
25+
json.dump(merged_data, f, indent=4)
26+
27+
if __name__ == "__main__":
28+
if len(sys.argv) < 3:
29+
print("Usage: python merge_benchmarks.py output.json input1.json input2.json ...")
30+
sys.exit(1)
31+
32+
output_file = sys.argv[1]
33+
input_files = sys.argv[2:]
34+
35+
# Expand globs if shell didn't
36+
expanded_inputs = []
37+
for p in input_files:
38+
expanded_inputs.extend(glob.glob(p))
39+
40+
merge_jsons(output_file, expanded_inputs)

0 commit comments

Comments
 (0)