Skip to content

Commit 43a20d7

Browse files
committed
feat: port benchmark workflows and scripts from pycasbin
1 parent 56cd70f commit 43a20d7

File tree

13 files changed

+1040
-52
lines changed

13 files changed

+1040
-52
lines changed
Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
import pathlib, re, sys
2+
3+
try:
4+
p = pathlib.Path("comparison.md")
5+
if not p.exists():
6+
print("comparison.md not found, skipping post-processing.")
7+
sys.exit(0)
8+
9+
lines = p.read_text(encoding="utf-8").splitlines()
10+
processed_lines = []
11+
in_code = False
12+
13+
def strip_worker_suffix(text: str) -> str:
14+
return re.sub(r"(\S+?)-\d+(\s|$)", r"\1\2", text)
15+
16+
def get_icon(diff_val: float) -> str:
17+
if diff_val > 10:
18+
return "🐌"
19+
if diff_val < -10:
20+
return "🚀"
21+
return "➡️"
22+
23+
def clean_superscripts(text: str) -> str:
24+
return re.sub(r"[¹²³⁴⁵⁶⁷⁸⁹⁰]", "", text)
25+
26+
def parse_val(token: str):
27+
if "%" in token or "=" in token:
28+
return None
29+
token = clean_superscripts(token)
30+
token = token.split("±")[0].strip()
31+
token = token.split("(")[0].strip()
32+
if not token:
33+
return None
34+
35+
m = re.match(r"^([-+]?\d*\.?\d+)([a-zA-Zµ]+)?$", token)
36+
if not m:
37+
return None
38+
try:
39+
val = float(m.group(1))
40+
except ValueError:
41+
return None
42+
suffix = (m.group(2) or "").replace("µ", "u")
43+
multipliers = {
44+
"n": 1e-9,
45+
"ns": 1e-9,
46+
"u": 1e-6,
47+
"us": 1e-6,
48+
"m": 1e-3,
49+
"ms": 1e-3,
50+
"s": 1.0,
51+
"k": 1e3,
52+
"K": 1e3,
53+
"M": 1e6,
54+
"G": 1e9,
55+
"Ki": 1024.0,
56+
"Mi": 1024.0**2,
57+
"Gi": 1024.0**3,
58+
"Ti": 1024.0**4,
59+
"B": 1.0,
60+
"B/op": 1.0,
61+
"C": 1.0, # tolerate degree/unit markers that don't affect ratio
62+
}
63+
return val * multipliers.get(suffix, 1.0)
64+
65+
def extract_two_numbers(tokens):
66+
found = []
67+
for t in tokens[1:]: # skip name
68+
if t in {"±", "∞", "~", "│", "│"}:
69+
continue
70+
if "%" in t or "=" in t:
71+
continue
72+
val = parse_val(t)
73+
if val is not None:
74+
found.append(val)
75+
if len(found) == 2:
76+
break
77+
return found
78+
79+
# Pass 0:
80+
# 1. find a header line with pipes to derive alignment hint
81+
# 2. calculate max content width to ensure right-most alignment
82+
max_content_width = 0
83+
84+
for line in lines:
85+
if line.strip() == "```":
86+
in_code = not in_code
87+
continue
88+
if not in_code:
89+
continue
90+
91+
# Skip footnotes/meta for width calculation
92+
if re.match(r"^\s*[¹²³⁴⁵⁶⁷⁸⁹⁰]", line) or re.search(r"need\s*>?=\s*\d+\s+samples", line):
93+
continue
94+
if not line.strip() or line.strip().startswith(("goos:", "goarch:", "pkg:", "cpu:")):
95+
continue
96+
# Header lines are handled separately in Pass 1
97+
if "│" in line and ("vs base" in line or "old" in line or "new" in line):
98+
continue
99+
100+
# It's likely a data line
101+
# Check if it has an existing percentage we might move/align
102+
curr_line = strip_worker_suffix(line).rstrip()
103+
pct_match = re.search(r"([+-]?\d+\.\d+)%", curr_line)
104+
if pct_match:
105+
# If we are going to realign this, we count width up to the percentage
106+
w = len(curr_line[: pct_match.start()].rstrip())
107+
else:
108+
w = len(curr_line)
109+
110+
if w > max_content_width:
111+
max_content_width = w
112+
113+
# Calculate global alignment target for Diff column
114+
# Ensure target column is beyond the longest line with some padding
115+
diff_col_start = max_content_width + 4
116+
117+
# Calculate right boundary (pipe) position
118+
# Diff column width ~12 chars (e.g. "+100.00% 🚀")
119+
right_boundary = diff_col_start + 14
120+
121+
for line in lines:
122+
123+
if line.strip() == "```":
124+
in_code = not in_code
125+
processed_lines.append(line)
126+
continue
127+
128+
if not in_code:
129+
processed_lines.append(line)
130+
continue
131+
132+
# footnotes keep untouched
133+
if re.match(r"^\s*[¹²³⁴⁵⁶⁷⁸⁹⁰]", line) or re.search(r"need\s*>?=\s*\d+\s+samples", line):
134+
processed_lines.append(line)
135+
continue
136+
137+
# header lines: ensure last column labeled Diff and force alignment
138+
if "│" in line and ("vs base" in line or "old" in line or "new" in line):
139+
# Strip trailing pipe and whitespace
140+
stripped_header = line.rstrip().rstrip("│").rstrip()
141+
142+
# If "vs base" is present, ensure we don't duplicate "Diff" if it's already there
143+
# But we want to enforce OUR alignment, so we might strip existing Diff
144+
stripped_header = re.sub(r"\s+Diff\s*$", "", stripped_header, flags=re.IGNORECASE)
145+
stripped_header = re.sub(r"\s+Delta\b", "", stripped_header, flags=re.IGNORECASE)
146+
147+
# Pad to diff_col_start
148+
padding = diff_col_start - len(stripped_header)
149+
if padding < 2:
150+
padding = 2 # minimum spacing
151+
152+
if len(stripped_header) < diff_col_start:
153+
new_header = stripped_header + " " * (diff_col_start - len(stripped_header))
154+
else:
155+
new_header = stripped_header + " "
156+
157+
# Add Diff column header if it's the second header row (vs base)
158+
if "vs base" in line or "new pr.json" in line:
159+
new_header += "Diff"
160+
161+
# Add closing pipe at the right boundary
162+
current_len = len(new_header)
163+
if current_len < right_boundary:
164+
new_header += " " * (right_boundary - current_len)
165+
166+
new_header += "│"
167+
processed_lines.append(new_header)
168+
continue
169+
170+
# non-data meta lines
171+
if not line.strip() or line.strip().startswith(("goos:", "goarch:", "pkg:")):
172+
processed_lines.append(line)
173+
continue
174+
175+
original_line = line
176+
line = strip_worker_suffix(line)
177+
tokens = line.split()
178+
if not tokens:
179+
processed_lines.append(line)
180+
continue
181+
182+
numbers = extract_two_numbers(tokens)
183+
pct_match = re.search(r"([+-]?\d+\.\d+)%", line)
184+
185+
# Helper to align and append
186+
def append_aligned(left_part, content):
187+
if len(left_part) < diff_col_start:
188+
aligned = left_part + " " * (diff_col_start - len(left_part))
189+
else:
190+
aligned = left_part + " "
191+
192+
return f"{aligned}{content}"
193+
194+
# Special handling for geomean when values missing or zero
195+
is_geomean = tokens[0] == "geomean"
196+
if is_geomean and (len(numbers) < 2 or any(v == 0 for v in numbers)) and not pct_match:
197+
leading = re.match(r"^\s*", line).group(0)
198+
left = f"{leading}geomean"
199+
processed_lines.append(append_aligned(left, "n/a (has zero)"))
200+
continue
201+
202+
# when both values are zero, force diff = 0 and align
203+
if len(numbers) == 2 and numbers[0] == 0 and numbers[1] == 0:
204+
diff_val = 0.0
205+
icon = get_icon(diff_val)
206+
left = line.rstrip()
207+
processed_lines.append(append_aligned(left, f"{diff_val:+.2f}% {icon}"))
208+
continue
209+
210+
# recompute diff when we have two numeric values
211+
if len(numbers) == 2 and numbers[0] != 0:
212+
diff_val = (numbers[1] - numbers[0]) / numbers[0] * 100
213+
icon = get_icon(diff_val)
214+
215+
left = line
216+
if pct_match:
217+
left = line[: pct_match.start()].rstrip()
218+
else:
219+
left = line.rstrip()
220+
221+
processed_lines.append(append_aligned(left, f"{diff_val:+.2f}% {icon}"))
222+
continue
223+
224+
# fallback: align existing percentage to Diff column and (re)append icon
225+
if pct_match:
226+
try:
227+
pct_val = float(pct_match.group(1))
228+
icon = get_icon(pct_val)
229+
230+
left = line[: pct_match.start()].rstrip()
231+
suffix = line[pct_match.end() :]
232+
# Remove any existing icon after the percentage to avoid duplicates
233+
suffix = re.sub(r"\s*(🐌|🚀|➡️)", "", suffix)
234+
235+
processed_lines.append(append_aligned(left, f"{pct_val:+.2f}% {icon}{suffix}"))
236+
except ValueError:
237+
processed_lines.append(line)
238+
continue
239+
240+
# If we cannot parse numbers or percentages, keep the original (only worker suffix stripped)
241+
processed_lines.append(line)
242+
243+
p.write_text("\n".join(processed_lines) + "\n", encoding="utf-8")
244+
245+
except Exception as e:
246+
print(f"Error post-processing comparison.md: {e}")
247+
sys.exit(1)
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
module.exports = async ({github, context, core}) => {
2+
try {
3+
const artifacts = await github.rest.actions.listWorkflowRunArtifacts({
4+
owner: context.repo.owner,
5+
repo: context.repo.repo,
6+
run_id: context.payload.workflow_run.id,
7+
});
8+
9+
const matchArtifact = artifacts.data.artifacts.find((artifact) => {
10+
return artifact.name == "benchmark-results";
11+
});
12+
13+
if (!matchArtifact) {
14+
core.setFailed("No artifact named 'benchmark-results' found.");
15+
return;
16+
}
17+
18+
const download = await github.rest.actions.downloadArtifact({
19+
owner: context.repo.owner,
20+
repo: context.repo.repo,
21+
artifact_id: matchArtifact.id,
22+
archive_format: 'zip',
23+
});
24+
25+
const fs = require('fs');
26+
const path = require('path');
27+
const workspace = process.env.GITHUB_WORKSPACE;
28+
fs.writeFileSync(path.join(workspace, 'benchmark-results.zip'), Buffer.from(download.data));
29+
} catch (error) {
30+
core.setFailed(`Failed to download artifact: ${error.message}`);
31+
}
32+
};
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
import json
2+
import os
3+
import sys
4+
import datetime
5+
import re
6+
7+
def normalize_name(name):
8+
# Remove prefix if exists (e.g., "BenchmarkModel/")
9+
if "/" in name:
10+
name = name.split("/", 1)[1]
11+
12+
# Clean up name similar to pycasbin
13+
parts = name.split("_")
14+
new_parts = []
15+
for p in parts:
16+
if p.lower() in ["rbac", "abac", "acl", "api", "rest"]:
17+
new_parts.append(p.upper())
18+
else:
19+
new_parts.append(p.capitalize())
20+
return "".join(new_parts)
21+
22+
23+
def main():
24+
if len(sys.argv) < 3:
25+
print("Usage: python format_google_benchmark_data.py input.json output.json")
26+
sys.exit(1)
27+
28+
input_path = sys.argv[1]
29+
output_path = sys.argv[2]
30+
31+
try:
32+
with open(input_path, "r", encoding="utf-8") as f:
33+
data = json.load(f)
34+
except Exception as e:
35+
print(f"Error loading {input_path}: {e}")
36+
sys.exit(1)
37+
38+
# Get commit info from environment variables
39+
commit_info = {
40+
"author": {
41+
"email": os.environ.get("COMMIT_AUTHOR_EMAIL", ""),
42+
"name": os.environ.get("COMMIT_AUTHOR_NAME", ""),
43+
"username": os.environ.get("COMMIT_AUTHOR_USERNAME", ""),
44+
},
45+
"committer": {
46+
"email": os.environ.get("COMMIT_COMMITTER_EMAIL", ""),
47+
"name": os.environ.get("COMMIT_COMMITTER_NAME", ""),
48+
"username": os.environ.get("COMMIT_COMMITTER_USERNAME", ""),
49+
},
50+
"distinct": True,
51+
"id": os.environ.get("COMMIT_ID", ""),
52+
"message": os.environ.get("COMMIT_MESSAGE", ""),
53+
"timestamp": os.environ.get("COMMIT_TIMESTAMP", ""),
54+
"tree_id": os.environ.get("COMMIT_TREE_ID", ""),
55+
"url": os.environ.get("COMMIT_URL", ""),
56+
}
57+
58+
# Get CPU count
59+
cpu_count = data.get("context", {}).get("num_cpus")
60+
if not cpu_count:
61+
cpu_count = os.cpu_count() or 1
62+
63+
benches = []
64+
for bench in data.get("benchmarks", []):
65+
# Skip aggregate items (mean, median, stddev) if any
66+
if "run_type" in bench and bench["run_type"] == "aggregate":
67+
continue
68+
69+
name = bench["name"]
70+
71+
# Google Benchmark outputs time in the unit specified by time_unit
72+
# We want to standardize on ns/op
73+
val = bench["real_time"]
74+
unit = bench.get("time_unit", "ns")
75+
76+
if unit == "ms":
77+
val *= 1e6
78+
elif unit == "us":
79+
val *= 1e3
80+
elif unit == "s":
81+
val *= 1e9
82+
83+
# Extra info
84+
iterations = bench.get("iterations", 0)
85+
extra = f"{iterations} times"
86+
87+
benches.append(
88+
{"name": normalize_name(name), "value": round(val, 2), "unit": "ns/op", "extra": extra}
89+
)
90+
91+
output_data = {
92+
"commit": commit_info,
93+
"date": int(datetime.datetime.now().timestamp() * 1000),
94+
"tool": "cpp",
95+
"procs": cpu_count,
96+
"benches": benches,
97+
}
98+
99+
with open(output_path, "w", encoding="utf-8") as f:
100+
json.dump(output_data, f, indent=2)
101+
102+
print(f"Successfully formatted benchmark data to {output_path}")
103+
104+
105+
if __name__ == "__main__":
106+
main()

0 commit comments

Comments
 (0)