Skip to content

Commit 3db2ff8

Browse files
committed
foo
1 parent 3931b8f commit 3db2ff8

File tree

1 file changed

+334
-6
lines changed

1 file changed

+334
-6
lines changed

.github/workflows/alarm.yml

Lines changed: 334 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: Alarm regression
1+
name: Benchmark Comparison & Alarm Regression
22

33
on:
44
workflow_run:
@@ -9,6 +9,9 @@ permissions:
99
contents: read
1010
actions: read
1111
issues: write
12+
pull-requests: write
13+
checks: write
14+
statuses: write
1215

1316
jobs:
1417
comment-if-regressed:
@@ -18,9 +21,334 @@ jobs:
1821
contains(fromJson('["success","neutral"]'), github.event.workflow_run.conclusion)
1922
2023
steps:
21-
- name: Checkout code
22-
uses: actions/checkout@v4
24+
- name: Setup Python
25+
uses: actions/setup-python@v5
2326
with:
24-
# Checkout full history is required to shallow cloning while mark HEAD as "grafted". This breaks remote
25-
# tracking thereby making it impossible to detect whether a commit is contained in upstream main.
26-
fetch-depth: 0
27+
python-version: '3.10'
28+
29+
- name: Install deps
30+
run: |
31+
python -m pip install --quiet --upgrade wandb
32+
33+
- name: Download artifacts from triggering run
34+
id: dl
35+
uses: actions/download-artifact@v4
36+
with:
37+
name: speed-test-results
38+
run-id: ${{ github.event.workflow_run.id }}
39+
github-token: ${{ secrets.GITHUB_TOKEN }}
40+
path: ./artifacts
41+
42+
- name: Show downloaded files
43+
run: |
44+
echo "Downloaded into ${{ steps.dl.outputs.download-path }}"
45+
ls -la ${{ steps.dl.outputs.download-path }} || true
46+
(command -v tree >/dev/null && tree -a ${{ steps.dl.outputs.download-path }}) || true
47+
48+
- name: Resolve PR number
49+
id: pr
50+
uses: actions/github-script@v7
51+
with:
52+
script: |
53+
const wr = context.payload.workflow_run;
54+
let pr = (wr.pull_requests && wr.pull_requests[0]) || null;
55+
56+
if (!pr) {
57+
const sha = wr.head_sha;
58+
const { data } = await github.rest.repos.listPullRequestsAssociatedWithCommit({
59+
owner: context.repo.owner,
60+
repo: context.repo.repo,
61+
commit_sha: sha,
62+
});
63+
if (data && data.length) pr = data[0];
64+
}
65+
66+
core.setOutput('pr_number', pr ? String(pr.number) : '');
67+
68+
- name: Check regressions
69+
if: ${{ steps.pr.outputs.pr_number != '' }}
70+
env:
71+
# --- W&B ---
72+
WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
73+
WANDB_ENTITY: genesis-ai-company
74+
WANDB_PROJECT: genesis-benchmarks
75+
WANDB_SILENT: "true"
76+
77+
# --- Parameters ---
78+
MAX_REVISIONS: "5"
79+
NO_CHANGE_PATIENCE: "100"
80+
RUNTIME_REGRESSION_TOLERANCE_PCT: "10"
81+
COMPILE_REGRESSION_TOLERANCE_PCT: "10"
82+
83+
# Input/Output paths
84+
ARTIFACTS_DIR: ${{ steps.dl.outputs.download-path }}
85+
PR_COMMENT_PATH: pr_comment.md
86+
run: |
87+
python - <<'PY'
88+
import os, sys, json, math, re
89+
import statistics as stats
90+
import wandb
91+
92+
SHA_RE = re.compile(r"[0-9a-fA-F]{7,40}")
93+
94+
def _norm_rev(text):
95+
if not text: return None
96+
text = text.split("@", 1)[0]
97+
m = SHA_RE.search(text)
98+
return m.group(0) if m else text
99+
100+
def _normalize_kv_id(kv: dict, drop_keys=None) -> str:
101+
drop = set(drop_keys or [])
102+
pairs = []
103+
for k, v in kv.items():
104+
if k in drop or v is None: continue
105+
k = str(k).strip(); v = str(v).strip()
106+
if not k or not v: continue
107+
pairs.append((k, v))
108+
pairs.sort(key=lambda x: x[0])
109+
return "-".join(f"{k}={v}" for k, v in pairs)
110+
111+
def wandb_normalize_benchmark_id(bid: str) -> str:
112+
kv = {}
113+
for token in (p.strip() for p in bid.split("-") if p.strip()):
114+
if "=" not in token: continue
115+
k, v = token.split("=", 1)
116+
kv[k.strip()] = v.strip()
117+
return _normalize_kv_id(kv)
118+
119+
def artifacts_parse_speed_txt_lines(lines):
120+
METRIC_KEYS = {"compile_time", "runtime_fps", "realtime_factor"}
121+
out = {}
122+
for line in lines:
123+
if not line.strip(): continue
124+
parts = [p.strip() for p in line.strip().split("|") if "=" in p]
125+
kv = {}
126+
for p in parts:
127+
k, v = p.split("=", 1)
128+
kv[k.strip()] = v.strip()
129+
tid = _normalize_kv_id(kv, drop_keys=METRIC_KEYS)
130+
rt = kv.get("runtime_fps"); ct = kv.get("compile_time")
131+
try: rt = float(rt) if rt is not None else None
132+
except: rt = None
133+
try: ct = float(ct) if ct is not None else None
134+
except: ct = None
135+
out[tid] = {"runtime_fps": rt, "compile_time": ct}
136+
return out
137+
138+
# ----- read workflow_run id (for tag only; no API calls) -----
139+
ev = json.load(open(os.environ["GITHUB_EVENT_PATH"], "r", encoding="utf-8"))
140+
wr = ev["workflow_run"]
141+
wr_id = wr["id"]
142+
143+
# ----- Load current PR artifacts -----
144+
artifacts_path = os.path.abspath(os.environ.get("ARTIFACTS_DIR", "./artifacts"))
145+
if not os.path.exists(artifacts_path):
146+
print("No artifacts dir; skip."); open(os.environ.get("PR_COMMENT_PATH","pr_comment.md"),"w").close(); sys.exit(0)
147+
148+
current_txt_path = None
149+
for root, _, files in os.walk(artifacts_path):
150+
for fname in files:
151+
if fname.startswith("speed_test") and fname.endswith(".txt"):
152+
current_txt_path = os.path.join(root, fname); break
153+
if current_txt_path: break
154+
155+
if current_txt_path is None:
156+
print("No speed_test*.txt; skip."); open(os.environ.get("PR_COMMENT_PATH","pr_comment.md"),"w").close(); sys.exit(0)
157+
158+
with open(current_txt_path, "r", encoding="utf-8") as f:
159+
current_benchmark = artifacts_parse_speed_txt_lines(f.readlines())
160+
161+
# ----- W&B login (anonymous allowed) -----
162+
if not os.getenv("WANDB_API_KEY"):
163+
try: wandb.login(anonymous="allow", relogin=True)
164+
except Exception: pass
165+
166+
# ----- Collect baselines from W&B -----
167+
ENTITY = os.environ.get("WANDB_ENTITY","")
168+
PROJECT= os.environ.get("WANDB_PROJECT","")
169+
MAX_REVISIONS = int(os.environ.get("MAX_REVISIONS","5"))
170+
NO_CHANGE_PATIENCE = int(os.environ.get("NO_CHANGE_PATIENCE","100"))
171+
tol_rt = float(os.environ.get("RUNTIME_REGRESSION_TOLERANCE_PCT","10"))
172+
tol_ct = float(os.environ.get("COMPILE_REGRESSION_TOLERANCE_PCT","10"))
173+
174+
api = wandb.Api()
175+
runs_iter = api.runs(f"{ENTITY}/{PROJECT}", order="-created_at")
176+
177+
by_rev = {} # rev -> {bench_id: {runtime_fps, compile_time}}
178+
rev_order = [] # latest -> oldest
179+
selected_revs = None
180+
no_change_streak = 0
181+
182+
for run in runs_iter:
183+
if run.state != "finished": continue
184+
cfg = getattr(run, "config", None)
185+
if cfg is None: continue
186+
cfg = json.loads(cfg)
187+
raw_rev = cfg.get("revision"); raw_bid = cfg.get("benchmark_id")
188+
if not raw_rev or not raw_bid:
189+
if selected_revs is not None:
190+
no_change_streak += 1
191+
if no_change_streak >= NO_CHANGE_PATIENCE: break
192+
continue
193+
194+
rev = _norm_rev(raw_rev.get("value"))
195+
bid = raw_bid.get("value")
196+
if not rev or not bid:
197+
if selected_revs is not None:
198+
no_change_streak += 1
199+
if no_change_streak >= NO_CHANGE_PATIENCE: break
200+
continue
201+
202+
if selected_revs is not None and rev not in selected_revs:
203+
no_change_streak += 1
204+
if no_change_streak >= NO_CHANGE_PATIENCE: break
205+
continue
206+
207+
if rev not in by_rev:
208+
by_rev[rev] = {}
209+
rev_order.append(rev)
210+
if len(rev_order) >= MAX_REVISIONS:
211+
selected_revs = set(rev_order)
212+
213+
nbid = wandb_normalize_benchmark_id(bid)
214+
if nbid not in by_rev[rev]:
215+
runtime_fps = None; compile_time = None; cnt = 0
216+
for row in run.scan_history(keys=["runtime_fps","compile_time"]):
217+
runtime_fps = row.get("runtime_fps")
218+
compile_time = row.get("compile_time")
219+
if runtime_fps is not None and compile_time is not None: break
220+
cnt += 1
221+
if cnt >= 10: break
222+
by_rev[rev][nbid] = {"runtime_fps": runtime_fps, "compile_time": compile_time}
223+
if selected_revs is not None: no_change_streak = 0
224+
else:
225+
if selected_revs is not None:
226+
no_change_streak += 1
227+
if no_change_streak >= NO_CHANGE_PATIENCE: break
228+
229+
# ----- Compare current vs baselines -----
230+
def collect_mean(metric_key, bench_id):
231+
vals = []
232+
for r in by_rev.keys():
233+
v = by_rev.get(r, {}).get(bench_id, {}).get(metric_key)
234+
if isinstance(v, (int,float)) and not (isinstance(v,float) and math.isnan(v)):
235+
vals.append(float(v))
236+
return stats.mean(vals) if vals else None
237+
238+
runtime_regs = []; compile_regs = []
239+
for bid, m in current_benchmark.items():
240+
cur_rt = m.get("runtime_fps"); cur_ct = m.get("compile_time")
241+
base_rt = collect_mean("runtime_fps", bid)
242+
base_ct = collect_mean("compile_time", bid)
243+
if base_rt is not None and isinstance(cur_rt,(int,float)) and not (isinstance(cur_rt,float) and math.isnan(cur_rt)) and base_rt>0:
244+
d = (cur_rt - base_rt) / base_rt * 100.0
245+
if True: # d < -tol_rt:
246+
runtime_regs.append((bid, cur_rt, base_rt, d))
247+
if base_ct is not None and isinstance(cur_ct,(int,float)) and not (isinstance(cur_ct,float) and math.isnan(cur_ct)) and base_ct>0:
248+
d = (cur_ct - base_ct) / base_ct * 100.0
249+
if True: #d > tol_ct:
250+
compile_regs.append((bid, cur_ct, base_ct, d))
251+
252+
# if not runtime_regs and not compile_regs:
253+
# print("No regression vs baselines; skip comment.")
254+
# open(os.environ.get("PR_COMMENT_PATH","pr_comment.md"),"w").close()
255+
# sys.exit(0)
256+
257+
def trunc(s, n=120): return s if len(s)<=n else s[:n]+"…"
258+
259+
lines = []
260+
lines.append(":warning: **Benchmark regression detected**")
261+
lines.append(f"- Baseline commits considered: **{len(rev_order)}**")
262+
for i, r in enumerate(rev_order, 1):
263+
lines.append(f" - Commit {i}: {r}")
264+
lines.append(f"- Runtime tolerance: **-{tol_rt:.1f}%**; Compile tolerance: **+{tol_ct:.1f}%**")
265+
lines.append("")
266+
267+
if runtime_regs:
268+
runtime_regs.sort(key=lambda x: x[3])
269+
lines += ["**Runtime FPS regressions (vs mean of other commits)**",
270+
"| benchmark_id | current | baseline mean | delta % |",
271+
"|---|---:|---:|---:|"]
272+
for bid, cur, base, d in runtime_regs[:20]:
273+
lines.append(f"| `{trunc(bid)}` | {cur:,.0f} | {base:,.0f} | {d:.2f}% |")
274+
if len(runtime_regs)>20: lines.append("_Only first 20 shown._")
275+
lines.append("")
276+
277+
if compile_regs:
278+
compile_regs.sort(key=lambda x: -x[3])
279+
lines += ["**Compile-time regressions (vs mean of other commits)**",
280+
"| benchmark_id | current | baseline mean | delta % |",
281+
"|---|---:|---:|---:|"]
282+
for bid, cur, base, d in compile_regs[:20]:
283+
lines.append(f"| `{trunc(bid)}` | {cur:,.0f} | {base:,.0f} | {d:.2f}% |")
284+
if len(compile_regs)>20: lines.append("_Only first 20 shown._")
285+
lines.append("")
286+
287+
lines.append(f"<!-- bench-guard-run:{wr_id} -->")
288+
body = "\n".join(lines)
289+
290+
with open(os.environ.get("PR_COMMENT_PATH","pr_comment.md"), "w", encoding="utf-8") as f:
291+
f.write(body + "\n")
292+
print("[INFO] wrote pr_comment.md")
293+
PY
294+
295+
# capture comment into env for next step
296+
if [ -s "pr_comment.md" ]; then
297+
{
298+
echo 'SCRIPT_OUTPUT<<__EOF__'
299+
cat pr_comment.md
300+
echo '__EOF__'
301+
} >> "$GITHUB_ENV"
302+
else
303+
echo "SCRIPT_OUTPUT=" >> "$GITHUB_ENV"
304+
fi
305+
306+
- name: Add PR comment
307+
if: ${{ steps.pr.outputs.pr_number != '' && env.SCRIPT_OUTPUT != '' }}
308+
uses: actions/github-script@v7
309+
env:
310+
COMMENT_BODY: ${{ env.SCRIPT_OUTPUT }}
311+
with:
312+
script: |
313+
const prNumber = Number('${{ steps.pr.outputs.pr_number }}');
314+
if (!prNumber) {
315+
core.info('No PR number; skipping comment.');
316+
return;
317+
}
318+
await github.rest.issues.createComment({
319+
owner: context.repo.owner,
320+
repo: context.repo.repo,
321+
issue_number: prNumber,
322+
body: process.env.COMMENT_BODY
323+
});
324+
325+
- name: Publish PR check (show in Checks tab)
326+
if: always() # 실패해도 체크는 찍히도록
327+
uses: actions/github-script@v7
328+
env:
329+
CHECK_NAME: Benchmark Comparison & Alarm Regression
330+
COMMENT_BODY: ${{ env.SCRIPT_OUTPUT }} # 위에서 pr_comment.md를 env로 넣은 값
331+
with:
332+
script: |
333+
const sha = context.payload.workflow_run.head_sha;
334+
const hasBody = Boolean(process.env.COMMENT_BODY && process.env.COMMENT_BODY.trim());
335+
const conclusion = hasBody ? 'failure' : 'success';
336+
const summary = hasBody
337+
? 'Benchmark regression detected. See details in the output.'
338+
: 'No regression detected.';
339+
340+
// Checks API: PR 하단 "Checks" 섹션에 나타납니다.
341+
await github.rest.checks.create({
342+
owner: context.repo.owner,
343+
repo: context.repo.repo,
344+
name: process.env.CHECK_NAME,
345+
head_sha: sha,
346+
status: 'completed',
347+
conclusion,
348+
output: {
349+
title: process.env.CHECK_NAME,
350+
summary,
351+
// 길어도 괜찮다면 전체 코멘트 본문을 체크 출력으로 넣을 수 있어요
352+
text: process.env.COMMENT_BODY || undefined
353+
}
354+
});

0 commit comments

Comments
 (0)