Skip to content

Commit 89071a3

Browse files
committed
foo
1 parent 0b635c1 commit 89071a3

File tree

1 file changed

+131
-119
lines changed

1 file changed

+131
-119
lines changed

.github/workflows/alarm.yml

Lines changed: 131 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ jobs:
6565
6666
core.setOutput('pr_number', pr ? String(pr.number) : '');
6767
68-
- name: Check regressions
68+
- name: Check regressions + build outputs
69+
id: analyze
6970
if: ${{ steps.pr.outputs.pr_number != '' }}
7071
env:
7172
# --- W&B ---
@@ -83,14 +84,15 @@ jobs:
8384
# Input/Output paths
8485
ARTIFACTS_DIR: ${{ steps.dl.outputs.download-path }}
8586
PR_COMMENT_PATH: pr_comment.md
87+
CHECK_BODY_PATH: check_output.md
8688
run: |
8789
python - <<'PY'
8890
import os, sys, json, math, re
8991
import statistics as stats
9092
import wandb
9193
94+
# ---------- helpers ----------
9295
SHA_RE = re.compile(r"[0-9a-fA-F]{7,40}")
93-
9496
def _norm_rev(text):
9597
if not text: return None
9698
text = text.split("@", 1)[0]
@@ -135,50 +137,58 @@ jobs:
135137
out[tid] = {"runtime_fps": rt, "compile_time": ct}
136138
return out
137139
138-
# ----- read workflow_run id (for tag only; no API calls) -----
139-
ev = json.load(open(os.environ["GITHUB_EVENT_PATH"], "r", encoding="utf-8"))
140-
wr = ev["workflow_run"]
141-
wr_id = wr["id"]
140+
def fmt_num(v):
141+
if v is None or (isinstance(v,float) and math.isnan(v)): return "—"
142+
try:
143+
if abs(v) >= 1000: return f"{v:,.0f}"
144+
return f"{v:.2f}" if isinstance(v,float) and not v.is_integer() else f"{int(v)}"
145+
except Exception:
146+
return str(v)
147+
148+
def fmt_pct(v, highlight=False):
149+
if v is None: return "—"
150+
s = f"{v:+.2f}%"
151+
return f"**{s}**" if highlight else s
152+
153+
# ----- read run params -----
154+
tol_rt = float(os.environ.get("RUNTIME_REGRESSION_TOLERANCE_PCT","10"))
155+
tol_ct = float(os.environ.get("COMPILE_REGRESSION_TOLERANCE_PCT","10"))
156+
MAX_REVISIONS = int(os.environ.get("MAX_REVISIONS","5"))
157+
NO_CHANGE_PATIENCE = int(os.environ.get("NO_CHANGE_PATIENCE","100"))
142158
143-
# ----- Load current PR artifacts -----
159+
# ----- load artifact (current results) -----
144160
artifacts_path = os.path.abspath(os.environ.get("ARTIFACTS_DIR", "./artifacts"))
145161
if not os.path.exists(artifacts_path):
146-
print("No artifacts dir; skip."); open(os.environ.get("PR_COMMENT_PATH","pr_comment.md"),"w").close(); sys.exit(0)
162+
# no data → no comment/check body
163+
open(os.environ.get("PR_COMMENT_PATH","pr_comment.md"),"w").close()
164+
open(os.environ.get("CHECK_BODY_PATH","check_output.md"),"w").close()
165+
sys.exit(0)
147166
148167
current_txt_path = None
149168
for root, _, files in os.walk(artifacts_path):
150169
for fname in files:
151170
if fname.startswith("speed_test") and fname.endswith(".txt"):
152171
current_txt_path = os.path.join(root, fname); break
153172
if current_txt_path: break
154-
155173
if current_txt_path is None:
156-
print("No speed_test*.txt; skip."); open(os.environ.get("PR_COMMENT_PATH","pr_comment.md"),"w").close(); sys.exit(0)
174+
open(os.environ.get("PR_COMMENT_PATH","pr_comment.md"),"w").close()
175+
open(os.environ.get("CHECK_BODY_PATH","check_output.md"),"w").close()
176+
sys.exit(0)
157177
158178
with open(current_txt_path, "r", encoding="utf-8") as f:
159-
current_benchmark = artifacts_parse_speed_txt_lines(f.readlines())
179+
current_bm = artifacts_parse_speed_txt_lines(f.readlines())
160180
161-
# ----- W&B login (anonymous allowed) -----
181+
# ----- W&B baselines -----
162182
if not os.getenv("WANDB_API_KEY"):
163183
try: wandb.login(anonymous="allow", relogin=True)
164184
except Exception: pass
165-
166-
# ----- Collect baselines from W&B -----
167185
ENTITY = os.environ.get("WANDB_ENTITY","")
168186
PROJECT= os.environ.get("WANDB_PROJECT","")
169-
MAX_REVISIONS = int(os.environ.get("MAX_REVISIONS","5"))
170-
NO_CHANGE_PATIENCE = int(os.environ.get("NO_CHANGE_PATIENCE","100"))
171-
tol_rt = float(os.environ.get("RUNTIME_REGRESSION_TOLERANCE_PCT","10"))
172-
tol_ct = float(os.environ.get("COMPILE_REGRESSION_TOLERANCE_PCT","10"))
173187
174188
api = wandb.Api()
175189
runs_iter = api.runs(f"{ENTITY}/{PROJECT}", order="-created_at")
176190
177-
by_rev = {} # rev -> {bench_id: {runtime_fps, compile_time}}
178-
rev_order = [] # latest -> oldest
179-
selected_revs = None
180-
no_change_streak = 0
181-
191+
by_rev = {}; rev_order = []; selected_revs=None; no_change=0
182192
for run in runs_iter:
183193
if run.state != "finished": continue
184194
cfg = getattr(run, "config", None)
@@ -187,112 +197,120 @@ jobs:
187197
raw_rev = cfg.get("revision"); raw_bid = cfg.get("benchmark_id")
188198
if not raw_rev or not raw_bid:
189199
if selected_revs is not None:
190-
no_change_streak += 1
191-
if no_change_streak >= NO_CHANGE_PATIENCE: break
200+
no_change += 1
201+
if no_change >= NO_CHANGE_PATIENCE: break
192202
continue
193-
194-
rev = _norm_rev(raw_rev.get("value"))
195-
bid = raw_bid.get("value")
203+
rev = _norm_rev(raw_rev.get("value")); bid = raw_bid.get("value")
196204
if not rev or not bid:
197205
if selected_revs is not None:
198-
no_change_streak += 1
199-
if no_change_streak >= NO_CHANGE_PATIENCE: break
206+
no_change += 1
207+
if no_change >= NO_CHANGE_PATIENCE: break
200208
continue
201-
202209
if selected_revs is not None and rev not in selected_revs:
203-
no_change_streak += 1
204-
if no_change_streak >= NO_CHANGE_PATIENCE: break
210+
no_change += 1
211+
if no_change >= NO_CHANGE_PATIENCE: break
205212
continue
206-
207213
if rev not in by_rev:
208-
by_rev[rev] = {}
209-
rev_order.append(rev)
210-
if len(rev_order) >= MAX_REVISIONS:
211-
selected_revs = set(rev_order)
214+
by_rev[rev]={}; rev_order.append(rev)
215+
if len(rev_order) >= MAX_REVISIONS: selected_revs = set(rev_order)
212216
213217
nbid = wandb_normalize_benchmark_id(bid)
214218
if nbid not in by_rev[rev]:
215-
runtime_fps = None; compile_time = None; cnt = 0
219+
runtime_fps=None; compile_time=None; cnt=0
216220
for row in run.scan_history(keys=["runtime_fps","compile_time"]):
217221
runtime_fps = row.get("runtime_fps")
218222
compile_time = row.get("compile_time")
219223
if runtime_fps is not None and compile_time is not None: break
220224
cnt += 1
221225
if cnt >= 10: break
222226
by_rev[rev][nbid] = {"runtime_fps": runtime_fps, "compile_time": compile_time}
223-
if selected_revs is not None: no_change_streak = 0
227+
if selected_revs is not None: no_change = 0
224228
else:
225229
if selected_revs is not None:
226-
no_change_streak += 1
227-
if no_change_streak >= NO_CHANGE_PATIENCE: break
230+
no_change += 1
231+
if no_change >= NO_CHANGE_PATIENCE: break
228232
229-
# ----- Compare current vs baselines -----
230-
def collect_mean(metric_key, bench_id):
231-
vals = []
233+
def mean_of(metric, bid):
234+
vals=[]
232235
for r in by_rev.keys():
233-
v = by_rev.get(r, {}).get(bench_id, {}).get(metric_key)
234-
if isinstance(v, (int,float)) and not (isinstance(v,float) and math.isnan(v)):
236+
v = by_rev.get(r,{}).get(bid,{}).get(metric)
237+
if isinstance(v,(int,float)) and not (isinstance(v,float) and math.isnan(v)):
235238
vals.append(float(v))
236239
return stats.mean(vals) if vals else None
237240
238-
runtime_regs = []; compile_regs = []
239-
for bid, m in current_benchmark.items():
240-
cur_rt = m.get("runtime_fps"); cur_ct = m.get("compile_time")
241-
base_rt = collect_mean("runtime_fps", bid)
242-
base_ct = collect_mean("compile_time", bid)
243-
if base_rt is not None and isinstance(cur_rt,(int,float)) and not (isinstance(cur_rt,float) and math.isnan(cur_rt)) and base_rt>0:
244-
d = (cur_rt - base_rt) / base_rt * 100.0
245-
if True: # d < -tol_rt:
246-
runtime_regs.append((bid, cur_rt, base_rt, d))
247-
if base_ct is not None and isinstance(cur_ct,(int,float)) and not (isinstance(cur_ct,float) and math.isnan(cur_ct)) and base_ct>0:
248-
d = (cur_ct - base_ct) / base_ct * 100.0
249-
if True: #d > tol_ct:
250-
compile_regs.append((bid, cur_ct, base_ct, d))
251-
252-
# if not runtime_regs and not compile_regs:
253-
# print("No regression vs baselines; skip comment.")
254-
# open(os.environ.get("PR_COMMENT_PATH","pr_comment.md"),"w").close()
255-
# sys.exit(0)
256-
257-
def trunc(s, n=120): return s if len(s)<=n else s[:n]+"…"
258-
259-
lines = []
260-
lines.append(":warning: **Benchmark regression detected**")
261-
lines.append(f"- Baseline commits considered: **{len(rev_order)}**")
262-
for i, r in enumerate(rev_order, 1):
263-
lines.append(f" - Commit {i}: {r}")
264-
lines.append(f"- Runtime tolerance: **-{tol_rt:.1f}%**; Compile tolerance: **+{tol_ct:.1f}%**")
265-
lines.append("")
266-
267-
if runtime_regs:
268-
runtime_regs.sort(key=lambda x: x[3])
269-
lines += ["**Runtime FPS regressions (vs mean of other commits)**",
270-
"| benchmark_id | current | baseline mean | delta % |",
271-
"|---|---:|---:|---:|"]
272-
for bid, cur, base, d in runtime_regs[:20]:
273-
lines.append(f"| `{trunc(bid)}` | {cur:,.0f} | {base:,.0f} | {d:.2f}% |")
274-
if len(runtime_regs)>20: lines.append("_Only first 20 shown._")
275-
lines.append("")
276-
277-
if compile_regs:
278-
compile_regs.sort(key=lambda x: -x[3])
279-
lines += ["**Compile-time regressions (vs mean of other commits)**",
280-
"| benchmark_id | current | baseline mean | delta % |",
281-
"|---|---:|---:|---:|"]
282-
for bid, cur, base, d in compile_regs[:20]:
283-
lines.append(f"| `{trunc(bid)}` | {cur:,.0f} | {base:,.0f} | {d:.2f}% |")
284-
if len(compile_regs)>20: lines.append("_Only first 20 shown._")
285-
lines.append("")
286-
287-
lines.append(f"<!-- bench-guard-run:{wr_id} -->")
288-
body = "\n".join(lines)
289-
241+
# ----- build table rows for ALL benchmarks -----
242+
rows = []
243+
reg_found = False
244+
for bid in sorted(current_bm.keys()):
245+
cur_rt = current_bm[bid].get("runtime_fps")
246+
cur_ct = current_bm[bid].get("compile_time")
247+
base_rt = mean_of("runtime_fps", bid)
248+
base_ct = mean_of("compile_time", bid)
249+
250+
d_rt = ((cur_rt - base_rt)/base_rt*100.0) if (base_rt and isinstance(cur_rt,(int,float))) else None
251+
d_ct = ((cur_ct - base_ct)/base_ct*100.0) if (base_ct and isinstance(cur_ct,(int,float))) else None
252+
253+
is_reg = (d_rt is not None and d_rt < -tol_rt) or (d_ct is not None and d_ct > tol_ct)
254+
reg_found = reg_found or is_reg
255+
256+
stat = "🔴" if is_reg else "✅"
257+
delta_rt_cell = fmt_pct(d_rt, highlight=is_reg and d_rt is not None and d_rt < -tol_rt)
258+
delta_ct_cell = fmt_pct(d_ct, highlight=is_reg and d_ct is not None and d_ct > tol_ct)
259+
260+
rows.append([
261+
stat,
262+
f"`{bid}`",
263+
fmt_num(cur_rt), fmt_num(base_rt), delta_rt_cell,
264+
fmt_num(cur_ct), fmt_num(base_ct), delta_ct_cell
265+
])
266+
267+
# ----- compose CHECK body -----
268+
header = [
269+
"| status | benchmark_id | current FPS | baseline FPS | Δ FPS | current compile | baseline compile | Δ compile |",
270+
"|:------:|:-------------|-----------:|-------------:|------:|----------------:|-----------------:|---------:|",
271+
]
272+
table_lines = header + ["| "+" | ".join(r)+" |" for r in rows]
273+
274+
summary_top = []
275+
summary_top.append(f"Baselines considered: **{len(rev_order)}** commits")
276+
if reg_found:
277+
summary_top.append(f"Regressions detected (runtime ≤ −{tol_rt:.0f}%, compile ≥ +{tol_ct:.0f}%).")
278+
else:
279+
summary_top.append("No regressions detected.")
280+
281+
check_body = "\n".join(summary_top + ["", "<details><summary>Benchmark details</summary>", "", *table_lines, "", "</details>"])
282+
283+
# ----- compose COMMENT body -----
284+
if reg_found:
285+
comment_body = "\n".join([
286+
":warning: **Benchmark comparison vs W&B baselines**",
287+
f"- Baselines considered: **{len(rev_order)}** commits",
288+
f"- Thresholds: runtime ≤ −{tol_rt:.0f}%, compile ≥ +{tol_ct:.0f}%",
289+
"",
290+
*table_lines
291+
])
292+
else:
293+
comment_body = ""
294+
295+
# write files
296+
with open(os.environ.get("CHECK_BODY_PATH","check_output.md"), "w", encoding="utf-8") as f:
297+
f.write(check_body+"\n")
290298
with open(os.environ.get("PR_COMMENT_PATH","pr_comment.md"), "w", encoding="utf-8") as f:
291-
f.write(body + "\n")
292-
print("[INFO] wrote pr_comment.md")
299+
f.write(comment_body+"\n")
300+
301+
# flag for next steps
302+
open("HAS_REGRESSIONS.txt","w").write("true\n" if reg_found else "false\n")
293303
PY
294304
295-
# capture comment into env for next step
305+
# expose outputs to later steps
306+
echo "HAS_REGRESSIONS=$(cat HAS_REGRESSIONS.txt)" >> "$GITHUB_ENV"
307+
{
308+
echo 'CHECK_OUTPUT<<__EOF__'
309+
cat check_output.md
310+
echo '__EOF__'
311+
} >> "$GITHUB_ENV"
312+
313+
# only set SCRIPT_OUTPUT when we actually want to comment
296314
if [ -s "pr_comment.md" ]; then
297315
{
298316
echo 'SCRIPT_OUTPUT<<__EOF__'
@@ -303,41 +321,36 @@ jobs:
303321
echo "SCRIPT_OUTPUT=" >> "$GITHUB_ENV"
304322
fi
305323
306-
- name: Add PR comment
324+
- name: Add PR comment (only if regressions)
307325
if: ${{ steps.pr.outputs.pr_number != '' && env.SCRIPT_OUTPUT != '' }}
308326
uses: actions/github-script@v7
309327
env:
310328
COMMENT_BODY: ${{ env.SCRIPT_OUTPUT }}
311329
with:
312330
script: |
313331
const prNumber = Number('${{ steps.pr.outputs.pr_number }}');
314-
if (!prNumber) {
315-
core.info('No PR number; skipping comment.');
316-
return;
317-
}
318332
await github.rest.issues.createComment({
319333
owner: context.repo.owner,
320334
repo: context.repo.repo,
321335
issue_number: prNumber,
322336
body: process.env.COMMENT_BODY
323337
});
324338
325-
- name: Publish PR check (show in Checks tab)
326-
if: always() # 실패해도 체크는 찍히도록
339+
- name: Publish PR check (always show full table)
340+
if: always()
327341
uses: actions/github-script@v7
328342
env:
329343
CHECK_NAME: Benchmark Comparison
330-
COMMENT_BODY: ${{ env.SCRIPT_OUTPUT }} # 위에서 pr_comment.md를 env로 넣은 값
344+
CHECK_BODY: ${{ env.CHECK_OUTPUT }}
345+
HAS_REGRESSIONS: ${{ env.HAS_REGRESSIONS }}
331346
with:
332347
script: |
333348
const sha = context.payload.workflow_run.head_sha;
334-
const hasBody = Boolean(process.env.COMMENT_BODY && process.env.COMMENT_BODY.trim());
349+
const hasRegs = (process.env.HAS_REGRESSIONS || 'false').trim() === 'true';
335350
const conclusion = 'success';
336-
const summary = hasBody
337-
? 'Benchmark regression detected. See details in the output.'
338-
: 'No regression detected.';
339-
340-
// Checks API: PR 하단 "Checks" 섹션에 나타납니다.
351+
const summary = hasRegs
352+
? 'Regressions detected. See the table below.'
353+
: 'No regressions detected. See the table below.';
341354
await github.rest.checks.create({
342355
owner: context.repo.owner,
343356
repo: context.repo.repo,
@@ -348,7 +361,6 @@ jobs:
348361
output: {
349362
title: process.env.CHECK_NAME,
350363
summary,
351-
// 길어도 괜찮다면 전체 코멘트 본문을 체크 출력으로 넣을 수 있어요
352-
text: process.env.COMMENT_BODY || undefined
364+
text: process.env.CHECK_BODY || undefined
353365
}
354-
});
366+
});

0 commit comments

Comments
 (0)