6565
6666 core.setOutput('pr_number', pr ? String(pr.number) : '');
6767
68- - name : Check regressions
68+ - name : Check regressions + build outputs
69+ id : analyze
6970 if : ${{ steps.pr.outputs.pr_number != '' }}
7071 env :
7172 # --- W&B ---
@@ -83,14 +84,15 @@ jobs:
8384 # Input/Output paths
8485 ARTIFACTS_DIR : ${{ steps.dl.outputs.download-path }}
8586 PR_COMMENT_PATH : pr_comment.md
87+ CHECK_BODY_PATH : check_output.md
8688 run : |
8789 python - <<'PY'
8890 import os, sys, json, math, re
8991 import statistics as stats
9092 import wandb
9193
94+ # ---------- helpers ----------
9295 SHA_RE = re.compile(r"[0-9a-fA-F]{7,40}")
93-
9496 def _norm_rev(text):
9597 if not text: return None
9698 text = text.split("@", 1)[0]
@@ -135,50 +137,58 @@ jobs:
135137 out[tid] = {"runtime_fps": rt, "compile_time": ct}
136138 return out
137139
138- # ----- read workflow_run id (for tag only; no API calls) -----
139- ev = json.load(open(os.environ["GITHUB_EVENT_PATH"], "r", encoding="utf-8"))
140- wr = ev["workflow_run"]
141- wr_id = wr["id"]
140+ def fmt_num(v):
141+ if v is None or (isinstance(v,float) and math.isnan(v)): return "—"
142+ try:
143+ if abs(v) >= 1000: return f"{v:,.0f}"
144+ return f"{v:.2f}" if isinstance(v,float) and not v.is_integer() else f"{int(v)}"
145+ except Exception:
146+ return str(v)
147+
148+ def fmt_pct(v, highlight=False):
149+ if v is None: return "—"
150+ s = f"{v:+.2f}%"
151+ return f"**{s}**" if highlight else s
152+
153+ # ----- read run params -----
154+ tol_rt = float(os.environ.get("RUNTIME_REGRESSION_TOLERANCE_PCT","10"))
155+ tol_ct = float(os.environ.get("COMPILE_REGRESSION_TOLERANCE_PCT","10"))
156+ MAX_REVISIONS = int(os.environ.get("MAX_REVISIONS","5"))
157+ NO_CHANGE_PATIENCE = int(os.environ.get("NO_CHANGE_PATIENCE","100"))
142158
143- # ----- Load current PR artifacts -----
159+ # ----- load artifact ( current results) -----
144160 artifacts_path = os.path.abspath(os.environ.get("ARTIFACTS_DIR", "./artifacts"))
145161 if not os.path.exists(artifacts_path):
146- print("No artifacts dir; skip."); open(os.environ.get("PR_COMMENT_PATH","pr_comment.md"),"w").close(); sys.exit(0)
162+ # no data → no comment/check body
163+ open(os.environ.get("PR_COMMENT_PATH","pr_comment.md"),"w").close()
164+ open(os.environ.get("CHECK_BODY_PATH","check_output.md"),"w").close()
165+ sys.exit(0)
147166
148167 current_txt_path = None
149168 for root, _, files in os.walk(artifacts_path):
150169 for fname in files:
151170 if fname.startswith("speed_test") and fname.endswith(".txt"):
152171 current_txt_path = os.path.join(root, fname); break
153172 if current_txt_path: break
154-
155173 if current_txt_path is None:
156- print("No speed_test*.txt; skip."); open(os.environ.get("PR_COMMENT_PATH","pr_comment.md"),"w").close(); sys.exit(0)
174+ open(os.environ.get("PR_COMMENT_PATH","pr_comment.md"),"w").close()
175+ open(os.environ.get("CHECK_BODY_PATH","check_output.md"),"w").close()
176+ sys.exit(0)
157177
158178 with open(current_txt_path, "r", encoding="utf-8") as f:
159- current_benchmark = artifacts_parse_speed_txt_lines(f.readlines())
179+ current_bm = artifacts_parse_speed_txt_lines(f.readlines())
160180
161- # ----- W&B login (anonymous allowed) -----
181+ # ----- W&B baselines -----
162182 if not os.getenv("WANDB_API_KEY"):
163183 try: wandb.login(anonymous="allow", relogin=True)
164184 except Exception: pass
165-
166- # ----- Collect baselines from W&B -----
167185 ENTITY = os.environ.get("WANDB_ENTITY","")
168186 PROJECT= os.environ.get("WANDB_PROJECT","")
169- MAX_REVISIONS = int(os.environ.get("MAX_REVISIONS","5"))
170- NO_CHANGE_PATIENCE = int(os.environ.get("NO_CHANGE_PATIENCE","100"))
171- tol_rt = float(os.environ.get("RUNTIME_REGRESSION_TOLERANCE_PCT","10"))
172- tol_ct = float(os.environ.get("COMPILE_REGRESSION_TOLERANCE_PCT","10"))
173187
174188 api = wandb.Api()
175189 runs_iter = api.runs(f"{ENTITY}/{PROJECT}", order="-created_at")
176190
177- by_rev = {} # rev -> {bench_id: {runtime_fps, compile_time}}
178- rev_order = [] # latest -> oldest
179- selected_revs = None
180- no_change_streak = 0
181-
191+ by_rev = {}; rev_order = []; selected_revs=None; no_change=0
182192 for run in runs_iter:
183193 if run.state != "finished": continue
184194 cfg = getattr(run, "config", None)
@@ -187,112 +197,120 @@ jobs:
187197 raw_rev = cfg.get("revision"); raw_bid = cfg.get("benchmark_id")
188198 if not raw_rev or not raw_bid:
189199 if selected_revs is not None:
190- no_change_streak += 1
191- if no_change_streak >= NO_CHANGE_PATIENCE: break
200+ no_change += 1
201+ if no_change >= NO_CHANGE_PATIENCE: break
192202 continue
193-
194- rev = _norm_rev(raw_rev.get("value"))
195- bid = raw_bid.get("value")
203+ rev = _norm_rev(raw_rev.get("value")); bid = raw_bid.get("value")
196204 if not rev or not bid:
197205 if selected_revs is not None:
198- no_change_streak += 1
199- if no_change_streak >= NO_CHANGE_PATIENCE: break
206+ no_change += 1
207+ if no_change >= NO_CHANGE_PATIENCE: break
200208 continue
201-
202209 if selected_revs is not None and rev not in selected_revs:
203- no_change_streak += 1
204- if no_change_streak >= NO_CHANGE_PATIENCE: break
210+ no_change += 1
211+ if no_change >= NO_CHANGE_PATIENCE: break
205212 continue
206-
207213 if rev not in by_rev:
208- by_rev[rev] = {}
209- rev_order.append(rev)
210- if len(rev_order) >= MAX_REVISIONS:
211- selected_revs = set(rev_order)
214+ by_rev[rev]={}; rev_order.append(rev)
215+ if len(rev_order) >= MAX_REVISIONS: selected_revs = set(rev_order)
212216
213217 nbid = wandb_normalize_benchmark_id(bid)
214218 if nbid not in by_rev[rev]:
215- runtime_fps = None; compile_time = None; cnt = 0
219+ runtime_fps= None; compile_time= None; cnt= 0
216220 for row in run.scan_history(keys=["runtime_fps","compile_time"]):
217221 runtime_fps = row.get("runtime_fps")
218222 compile_time = row.get("compile_time")
219223 if runtime_fps is not None and compile_time is not None: break
220224 cnt += 1
221225 if cnt >= 10: break
222226 by_rev[rev][nbid] = {"runtime_fps": runtime_fps, "compile_time": compile_time}
223- if selected_revs is not None: no_change_streak = 0
227+ if selected_revs is not None: no_change = 0
224228 else:
225229 if selected_revs is not None:
226- no_change_streak += 1
227- if no_change_streak >= NO_CHANGE_PATIENCE: break
230+ no_change += 1
231+ if no_change >= NO_CHANGE_PATIENCE: break
228232
229- # ----- Compare current vs baselines -----
230- def collect_mean(metric_key, bench_id):
231- vals = []
233+ def mean_of(metric, bid):
234+ vals=[]
232235 for r in by_rev.keys():
233- v = by_rev.get(r, {}).get(bench_id, {}).get(metric_key )
234- if isinstance(v, (int,float)) and not (isinstance(v,float) and math.isnan(v)):
236+ v = by_rev.get(r,{}).get(bid, {}).get(metric )
237+ if isinstance(v,(int,float)) and not (isinstance(v,float) and math.isnan(v)):
235238 vals.append(float(v))
236239 return stats.mean(vals) if vals else None
237240
238- runtime_regs = []; compile_regs = []
239- for bid, m in current_benchmark.items():
240- cur_rt = m.get("runtime_fps"); cur_ct = m.get("compile_time")
241- base_rt = collect_mean("runtime_fps", bid)
242- base_ct = collect_mean("compile_time", bid)
243- if base_rt is not None and isinstance(cur_rt,(int,float)) and not (isinstance(cur_rt,float) and math.isnan(cur_rt)) and base_rt>0:
244- d = (cur_rt - base_rt) / base_rt * 100.0
245- if True: # d < -tol_rt:
246- runtime_regs.append((bid, cur_rt, base_rt, d))
247- if base_ct is not None and isinstance(cur_ct,(int,float)) and not (isinstance(cur_ct,float) and math.isnan(cur_ct)) and base_ct>0:
248- d = (cur_ct - base_ct) / base_ct * 100.0
249- if True: #d > tol_ct:
250- compile_regs.append((bid, cur_ct, base_ct, d))
251-
252- # if not runtime_regs and not compile_regs:
253- # print("No regression vs baselines; skip comment.")
254- # open(os.environ.get("PR_COMMENT_PATH","pr_comment.md"),"w").close()
255- # sys.exit(0)
256-
257- def trunc(s, n=120): return s if len(s)<=n else s[:n]+"…"
258-
259- lines = []
260- lines.append(":warning: **Benchmark regression detected**")
261- lines.append(f"- Baseline commits considered: **{len(rev_order)}**")
262- for i, r in enumerate(rev_order, 1):
263- lines.append(f" - Commit {i}: {r}")
264- lines.append(f"- Runtime tolerance: **-{tol_rt:.1f}%**; Compile tolerance: **+{tol_ct:.1f}%**")
265- lines.append("")
266-
267- if runtime_regs:
268- runtime_regs.sort(key=lambda x: x[3])
269- lines += ["**Runtime FPS regressions (vs mean of other commits)**",
270- "| benchmark_id | current | baseline mean | delta % |",
271- "|---|---:|---:|---:|"]
272- for bid, cur, base, d in runtime_regs[:20]:
273- lines.append(f"| `{trunc(bid)}` | {cur:,.0f} | {base:,.0f} | {d:.2f}% |")
274- if len(runtime_regs)>20: lines.append("_Only first 20 shown._")
275- lines.append("")
276-
277- if compile_regs:
278- compile_regs.sort(key=lambda x: -x[3])
279- lines += ["**Compile-time regressions (vs mean of other commits)**",
280- "| benchmark_id | current | baseline mean | delta % |",
281- "|---|---:|---:|---:|"]
282- for bid, cur, base, d in compile_regs[:20]:
283- lines.append(f"| `{trunc(bid)}` | {cur:,.0f} | {base:,.0f} | {d:.2f}% |")
284- if len(compile_regs)>20: lines.append("_Only first 20 shown._")
285- lines.append("")
286-
287- lines.append(f"<!-- bench-guard-run:{wr_id} -->")
288- body = "\n".join(lines)
289-
241+ # ----- build table rows for ALL benchmarks -----
242+ rows = []
243+ reg_found = False
244+ for bid in sorted(current_bm.keys()):
245+ cur_rt = current_bm[bid].get("runtime_fps")
246+ cur_ct = current_bm[bid].get("compile_time")
247+ base_rt = mean_of("runtime_fps", bid)
248+ base_ct = mean_of("compile_time", bid)
249+
250+ d_rt = ((cur_rt - base_rt)/base_rt*100.0) if (base_rt and isinstance(cur_rt,(int,float))) else None
251+ d_ct = ((cur_ct - base_ct)/base_ct*100.0) if (base_ct and isinstance(cur_ct,(int,float))) else None
252+
253+ is_reg = (d_rt is not None and d_rt < -tol_rt) or (d_ct is not None and d_ct > tol_ct)
254+ reg_found = reg_found or is_reg
255+
256+ stat = "🔴" if is_reg else "✅"
257+ delta_rt_cell = fmt_pct(d_rt, highlight=is_reg and d_rt is not None and d_rt < -tol_rt)
258+ delta_ct_cell = fmt_pct(d_ct, highlight=is_reg and d_ct is not None and d_ct > tol_ct)
259+
260+ rows.append([
261+ stat,
262+ f"`{bid}`",
263+ fmt_num(cur_rt), fmt_num(base_rt), delta_rt_cell,
264+ fmt_num(cur_ct), fmt_num(base_ct), delta_ct_cell
265+ ])
266+
267+ # ----- compose CHECK body -----
268+ header = [
269+ "| status | benchmark_id | current FPS | baseline FPS | Δ FPS | current compile | baseline compile | Δ compile |",
270+ "|:------:|:-------------|-----------:|-------------:|------:|----------------:|-----------------:|---------:|",
271+ ]
272+ table_lines = header + ["| "+" | ".join(r)+" |" for r in rows]
273+
274+ summary_top = []
275+ summary_top.append(f"Baselines considered: **{len(rev_order)}** commits")
276+ if reg_found:
277+ summary_top.append(f"Regressions detected (runtime ≤ −{tol_rt:.0f}%, compile ≥ +{tol_ct:.0f}%).")
278+ else:
279+ summary_top.append("No regressions detected.")
280+
281+ check_body = "\n".join(summary_top + ["", "<details><summary>Benchmark details</summary>", "", *table_lines, "", "</details>"])
282+
283+ # ----- compose COMMENT body -----
284+ if reg_found:
285+ comment_body = "\n".join([
286+ ":warning: **Benchmark comparison vs W&B baselines**",
287+ f"- Baselines considered: **{len(rev_order)}** commits",
288+ f"- Thresholds: runtime ≤ −{tol_rt:.0f}%, compile ≥ +{tol_ct:.0f}%",
289+ "",
290+ *table_lines
291+ ])
292+ else:
293+ comment_body = ""
294+
295+ # write files
296+ with open(os.environ.get("CHECK_BODY_PATH","check_output.md"), "w", encoding="utf-8") as f:
297+ f.write(check_body+"\n")
290298 with open(os.environ.get("PR_COMMENT_PATH","pr_comment.md"), "w", encoding="utf-8") as f:
291- f.write(body + "\n")
292- print("[INFO] wrote pr_comment.md")
299+ f.write(comment_body+"\n")
300+
301+ # flag for next steps
302+ open("HAS_REGRESSIONS.txt","w").write("true\n" if reg_found else "false\n")
293303 PY
294304
295- # capture comment into env for next step
305+ # expose outputs to later steps
306+ echo "HAS_REGRESSIONS=$(cat HAS_REGRESSIONS.txt)" >> "$GITHUB_ENV"
307+ {
308+ echo 'CHECK_OUTPUT<<__EOF__'
309+ cat check_output.md
310+ echo '__EOF__'
311+ } >> "$GITHUB_ENV"
312+
313+ # only set SCRIPT_OUTPUT when we actually want to comment
296314 if [ -s "pr_comment.md" ]; then
297315 {
298316 echo 'SCRIPT_OUTPUT<<__EOF__'
@@ -303,41 +321,36 @@ jobs:
303321 echo "SCRIPT_OUTPUT=" >> "$GITHUB_ENV"
304322 fi
305323
306- - name : Add PR comment
324+ - name : Add PR comment (only if regressions)
307325 if : ${{ steps.pr.outputs.pr_number != '' && env.SCRIPT_OUTPUT != '' }}
308326 uses : actions/github-script@v7
309327 env :
310328 COMMENT_BODY : ${{ env.SCRIPT_OUTPUT }}
311329 with :
312330 script : |
313331 const prNumber = Number('${{ steps.pr.outputs.pr_number }}');
314- if (!prNumber) {
315- core.info('No PR number; skipping comment.');
316- return;
317- }
318332 await github.rest.issues.createComment({
319333 owner: context.repo.owner,
320334 repo: context.repo.repo,
321335 issue_number: prNumber,
322336 body: process.env.COMMENT_BODY
323337 });
324338
325- - name : Publish PR check (show in Checks tab )
326- if : always() # 실패해도 체크는 찍히도록
339+ - name : Publish PR check (always show full table )
340+ if : always()
327341 uses : actions/github-script@v7
328342 env :
329343 CHECK_NAME : Benchmark Comparison
330- COMMENT_BODY : ${{ env.SCRIPT_OUTPUT }} # 위에서 pr_comment.md를 env로 넣은 값
344+ CHECK_BODY : ${{ env.CHECK_OUTPUT }}
345+ HAS_REGRESSIONS : ${{ env.HAS_REGRESSIONS }}
331346 with :
332347 script : |
333348 const sha = context.payload.workflow_run.head_sha;
334- const hasBody = Boolean (process.env.COMMENT_BODY && process.env.COMMENT_BODY. trim()) ;
349+ const hasRegs = (process.env.HAS_REGRESSIONS || 'false'). trim() === 'true' ;
335350 const conclusion = 'success';
336- const summary = hasBody
337- ? 'Benchmark regression detected. See details in the output.'
338- : 'No regression detected.';
339-
340- // Checks API: PR 하단 "Checks" 섹션에 나타납니다.
351+ const summary = hasRegs
352+ ? 'Regressions detected. See the table below.'
353+ : 'No regressions detected. See the table below.';
341354 await github.rest.checks.create({
342355 owner: context.repo.owner,
343356 repo: context.repo.repo,
@@ -348,7 +361,6 @@ jobs:
348361 output: {
349362 title: process.env.CHECK_NAME,
350363 summary,
351- // 길어도 괜찮다면 전체 코멘트 본문을 체크 출력으로 넣을 수 있어요
352- text: process.env.COMMENT_BODY || undefined
364+ text: process.env.CHECK_BODY || undefined
353365 }
354- });
366+ });
0 commit comments