@@ -19,28 +19,20 @@ jobs:
1919 contains(fromJson('["success","neutral"]'), github.event.workflow_run.conclusion)
2020
2121 steps :
22- - name : Checkout base repo (default branch)
23- uses : actions/checkout@v4
24- with :
25- # base(업스트림) 기본 브랜치의 최신 스크립트를 사용
26- fetch-depth : 0
27-
2822 - name : Setup Python
2923 uses : actions/setup-python@v5
3024 with :
3125 python-version : ' 3.10'
3226
3327 - name : Install deps
3428 run : |
35- python -m pip install --quiet --upgrade requests wandb
29+ python -m pip install --quiet --upgrade wandb
3630
3731 - name : Download artifacts from triggering run
3832 id : dl
3933 uses : actions/download-artifact@v4
4034 with :
41- # Production에서 업로드한 아티팩트 이름 (고정 이름이라면 그대로 사용)
4235 name : speed-test-results
43- # 트리거한 워크플로우 런 ID로 정확히 해당 실행의 아티팩트를 가져옴
4436 run-id : ${{ github.event.workflow_run.id }}
4537 github-token : ${{ secrets.GITHUB_TOKEN }}
4638 path : ./artifacts
@@ -49,34 +41,258 @@ jobs:
4941 run : |
5042 echo "Downloaded into ${{ steps.dl.outputs.download-path }}"
5143 ls -la ${{ steps.dl.outputs.download-path }} || true
52- echo
53- echo "Tree:"
5444 (command -v tree >/dev/null && tree -a ${{ steps.dl.outputs.download-path }}) || true
5545
56- - name : Run W&B regression commenter
46+ - name : Check regressions
5747 env :
5848 GITHUB_TOKEN : ${{ github.token }}
5949 GITHUB_API_URL : ${{ github.api_url }}
6050
6151 # --- W&B ---
62- WANDB_API_KEY : ${{ secrets.WANDB_API_KEY }} # 공개 프로젝트면 없어도 동작(anonymous)
52+ WANDB_API_KEY : ${{ secrets.WANDB_API_KEY }}
6353 WANDB_ENTITY : genesis-ai-company
6454 WANDB_PROJECT : genesis-benchmarks
6555 WANDB_SILENT : " true"
6656
67- # --- 수집/비교 파라미터 ---
68- MAX_RUNS : " 1000"
57+ # --- Parameters ---
6958 MAX_REVISIONS : " 5"
7059 NO_CHANGE_PATIENCE : " 100"
7160 RUNTIME_REGRESSION_TOLERANCE_PCT : " 10"
7261 COMPILE_REGRESSION_TOLERANCE_PCT : " 10"
7362
63+ # Input/Output paths
7464 ARTIFACTS_DIR : ${{ steps.dl.outputs.download-path }}
65+ PR_COMMENT_PATH : pr_comment.md
7566 run : |
76- python test_wandb.py
67+ python - <<'PY'
68+ import os, sys, json, math, re, requests
69+ import statistics as stats
70+ import wandb
71+
72+ SHA_RE = re.compile(r"[0-9a-fA-F]{7,40}")
73+
74+ def _norm_rev(text):
75+ if not text: return None
76+ text = text.split("@", 1)[0]
77+ m = SHA_RE.search(text)
78+ return m.group(0) if m else text
79+
80+ def _normalize_kv_id(kv: dict, drop_keys=None) -> str:
81+ drop = set(drop_keys or [])
82+ pairs = []
83+ for k, v in kv.items():
84+ if k in drop or v is None: continue
85+ k = str(k).strip(); v = str(v).strip()
86+ if not k or not v: continue
87+ pairs.append((k, v))
88+ pairs.sort(key=lambda x: x[0])
89+ return "-".join(f"{k}={v}" for k, v in pairs)
90+
91+ def wandb_normalize_benchmark_id(bid: str) -> str:
92+ kv = {}
93+ for token in (p.strip() for p in bid.split("-") if p.strip()):
94+ if "=" not in token: continue
95+ k, v = token.split("=", 1)
96+ kv[k.strip()] = v.strip()
97+ return _normalize_kv_id(kv)
98+
99+ def artifacts_parse_speed_txt_lines(lines):
100+ METRIC_KEYS = {"compile_time", "runtime_fps", "realtime_factor"}
101+ out = {}
102+ for line in lines:
103+ if not line.strip(): continue
104+ parts = [p.strip() for p in line.strip().split("|") if "=" in p]
105+ kv = {}
106+ for p in parts:
107+ k, v = p.split("=", 1)
108+ kv[k.strip()] = v.strip()
109+ tid = _normalize_kv_id(kv, drop_keys=METRIC_KEYS)
110+ rt = kv.get("runtime_fps"); ct = kv.get("compile_time")
111+ try: rt = float(rt) if rt is not None else None
112+ except: rt = None
113+ try: ct = float(ct) if ct is not None else None
114+ except: ct = None
115+ out[tid] = {"runtime_fps": rt, "compile_time": ct}
116+ return out
117+
118+ # ----- Read event / find PR -----
119+ API = os.environ.get("GITHUB_API_URL", "https://api.github.com")
120+ ev = json.load(open(os.environ["GITHUB_EVENT_PATH"], "r", encoding="utf-8"))
121+ wr = ev["workflow_run"]
122+ repo= ev["repository"]
123+ owner, name = repo["full_name"].split("/", 1)
124+ head_sha = wr.get("head_sha")
125+ wr_id = wr["id"]
126+
127+ s = requests.Session()
128+ s.headers.update({
129+ "Authorization": f"Bearer {os.environ['GITHUB_TOKEN']}",
130+ "Accept": "application/vnd.github+json",
131+ })
132+
133+ prs = wr.get("pull_requests") or []
134+ pr = prs[0] if prs else None
135+ if not pr:
136+ r = s.get(f"{API}/repos/{owner}/{name}/commits/{head_sha}/pulls",
137+ headers={"Accept":"application/vnd.github.groot-preview+json"})
138+ if r.ok and r.json(): pr = r.json()[0]
139+ if not pr:
140+ print("No PR found; skip."); open(os.environ.get("PR_COMMENT_PATH","pr_comment.md"),"w").close(); sys.exit(0)
141+
142+ pr_num = pr["number"]
143+ target_repo_api = pr["base"]["repo"]["url"]
144+
145+ # ----- Load current PR artifacts -----
146+ artifacts_path = os.path.abspath(os.environ.get("ARTIFACTS_DIR", "./artifacts"))
147+ if not os.path.exists(artifacts_path):
148+ print("No artifacts dir; skip."); open(os.environ.get("PR_COMMENT_PATH","pr_comment.md"),"w").close(); sys.exit(0)
149+
150+ current_txt_path = None
151+ for root, _, files in os.walk(artifacts_path):
152+ for fname in files:
153+ if fname.startswith("speed_test") and fname.endswith(".txt"):
154+ current_txt_path = os.path.join(root, fname); break
155+ if current_txt_path: break
156+
157+ if current_txt_path is None:
158+ print("No speed_test*.txt; skip."); open(os.environ.get("PR_COMMENT_PATH","pr_comment.md"),"w").close(); sys.exit(0)
159+
160+ with open(current_txt_path, "r", encoding="utf-8") as f:
161+ current_benchmark = artifacts_parse_speed_txt_lines(f.readlines())
162+
163+ # ----- W&B login (anonymous allowed) -----
164+ if not os.getenv("WANDB_API_KEY"):
165+ try: wandb.login(anonymous="allow", relogin=True)
166+ except Exception: pass
167+
168+ # ----- Collect baselines from W&B -----
169+ ENTITY = os.environ.get("WANDB_ENTITY","")
170+ PROJECT= os.environ.get("WANDB_PROJECT","")
171+ MAX_REVISIONS = int(os.environ.get("MAX_REVISIONS","5"))
172+ NO_CHANGE_PATIENCE = int(os.environ.get("NO_CHANGE_PATIENCE","100"))
173+ tol_rt = float(os.environ.get("RUNTIME_REGRESSION_TOLERANCE_PCT","10"))
174+ tol_ct = float(os.environ.get("COMPILE_REGRESSION_TOLERANCE_PCT","10"))
175+
176+ api = wandb.Api()
177+ runs_iter = api.runs(f"{ENTITY}/{PROJECT}", order="-created_at")
77178
78- # if pr_comment.md exists, inject it to the next step as an ENV
79- if [ -f "pr_comment.md" ]; then
179+ by_rev = {} # rev -> {bench_id: {runtime_fps, compile_time}}
180+ rev_order = [] # latest -> oldest
181+ selected_revs = None
182+ no_change_streak = 0
183+
184+ for run in runs_iter:
185+ if run.state != "finished": continue
186+ cfg = getattr(run, "config", None)
187+ if cfg is None: continue
188+ cfg = json.loads(cfg)
189+ raw_rev = cfg.get("revision"); raw_bid = cfg.get("benchmark_id")
190+ if not raw_rev or not raw_bid:
191+ if selected_revs is not None:
192+ no_change_streak += 1
193+ if no_change_streak >= NO_CHANGE_PATIENCE: break
194+ continue
195+
196+ rev = _norm_rev(raw_rev.get("value"))
197+ bid = raw_bid.get("value")
198+ if not rev or not bid:
199+ if selected_revs is not None:
200+ no_change_streak += 1
201+ if no_change_streak >= NO_CHANGE_PATIENCE: break
202+ continue
203+
204+ if selected_revs is not None and rev not in selected_revs:
205+ no_change_streak += 1
206+ if no_change_streak >= NO_CHANGE_PATIENCE: break
207+ continue
208+
209+ if rev not in by_rev:
210+ by_rev[rev] = {}
211+ rev_order.append(rev)
212+ if len(rev_order) >= MAX_REVISIONS:
213+ selected_revs = set(rev_order)
214+
215+ if wandb_normalize_benchmark_id(bid) not in by_rev[rev]:
216+ # pull first rows with metrics
217+ runtime_fps = None; compile_time = None; cnt = 0
218+ for row in run.scan_history(keys=["runtime_fps","compile_time"]):
219+ runtime_fps = row.get("runtime_fps")
220+ compile_time = row.get("compile_time")
221+ if runtime_fps is not None and compile_time is not None: break
222+ cnt += 1
223+ if cnt >= 10: break
224+ nbid = wandb_normalize_benchmark_id(bid)
225+ by_rev[rev][nbid] = {"runtime_fps": runtime_fps, "compile_time": compile_time}
226+ if selected_revs is not None: no_change_streak = 0
227+ else:
228+ if selected_revs is not None:
229+ no_change_streak += 1
230+ if no_change_streak >= NO_CHANGE_PATIENCE: break
231+
232+ # ----- Compare current vs baselines -----
233+ def collect_mean(metric_key, bench_id):
234+ vals = []
235+ for r in by_rev.keys():
236+ v = by_rev.get(r, {}).get(bench_id, {}).get(metric_key)
237+ if isinstance(v, (int,float)) and not (isinstance(v,float) and math.isnan(v)):
238+ vals.append(float(v))
239+ return stats.mean(vals) if vals else None
240+
241+ runtime_regs = []; compile_regs = []
242+ for bid, m in current_benchmark.items():
243+ cur_rt = m.get("runtime_fps"); cur_ct = m.get("compile_time")
244+ base_rt = collect_mean("runtime_fps", bid)
245+ base_ct = collect_mean("compile_time", bid)
246+ if base_rt is not None and isinstance(cur_rt,(int,float)) and not (isinstance(cur_rt,float) and math.isnan(cur_rt)) and base_rt>0:
247+ d = (cur_rt - base_rt) / base_rt * 100.0
248+ runtime_regs.append((bid, cur_rt, base_rt, d))
249+ if base_ct is not None and isinstance(cur_ct,(int,float)) and not (isinstance(cur_ct,float) and math.isnan(cur_ct)) and base_ct>0:
250+ d = (cur_ct - base_ct) / base_ct * 100.0
251+ compile_regs.append((bid, cur_ct, base_ct, d))
252+
253+ def trunc(s, n=120): return s if len(s)<=n else s[:n]+"…"
254+
255+ lines = []
256+ lines.append(":warning: **Benchmark regression detected (vs W&B history)**")
257+ lines.append(f"- Baseline commits considered: **{len(rev_order)}**")
258+ for i, r in enumerate(rev_order, 1):
259+ lines.append(f" - Commit {i}: {r}")
260+ lines.append(f"- Runtime tolerance: **-{tol_rt:.1f}%**; Compile tolerance: **+{tol_ct:.1f}%**")
261+ lines.append("")
262+
263+ if runtime_regs:
264+ runtime_regs.sort(key=lambda x: x[3])
265+ lines += ["**Runtime FPS regressions (vs mean of other revisions)**",
266+ "| benchmark_id | current | baseline mean | delta % |",
267+ "|---|---:|---:|---:|"]
268+ for bid, cur, base, d in runtime_regs[:20]:
269+ lines.append(f"| `{trunc(bid)}` | {cur:,.0f} | {base:,.0f} | {d:.2f}% |")
270+ if len(runtime_regs)>20: lines.append("_Only first 20 shown._"); lines.append("")
271+
272+ if compile_regs:
273+ compile_regs.sort(key=lambda x: -x[3])
274+ lines += ["**Compile-time regressions (vs mean of other revisions)**",
275+ "| benchmark_id | current | baseline mean | delta % |",
276+ "|---|---:|---:|---:|"]
277+ for bid, cur, base, d in compile_regs[:20]:
278+ lines.append(f"| `{trunc(bid)}` | {cur:,.0f} | {base:,.0f} | {d:.2f}% |")
279+ if len(compile_regs)>20: lines.append("_Only first 20 shown._"); lines.append("")
280+
281+ tag = f"bench-guard-run:{wr_id}"
282+ lines.append(f"<!-- {tag} -->")
283+ body = "\n".join(lines)
284+
285+ print("********* Body: ")
286+ print(body)
287+
288+ comment_path = os.environ.get("PR_COMMENT_PATH", "pr_comment.md")
289+ with open(comment_path, "w", encoding="utf-8") as f:
290+ f.write(body + "\n")
291+ print(f"[INFO] wrote {comment_path}")
292+ PY
293+
294+ # Pass pr_comment.md to the next step
295+ if [ -s "pr_comment.md" ]; then
80296 {
81297 echo 'SCRIPT_OUTPUT<<__EOF__'
82298 cat pr_comment.md
@@ -93,18 +309,15 @@ jobs:
93309 COMMENT_BODY : ${{ env.SCRIPT_OUTPUT }}
94310 with :
95311 script : |
96- // workflow_run event does not have context.issue -> extract PR from payload
97312 const prs = (context.payload.workflow_run.pull_requests || []);
98313 if (!prs.length) {
99314 core.info('No associated PR; skipping comment.');
100315 return;
101316 }
102317 const prNumber = prs[0].number;
103-
104318 await github.rest.issues.createComment({
105319 issue_number: prNumber,
106320 owner: context.repo.owner,
107321 repo: context.repo.repo,
108322 body: process.env.COMMENT_BODY
109323 });
110-
0 commit comments