Skip to content

Commit 0cdcde1

Browse files
committed
foo
1 parent c133d4c commit 0cdcde1

File tree

2 files changed

+236
-23
lines changed

2 files changed

+236
-23
lines changed

.github/workflows/alarm.yml

Lines changed: 234 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -19,28 +19,20 @@ jobs:
1919
contains(fromJson('["success","neutral"]'), github.event.workflow_run.conclusion)
2020
2121
steps:
22-
- name: Checkout base repo (default branch)
23-
uses: actions/checkout@v4
24-
with:
25-
# base(업스트림) 기본 브랜치의 최신 스크립트를 사용
26-
fetch-depth: 0
27-
2822
- name: Setup Python
2923
uses: actions/setup-python@v5
3024
with:
3125
python-version: '3.10'
3226

3327
- name: Install deps
3428
run: |
35-
python -m pip install --quiet --upgrade requests wandb
29+
python -m pip install --quiet --upgrade wandb
3630
3731
- name: Download artifacts from triggering run
3832
id: dl
3933
uses: actions/download-artifact@v4
4034
with:
41-
# Production에서 업로드한 아티팩트 이름 (고정 이름이라면 그대로 사용)
4235
name: speed-test-results
43-
# 트리거한 워크플로우 런 ID로 정확히 해당 실행의 아티팩트를 가져옴
4436
run-id: ${{ github.event.workflow_run.id }}
4537
github-token: ${{ secrets.GITHUB_TOKEN }}
4638
path: ./artifacts
@@ -49,34 +41,258 @@ jobs:
4941
run: |
5042
echo "Downloaded into ${{ steps.dl.outputs.download-path }}"
5143
ls -la ${{ steps.dl.outputs.download-path }} || true
52-
echo
53-
echo "Tree:"
5444
(command -v tree >/dev/null && tree -a ${{ steps.dl.outputs.download-path }}) || true
5545
56-
- name: Run W&B regression commenter
46+
- name: Check regressions
5747
env:
5848
GITHUB_TOKEN: ${{ github.token }}
5949
GITHUB_API_URL: ${{ github.api_url }}
6050

6151
# --- W&B ---
62-
WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }} # 공개 프로젝트면 없어도 동작(anonymous)
52+
WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
6353
WANDB_ENTITY: genesis-ai-company
6454
WANDB_PROJECT: genesis-benchmarks
6555
WANDB_SILENT: "true"
6656

67-
# --- 수집/비교 파라미터 ---
68-
MAX_RUNS: "1000"
57+
# --- Parameters ---
6958
MAX_REVISIONS: "5"
7059
NO_CHANGE_PATIENCE: "100"
7160
RUNTIME_REGRESSION_TOLERANCE_PCT: "10"
7261
COMPILE_REGRESSION_TOLERANCE_PCT: "10"
7362

63+
# Input/Output paths
7464
ARTIFACTS_DIR: ${{ steps.dl.outputs.download-path }}
65+
PR_COMMENT_PATH: pr_comment.md
7566
run: |
76-
python test_wandb.py
67+
python - <<'PY'
68+
import os, sys, json, math, re, requests
69+
import statistics as stats
70+
import wandb
71+
72+
SHA_RE = re.compile(r"[0-9a-fA-F]{7,40}")
73+
74+
def _norm_rev(text):
75+
if not text: return None
76+
text = text.split("@", 1)[0]
77+
m = SHA_RE.search(text)
78+
return m.group(0) if m else text
79+
80+
def _normalize_kv_id(kv: dict, drop_keys=None) -> str:
81+
drop = set(drop_keys or [])
82+
pairs = []
83+
for k, v in kv.items():
84+
if k in drop or v is None: continue
85+
k = str(k).strip(); v = str(v).strip()
86+
if not k or not v: continue
87+
pairs.append((k, v))
88+
pairs.sort(key=lambda x: x[0])
89+
return "-".join(f"{k}={v}" for k, v in pairs)
90+
91+
def wandb_normalize_benchmark_id(bid: str) -> str:
92+
kv = {}
93+
for token in (p.strip() for p in bid.split("-") if p.strip()):
94+
if "=" not in token: continue
95+
k, v = token.split("=", 1)
96+
kv[k.strip()] = v.strip()
97+
return _normalize_kv_id(kv)
98+
99+
def artifacts_parse_speed_txt_lines(lines):
100+
METRIC_KEYS = {"compile_time", "runtime_fps", "realtime_factor"}
101+
out = {}
102+
for line in lines:
103+
if not line.strip(): continue
104+
parts = [p.strip() for p in line.strip().split("|") if "=" in p]
105+
kv = {}
106+
for p in parts:
107+
k, v = p.split("=", 1)
108+
kv[k.strip()] = v.strip()
109+
tid = _normalize_kv_id(kv, drop_keys=METRIC_KEYS)
110+
rt = kv.get("runtime_fps"); ct = kv.get("compile_time")
111+
try: rt = float(rt) if rt is not None else None
112+
except: rt = None
113+
try: ct = float(ct) if ct is not None else None
114+
except: ct = None
115+
out[tid] = {"runtime_fps": rt, "compile_time": ct}
116+
return out
117+
118+
# ----- Read event / find PR -----
119+
API = os.environ.get("GITHUB_API_URL", "https://api.github.com")
120+
ev = json.load(open(os.environ["GITHUB_EVENT_PATH"], "r", encoding="utf-8"))
121+
wr = ev["workflow_run"]
122+
repo= ev["repository"]
123+
owner, name = repo["full_name"].split("/", 1)
124+
head_sha = wr.get("head_sha")
125+
wr_id = wr["id"]
126+
127+
s = requests.Session()
128+
s.headers.update({
129+
"Authorization": f"Bearer {os.environ['GITHUB_TOKEN']}",
130+
"Accept": "application/vnd.github+json",
131+
})
132+
133+
prs = wr.get("pull_requests") or []
134+
pr = prs[0] if prs else None
135+
if not pr:
136+
r = s.get(f"{API}/repos/{owner}/{name}/commits/{head_sha}/pulls",
137+
headers={"Accept":"application/vnd.github.groot-preview+json"})
138+
if r.ok and r.json(): pr = r.json()[0]
139+
if not pr:
140+
print("No PR found; skip."); open(os.environ.get("PR_COMMENT_PATH","pr_comment.md"),"w").close(); sys.exit(0)
141+
142+
pr_num = pr["number"]
143+
target_repo_api = pr["base"]["repo"]["url"]
144+
145+
# ----- Load current PR artifacts -----
146+
artifacts_path = os.path.abspath(os.environ.get("ARTIFACTS_DIR", "./artifacts"))
147+
if not os.path.exists(artifacts_path):
148+
print("No artifacts dir; skip."); open(os.environ.get("PR_COMMENT_PATH","pr_comment.md"),"w").close(); sys.exit(0)
149+
150+
current_txt_path = None
151+
for root, _, files in os.walk(artifacts_path):
152+
for fname in files:
153+
if fname.startswith("speed_test") and fname.endswith(".txt"):
154+
current_txt_path = os.path.join(root, fname); break
155+
if current_txt_path: break
156+
157+
if current_txt_path is None:
158+
print("No speed_test*.txt; skip."); open(os.environ.get("PR_COMMENT_PATH","pr_comment.md"),"w").close(); sys.exit(0)
159+
160+
with open(current_txt_path, "r", encoding="utf-8") as f:
161+
current_benchmark = artifacts_parse_speed_txt_lines(f.readlines())
162+
163+
# ----- W&B login (anonymous allowed) -----
164+
if not os.getenv("WANDB_API_KEY"):
165+
try: wandb.login(anonymous="allow", relogin=True)
166+
except Exception: pass
167+
168+
# ----- Collect baselines from W&B -----
169+
ENTITY = os.environ.get("WANDB_ENTITY","")
170+
PROJECT= os.environ.get("WANDB_PROJECT","")
171+
MAX_REVISIONS = int(os.environ.get("MAX_REVISIONS","5"))
172+
NO_CHANGE_PATIENCE = int(os.environ.get("NO_CHANGE_PATIENCE","100"))
173+
tol_rt = float(os.environ.get("RUNTIME_REGRESSION_TOLERANCE_PCT","10"))
174+
tol_ct = float(os.environ.get("COMPILE_REGRESSION_TOLERANCE_PCT","10"))
175+
176+
api = wandb.Api()
177+
runs_iter = api.runs(f"{ENTITY}/{PROJECT}", order="-created_at")
77178
78-
# if pr_comment.md exists, inject it to the next step as an ENV
79-
if [ -f "pr_comment.md" ]; then
179+
by_rev = {} # rev -> {bench_id: {runtime_fps, compile_time}}
180+
rev_order = [] # latest -> oldest
181+
selected_revs = None
182+
no_change_streak = 0
183+
184+
for run in runs_iter:
185+
if run.state != "finished": continue
186+
cfg = getattr(run, "config", None)
187+
if cfg is None: continue
188+
cfg = json.loads(cfg)
189+
raw_rev = cfg.get("revision"); raw_bid = cfg.get("benchmark_id")
190+
if not raw_rev or not raw_bid:
191+
if selected_revs is not None:
192+
no_change_streak += 1
193+
if no_change_streak >= NO_CHANGE_PATIENCE: break
194+
continue
195+
196+
rev = _norm_rev(raw_rev.get("value"))
197+
bid = raw_bid.get("value")
198+
if not rev or not bid:
199+
if selected_revs is not None:
200+
no_change_streak += 1
201+
if no_change_streak >= NO_CHANGE_PATIENCE: break
202+
continue
203+
204+
if selected_revs is not None and rev not in selected_revs:
205+
no_change_streak += 1
206+
if no_change_streak >= NO_CHANGE_PATIENCE: break
207+
continue
208+
209+
if rev not in by_rev:
210+
by_rev[rev] = {}
211+
rev_order.append(rev)
212+
if len(rev_order) >= MAX_REVISIONS:
213+
selected_revs = set(rev_order)
214+
215+
if wandb_normalize_benchmark_id(bid) not in by_rev[rev]:
216+
# pull first rows with metrics
217+
runtime_fps = None; compile_time = None; cnt = 0
218+
for row in run.scan_history(keys=["runtime_fps","compile_time"]):
219+
runtime_fps = row.get("runtime_fps")
220+
compile_time = row.get("compile_time")
221+
if runtime_fps is not None and compile_time is not None: break
222+
cnt += 1
223+
if cnt >= 10: break
224+
nbid = wandb_normalize_benchmark_id(bid)
225+
by_rev[rev][nbid] = {"runtime_fps": runtime_fps, "compile_time": compile_time}
226+
if selected_revs is not None: no_change_streak = 0
227+
else:
228+
if selected_revs is not None:
229+
no_change_streak += 1
230+
if no_change_streak >= NO_CHANGE_PATIENCE: break
231+
232+
# ----- Compare current vs baselines -----
233+
def collect_mean(metric_key, bench_id):
234+
vals = []
235+
for r in by_rev.keys():
236+
v = by_rev.get(r, {}).get(bench_id, {}).get(metric_key)
237+
if isinstance(v, (int,float)) and not (isinstance(v,float) and math.isnan(v)):
238+
vals.append(float(v))
239+
return stats.mean(vals) if vals else None
240+
241+
runtime_regs = []; compile_regs = []
242+
for bid, m in current_benchmark.items():
243+
cur_rt = m.get("runtime_fps"); cur_ct = m.get("compile_time")
244+
base_rt = collect_mean("runtime_fps", bid)
245+
base_ct = collect_mean("compile_time", bid)
246+
if base_rt is not None and isinstance(cur_rt,(int,float)) and not (isinstance(cur_rt,float) and math.isnan(cur_rt)) and base_rt>0:
247+
d = (cur_rt - base_rt) / base_rt * 100.0
248+
runtime_regs.append((bid, cur_rt, base_rt, d))
249+
if base_ct is not None and isinstance(cur_ct,(int,float)) and not (isinstance(cur_ct,float) and math.isnan(cur_ct)) and base_ct>0:
250+
d = (cur_ct - base_ct) / base_ct * 100.0
251+
compile_regs.append((bid, cur_ct, base_ct, d))
252+
253+
def trunc(s, n=120): return s if len(s)<=n else s[:n]+"…"
254+
255+
lines = []
256+
lines.append(":warning: **Benchmark regression detected (vs W&B history)**")
257+
lines.append(f"- Baseline commits considered: **{len(rev_order)}**")
258+
for i, r in enumerate(rev_order, 1):
259+
lines.append(f" - Commit {i}: {r}")
260+
lines.append(f"- Runtime tolerance: **-{tol_rt:.1f}%**; Compile tolerance: **+{tol_ct:.1f}%**")
261+
lines.append("")
262+
263+
if runtime_regs:
264+
runtime_regs.sort(key=lambda x: x[3])
265+
lines += ["**Runtime FPS regressions (vs mean of other revisions)**",
266+
"| benchmark_id | current | baseline mean | delta % |",
267+
"|---|---:|---:|---:|"]
268+
for bid, cur, base, d in runtime_regs[:20]:
269+
lines.append(f"| `{trunc(bid)}` | {cur:,.0f} | {base:,.0f} | {d:.2f}% |")
270+
if len(runtime_regs)>20: lines.append("_Only first 20 shown._"); lines.append("")
271+
272+
if compile_regs:
273+
compile_regs.sort(key=lambda x: -x[3])
274+
lines += ["**Compile-time regressions (vs mean of other revisions)**",
275+
"| benchmark_id | current | baseline mean | delta % |",
276+
"|---|---:|---:|---:|"]
277+
for bid, cur, base, d in compile_regs[:20]:
278+
lines.append(f"| `{trunc(bid)}` | {cur:,.0f} | {base:,.0f} | {d:.2f}% |")
279+
if len(compile_regs)>20: lines.append("_Only first 20 shown._"); lines.append("")
280+
281+
tag = f"bench-guard-run:{wr_id}"
282+
lines.append(f"<!-- {tag} -->")
283+
body = "\n".join(lines)
284+
285+
print("********* Body: ")
286+
print(body)
287+
288+
comment_path = os.environ.get("PR_COMMENT_PATH", "pr_comment.md")
289+
with open(comment_path, "w", encoding="utf-8") as f:
290+
f.write(body + "\n")
291+
print(f"[INFO] wrote {comment_path}")
292+
PY
293+
294+
# Pass pr_comment.md to the next step
295+
if [ -s "pr_comment.md" ]; then
80296
{
81297
echo 'SCRIPT_OUTPUT<<__EOF__'
82298
cat pr_comment.md
@@ -93,18 +309,15 @@ jobs:
93309
COMMENT_BODY: ${{ env.SCRIPT_OUTPUT }}
94310
with:
95311
script: |
96-
// workflow_run event does not have context.issue -> extract PR from payload
97312
const prs = (context.payload.workflow_run.pull_requests || []);
98313
if (!prs.length) {
99314
core.info('No associated PR; skipping comment.');
100315
return;
101316
}
102317
const prNumber = prs[0].number;
103-
104318
await github.rest.issues.createComment({
105319
issue_number: prNumber,
106320
owner: context.repo.owner,
107321
repo: context.repo.repo,
108322
body: process.env.COMMENT_BODY
109323
});
110-

test_wandb.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -293,9 +293,9 @@ def trunc(s, n=120):
293293

294294
lines = []
295295
lines.append(":warning: **Benchmark regression detected (vs W&B history)**")
296-
lines.append(f"- Revisions considered: **{len(rev_order)}**")
296+
lines.append(f"- Baseline commits considered: **{len(rev_order)}**")
297297
for i in range(len(rev_order)):
298-
lines.append(f"\t- Revision {i+1}: {rev_order[i]}")
298+
lines.append(f"\t- Commit {i+1}: {rev_order[i]}")
299299
lines.append(f"- Runtime tolerance: **-{tol_rt:.1f}%**; Compile tolerance: **+{tol_ct:.1f}%**")
300300
lines.append("")
301301

0 commit comments

Comments
 (0)