|
| 1 | +#!/usr/bin/env python3 |
| 2 | +"""Canary report: walk lane artifacts, summarize via Haiku, post to Slack. |
| 3 | +
|
| 4 | +Invoked by the `canary-report` GitHub Actions job after every live-canary |
| 5 | +lane finishes. Expects artifacts under ``--artifacts-dir`` following the |
| 6 | +standard ``<lane>/<provider>/<timestamp>/`` layout produced by |
| 7 | +``scripts/live-canary/run.sh``. |
| 8 | +
|
| 9 | +Zero external dependencies — uses only the stdlib so it can run in any CI |
| 10 | +shell. Exits 0 even on Haiku / Slack failure so the notifier never blocks |
| 11 | +CI; errors degrade to a raw "X/Y lanes failed — <run URL>" fallback. |
| 12 | +""" |
| 13 | + |
| 14 | +from __future__ import annotations |
| 15 | + |
| 16 | +import argparse |
| 17 | +import json |
| 18 | +import os |
| 19 | +import sys |
| 20 | +import urllib.error |
| 21 | +import urllib.request |
| 22 | +import xml.etree.ElementTree as ET |
| 23 | +from dataclasses import dataclass, field |
| 24 | +from pathlib import Path |
| 25 | + |
| 26 | +MODEL = "claude-haiku-4-5-20251001" |
| 27 | +ANTHROPIC_URL = "https://api.anthropic.com/v1/messages" |
| 28 | +ANTHROPIC_VERSION = "2023-06-01" |
| 29 | +MAX_LOG_BYTES = 20_000 |
| 30 | + |
| 31 | +HAIKU_SYSTEM = ( |
| 32 | + "You analyze CI canary test logs. Given a lane's summary, JUnit digest, " |
| 33 | + "and log tail, return ONLY a JSON object with these keys:\n" |
| 34 | + ' status: "pass" | "fail" | "skip"\n' |
| 35 | + " reason: string, <=200 chars, one-sentence cause if failed (else empty)\n" |
| 36 | + " tool_calls_total: integer, 0 if none visible\n" |
| 37 | + " tools_used: list of distinct tool names (up to 10)\n" |
| 38 | + " notable: string, <=200 chars, anything worth flagging (else empty)\n" |
| 39 | + "Do not include prose outside the JSON. If the log is empty or ambiguous, " |
| 40 | + "still produce the object with best-effort fields." |
| 41 | +) |
| 42 | + |
| 43 | + |
| 44 | +@dataclass |
| 45 | +class LaneReport: |
| 46 | + lane: str |
| 47 | + provider: str |
| 48 | + passed: int = 0 |
| 49 | + failed: int = 0 |
| 50 | + skipped: int = 0 |
| 51 | + tests: int = 0 |
| 52 | + duration_s: float = 0.0 |
| 53 | + junit_failures: list[tuple[str, str]] = field(default_factory=list) |
| 54 | + status: str = "unknown" |
| 55 | + reason: str = "" |
| 56 | + tool_calls_total: int = 0 |
| 57 | + tools_used: list[str] = field(default_factory=list) |
| 58 | + notable: str = "" |
| 59 | + summary_md: str = "" |
| 60 | + log_tail: str = "" |
| 61 | + |
| 62 | + |
| 63 | +def read_tail(path: Path, n_bytes: int) -> str: |
| 64 | + if not path.exists(): |
| 65 | + return "" |
| 66 | + size = path.stat().st_size |
| 67 | + with path.open("rb") as f: |
| 68 | + if size > n_bytes: |
| 69 | + f.seek(size - n_bytes) |
| 70 | + data = f.read() |
| 71 | + return data.decode("utf-8", errors="replace") |
| 72 | + |
| 73 | + |
| 74 | +def parse_junit(path: Path, report: LaneReport) -> None: |
| 75 | + if not path.exists() or path.stat().st_size == 0: |
| 76 | + return |
| 77 | + try: |
| 78 | + root = ET.parse(path).getroot() |
| 79 | + except ET.ParseError: |
| 80 | + return |
| 81 | + for ts in root.iter("testsuite"): |
| 82 | + report.tests += int(ts.get("tests", 0) or 0) |
| 83 | + report.failed += int(ts.get("failures", 0) or 0) + int(ts.get("errors", 0) or 0) |
| 84 | + report.skipped += int(ts.get("skipped", 0) or 0) |
| 85 | + report.duration_s += float(ts.get("time", 0.0) or 0.0) |
| 86 | + report.passed = max(report.tests - report.failed - report.skipped, 0) |
| 87 | + for tc in root.iter("testcase"): |
| 88 | + name = tc.get("name", "?") |
| 89 | + failure = tc.find("failure") |
| 90 | + error = tc.find("error") |
| 91 | + node = failure if failure is not None else error |
| 92 | + if node is not None: |
| 93 | + msg = (node.get("message") or "").strip() |
| 94 | + report.junit_failures.append((name, msg[:240])) |
| 95 | + |
| 96 | + |
| 97 | +def collect_lane(lane_dir: Path) -> LaneReport | None: |
| 98 | + parts = lane_dir.parts |
| 99 | + if len(parts) < 3: |
| 100 | + return None |
| 101 | + lane = parts[-3] |
| 102 | + provider = parts[-2] |
| 103 | + r = LaneReport(lane=lane, provider=provider) |
| 104 | + parse_junit(lane_dir / "auth-canary-junit.xml", r) |
| 105 | + r.summary_md = read_tail(lane_dir / "summary.md", 4_000) |
| 106 | + r.log_tail = read_tail(lane_dir / "test-output.log", MAX_LOG_BYTES) |
| 107 | + if r.tests == 0 and not r.log_tail: |
| 108 | + r.status = "skip" |
| 109 | + elif r.failed > 0: |
| 110 | + r.status = "fail" |
| 111 | + elif r.tests > 0: |
| 112 | + r.status = "pass" |
| 113 | + return r |
| 114 | + |
| 115 | + |
| 116 | +def discover_lane_dirs(artifacts_root: Path) -> list[Path]: |
| 117 | + """Return the latest <lane>/<provider>/<timestamp> dir for each lane+provider.""" |
| 118 | + if not artifacts_root.exists(): |
| 119 | + return [] |
| 120 | + out: list[Path] = [] |
| 121 | + for lane_dir in sorted(p for p in artifacts_root.iterdir() if p.is_dir()): |
| 122 | + for provider_dir in sorted(p for p in lane_dir.iterdir() if p.is_dir()): |
| 123 | + runs = sorted( |
| 124 | + (p for p in provider_dir.iterdir() if p.is_dir()), |
| 125 | + reverse=True, |
| 126 | + ) |
| 127 | + if runs: |
| 128 | + out.append(runs[0]) |
| 129 | + return out |
| 130 | + |
| 131 | + |
| 132 | +def post_json(url: str, payload: dict, headers: dict[str, str], timeout: int = 20) -> dict: |
| 133 | + body = json.dumps(payload).encode("utf-8") |
| 134 | + req = urllib.request.Request(url, data=body, headers={"Content-Type": "application/json", **headers}) |
| 135 | + with urllib.request.urlopen(req, timeout=timeout) as resp: |
| 136 | + raw = resp.read().decode("utf-8", errors="replace") |
| 137 | + if resp.status >= 300: |
| 138 | + raise RuntimeError(f"HTTP {resp.status}: {raw[:200]}") |
| 139 | + try: |
| 140 | + return json.loads(raw) if raw else {} |
| 141 | + except json.JSONDecodeError: |
| 142 | + return {"_raw": raw} |
| 143 | + |
| 144 | + |
| 145 | +def run_haiku(api_key: str, report: LaneReport) -> None: |
| 146 | + """Enrich report with Haiku-derived fields. Degrades silently on failure.""" |
| 147 | + junit = ( |
| 148 | + f"tests={report.tests} passed={report.passed} failed={report.failed} " |
| 149 | + f"skipped={report.skipped} duration={report.duration_s:.1f}s" |
| 150 | + ) |
| 151 | + failures_block = "\n".join(f"- {n}: {m}" for n, m in report.junit_failures[:10]) or "(none)" |
| 152 | + user_msg = ( |
| 153 | + f"Lane: {report.lane}\n" |
| 154 | + f"Provider: {report.provider}\n" |
| 155 | + f"JUnit digest: {junit}\n" |
| 156 | + f"JUnit failures:\n{failures_block}\n\n" |
| 157 | + f"summary.md:\n{report.summary_md[:1500]}\n\n" |
| 158 | + f"test-output.log tail (up to {MAX_LOG_BYTES} bytes):\n" |
| 159 | + f"{report.log_tail}" |
| 160 | + ) |
| 161 | + payload = { |
| 162 | + "model": MODEL, |
| 163 | + "max_tokens": 512, |
| 164 | + "system": HAIKU_SYSTEM, |
| 165 | + "messages": [{"role": "user", "content": user_msg}], |
| 166 | + } |
| 167 | + headers = {"x-api-key": api_key, "anthropic-version": ANTHROPIC_VERSION} |
| 168 | + try: |
| 169 | + resp = post_json(ANTHROPIC_URL, payload, headers, timeout=45) |
| 170 | + except Exception as e: |
| 171 | + report.notable = f"haiku call failed: {type(e).__name__}"[:200] |
| 172 | + return |
| 173 | + text = "" |
| 174 | + for block in resp.get("content", []): |
| 175 | + if block.get("type") == "text": |
| 176 | + text += block.get("text", "") |
| 177 | + text = text.strip() |
| 178 | + if text.startswith("```"): |
| 179 | + text = text.strip("`") |
| 180 | + if text.lower().startswith("json"): |
| 181 | + text = text[4:].strip() |
| 182 | + try: |
| 183 | + data = json.loads(text) |
| 184 | + except json.JSONDecodeError: |
| 185 | + report.notable = f"haiku returned non-JSON: {text[:160]}" |
| 186 | + return |
| 187 | + if isinstance(data.get("status"), str): |
| 188 | + report.status = data["status"] |
| 189 | + report.reason = str(data.get("reason", ""))[:200] |
| 190 | + try: |
| 191 | + report.tool_calls_total = int(data.get("tool_calls_total", 0)) |
| 192 | + except (TypeError, ValueError): |
| 193 | + pass |
| 194 | + tu = data.get("tools_used", []) |
| 195 | + if isinstance(tu, list): |
| 196 | + report.tools_used = [str(x) for x in tu][:10] |
| 197 | + report.notable = str(data.get("notable", ""))[:200] |
| 198 | + |
| 199 | + |
| 200 | +def slack_payload(reports: list[LaneReport], run_url: str | None, commit: str | None) -> dict: |
| 201 | + emoji = {"pass": ":white_check_mark:", "fail": ":x:", "skip": ":heavy_minus_sign:"} |
| 202 | + red = sum(1 for r in reports if r.status == "fail") |
| 203 | + green = sum(1 for r in reports if r.status == "pass") |
| 204 | + header = f"Canary: {green} passed, {red} failed of {len(reports)} lanes" |
| 205 | + blocks: list[dict] = [ |
| 206 | + {"type": "header", "text": {"type": "plain_text", "text": header}}, |
| 207 | + ] |
| 208 | + for r in reports: |
| 209 | + header_line = ( |
| 210 | + f"{emoji.get(r.status, ':grey_question:')} *{r.lane}* ({r.provider}) — " |
| 211 | + f"{r.passed}/{r.tests} passed, {r.failed} failed in {r.duration_s:.0f}s" |
| 212 | + ) |
| 213 | + lines = [header_line] |
| 214 | + if r.reason: |
| 215 | + lines.append(f"> {r.reason}") |
| 216 | + if r.tools_used: |
| 217 | + lines.append(f"tools: {', '.join(r.tools_used)} (≈{r.tool_calls_total} calls)") |
| 218 | + if r.notable: |
| 219 | + lines.append(f"_{r.notable}_") |
| 220 | + blocks.append({"type": "section", "text": {"type": "mrkdwn", "text": "\n".join(lines)}}) |
| 221 | + ctx: list[str] = [] |
| 222 | + if commit: |
| 223 | + ctx.append(f"commit `{commit[:7]}`") |
| 224 | + if run_url: |
| 225 | + ctx.append(f"<{run_url}|GitHub run>") |
| 226 | + if ctx: |
| 227 | + blocks.append({"type": "context", "elements": [{"type": "mrkdwn", "text": " • ".join(ctx)}]}) |
| 228 | + return {"blocks": blocks} |
| 229 | + |
| 230 | + |
| 231 | +def fallback_payload(reports: list[LaneReport], run_url: str | None) -> dict: |
| 232 | + red = sum(1 for r in reports if r.status == "fail") |
| 233 | + text = f"Canary: {red}/{len(reports)} lanes failed" |
| 234 | + if run_url: |
| 235 | + text += f" — {run_url}" |
| 236 | + return {"text": text} |
| 237 | + |
| 238 | + |
| 239 | +def main() -> int: |
| 240 | + p = argparse.ArgumentParser(description=__doc__) |
| 241 | + p.add_argument("--artifacts-dir", default="artifacts/live-canary", |
| 242 | + help="root of downloaded lane artifacts") |
| 243 | + p.add_argument("--slack-webhook", default=os.environ.get("SLACK_WEBHOOK_URL")) |
| 244 | + p.add_argument("--anthropic-api-key", default=os.environ.get("ANTHROPIC_API_KEY")) |
| 245 | + p.add_argument("--run-url", default=os.environ.get("CANARY_RUN_URL")) |
| 246 | + p.add_argument("--commit", default=os.environ.get("GITHUB_SHA")) |
| 247 | + p.add_argument("--dry-run", action="store_true", |
| 248 | + help="print the Slack payload to stdout instead of posting") |
| 249 | + args = p.parse_args() |
| 250 | + |
| 251 | + artifacts_root = Path(args.artifacts_dir) |
| 252 | + lane_dirs = discover_lane_dirs(artifacts_root) |
| 253 | + if not lane_dirs: |
| 254 | + print(f"[notify_slack] no lane artifacts under {artifacts_root}", file=sys.stderr) |
| 255 | + return 0 |
| 256 | + |
| 257 | + reports: list[LaneReport] = [] |
| 258 | + for d in lane_dirs: |
| 259 | + r = collect_lane(d) |
| 260 | + if r is not None: |
| 261 | + reports.append(r) |
| 262 | + |
| 263 | + if args.anthropic_api_key and reports: |
| 264 | + for r in reports: |
| 265 | + run_haiku(args.anthropic_api_key, r) |
| 266 | + else: |
| 267 | + print("[notify_slack] no ANTHROPIC_API_KEY — skipping haiku enrichment", |
| 268 | + file=sys.stderr) |
| 269 | + |
| 270 | + payload = slack_payload(reports, args.run_url, args.commit) |
| 271 | + |
| 272 | + if args.dry_run or not args.slack_webhook: |
| 273 | + print(json.dumps(payload, indent=2)) |
| 274 | + return 0 |
| 275 | + |
| 276 | + try: |
| 277 | + post_json(args.slack_webhook, payload, {}, timeout=10) |
| 278 | + except Exception as e: |
| 279 | + print(f"[notify_slack] slack post failed: {e} — sending fallback", file=sys.stderr) |
| 280 | + try: |
| 281 | + post_json(args.slack_webhook, fallback_payload(reports, args.run_url), {}, timeout=10) |
| 282 | + except Exception as e2: |
| 283 | + print(f"[notify_slack] fallback also failed: {e2}", file=sys.stderr) |
| 284 | + return 0 |
| 285 | + |
| 286 | + |
| 287 | +if __name__ == "__main__": |
| 288 | + sys.exit(main()) |
0 commit comments