|
| 1 | +"""Sprites analogue of daytona_webhook.py. |
| 2 | +
|
| 3 | +FastAPI app: receives the session.status_run_started webhook, drains the |
| 4 | +environment work queue, and per item creates a Sprite |
| 5 | +([sprites.dev](https://sprites.dev), Fly.io's stateful sandboxes) running the |
| 6 | +provider-agnostic ``sandbox_runner.py``. Deploy this anywhere that can serve |
| 7 | +HTTP and reach the Sprites API (Fly, Render, a VM, etc.). |
| 8 | +
|
| 9 | +The webhook is a wake-up signal only — each delivery drains *all* pending work |
| 10 | +items, so a single arriving webhook recovers any earlier missed deliveries. |
| 11 | +
|
| 12 | +Sprites has no published SDK, so this talks its REST API directly (create, |
| 13 | +filesystem write, exec). A Sprite is a full Linux environment, so |
| 14 | +``beta_agent_toolset_20260401`` (bash/read/write/edit/glob/grep) works as-is. |
| 15 | +
|
| 16 | +Env on the orchestrator host: |
| 17 | + ANTHROPIC_WEBHOOK_SECRET, ANTHROPIC_BASE_URL, |
| 18 | + ANTHROPIC_ENVIRONMENT_ID, ANTHROPIC_ENVIRONMENT_KEY, |
| 19 | + SPRITES_API_KEY, SPRITES_API_URL (optional) |
| 20 | +""" |
| 21 | + |
| 22 | +import os |
| 23 | +import re |
| 24 | +from collections.abc import Mapping |
| 25 | +from functools import cache |
| 26 | +from pathlib import Path |
| 27 | + |
| 28 | +import anthropic |
| 29 | +import httpx |
| 30 | +from anthropic.types.beta import UnwrapWebhookEvent |
| 31 | +from fastapi import FastAPI, HTTPException, Request |
| 32 | + |
| 33 | +# Same provider-agnostic sandbox_runner.py the Modal and Daytona demos use. |
| 34 | +RUNNER_SRC = ( |
| 35 | + Path(__file__).resolve().parent.parent / "modal" / "sandbox_runner.py" |
| 36 | +).read_text() |
| 37 | + |
| 38 | +SPRITES_API_URL = os.environ.get("SPRITES_API_URL", "https://api.sprites.dev") |
| 39 | +WORKDIR = "/workspace" |
| 40 | +RUNNER_PATH = "/root/sandbox_runner.py" |
| 41 | + |
| 42 | +app = FastAPI() |
| 43 | + |
| 44 | + |
| 45 | +# --------------------------------------------------------------------------- |
| 46 | +# Minimal Sprites REST client (no SDK). Auth is a single bearer token. |
| 47 | +# --------------------------------------------------------------------------- |
| 48 | + |
| 49 | + |
| 50 | +@cache |
| 51 | +def _sprites() -> httpx.Client: |
| 52 | + return httpx.Client( |
| 53 | + base_url=SPRITES_API_URL, |
| 54 | + headers={"authorization": f"Bearer {os.environ['SPRITES_API_KEY']}"}, |
| 55 | + timeout=httpx.Timeout(300.0), |
| 56 | + ) |
| 57 | + |
| 58 | + |
| 59 | +def _shquote(value: str) -> str: |
| 60 | + """POSIX single-quote a value for a sourced env file.""" |
| 61 | + return "'" + value.replace("'", "'\\''") + "'" |
| 62 | + |
| 63 | + |
| 64 | +def _sprite_name(session_id: str) -> str: |
| 65 | + """A DNS-label-safe Sprite name derived from the session id.""" |
| 66 | + slug = re.sub(r"[^a-z0-9]+", "-", session_id.lower()).strip("-")[:40] |
| 67 | + return f"claude-agent-{slug}" if slug else f"claude-agent-{os.urandom(4).hex()}" |
| 68 | + |
| 69 | + |
| 70 | +def _exec(name: str, command: str, env: "Mapping[str, str] | None" = None) -> int: |
| 71 | + """Run ``bash -c <command>`` in the Sprite and return its exit code. |
| 72 | +
|
| 73 | + Sprites streams exec output as ``[type][payload]`` frames terminated by an |
| 74 | + exit frame ``0x03 <code>``; for a blocking command we only need the exit |
| 75 | + code, which is the final byte. |
| 76 | + """ |
| 77 | + params: list[tuple[str, str]] = [ |
| 78 | + ("cmd", "bash"), |
| 79 | + ("cmd", "-c"), |
| 80 | + ("cmd", command), |
| 81 | + ] |
| 82 | + for key, value in (env or {}).items(): |
| 83 | + params.append(("env", f"{key}={value}")) |
| 84 | + resp = _sprites().post(f"/v1/sprites/{name}/exec", params=params) |
| 85 | + resp.raise_for_status() |
| 86 | + body = resp.content |
| 87 | + return body[-1] if len(body) >= 2 and body[-2] == 0x03 else 0 |
| 88 | + |
| 89 | + |
| 90 | +def _find_live(session_id: str) -> str | None: |
| 91 | + """Return the Sprite name if one for this session already exists and runs.""" |
| 92 | + name = _sprite_name(session_id) |
| 93 | + resp = _sprites().get(f"/v1/sprites/{name}") |
| 94 | + if resp.status_code == 404: |
| 95 | + return None |
| 96 | + resp.raise_for_status() |
| 97 | + return name if resp.json().get("status") in ("running", "warm") else None |
| 98 | + |
| 99 | + |
| 100 | +def _spawn( |
| 101 | + session_id: str, *, environment_id: str, work_id: str, environment_key: str |
| 102 | +) -> str: |
| 103 | + """Create a Sprite and start sandbox_runner.py inside it as a service. |
| 104 | +
|
| 105 | + The runner is launched as a Sprite **service** rather than a detached |
| 106 | + `nohup ... &`: Sprites reaps an exec's process tree when the request |
| 107 | + connection closes, so a backgrounded process would not survive. Services are |
| 108 | + supervised and outlive the request. The runner self-stops its service on |
| 109 | + exit so the one-shot worker is not restarted after the session completes. |
| 110 | + """ |
| 111 | + name = _sprite_name(session_id) |
| 112 | + # 201 created, or 409 if a Sprite with this name already exists (reuse it). |
| 113 | + created = _sprites().post( |
| 114 | + "/v1/sprites", json={"name": name, "wait_for_capacity": True} |
| 115 | + ) |
| 116 | + if created.status_code not in (200, 201, 409): |
| 117 | + created.raise_for_status() |
| 118 | + |
| 119 | + _sprites().put( |
| 120 | + f"/v1/sprites/{name}/fs/write", |
| 121 | + params={"path": RUNNER_PATH}, |
| 122 | + content=RUNNER_SRC.encode(), |
| 123 | + headers={"content-type": "application/octet-stream"}, |
| 124 | + ).raise_for_status() |
| 125 | + |
| 126 | + # The runner needs the SDK and the WORKDIR. (Pre-bake these into a Sprite |
| 127 | + # checkpoint for production to drop the ~10-15s cold-start install.) |
| 128 | + code = _exec( |
| 129 | + name, f"mkdir -p {WORKDIR} && python3 -m pip install -q anthropic" |
| 130 | + ) |
| 131 | + if code != 0: |
| 132 | + raise RuntimeError(f"sprite {name}: runner setup failed (exit {code})") |
| 133 | + |
| 134 | + # Same env contract as `ant beta:worker poll --on-work`: sandbox_runner.py |
| 135 | + # reads these to build the client and run the worker's handle_item(). |
| 136 | + # ANTHROPIC_ENVIRONMENT_KEY is the runner's single credential — the org API |
| 137 | + # key never reaches the Sprite. Written to a file (not service args) so the |
| 138 | + # key never appears in process listings. |
| 139 | + env = { |
| 140 | + "ANTHROPIC_BASE_URL": os.environ.get( |
| 141 | + "ANTHROPIC_BASE_URL", "https://api.anthropic.com" |
| 142 | + ), |
| 143 | + "ANTHROPIC_ENVIRONMENT_KEY": environment_key, |
| 144 | + "ANTHROPIC_SESSION_ID": session_id, |
| 145 | + "ANTHROPIC_ENVIRONMENT_ID": environment_id, |
| 146 | + "ANTHROPIC_WORK_ID": work_id, |
| 147 | + } |
| 148 | + env_file = "\n".join(f"{k}={_shquote(v)}" for k, v in env.items()) + "\n" |
| 149 | + _sprites().put( |
| 150 | + f"/v1/sprites/{name}/fs/write", |
| 151 | + params={"path": "/root/runner.env"}, |
| 152 | + content=env_file.encode(), |
| 153 | + headers={"content-type": "application/octet-stream"}, |
| 154 | + ).raise_for_status() |
| 155 | + |
| 156 | + # Service supervises the runner; it self-stops on exit so the one-shot |
| 157 | + # worker isn't restarted. Service stdout/stderr land in |
| 158 | + # /.sprite/logs/services/agent-runner.log inside the Sprite. |
| 159 | + runner_cmd = ( |
| 160 | + "set -a; . /root/runner.env; set +a; " |
| 161 | + f"python3 {RUNNER_PATH}; " |
| 162 | + "sprite-env services stop agent-runner >/dev/null 2>&1 || true" |
| 163 | + ) |
| 164 | + _sprites().put( |
| 165 | + f"/v1/sprites/{name}/services/agent-runner", |
| 166 | + json={"cmd": "bash", "args": ["-lc", runner_cmd]}, |
| 167 | + ).raise_for_status() |
| 168 | + return name |
| 169 | + |
| 170 | + |
| 171 | +# --------------------------------------------------------------------------- |
| 172 | +# Webhook + work queue (identical flow to the Daytona / Modal demos). |
| 173 | +# --------------------------------------------------------------------------- |
| 174 | + |
| 175 | + |
| 176 | +@cache |
| 177 | +def _client() -> anthropic.AsyncAnthropic: |
| 178 | + """Shared client for both webhook verification and the work poller. |
| 179 | +
|
| 180 | + Async because ``client.beta.environments.work.poller(...)`` is async-only. |
| 181 | + ``unwrap()`` is synchronous even on the async client — do not ``await`` it. |
| 182 | + """ |
| 183 | + return anthropic.AsyncAnthropic( |
| 184 | + auth_token=os.environ["ANTHROPIC_ENVIRONMENT_KEY"], |
| 185 | + webhook_key=os.environ["ANTHROPIC_WEBHOOK_SECRET"], |
| 186 | + ) |
| 187 | + |
| 188 | + |
| 189 | +def _verify_webhook( |
| 190 | + client: anthropic.AsyncAnthropic, raw: bytes, headers: "Mapping[str, str]" |
| 191 | +) -> UnwrapWebhookEvent: |
| 192 | + from standardwebhooks import WebhookVerificationError |
| 193 | + |
| 194 | + try: |
| 195 | + return client.beta.webhooks.unwrap(raw.decode(), headers=headers) |
| 196 | + except (WebhookVerificationError, KeyError) as e: |
| 197 | + print(f"[webhook] signature reject: {type(e).__name__}: {e}", flush=True) |
| 198 | + raise HTTPException( |
| 199 | + status_code=401, detail="signature verification failed" |
| 200 | + ) from None |
| 201 | + |
| 202 | + |
| 203 | +async def _drain_work( |
| 204 | + client: anthropic.AsyncAnthropic, environment_id: str |
| 205 | +) -> list[dict]: |
| 206 | + """Drain the queue via the SDK poller, spawning a Sprite per work item. |
| 207 | +
|
| 208 | + ``drain=True`` returns when the queue is empty (the webhook handler must |
| 209 | + respond, not loop forever). ``auto_stop=False`` because each item is handed |
| 210 | + off to a detached Sprite that owns ``/stop`` — the poller must not terminate |
| 211 | + the lease out from under it. |
| 212 | + """ |
| 213 | + environment_key = os.environ["ANTHROPIC_ENVIRONMENT_KEY"] |
| 214 | + spawned: list[dict] = [] |
| 215 | + async for work in client.beta.environments.work.poller( |
| 216 | + environment_id=environment_id, |
| 217 | + environment_key=environment_key, |
| 218 | + block_ms=None, # None -> omit -> non-blocking. The API rejects block_ms=0. |
| 219 | + reclaim_older_than_ms=2000, |
| 220 | + drain=True, |
| 221 | + auto_stop=False, |
| 222 | + ): |
| 223 | + if work.data.type != "session": |
| 224 | + print(f"[webhook] skipping work={work.id} type={work.data.type}", flush=True) |
| 225 | + continue |
| 226 | + session_id = work.data.id |
| 227 | + try: |
| 228 | + existing = _find_live(session_id) |
| 229 | + if existing is not None: |
| 230 | + print( |
| 231 | + f"[webhook] work={work.id} session={session_id} sprite={existing} (live)", |
| 232 | + flush=True, |
| 233 | + ) |
| 234 | + spawned.append( |
| 235 | + { |
| 236 | + "session_id": session_id, |
| 237 | + "work_id": work.id, |
| 238 | + "sprite": existing, |
| 239 | + "created": False, |
| 240 | + } |
| 241 | + ) |
| 242 | + continue |
| 243 | + name = _spawn( |
| 244 | + session_id, |
| 245 | + environment_id=environment_id, |
| 246 | + work_id=work.id, |
| 247 | + environment_key=environment_key, |
| 248 | + ) |
| 249 | + print( |
| 250 | + f"[webhook] work={work.id} session={session_id} sprite={name} (created)", |
| 251 | + flush=True, |
| 252 | + ) |
| 253 | + spawned.append( |
| 254 | + { |
| 255 | + "session_id": session_id, |
| 256 | + "work_id": work.id, |
| 257 | + "sprite": name, |
| 258 | + "created": True, |
| 259 | + } |
| 260 | + ) |
| 261 | + except Exception as e: |
| 262 | + detail = type(e).__name__ |
| 263 | + print( |
| 264 | + f"[webhook] FAILED work={work.id} session={session_id}: {detail}", |
| 265 | + flush=True, |
| 266 | + ) |
| 267 | + spawned.append( |
| 268 | + {"session_id": session_id, "work_id": work.id, "error": detail} |
| 269 | + ) |
| 270 | + return spawned |
| 271 | + |
| 272 | + |
| 273 | +@app.post("/") |
| 274 | +async def webhook(request: Request) -> dict: |
| 275 | + raw = await request.body() |
| 276 | + client = _client() |
| 277 | + event = _verify_webhook(client, raw, request.headers) |
| 278 | + |
| 279 | + if event.data.type != "session.status_run_started": |
| 280 | + return {"status": "ignored", "event_type": event.data.type} |
| 281 | + |
| 282 | + spawned = await _drain_work(client, os.environ["ANTHROPIC_ENVIRONMENT_ID"]) |
| 283 | + return {"status": "ok", "event_type": event.data.type, "spawned": spawned} |
0 commit comments