Skip to content

Commit aa4c569

Browse files
authored
fix(deploy): accept idempotent 200 in tenant bootstrap (catch-all already exists) (#343)
The proxy self-provisions the catch-all tenant at startup from LLMTRACE_DEFAULT_TENANT_ID (ensure_tenant_exists), so the lifecycle's subsequent POST /api/v1/tenants for the catch-all returns 200 (already exists), not 201. _bootstrap_tenant_in_proxy raised on anything != 201, aborting provisioning BEFORE the dashboard was recreated — a live outage on 2026-05-30. Accept 200 and 201 (both carry the same body). Regression tests added.
1 parent 5afee41 commit aa4c569

2 files changed

Lines changed: 46 additions & 1 deletion

File tree

deployments/basilica/lifecycle.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -921,7 +921,11 @@ def _bootstrap_tenant_in_proxy(
921921
admin_key,
922922
body=body,
923923
)
924-
if status != 201:
924+
# Idempotent create: the proxy returns 201 for a newly-created tenant and
925+
# 200 when one with this `id` already exists (e.g. the proxy self-provisions
926+
# the catch-all tenant at startup from LLMTRACE_DEFAULT_TENANT_ID before the
927+
# lifecycle's create runs). Both carry the same body shape; both are success.
928+
if status not in (200, 201):
925929
raise RuntimeError(
926930
f"tenant bootstrap failed: status={status} body={payload}"
927931
)

deployments/basilica/tests/test_catchall_and_operator_tenant.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,3 +367,44 @@ def test_pro_yaml_has_no_hardcoded_tenant_uuid() -> None:
367367
"the previously-hardcoded catch-all/default tenant UUID must not "
368368
"appear anywhere in pro.yaml"
369369
)
370+
371+
372+
def _patch_admin_http(monkeypatch: Any, status: int) -> None:
373+
def fake(
374+
proxy_url: str,
375+
path: str,
376+
method: str,
377+
admin_key: str,
378+
body: Optional[dict[str, Any]] = None,
379+
) -> tuple[int, dict[str, Any]]:
380+
body = body or {}
381+
return status, {"id": body.get("id") or "generated-id", "name": body.get("name")}
382+
383+
monkeypatch.setattr(lifecycle, "_admin_http_request", fake)
384+
385+
386+
def test_bootstrap_tenant_accepts_idempotent_200(monkeypatch: Any) -> None:
387+
"""Regression for the 2026-05-30 outage: the proxy self-provisions the
388+
catch-all at startup from LLMTRACE_DEFAULT_TENANT_ID, so the lifecycle's
389+
subsequent create gets 200 (already exists). 200 MUST be treated as success
390+
(the previous code raised on anything != 201, aborting before the dashboard
391+
was recreated)."""
392+
_patch_admin_http(monkeypatch, 200)
393+
tid = lifecycle._bootstrap_tenant_in_proxy(
394+
"http://proxy", "admin", "catch-all", stable_id="a0f7d8a5-4524-4a83-afac-27080b4d0432"
395+
)
396+
assert tid == "a0f7d8a5-4524-4a83-afac-27080b4d0432"
397+
398+
399+
def test_bootstrap_tenant_accepts_created_201(monkeypatch: Any) -> None:
400+
_patch_admin_http(monkeypatch, 201)
401+
tid = lifecycle._bootstrap_tenant_in_proxy(
402+
"http://proxy", "admin", "operator", stable_id="550e8400-e29b-41d4-a716-446655440000"
403+
)
404+
assert tid == "550e8400-e29b-41d4-a716-446655440000"
405+
406+
407+
def test_bootstrap_tenant_rejects_error_status(monkeypatch: Any) -> None:
408+
_patch_admin_http(monkeypatch, 500)
409+
with pytest.raises(RuntimeError, match="tenant bootstrap failed"):
410+
lifecycle._bootstrap_tenant_in_proxy("http://proxy", "admin", "catch-all")

0 commit comments

Comments
 (0)