|
| 1 | +"""ISB lease API proxy for CI workflows. |
| 2 | +
|
| 3 | +GitHub-hosted Actions runners use Azure IPs which the upstream ISB WAF |
| 4 | +(AWSManagedRulesAnonymousIpList -> HostingProviderIPList) blocks. This |
| 5 | +Lambda makes the API call from AWS IP space, which the WAF allows. |
| 6 | +
|
| 7 | +Invoked via the standard Lambda InvokeFunction API (sigv4 by boto3). |
| 8 | +Caller must hold lambda:InvokeFunction on this function ARN — only the |
| 9 | +isb-hub-github-actions-ci-lease role is granted. |
| 10 | +
|
| 11 | +Event shape (JSON in invocation Payload): |
| 12 | +
|
| 13 | + { "op": "acquire", "template": "empty-sandbox", "user_email": "..." } |
| 14 | + { "op": "release", "lease_id": "...", "user_email": "..." } |
| 15 | + { "op": "list-orphans", "owned_by": "...", "older_than_minutes": 180 } |
| 16 | +
|
| 17 | +Response shape: |
| 18 | +
|
| 19 | + { "ok": true, "data": {...} } # success |
| 20 | + { "ok": false, "error": "...", "status": N } # failure with reason |
| 21 | +""" |
| 22 | + |
| 23 | +import base64 |
| 24 | +import hashlib |
| 25 | +import hmac |
| 26 | +import json |
| 27 | +import os |
| 28 | +import socket |
| 29 | +import time |
| 30 | +import urllib.error |
| 31 | +import urllib.parse |
| 32 | +import urllib.request |
| 33 | +from datetime import datetime, timezone |
| 34 | + |
| 35 | +import boto3 |
| 36 | + |
| 37 | +ISB_API_BASE_URL = "https://1ewlxhaey6.execute-api.us-west-2.amazonaws.com/prod/" |
| 38 | +ISB_JWT_SECRET_PATH = os.environ.get( |
| 39 | + "ISB_JWT_SECRET_PATH", "/InnovationSandbox/ndx/Auth/JwtSecret" |
| 40 | +) |
| 41 | +ISB_JWT_SECRET_REGION = os.environ.get("ISB_JWT_SECRET_REGION", "us-west-2") |
| 42 | + |
| 43 | +ACTIVE_STATUSES = {"Active"} |
| 44 | +PROVISIONING_STATUSES = {"Provisioning", "PendingApproval"} |
| 45 | + |
| 46 | +PROVISIONING_TIMEOUT_SECONDS = 30 * 60 |
| 47 | +POLL_INTERVAL_SECONDS = 5 |
| 48 | + |
| 49 | +# Cache the secret across warm Lambda invocations to avoid hitting Secrets |
| 50 | +# Manager on every call. The secret rotates monthly so a cold cache lifetime |
| 51 | +# is fine. |
| 52 | +_cached_secret = None |
| 53 | + |
| 54 | + |
| 55 | +def _b64url(data: bytes) -> str: |
| 56 | + return base64.urlsafe_b64encode(data).rstrip(b"=").decode() |
| 57 | + |
| 58 | + |
| 59 | +def sign_jwt(payload: dict, secret: str, expires_in_seconds: int = 600) -> str: |
| 60 | + """Sign a JWT in the same shape ISB expects (HS256, user/roles claim).""" |
| 61 | + header = {"alg": "HS256", "typ": "JWT"} |
| 62 | + now = int(time.time()) |
| 63 | + full_payload = {**payload, "iat": now, "exp": now + expires_in_seconds} |
| 64 | + encoded_header = _b64url(json.dumps(header, separators=(",", ":")).encode()) |
| 65 | + encoded_payload = _b64url(json.dumps(full_payload, separators=(",", ":")).encode()) |
| 66 | + signing_input = f"{encoded_header}.{encoded_payload}" |
| 67 | + signature = hmac.new(secret.encode(), signing_input.encode(), hashlib.sha256).digest() |
| 68 | + return f"{signing_input}.{_b64url(signature)}" |
| 69 | + |
| 70 | + |
| 71 | +def fetch_jwt_secret() -> str: |
| 72 | + global _cached_secret |
| 73 | + if _cached_secret is not None: |
| 74 | + return _cached_secret |
| 75 | + client = boto3.client("secretsmanager", region_name=ISB_JWT_SECRET_REGION) |
| 76 | + response = client.get_secret_value(SecretId=ISB_JWT_SECRET_PATH) |
| 77 | + secret = response.get("SecretString") |
| 78 | + if not secret: |
| 79 | + raise RuntimeError("ISB JWT secret is empty") |
| 80 | + _cached_secret = secret |
| 81 | + return secret |
| 82 | + |
| 83 | + |
| 84 | +def signed_admin_token(email: str) -> str: |
| 85 | + payload = {"user": {"email": email, "roles": ["Admin"]}} |
| 86 | + return sign_jwt(payload, fetch_jwt_secret()) |
| 87 | + |
| 88 | + |
| 89 | +def make_isb_api_request(method, path, token, body=None, query_params=None): |
| 90 | + """HTTP request to the ISB API with 4-retry exponential backoff.""" |
| 91 | + url = f"{ISB_API_BASE_URL.rstrip('/')}/{path.lstrip('/')}" |
| 92 | + if query_params: |
| 93 | + qs = "&".join( |
| 94 | + f"{k}={urllib.parse.quote(str(v))}" |
| 95 | + for k, v in query_params.items() |
| 96 | + if v is not None |
| 97 | + ) |
| 98 | + if qs: |
| 99 | + url = f"{url}?{qs}" |
| 100 | + |
| 101 | + data = json.dumps(body).encode() if body else None |
| 102 | + req = urllib.request.Request( |
| 103 | + url, |
| 104 | + data=data, |
| 105 | + headers={ |
| 106 | + "Content-Type": "application/json", |
| 107 | + "Authorization": f"Bearer {token}", |
| 108 | + }, |
| 109 | + method=method, |
| 110 | + ) |
| 111 | + |
| 112 | + last_transient = None |
| 113 | + for attempt in range(4): |
| 114 | + try: |
| 115 | + with urllib.request.urlopen(req, timeout=30) as response: |
| 116 | + return response.status, json.loads(response.read().decode()) |
| 117 | + except urllib.error.HTTPError as e: |
| 118 | + if e.code in (500, 502, 503, 504) and attempt < 3: |
| 119 | + last_transient = e |
| 120 | + time.sleep(2 ** attempt) |
| 121 | + continue |
| 122 | + try: |
| 123 | + err_body = json.loads(e.read().decode()) |
| 124 | + except Exception: |
| 125 | + err_body = {} |
| 126 | + return e.code, err_body |
| 127 | + except (ConnectionResetError, socket.timeout, urllib.error.URLError) as e: |
| 128 | + if attempt < 3: |
| 129 | + last_transient = e |
| 130 | + time.sleep(2 ** attempt) |
| 131 | + continue |
| 132 | + raise |
| 133 | + |
| 134 | + raise last_transient |
| 135 | + |
| 136 | + |
| 137 | +# ── Operations ────────────────────────────────────────────────────────────── |
| 138 | + |
| 139 | + |
| 140 | +def _resolve_lease_template(token, template_name): |
| 141 | + matches = [] |
| 142 | + page_identifier = None |
| 143 | + while True: |
| 144 | + params = {} |
| 145 | + if page_identifier: |
| 146 | + params["pageIdentifier"] = page_identifier |
| 147 | + status, body = make_isb_api_request("GET", "/leaseTemplates", token, query_params=params) |
| 148 | + if status != 200: |
| 149 | + raise RuntimeError(f"Failed to list lease templates (HTTP {status}): {body}") |
| 150 | + data = body.get("data", body) |
| 151 | + for tmpl in data.get("result", []): |
| 152 | + if tmpl.get("name", "").lower() == template_name.lower(): |
| 153 | + matches.append(tmpl) |
| 154 | + page_identifier = data.get("nextPageIdentifier") |
| 155 | + if not page_identifier: |
| 156 | + break |
| 157 | + if not matches: |
| 158 | + raise RuntimeError(f"Lease template '{template_name}' not found") |
| 159 | + if len(matches) > 1: |
| 160 | + raise RuntimeError(f"Lease template '{template_name}' is ambiguous; {len(matches)} matches") |
| 161 | + return matches[0] |
| 162 | + |
| 163 | + |
| 164 | +def op_acquire(event): |
| 165 | + """Provision a lease, poll until Active, return {account_id,lease_id,lease_uuid}.""" |
| 166 | + template_name = event["template"] |
| 167 | + user_email = event["user_email"] |
| 168 | + |
| 169 | + token = signed_admin_token(user_email) |
| 170 | + template = _resolve_lease_template(token, template_name) |
| 171 | + |
| 172 | + create_body = {"leaseTemplateUuid": template["uuid"], "userEmail": user_email} |
| 173 | + status, response = make_isb_api_request("POST", "/leases", token, body=create_body) |
| 174 | + if status != 201: |
| 175 | + return { |
| 176 | + "ok": False, |
| 177 | + "status": status, |
| 178 | + "error": f"Failed to create lease (HTTP {status})", |
| 179 | + "body": response, |
| 180 | + } |
| 181 | + |
| 182 | + lease = response.get("data", response) |
| 183 | + lease_uuid = lease.get("uuid", "unknown") |
| 184 | + lease_id = lease.get("leaseId") or base64.b64encode( |
| 185 | + json.dumps( |
| 186 | + {"userEmail": user_email, "uuid": lease_uuid}, separators=(",", ":") |
| 187 | + ).encode() |
| 188 | + ).decode() |
| 189 | + account_id = lease.get("awsAccountId", "") |
| 190 | + lease_status = lease.get("status", "unknown") |
| 191 | + |
| 192 | + deadline = time.time() + PROVISIONING_TIMEOUT_SECONDS |
| 193 | + poll_log = [f"initial: status={lease_status} account={account_id or 'pending'}"] |
| 194 | + while lease_status in PROVISIONING_STATUSES: |
| 195 | + if time.time() > deadline: |
| 196 | + return { |
| 197 | + "ok": False, |
| 198 | + "error": f"Lease provisioning timeout after {PROVISIONING_TIMEOUT_SECONDS}s (last status: {lease_status})", |
| 199 | + "lease_id": lease_id, |
| 200 | + "lease_uuid": lease_uuid, |
| 201 | + "poll_log": poll_log, |
| 202 | + } |
| 203 | + time.sleep(POLL_INTERVAL_SECONDS) |
| 204 | + poll_status, poll_response = make_isb_api_request( |
| 205 | + "GET", |
| 206 | + f"/leases/{urllib.parse.quote(lease_id, safe='+=')}", |
| 207 | + token, |
| 208 | + ) |
| 209 | + if poll_status != 200: |
| 210 | + poll_log.append(f"poll HTTP {poll_status} — continuing") |
| 211 | + continue |
| 212 | + poll_lease = poll_response.get("data", poll_response) |
| 213 | + lease_status = poll_lease.get("status", "unknown") |
| 214 | + account_id = poll_lease.get("awsAccountId", "") or account_id |
| 215 | + poll_log.append(f"status={lease_status} account={account_id or 'pending'}") |
| 216 | + |
| 217 | + if lease_status not in ACTIVE_STATUSES: |
| 218 | + return { |
| 219 | + "ok": False, |
| 220 | + "error": f"Lease did not become Active (terminal status: {lease_status})", |
| 221 | + "lease_id": lease_id, |
| 222 | + "lease_uuid": lease_uuid, |
| 223 | + "status": lease_status, |
| 224 | + "poll_log": poll_log, |
| 225 | + } |
| 226 | + |
| 227 | + if not account_id: |
| 228 | + return { |
| 229 | + "ok": False, |
| 230 | + "error": "Lease Active but awsAccountId empty (ISB API contract violation)", |
| 231 | + "lease_id": lease_id, |
| 232 | + "lease_uuid": lease_uuid, |
| 233 | + "poll_log": poll_log, |
| 234 | + } |
| 235 | + |
| 236 | + return { |
| 237 | + "ok": True, |
| 238 | + "data": { |
| 239 | + "account_id": account_id, |
| 240 | + "lease_id": lease_id, |
| 241 | + "lease_uuid": lease_uuid, |
| 242 | + }, |
| 243 | + } |
| 244 | + |
| 245 | + |
| 246 | +def op_release(event): |
| 247 | + """Terminate a lease. Idempotent on 404/409.""" |
| 248 | + lease_id = event["lease_id"] |
| 249 | + user_email = event["user_email"] |
| 250 | + token = signed_admin_token(user_email) |
| 251 | + encoded_id = urllib.parse.quote(lease_id, safe="+=") |
| 252 | + status, body = make_isb_api_request("POST", f"/leases/{encoded_id}/terminate", token) |
| 253 | + if status == 200: |
| 254 | + return {"ok": True, "data": {"terminated": True}} |
| 255 | + if status in (404, 409): |
| 256 | + return {"ok": True, "data": {"terminated": False, "already_gone": True, "status": status}} |
| 257 | + return { |
| 258 | + "ok": False, |
| 259 | + "status": status, |
| 260 | + "error": f"Failed to terminate lease (HTTP {status})", |
| 261 | + "body": body, |
| 262 | + } |
| 263 | + |
| 264 | + |
| 265 | +def op_list_orphans(event): |
| 266 | + """Find leases owned by the CI service identity older than threshold.""" |
| 267 | + owned_by = event["owned_by"] |
| 268 | + older_than_minutes = int(event.get("older_than_minutes", 180)) |
| 269 | + token = signed_admin_token(owned_by) |
| 270 | + page_identifier = None |
| 271 | + now = datetime.now(timezone.utc) |
| 272 | + orphans = [] |
| 273 | + while True: |
| 274 | + params = {"userEmail": owned_by} |
| 275 | + if page_identifier: |
| 276 | + params["pageIdentifier"] = page_identifier |
| 277 | + status, body = make_isb_api_request("GET", "/leases", token, query_params=params) |
| 278 | + if status != 200: |
| 279 | + return {"ok": False, "status": status, "error": "Failed to list leases", "body": body} |
| 280 | + data = body.get("data", body) |
| 281 | + for lease in data.get("result", []): |
| 282 | + if lease.get("status") not in ACTIVE_STATUSES.union(PROVISIONING_STATUSES): |
| 283 | + continue |
| 284 | + start = lease.get("startDate") or lease.get("createdAt") |
| 285 | + if not start: |
| 286 | + continue |
| 287 | + try: |
| 288 | + start_dt = datetime.fromisoformat(start.replace("Z", "+00:00")) |
| 289 | + except Exception: |
| 290 | + continue |
| 291 | + age_minutes = int((now - start_dt).total_seconds() / 60) |
| 292 | + if age_minutes < older_than_minutes: |
| 293 | + continue |
| 294 | + orphans.append( |
| 295 | + { |
| 296 | + "lease_id": lease.get("leaseId", ""), |
| 297 | + "account_id": lease.get("awsAccountId", ""), |
| 298 | + "status": lease.get("status", ""), |
| 299 | + "age_minutes": age_minutes, |
| 300 | + } |
| 301 | + ) |
| 302 | + page_identifier = data.get("nextPageIdentifier") |
| 303 | + if not page_identifier: |
| 304 | + break |
| 305 | + return {"ok": True, "data": {"orphans": orphans}} |
| 306 | + |
| 307 | + |
| 308 | +# ── Handler ───────────────────────────────────────────────────────────────── |
| 309 | + |
| 310 | + |
| 311 | +def handler(event, _context): |
| 312 | + """Entry point. Routes on event['op'].""" |
| 313 | + op = event.get("op") |
| 314 | + try: |
| 315 | + if op == "acquire": |
| 316 | + return op_acquire(event) |
| 317 | + if op == "release": |
| 318 | + return op_release(event) |
| 319 | + if op == "list-orphans": |
| 320 | + return op_list_orphans(event) |
| 321 | + return {"ok": False, "error": f"Unknown op: {op}"} |
| 322 | + except Exception as e: # noqa: BLE001 — surface to caller |
| 323 | + return {"ok": False, "error": f"{type(e).__name__}: {e}"} |
0 commit comments