api-proxy: expose models_fetch_complete in /health, fix port tables, add readiness polling docs (#2305)

Copilot · lpcox · web-flow · commit 2d8498de4843 · 2026-04-29T17:13:17.000-07:00
* Initial plan * feat: expose models_fetch_complete in /health, fix port tables, add polling recipe Agent-Logs-Url: https://github.com/github/gh-aw-firewall/sessions/0abd7d5b-2744-4269-8039-558fb5331a53 Co-authored-by: lpcox <15877973+lpcox@users.noreply.github.com> * fix: hermetic test overrides and complete JSON example in health check docs Agent-Logs-Url: https://github.com/github/gh-aw-firewall/sessions/25b45b00-16c8-494f-b84e-c811d44f6870 Co-authored-by: lpcox <15877973+lpcox@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: lpcox <15877973+lpcox@users.noreply.github.com>
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -26,7 +26,7 @@ The system is orchestrated by `src/cli.ts` and managed by `src/docker-manager.ts
 - Enabled via `--enable-api-proxy`; not started otherwise
 - Injects real API credentials (OpenAI, Anthropic, Copilot) that the agent never sees
 - Agent calls the sidecar with no auth (e.g., `http://172.30.0.30:10001` for Anthropic); sidecar injects the real key and forwards via Squid
-- Ports: 10000 (OpenAI), 10001 (Anthropic), 10002 (Copilot), 10004 (OpenCode) — these are discrete ports, not a contiguous range
+- Ports: 10000 (OpenAI), 10001 (Anthropic), 10002 (Copilot), 10003 (Gemini), 10004 (OpenCode) — these are discrete ports, not a contiguous range
 
 ### Documentation Files
 
@@ -152,7 +152,7 @@ The codebase follows a modular architecture with clear separation of concerns:
 - `SYS_CHROOT` and `SYS_ADMIN` dropped via `capsh` before user code runs; `NET_ADMIN` never granted to agent (only to the iptables-init init container)
 
 **API Proxy Sidecar** (`containers/api-proxy/`) — *optional, requires `--enable-api-proxy`*
-- Node.js HTTP proxy at `172.30.0.30`; listens on ports 10000, 10001, 10002, 10004
+- Node.js HTTP proxy at `172.30.0.30`; listens on ports 10000, 10001, 10002, 10003, 10004
 - Agent sends unauthenticated requests; sidecar injects the real API key before forwarding
 - All upstream traffic goes through Squid (`HTTP_PROXY` env set inside sidecar)
 - Agent container's `depends_on` adds `api-proxy: service_healthy` when enabled
diff --git a/containers/api-proxy/server.js b/containers/api-proxy/server.js
@@ -1427,6 +1427,7 @@ function healthResponse() {
       complete: keyValidationComplete,
       results: keyValidationResults,
     },
+    models_fetch_complete: modelFetchComplete,
     metrics_summary: metrics.getSummary(),
     rate_limits: limiter.getAllStatus(),
   };
@@ -1782,4 +1783,4 @@ if (require.main === module) {
 }
 
 // Export for testing
-module.exports = { normalizeApiTarget, deriveCopilotApiTarget, deriveGitHubApiTarget, deriveGitHubApiBasePath, normalizeBasePath, buildUpstreamPath, proxyWebSocket, resolveCopilotAuthToken, resolveOpenCodeRoute, shouldStripHeader, stripGeminiKeyParam, validateApiKeys, probeProvider, httpProbe, keyValidationResults, resetKeyValidationState, fetchJson, extractModelIds, fetchStartupModels, reflectEndpoints, cachedModels, resetModelCacheState };
+module.exports = { normalizeApiTarget, deriveCopilotApiTarget, deriveGitHubApiTarget, deriveGitHubApiBasePath, normalizeBasePath, buildUpstreamPath, proxyWebSocket, resolveCopilotAuthToken, resolveOpenCodeRoute, shouldStripHeader, stripGeminiKeyParam, validateApiKeys, probeProvider, httpProbe, keyValidationResults, resetKeyValidationState, fetchJson, extractModelIds, fetchStartupModels, reflectEndpoints, healthResponse, cachedModels, resetModelCacheState };
diff --git a/containers/api-proxy/server.test.js b/containers/api-proxy/server.test.js
@@ -6,7 +6,7 @@ const http = require('http');
 const https = require('https');
 const tls = require('tls');
 const { EventEmitter } = require('events');
-const { normalizeApiTarget, deriveCopilotApiTarget, deriveGitHubApiTarget, deriveGitHubApiBasePath, normalizeBasePath, buildUpstreamPath, proxyWebSocket, resolveCopilotAuthToken, resolveOpenCodeRoute, shouldStripHeader, stripGeminiKeyParam, httpProbe, validateApiKeys, keyValidationResults, resetKeyValidationState, fetchJson, extractModelIds, fetchStartupModels, reflectEndpoints, cachedModels, resetModelCacheState } = require('./server');
+const { normalizeApiTarget, deriveCopilotApiTarget, deriveGitHubApiTarget, deriveGitHubApiBasePath, normalizeBasePath, buildUpstreamPath, proxyWebSocket, resolveCopilotAuthToken, resolveOpenCodeRoute, shouldStripHeader, stripGeminiKeyParam, httpProbe, validateApiKeys, keyValidationResults, resetKeyValidationState, fetchJson, extractModelIds, fetchStartupModels, reflectEndpoints, healthResponse, cachedModels, resetModelCacheState } = require('./server');
 
 describe('normalizeApiTarget', () => {
   it('should strip https:// prefix', () => {
@@ -1649,3 +1649,38 @@ describe('reflectEndpoints', () => {
     expect(opencode.models_url).toBeNull();
   });
 });
+
+// ── healthResponse ─────────────────────────────────────────────────────────
+
+describe('healthResponse', () => {
+  afterEach(() => {
+    resetModelCacheState();
+  });
+
+  it('should include models_fetch_complete: false before model fetch runs', () => {
+    const result = healthResponse();
+    expect(result.models_fetch_complete).toBe(false);
+  });
+
+  it('should include models_fetch_complete: true after model fetch completes', async () => {
+    // Pass explicit undefined overrides so no real network calls are made
+    await fetchStartupModels({
+      openaiKey: undefined,
+      anthropicKey: undefined,
+      copilotGithubToken: undefined,
+      copilotAuthToken: undefined,
+      geminiKey: undefined,
+    });
+    const result = healthResponse();
+    expect(result.models_fetch_complete).toBe(true);
+  });
+
+  it('should include required top-level fields', () => {
+    const result = healthResponse();
+    expect(result.status).toBe('healthy');
+    expect(result.service).toBe('awf-api-proxy');
+    expect(typeof result.providers).toBe('object');
+    expect(typeof result.key_validation).toBe('object');
+    expect(typeof result.models_fetch_complete).toBe('boolean');
+  });
+});
diff --git a/docs/api-proxy-sidecar.md b/docs/api-proxy-sidecar.md
@@ -310,7 +310,7 @@ The sidecar container:
 - **Image**: `ghcr.io/github/gh-aw-firewall/api-proxy:latest`
 - **Base**: `node:22-alpine`
 - **Network**: `awf-net` at `172.30.0.30`
-- **Ports**: 10000 (OpenAI), 10001 (Anthropic), 10002 (GitHub Copilot), 10003 (Google Gemini)
+- **Ports**: 10000 (OpenAI), 10001 (Anthropic), 10002 (GitHub Copilot), 10003 (Google Gemini), 10004 (OpenCode)
 - **Proxy**: Routes via Squid at `http://172.30.0.10:3128`
 
 ### Health check
@@ -321,6 +321,46 @@ Docker healthcheck on the `/health` endpoint (port 10000):
 - **Retries**: 5
 - **Start period**: 2s
 
+The `/health` endpoint returns a JSON object that includes a `models_fetch_complete` field, indicating whether the startup model-discovery pass has finished:
+
+```json
+{
+  "status": "healthy",
+  "service": "awf-api-proxy",
+  "squid_proxy": "http://172.30.0.10:3128",
+  "providers": { "openai": true, "anthropic": false, "gemini": false, "copilot": false },
+  "key_validation": { "complete": true, "results": { "openai": "valid" } },
+  "models_fetch_complete": true,
+  "metrics_summary": { "total_requests": 0, "success_rate": 100, "avg_latency_ms": 0 },
+  "rate_limits": {}
+}
+```
+
+Use `models_fetch_complete` as a readiness gate before submitting the first inference request, ensuring model lists are warm. See the [Readiness polling](#readiness-polling) recipe below.
+
+### Readiness polling
+
+Poll `/health` (or `/reflect`) until `models_fetch_complete: true` before launching the agent command, so model lists are fully cached:
+
+```bash
+# Wait up to 30 seconds for model discovery to complete
+for i in $(seq 1 30); do
+  result=$(curl -sf http://172.30.0.30:10000/health 2>/dev/null)
+  if [ "$(echo "$result" | jq -r '.models_fetch_complete')" = "true" ]; then
+    echo "Model discovery complete"
+    break
+  fi
+  echo "Waiting for model discovery... ($i/30)"
+  sleep 1
+done
+```
+
+Or use `/reflect` directly if you also need the model lists:
+
+```bash
+curl -sf http://172.30.0.30:10000/reflect | jq '.models_fetch_complete, .endpoints[].models'
+```
+
 ### Reflection endpoint
 
 The management port (10000) also exposes a `GET /reflect` endpoint for dynamic provider and model discovery. This allows agent harnesses to query which providers are configured and which models are available at runtime.