fix(e2e): stabilize release gate harness

cv · cv · commit 09fc635a29b9 · 2026-06-26T16:22:23.000-07:00
Signed-off-by: Carlos Villela &lt;cvillela@nvidia.com&gt;
diff --git a/test/deepagents-code-tui-startup-check.test.ts b/test/deepagents-code-tui-startup-check.test.ts
@@ -61,6 +61,22 @@ describe("Deep Agents Code TUI startup check helpers", () => {
     expect(validate("1; touch /tmp/nemoclaw-tui-timeout-injection")).toBe("invalid");
   });
 
+  it("skips non-Deep-Agents sandboxes before requiring expect", () => {
+    const result = runTuiStartupCheckHelperResult(
+      [
+        "PASSED=0",
+        "FAILED=0",
+        "sandbox_exec() { printf 'NEMOCLAW_DCODE_PROBE:other\\n'; }",
+        'command() { if [ "$1" = -v ] && [ "${2:-}" = expect ]; then return 1; fi; builtin command "$@"; }',
+        "main",
+      ].join("; "),
+    );
+
+    expect(result.status).toBe(0);
+    expect(result.stdout).toContain("SKIP: sandbox");
+    expect(result.stderr).not.toContain("expect is required");
+  });
+
   it("matches prompt-shaped TUI readiness text without accepting banner-only startup text", () => {
     const readiness = (capture: string) =>
       runTuiStartupCheckHelper(
diff --git a/test/e2e-scenario/fixtures/clients/provider.ts b/test/e2e-scenario/fixtures/clients/provider.ts
@@ -36,6 +36,11 @@ export interface ProviderJsonResponse<T = unknown> {
   readonly result: ShellProbeResult;
 }
 
+export interface ProviderReachabilityOptions extends ShellProbeRunOptions {
+  readonly connectTimeoutSeconds?: number;
+  readonly curlMaxTimeSeconds?: number;
+}
+
 const LOOPBACK_HOSTS = new Set(["localhost", "127.0.0.1", "::1"]);
 const BLOCKED_HOSTS = new Set(["169.254.169.254", "metadata.google.internal"]);
 
@@ -210,6 +215,28 @@ export class ProviderClient {
     );
   }
 
+  async probeReachability(
+    endpoint: TrustedProviderEndpoint,
+    options: ProviderReachabilityOptions = {},
+  ): Promise<ShellProbeResult> {
+    const { connectTimeoutSeconds = 10, curlMaxTimeSeconds = 20, ...runOptions } = options;
+    return await this.curl(
+      endpoint,
+      [
+        "-sS",
+        "--connect-timeout",
+        validateCurlMaxTimeSeconds(connectTimeoutSeconds),
+        "--max-time",
+        validateCurlMaxTimeSeconds(curlMaxTimeSeconds),
+        "-o",
+        "/dev/null",
+        "-w",
+        "%{http_code}",
+      ],
+      runOptions,
+    );
+  }
+
   async requestJson<T = unknown>(
     endpoint: TrustedProviderEndpoint,
     options: ProviderJsonRequestOptions = {},
diff --git a/test/e2e-scenario/fixtures/redaction.ts b/test/e2e-scenario/fixtures/redaction.ts
@@ -132,6 +132,7 @@ const FIXTURE_ENV_ALLOWLIST: ReadonlySet<string> = new Set([
   "CI",
   "NEMOCLAW_NON_INTERACTIVE",
   "NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE",
+  "NEMOCLAW_E2E_USE_HOSTED_INFERENCE",
 ]);
 
 const FIXTURE_ENV_PREFIXES: readonly string[] = ["E2E_", "NEMOCLAW_LOG_"];
diff --git a/test/e2e-scenario/live/hermes-e2e.test.ts b/test/e2e-scenario/live/hermes-e2e.test.ts
@@ -277,20 +277,17 @@ test.skipIf(!shouldRunLiveE2EScenarios())(
 
     expect(fs.existsSync(path.join(REPO_ROOT, "agents", "hermes", "manifest.yaml"))).toBe(true);
 
-    const providerModels = await provider.requestJson(
-      trustedProviderEndpoint("https://inference-api.nvidia.com/v1/models", {
-        allowedHosts: ["inference-api.nvidia.com"],
-      }),
+    const providerReachability = await provider.probeReachability(
+      trustedProviderEndpoint(hosted.endpointUrl, { allowedHosts: ["inference-api.nvidia.com"] }),
       {
-        artifactName: "phase-1-inference-models",
-        curlMaxTimeSeconds: 15,
-        headers: [`Authorization: Bearer ${apiKey}`],
+        artifactName: "phase-1-inference-reachability",
         env: buildAvailabilityProbeEnv(),
         redactionValues,
         timeoutMs: 30_000,
       },
     );
-    expect(providerModels.json).toBeTruthy();
+    expect(providerReachability.exitCode, resultText(providerReachability)).toBe(0);
+    expect(providerReachability.stdout.trim(), resultText(providerReachability)).not.toBe("000");
 
     // Phase 2: real installer + non-interactive Hermes onboard.
     const install = await host.command("bash", ["install.sh", "--non-interactive"], {
diff --git a/test/e2e-scenario/live/model-router-provider-routed-inference.test.ts b/test/e2e-scenario/live/model-router-provider-routed-inference.test.ts
@@ -108,9 +108,8 @@ test.skipIf(!shouldRunLiveE2EScenarios())(
     }
 
     const apiKey = secrets.required("NVIDIA_INFERENCE_API_KEY");
-    expect(apiKey.startsWith("nvapi-"), "NVIDIA_INFERENCE_API_KEY must start with nvapi-").toBe(
-      true,
-    );
+    apiKey.startsWith("nvapi-") ||
+      skip("provider-routed Model Router E2E requires a public NVIDIA Endpoints nvapi-* key");
 
     await artifacts.writeJson("scenario.json", {
       id: "model-router-provider-routed-inference",
@@ -119,7 +118,7 @@ test.skipIf(!shouldRunLiveE2EScenarios())(
       legacySource: "test/e2e/test-model-router-provider-routed-inference.sh",
       contract: [
         "Docker is available before onboarding",
-        "NVIDIA_INFERENCE_API_KEY is present and nvapi-prefixed",
+        "NVIDIA_INFERENCE_API_KEY is present and nvapi-prefixed when the public routed provider is exercised",
         "nemoclaw onboard --fresh completes with NEMOCLAW_PROVIDER=routed",
         "host model-router health reports at least one healthy endpoint",
         "sandbox inference.local returns model nvidia-routed with PONG content",
diff --git a/test/e2e-scenario/live/sandbox-survival.test.ts b/test/e2e-scenario/live/sandbox-survival.test.ts
@@ -15,6 +15,7 @@ import path from "node:path";
 
 import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts";
 import { assertExitZero, resultText, sandboxAccessEnv } from "../fixtures/clients/index.ts";
+import { trustedProviderEndpoint } from "../fixtures/clients/provider.ts";
 import { expect, test } from "../fixtures/e2e-test.ts";
 import { shouldRunLiveE2EScenarios } from "../fixtures/live-project-gate.ts";
 import { requireHostedInferenceConfig } from "../fixtures/hosted-inference.ts";
@@ -78,6 +79,7 @@ test.skipIf(!shouldRunLiveE2EScenarios())(
     cleanup,
     host,
     lifecycle,
+    provider,
     runtime,
     sandbox,
     secrets,
@@ -115,17 +117,17 @@ test.skipIf(!shouldRunLiveE2EScenarios())(
       skip("Docker is required for sandbox survival E2E");
     }
 
-    const modelsReachable = await host.command(
-      "curl",
-      ["-sf", "--max-time", "10", "https://inference-api.nvidia.com/v1/models"],
+    const endpointReachable = await provider.probeReachability(
+      trustedProviderEndpoint(hosted.endpointUrl, { allowedHosts: ["inference-api.nvidia.com"] }),
       {
-        artifactName: "prereq-inference-api-models",
+        artifactName: "prereq-inference-api-reachability",
         env: buildAvailabilityProbeEnv(),
         redactionValues: [apiKey],
-        timeoutMs: 15_000,
+        timeoutMs: 25_000,
       },
     );
-    expect(modelsReachable.exitCode, resultText(modelsReachable)).toBe(0);
+    expect(endpointReachable.exitCode, resultText(endpointReachable)).toBe(0);
+    expect(endpointReachable.stdout.trim(), resultText(endpointReachable)).not.toBe("000");
     expect(fs.existsSync(path.join(REPO_ROOT, "install.sh"))).toBe(true);
 
     await host.bestEffortCleanupSandbox(SANDBOX_NAME, {
diff --git a/test/e2e-scenario/live/sessions-agents-cli.test.ts b/test/e2e-scenario/live/sessions-agents-cli.test.ts
@@ -186,7 +186,23 @@ function parseJsonFromText(raw: string): unknown {
     const trimmed = line.trimStart();
     if (trimmed.startsWith("{") || trimmed.startsWith("[")) {
       const offset = cursor + line.length - trimmed.length;
-      return JSON.parse(text.slice(offset));
+      const candidate = text.slice(offset);
+      const candidates = [
+        candidate,
+        ...Array.from(candidate.matchAll(/[}\]]/g), ({ index = 0 }) =>
+          candidate.slice(0, index + 1),
+        ).reverse(),
+      ];
+      for (const jsonCandidate of candidates) {
+        try {
+          return JSON.parse(jsonCandidate);
+        } catch {
+          // Keep searching for the matching end of the first JSON envelope;
+          // stderr warnings can be appended after a valid pretty-printed JSON
+          // object when the E2E command captures diagnostics with 2>&1.
+        }
+      }
+      throw new Error("JSON envelope was present but not parseable");
     }
     cursor += lineWithBreak.length;
   }
diff --git a/test/e2e-scenario/support-tests/hosted-inference.test.ts b/test/e2e-scenario/support-tests/hosted-inference.test.ts
@@ -7,6 +7,7 @@ import os from "node:os";
 import path from "node:path";
 import { describe, expect, it } from "vitest";
 
+import { buildAvailabilityProbeEnv } from "../fixtures/availability-env.ts";
 import { startFakeOpenAiCompatibleServer } from "../fixtures/fake-openai-compatible.ts";
 import { requireHostedInferenceConfig } from "../fixtures/hosted-inference.ts";
 
@@ -160,6 +161,18 @@ describe("hosted inference E2E config", () => {
     expect(cfg.credentialEnv).toBe("COMPATIBLE_API_KEY");
   });
 
+  it("preserves the hosted-compatible mode flag without passing source secrets by default", () => {
+    const env = buildAvailabilityProbeEnv({
+      HOME: "/tmp/home",
+      PATH: "/usr/bin",
+      NEMOCLAW_E2E_USE_HOSTED_INFERENCE: "1",
+      NVIDIA_INFERENCE_API_KEY: "repo-hosted-key",
+    });
+
+    expect(env.NEMOCLAW_E2E_USE_HOSTED_INFERENCE).toBe("1");
+    expect(env).not.toHaveProperty("NVIDIA_INFERENCE_API_KEY");
+  });
+
   it("uses a lightweight compatible reachability probe without API or auth requests", () => {
     const { result, calls } = runHostedProbe({
       env: {
diff --git a/test/e2e/e2e-cloud-experimental/checks/10-deepagents-code-tui-startup.sh b/test/e2e/e2e-cloud-experimental/checks/10-deepagents-code-tui-startup.sh
@@ -44,6 +44,20 @@ is_positive_integer() {
   [[ "$1" =~ ^[1-9][0-9]*$ ]]
 }
 
+ensure_expect_available() {
+  if command -v expect >/dev/null 2>&1; then
+    return 0
+  fi
+  if [ "${GITHUB_ACTIONS:-}" = "true" ] && command -v sudo >/dev/null 2>&1 && command -v apt-get >/dev/null 2>&1; then
+    info "expect is not preinstalled; installing expect for the Deep Agents Code TUI PTY check"
+    if sudo apt-get update -qq && sudo apt-get install -y --no-install-recommends expect; then
+      command -v expect >/dev/null 2>&1
+      return $?
+    fi
+  fi
+  return 1
+}
+
 contains_secret() {
   NEMOCLAW_TOKEN_SECRET_PATTERN="$SECRET_PATTERN" \
     NEMOCLAW_CONTEXT_SECRET_VALUE_PATTERN="$CONTEXT_SECRET_VALUE_PATTERN" \
@@ -194,11 +208,6 @@ main() {
     exit 1
   fi
 
-  if ! command -v expect >/dev/null 2>&1; then
-    fail_test "expect is required for the Deep Agents Code TUI startup check"
-    printf '%s\n' "${PREFIX}: $PASSED passed, $FAILED failed"
-    exit 1
-  fi
   if ! command -v perl >/dev/null 2>&1; then
     fail_test "perl is required to sanitize and redact Deep Agents Code TUI captures"
     printf '%s\n' "${PREFIX}: $PASSED passed, $FAILED failed"
@@ -224,6 +233,12 @@ main() {
       ;;
   esac
 
+  if ! ensure_expect_available; then
+    fail_test "expect is required for the Deep Agents Code TUI startup check"
+    printf '%s\n' "${PREFIX}: $PASSED passed, $FAILED failed"
+    exit 1
+  fi
+
   local capture_dir raw_capture_file expect_log_file combined_capture_file plain_capture_file
   capture_dir="$(make_capture_dir)"
   raw_capture_file="${capture_dir}/${PREFIX}.raw.log"
diff --git a/test/e2e/test-sessions-agents-cli.sh b/test/e2e/test-sessions-agents-cli.sh
@@ -145,7 +145,7 @@ for line in raw.splitlines(keepends=True):
   cursor += len(line)
 if offset < 0:
   sys.exit(1)
-json.loads(raw[offset:])
+json.JSONDecoder().raw_decode(raw[offset:])
 " 2>/dev/null
 }
 
@@ -436,7 +436,7 @@ for line in raw.splitlines(keepends=True):
   cursor += len(line)
 if offset < 0:
   sys.exit(1)
-data = json.loads(raw[offset:])
+data, _ = json.JSONDecoder().raw_decode(raw[offset:])
 entries = data if isinstance(data, list) else data.get('agents', [])
 target = os.environ['TARGET']
 sys.exit(0 if any(entry.get('id') == target for entry in entries) else 1)