Skip to content

Commit 75046d5

Browse files
committed
fix(cloud): harden mock stack e2e harness
1 parent 27d4aee commit 75046d5

12 files changed

Lines changed: 552 additions & 153 deletions

File tree

packages/cloud-api/v1/_container-control-plane-forward.ts

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,15 +51,19 @@ async function forwardControlPlaneRequest(
5151
configureHeaders(headers);
5252

5353
try {
54-
const upstream = await fetch(target, {
55-
body:
56-
c.req.method === "GET" || c.req.method === "HEAD"
57-
? undefined
58-
: c.req.raw.body,
54+
const body =
55+
c.req.method === "GET" || c.req.method === "HEAD"
56+
? undefined
57+
: c.req.raw.body;
58+
const init: RequestInit & { duplex?: "half" } = {
59+
body,
5960
headers,
6061
method: c.req.method,
6162
redirect: "manual",
62-
});
63+
};
64+
if (body) init.duplex = "half";
65+
66+
const upstream = await fetch(target, init);
6367

6468
return new Response(upstream.body, {
6569
headers: upstream.headers,

packages/cloud-shared/src/lib/services/api-keys.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -254,11 +254,10 @@ export class ApiKeysService {
254254

255255
async revokeForAgent(agentSandboxId: string): Promise<void> {
256256
const name = ApiKeysService.agentApiKeyName(agentSandboxId);
257-
const keys = await apiKeysRepository.findByName(name);
257+
const keys = await apiKeysRepository.deleteByName(name);
258258
for (const key of keys) {
259259
await this.invalidateCache(key.key_hash);
260260
}
261-
await apiKeysRepository.deleteByName(name);
262261
}
263262
}
264263

packages/cloud-shared/src/lib/services/eliza-sandbox.ts

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -516,7 +516,7 @@ export class ElizaSandboxService {
516516
}
517517

518518
async deleteAgent(agentId: string, orgId: string): Promise<DeleteAgentResult> {
519-
return dbWrite.transaction(async (tx) => {
519+
const result = await dbWrite.transaction(async (tx) => {
520520
await this.lockLifecycle(tx, agentId, orgId);
521521

522522
const rec = await this.getAgentForLifecycleMutation(tx, agentId, orgId);
@@ -584,23 +584,26 @@ export class ElizaSandboxService {
584584
`);
585585
const deletedSandbox = result.rows[0];
586586

587-
if (deletedSandbox) {
588-
// Best-effort: revoke the per-agent API key. A failure here doesn't
589-
// un-delete the sandbox; the key just lingers as inactive data.
590-
try {
591-
await apiKeysService.revokeForAgent(agentId);
592-
} catch (err) {
593-
logger.warn("[agent-sandbox] Failed to revoke per-agent API key", {
594-
agentId,
595-
error: err instanceof Error ? err.message : String(err),
596-
});
597-
}
598-
}
599-
600587
return deletedSandbox
601588
? ({ success: true, deletedSandbox } as const)
602589
: ({ success: false, error: "Agent not found" } as const);
603590
});
591+
592+
if (result.success) {
593+
// Best-effort: revoke the per-agent API key after the row delete commits.
594+
// A failure here does not un-delete the sandbox; the key just lingers as
595+
// inactive data and can be cleaned by ops.
596+
try {
597+
await apiKeysService.revokeForAgent(agentId);
598+
} catch (err) {
599+
logger.warn("[agent-sandbox] Failed to revoke per-agent API key", {
600+
agentId,
601+
error: err instanceof Error ? err.message : String(err),
602+
});
603+
}
604+
}
605+
606+
return result;
604607
}
605608

606609
/**
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
import { randomUUID } from "node:crypto";
2+
import { createServer, type Server } from "node:http";
3+
4+
import type { SandboxCreateConfig, SandboxHandle, SandboxProvider } from "./sandbox-provider-types";
5+
6+
interface MemorySandbox {
7+
handle: SandboxHandle;
8+
runtimeAgent: {
9+
id: string;
10+
name: string;
11+
status: "active";
12+
};
13+
server: Server;
14+
}
15+
16+
function json(body: unknown, status = 200): Response {
17+
return Response.json(body, { status });
18+
}
19+
20+
async function listen(server: Server): Promise<number> {
21+
await new Promise<void>((resolve, reject) => {
22+
server.once("error", reject);
23+
server.listen(0, "127.0.0.1", () => resolve());
24+
});
25+
const address = server.address();
26+
if (!address || typeof address === "string") {
27+
throw new Error("[memory-sandbox] test server did not bind to a TCP port");
28+
}
29+
return address.port;
30+
}
31+
32+
/**
33+
* Test-only sandbox provider used by cloud E2E.
34+
*
35+
* It exercises the real DB-backed provisioning and deletion job service without
36+
* requiring Docker, SSH nodes, or live Hetzner credentials in CI. Production
37+
* selection is guarded in `createSandboxProvider`.
38+
*/
39+
export class MemorySandboxProvider implements SandboxProvider {
40+
private readonly sandboxes = new Map<string, MemorySandbox>();
41+
42+
async create(config: SandboxCreateConfig): Promise<SandboxHandle> {
43+
const runtimeAgent = {
44+
id: `runtime-${randomUUID()}`,
45+
name: config.agentName,
46+
status: "active" as const,
47+
};
48+
49+
const server = createServer(async (req, res) => {
50+
const url = new URL(req.url ?? "/", "http://127.0.0.1");
51+
if (req.method === "GET" && url.pathname === "/api/health") {
52+
const response = json({ success: true, status: "ok" });
53+
res.writeHead(response.status, Object.fromEntries(response.headers));
54+
res.end(await response.text());
55+
return;
56+
}
57+
58+
if (req.method === "GET" && url.pathname === "/api/agents") {
59+
const response = json({ success: true, agents: [runtimeAgent] });
60+
res.writeHead(response.status, Object.fromEntries(response.headers));
61+
res.end(await response.text());
62+
return;
63+
}
64+
65+
if (req.method === "POST" && url.pathname === "/api/agents") {
66+
const response = json({
67+
success: true,
68+
data: runtimeAgent,
69+
});
70+
res.writeHead(response.status, Object.fromEntries(response.headers));
71+
res.end(await response.text());
72+
return;
73+
}
74+
75+
if (
76+
req.method === "POST" &&
77+
url.pathname.startsWith("/api/agents/") &&
78+
url.pathname.endsWith("/start")
79+
) {
80+
const response = json({ success: true, data: runtimeAgent });
81+
res.writeHead(response.status, Object.fromEntries(response.headers));
82+
res.end(await response.text());
83+
return;
84+
}
85+
86+
const response = json({ success: false, error: "Not found" }, 404);
87+
res.writeHead(response.status, Object.fromEntries(response.headers));
88+
res.end(await response.text());
89+
});
90+
91+
const port = await listen(server);
92+
const sandboxId = `memory-${config.agentId}`;
93+
const baseUrl = `http://127.0.0.1:${port}`;
94+
const handle: SandboxHandle = {
95+
sandboxId,
96+
bridgeUrl: baseUrl,
97+
healthUrl: `${baseUrl}/api/health`,
98+
metadata: {
99+
provider: "memory",
100+
agentId: config.agentId,
101+
},
102+
};
103+
this.sandboxes.set(sandboxId, { handle, runtimeAgent, server });
104+
return handle;
105+
}
106+
107+
async stop(sandboxId: string): Promise<void> {
108+
const sandbox = this.sandboxes.get(sandboxId);
109+
if (!sandbox) return;
110+
this.sandboxes.delete(sandboxId);
111+
await new Promise<void>((resolve, reject) => {
112+
sandbox.server.close((error) => {
113+
if (error) reject(error);
114+
else resolve();
115+
});
116+
});
117+
}
118+
119+
async checkHealth(handle: SandboxHandle): Promise<boolean> {
120+
return this.sandboxes.has(handle.sandboxId);
121+
}
122+
123+
async runCommand(): Promise<string> {
124+
return "";
125+
}
126+
}

packages/cloud-shared/src/lib/services/sandbox-provider.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ export type {
1717
* - `DockerSandboxProvider` (SSH-into-remote-nodes) otherwise.
1818
*/
1919
export async function createSandboxProvider(): Promise<SandboxProvider> {
20+
if (shouldUseMemoryTestProvider()) {
21+
const { MemorySandboxProvider } = await import("./memory-sandbox-provider");
22+
return new MemorySandboxProvider();
23+
}
2024
if (shouldUseLocalDockerProvider()) {
2125
const { LocalDockerSandboxProvider } = await import("./local-docker-sandbox-provider");
2226
return new LocalDockerSandboxProvider();
@@ -25,6 +29,15 @@ export async function createSandboxProvider(): Promise<SandboxProvider> {
2529
return new DockerSandboxProvider();
2630
}
2731

32+
function shouldUseMemoryTestProvider(): boolean {
33+
const env = process.env;
34+
if (env.ELIZA_TEST_SANDBOX_PROVIDER !== "memory") return false;
35+
if (env.NODE_ENV === "test" || env.CLOUD_E2E === "1") return true;
36+
throw new Error(
37+
"ELIZA_TEST_SANDBOX_PROVIDER=memory is only allowed under NODE_ENV=test or CLOUD_E2E=1",
38+
);
39+
}
40+
2841
function shouldUseLocalDockerProvider(): boolean {
2942
const env = process.env;
3043
if (env.MILADY_LOCAL_DOCKER_PROVIDER === "1") return true;

packages/os/usb-installer/HANDOFF.md

Lines changed: 68 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
11
# elizaOS USB Installer Handoff
22

3-
Last updated: 2026-05-19
3+
Last updated: 2026-05-20
44

55
## Current Branch
66

77
- Repository: `elizaOS/eliza`
88
- Worktree used for the latest proof: `/home/nubs/Git/iqlabs/elizaos-usb-prod-e2e`
9-
- Branch: `nubs/elizaos-live-prod-hardening-20260519`
10-
- PR: https://github.com/elizaOS/eliza/pull/7803
11-
- Verified local head: current merge head
12-
- Latest merged `origin/develop`: `c73f1768b68ea72b5df83efeeaadea49f812555f`
13-
- Latest local CI-fix validation: 2026-05-19 23:55 UTC
9+
- Branch: `nubs/messylinux-cloud-e2e-hardening`
10+
- Previous PR #7803: https://github.com/elizaOS/eliza/pull/7803 (merged)
11+
- Follow-up PR #7825: https://github.com/elizaOS/eliza/pull/7825
12+
- Verified local base: `origin/develop@27d4aee89417a4fc921c2c39ae7bd6fa3bdb9c82`
13+
- Latest PR head locally validated: `7497cf6c6b5e787df1ddc71ef49844cf7283c4a0`
14+
- Latest local CI-fix validation: 2026-05-20 05:13 UTC
1415

1516
## What This Package Is
1617

@@ -105,6 +106,21 @@ behind the backend contract and future signed/elevated helpers.
105106
- OS release CI and the Linux release-packaging path now run Playwright E2E
106107
and run the opt-in `scsi_debug` virtual block-device proof when the runner
107108
kernel provides that module.
109+
- Additional cloud mock-stack E2E hardening added on 2026-05-20:
110+
- fixed the cloud E2E repo-root resolution so the PGlite TCP bridge script
111+
resolves from the repository root, not `packages/`;
112+
- replaced the stale in-process control-plane mock with the real
113+
`container-control-plane` sidecar and a guarded in-memory sandbox provider
114+
that only activates under `NODE_ENV=test` or `CLOUD_E2E=1`;
115+
- added a Node-hosted cloud-api Worker fetch adapter for the E2E harness so
116+
CI exercises the generated router, real API routes, DB queue, and sidecar
117+
forwarder without depending on Wrangler local runtime;
118+
- fixed Node fetch forwarding for request bodies by setting `duplex: "half"`;
119+
- added process-level DB pool cleanup before the fixture stops PGlite;
120+
- moved best-effort per-agent API-key revocation out of the sandbox delete
121+
transaction and made revocation a single delete-returning operation;
122+
- updated provision/deprovision/stuck-cleanup specs to create real agents,
123+
drive the real provisioning queue, and assert externally visible states.
108124
- Post-merge validation on 2026-05-20 after merging
109125
`origin/develop@c73f1768b6`:
110126
- `bun run verify:cloud` passed;
@@ -119,6 +135,52 @@ behind the backend contract and future signed/elevated helpers.
119135
- `bun run --cwd packages/os/usb-installer test:linux-virtual-usb` passed
120136
with `scsi_debug` cleanup verified;
121137
- `git diff --check` passed.
138+
- Final local validation on 2026-05-20 after the mock-stack E2E harness fix:
139+
- `bun run --cwd packages/cloud-shared typecheck` passed;
140+
- `bun run --cwd packages/cloud-api typecheck` passed;
141+
- `bun run --cwd packages/cloud-api lint` passed;
142+
- `bun test packages/cloud-api/webhooks/bluebubbles/route.test.ts` passed:
143+
10 tests;
144+
- `bun run --cwd packages/cloud-services/container-control-plane typecheck`
145+
passed;
146+
- `bun run --cwd packages/test/cloud-e2e typecheck` passed;
147+
- `bun run --cwd packages/cloud-shared lint` passed;
148+
- `bun run cloud:e2e` passed: 4 Playwright tests covering onboarding,
149+
provision, deprovision, and stuck cleanup against PGlite, cloud-api,
150+
cloud-frontend, the real control-plane sidecar, and the guarded memory
151+
sandbox provider;
152+
- `bun run --cwd packages/cloud-api test -- --runInBand` passed: 44 tests;
153+
- `bun run --cwd packages/os/usb-installer typecheck` passed;
154+
- `bun run --cwd packages/os/usb-installer test` passed: 9 files, 80 tests,
155+
with the opt-in virtual block-device test skipped by default;
156+
- `bun run --cwd packages/os/usb-installer lint` passed;
157+
- `bun run --cwd packages/os/usb-installer build` passed;
158+
- `bun run --cwd packages/os/usb-installer test:e2e` passed: 6 Playwright
159+
tests;
160+
- `bun run --cwd packages/os/usb-installer test:linux-virtual-usb` passed
161+
against `scsi_debug`;
162+
- `git diff --check` passed.
163+
- Follow-up local validation on 2026-05-20 after rebasing PR #7825 onto
164+
`origin/develop@27d4aee894`:
165+
- `bun run --cwd packages/cloud-shared typecheck` passed;
166+
- `bun run --cwd packages/cloud-shared lint` passed;
167+
- `bun run --cwd packages/cloud-api typecheck` passed;
168+
- `bun run --cwd packages/test/cloud-e2e typecheck` passed;
169+
- `bun run cloud:e2e` passed: 4 Playwright tests covering onboarding,
170+
provision, deprovision, and stuck cleanup against PGlite, cloud-api,
171+
cloud-frontend, the real control-plane sidecar, and the guarded memory
172+
sandbox provider;
173+
- `bun run test:cloud` passed: 279 tests across 30 files;
174+
- `bun run --cwd packages/os/usb-installer typecheck` passed;
175+
- `bun run --cwd packages/os/usb-installer test` passed: 9 files, 80 tests,
176+
with the opt-in virtual block-device test skipped by default;
177+
- `bun run --cwd packages/os/usb-installer lint` passed;
178+
- `bun run --cwd packages/os/usb-installer build` passed;
179+
- `bun run --cwd packages/os/usb-installer test:e2e` passed: 6 Playwright
180+
tests;
181+
- `bun run --cwd packages/os/usb-installer test:linux-virtual-usb` passed
182+
against `scsi_debug`;
183+
- `git diff --check` passed.
122184
- Disk cleanup on 2026-05-19:
123185
- removed ignored/generated stale ISO artifacts and root `dist/`;
124186
- removed inactive `/tmp/eliza-pr7803` temp checkout after confirming no

0 commit comments

Comments
 (0)