Skip to content

Commit 7912c93

Browse files
committed
fix(orchestrator): polish verified app completions
1 parent 7ab0a8f commit 7912c93

6 files changed

Lines changed: 429 additions & 30 deletions

File tree

plugins/plugin-agent-orchestrator/__tests__/unit/spawn-agent.test.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,10 @@ describe("TASKS:spawn_agent", () => {
273273
expect(initialTask).toContain(
274274
"write files under data/apps/<slug>/, not apps/<slug>/ or public/apps/<slug>/",
275275
);
276+
expect(initialTask).toContain(
277+
"do not leave placeholder/mock external assets, TODO/placeholder comments, or unfinished sample code",
278+
);
279+
expect(initialTask).toContain('do not leave inert href="#" controls');
276280
expect(initialTask.indexOf("--- Resolved Workspace ---")).toBeLessThan(
277281
initialTask.indexOf("--- User Task ---"),
278282
);

plugins/plugin-agent-orchestrator/__tests__/unit/sub-agent-completion-evaluator.test.ts

Lines changed: 131 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,38 @@ describe("subAgentCompletionResponseEvaluator", () => {
170170
setContexts: [SIMPLE_CONTEXT_ID],
171171
clearCandidateActions: true,
172172
clearParentActionHints: true,
173-
reply: "https://example.test/apps/nebula/",
173+
reply:
174+
"The app is live at https://example.test/apps/nebula/. Let me know if you'd like tweaks.",
175+
debug: [
176+
"verified sub-agent completion has no concrete follow-up action; using direct reply",
177+
],
178+
});
179+
});
180+
181+
it("keeps clean completion prose after stripped tool output when it cites a verified URL", async () => {
182+
const context = makeContext({
183+
text: "[sub-agent: nebula app (opencode) — task_complete]\n[tool output: find files]\n/home/user/project/.git/config\n/home/user/project/data/apps/nebula/index.html\n[/tool output]\nBuilt Nebula Garden with product cards and a waitlist CTA.\nLive URL: https://example.test/apps/nebula/.",
184+
metadata: {
185+
subAgentVerifiedUrls: ["https://example.test/apps/nebula/"],
186+
},
187+
messageHandler: {
188+
plan: {
189+
contexts: ["general"],
190+
reply: "https://example.test/apps/nebula/",
191+
requiresTool: true,
192+
candidateActions: ["SHELL"],
193+
},
194+
},
195+
});
196+
197+
expect(subAgentCompletionResponseEvaluator.shouldRun(context)).toBe(true);
198+
expect(subAgentCompletionResponseEvaluator.evaluate(context)).toEqual({
199+
requiresTool: false,
200+
setContexts: [SIMPLE_CONTEXT_ID],
201+
clearCandidateActions: true,
202+
clearParentActionHints: true,
203+
reply:
204+
"Built Nebula Garden with product cards and a waitlist CTA.\nLive URL: https://example.test/apps/nebula/.",
174205
debug: [
175206
"verified sub-agent completion has no concrete follow-up action; using direct reply",
176207
],
@@ -208,6 +239,105 @@ describe("subAgentCompletionResponseEvaluator", () => {
208239
});
209240
});
210241

242+
it("keeps a clean synthesized reply when bare completion URLs are verified", async () => {
243+
const context = makeContext({
244+
text: "[sub-agent: permit garden (opencode) — task_complete]\nhttp://127.0.0.1:6900/apps/permit-garden/\nhttps://example.test/apps/permit-garden/",
245+
metadata: {
246+
subAgentVerifiedUrls: [
247+
"http://127.0.0.1:6900/apps/permit-garden/",
248+
"https://example.test/apps/permit-garden/",
249+
],
250+
},
251+
messageHandler: {
252+
plan: {
253+
contexts: ["general"],
254+
reply:
255+
"✅ Built Permit Garden as a fictional bureaucratic zine and sticker landing page. It has product cards, pricing, and a waitlist CTA: https://example.test/apps/permit-garden/",
256+
requiresTool: false,
257+
},
258+
},
259+
});
260+
261+
expect(subAgentCompletionResponseEvaluator.shouldRun(context)).toBe(true);
262+
expect(subAgentCompletionResponseEvaluator.evaluate(context)).toEqual({
263+
requiresTool: false,
264+
setContexts: [SIMPLE_CONTEXT_ID],
265+
clearCandidateActions: true,
266+
clearParentActionHints: true,
267+
reply:
268+
"Built Permit Garden as a fictional bureaucratic zine and sticker landing page. It has product cards, pricing, and a waitlist CTA: https://example.test/apps/permit-garden/",
269+
debug: [
270+
"verified sub-agent completion has no concrete follow-up action; using direct reply",
271+
],
272+
});
273+
});
274+
275+
it("removes loopback route aliases from verified app completion replies", async () => {
276+
const context = makeContext({
277+
text: "[sub-agent: civic vitrine (opencode) — task_complete]\nBuilt Civic Vitrine.\n- URL: http://127.0.0.1:6900/apps/civic-vitrine/\n- Public URL: https://example.test/apps/civic-vitrine/\n- Waitlist form: local submit handler.",
278+
metadata: {
279+
subAgentVerifiedUrls: [
280+
"http://127.0.0.1:6900/apps/civic-vitrine/",
281+
"https://example.test/apps/civic-vitrine/",
282+
],
283+
},
284+
messageHandler: {
285+
plan: {
286+
contexts: ["general"],
287+
reply:
288+
"✅ Civic Vitrine site built. You can view it locally at http://127.0.0.1:6900/apps/civic-vitrine/ and publicly at https://example.test/apps/civic-vitrine/.",
289+
requiresTool: false,
290+
},
291+
},
292+
});
293+
294+
expect(subAgentCompletionResponseEvaluator.shouldRun(context)).toBe(true);
295+
expect(subAgentCompletionResponseEvaluator.evaluate(context)).toEqual({
296+
requiresTool: false,
297+
setContexts: [SIMPLE_CONTEXT_ID],
298+
clearCandidateActions: true,
299+
clearParentActionHints: true,
300+
reply:
301+
"Built Civic Vitrine.\n- Public URL: https://example.test/apps/civic-vitrine/\n- Waitlist form: local submit handler.",
302+
debug: [
303+
"verified sub-agent completion has no concrete follow-up action; using direct reply",
304+
],
305+
});
306+
});
307+
308+
it("appends the public verified URL to a clean synthesized reply that omits it", async () => {
309+
const context = makeContext({
310+
text: "[sub-agent: queue cathedral (opencode) — task_complete]\nhttp://127.0.0.1:6900/apps/queue-cathedral/\nhttps://example.test/apps/queue-cathedral/",
311+
metadata: {
312+
subAgentVerifiedUrls: [
313+
"http://127.0.0.1:6900/apps/queue-cathedral/",
314+
"https://example.test/apps/queue-cathedral/",
315+
],
316+
},
317+
messageHandler: {
318+
plan: {
319+
contexts: ["simple"],
320+
reply:
321+
"The Queue Cathedral site is live with product cards, prices, and a waitlist CTA.",
322+
requiresTool: false,
323+
},
324+
},
325+
});
326+
327+
expect(subAgentCompletionResponseEvaluator.shouldRun(context)).toBe(true);
328+
expect(subAgentCompletionResponseEvaluator.evaluate(context)).toEqual({
329+
requiresTool: false,
330+
setContexts: [SIMPLE_CONTEXT_ID],
331+
clearCandidateActions: true,
332+
clearParentActionHints: true,
333+
reply:
334+
"The Queue Cathedral site is live with product cards, prices, and a waitlist CTA.\nhttps://example.test/apps/queue-cathedral/",
335+
debug: [
336+
"verified sub-agent completion has no concrete follow-up action; using direct reply",
337+
],
338+
});
339+
});
340+
211341
it("suppresses empty task_complete placeholders", async () => {
212342
const context = makeContext({
213343
text: "[sub-agent: tweet app (opencode) — task_complete]\nsub-agent reports task complete (no captured output).",

plugins/plugin-agent-orchestrator/__tests__/unit/sub-agent-router.test.ts

Lines changed: 79 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -757,6 +757,32 @@ describe("SubAgentRouter", () => {
757757
expect(posted?.content?.text).toContain("NOT reachable");
758758
});
759759

760+
it("suppresses an exhausted retry failure when a newer continuation is active", async () => {
761+
session = sessionWithTask(`build it at ${DEAD_URL}`, 2);
762+
const newer = {
763+
...session,
764+
id: "11111111-2222-3333-4444-555555555555",
765+
status: "running",
766+
createdAt: new Date("2026-05-07T12:01:00.000Z"),
767+
lastActivityAt: new Date("2026-05-07T12:01:00.000Z"),
768+
} satisfies SessionInfo;
769+
acp = makeAcpService(session);
770+
acp.service.listSessions.mockResolvedValue([session, newer]);
771+
const { runtime, handleMessage, spawnSession } = makeRuntime({
772+
acp: acp.service,
773+
setting: { ELIZA_URL_VERIFY_SETTLE_MS: "0" },
774+
});
775+
await SubAgentRouter.start(runtime);
776+
777+
acp.emit(SESSION_ID, "task_complete", {
778+
response: `Done — live at ${DEAD_URL}`,
779+
});
780+
await new Promise((r) => setTimeout(r, 200));
781+
782+
expect(spawnSession).not.toHaveBeenCalled();
783+
expect(handleMessage).not.toHaveBeenCalled();
784+
});
785+
760786
it("treats a 405 (reachable, GET-not-allowed) URL as not dead — no retry", async () => {
761787
// Sub-agents dump raw HTTP headers into their narration; incidental
762788
// URLs there (CDN telemetry / NEL `report-to`, POST-only APIs) 405 a
@@ -974,19 +1000,54 @@ describe("SubAgentRouter", () => {
9741000
expect(spawnSession).not.toHaveBeenCalled();
9751001
expect(handleMessage).toHaveBeenCalledTimes(1);
9761002
const posted = handleMessage.mock.calls[0]?.[1];
977-
expect(posted?.content?.text).toContain(localPage);
1003+
expect(posted?.content?.text).not.toContain(localPage);
9781004
expect(posted?.content?.text).toContain(publicPage);
979-
expect(posted?.content?.text).toContain("style.css");
980-
expect(posted?.content?.text).toContain("app.js");
9811005
expect(posted?.content?.metadata?.subAgentVerifiedUrls).toEqual([
982-
localPage,
9831006
publicPage,
9841007
]);
9851008
} finally {
9861009
fs.rmSync(tmpRoot, { recursive: true, force: true });
9871010
}
9881011
});
9891012

1013+
it("rejects generated app pages that reference unreachable image assets", async () => {
1014+
const appUrl = "https://example.test/apps/permit-garden/";
1015+
const imageUrl = "https://cdn.example.test/permit-garden/sticker.png";
1016+
const fetchMock = vi.fn(async (input: RequestInfo | URL) => {
1017+
if (String(input) === imageUrl) {
1018+
return new Response("not found", { status: 404 });
1019+
}
1020+
return new Response(
1021+
`<!doctype html><img src="${imageUrl}" alt="Sticker">`,
1022+
{
1023+
status: 200,
1024+
headers: { "content-type": "text/html" },
1025+
},
1026+
);
1027+
});
1028+
stubFetch(fetchMock);
1029+
session = sessionWithTask(`build and verify ${appUrl}`);
1030+
acp = makeAcpService(session);
1031+
const { runtime, handleMessage, spawnSession } = makeRuntime({
1032+
acp: acp.service,
1033+
setting: { ELIZA_URL_VERIFY_SETTLE_MS: "0" },
1034+
});
1035+
await SubAgentRouter.start(runtime);
1036+
1037+
acp.emit(SESSION_ID, "task_complete", {
1038+
response: `Done — live at ${appUrl}`,
1039+
});
1040+
await new Promise((r) => setTimeout(r, 200));
1041+
1042+
expect(spawnSession).toHaveBeenCalledTimes(1);
1043+
const retryTask = String(spawnSession.mock.calls[0]?.[0]?.initialTask);
1044+
expect(retryTask).toContain("--- VERIFICATION FEEDBACK");
1045+
expect(retryTask).toContain(imageUrl);
1046+
expect(retryTask).toContain("HTTP 404");
1047+
expect(handleMessage).not.toHaveBeenCalled();
1048+
expect(fetchMock).toHaveBeenCalledWith(imageUrl, expect.anything());
1049+
});
1050+
9901051
it("rejects mapped app URLs whose local target was not written this session by default", async () => {
9911052
const tmpRoot = fs.mkdtempSync(
9921053
path.join(os.tmpdir(), "sub-agent-router-"),
@@ -1267,11 +1328,10 @@ describe("SubAgentRouter", () => {
12671328
expect(handleMessage).toHaveBeenCalledTimes(1);
12681329
const posted = handleMessage.mock.calls[0]?.[1];
12691330
expect(posted?.content?.metadata?.subAgentVerifiedUrls).toEqual([
1270-
localUrl,
12711331
publicUrl,
12721332
]);
1273-
expect(posted?.content?.text).toContain(localUrl);
1274-
expect(posted?.content?.text).not.toContain(publicUrl);
1333+
expect(posted?.content?.text).not.toContain(localUrl);
1334+
expect(posted?.content?.text).toContain(publicUrl);
12751335
expect(posted?.content?.text).not.toContain("[verification:");
12761336
} finally {
12771337
fs.rmSync(tmpRoot, { recursive: true, force: true });
@@ -1487,6 +1547,18 @@ describe("extractSubResources", () => {
14871547
]);
14881548
});
14891549

1550+
it("extracts media src and srcset resources", () => {
1551+
const html = `<!doctype html><img src="hero.png" srcset="hero-small.png 480w, https://cdn.example.com/hero-large.png 960w">
1552+
<source srcset="poster.webp 1x, poster@2x.webp 2x">`;
1553+
expect(extractSubResources(html, PAGE).sort()).toEqual([
1554+
"https://cdn.example.com/hero-large.png",
1555+
"https://example.test/apps/bmi/hero-small.png",
1556+
"https://example.test/apps/bmi/hero.png",
1557+
"https://example.test/apps/bmi/poster.webp",
1558+
"https://example.test/apps/bmi/poster@2x.webp",
1559+
]);
1560+
});
1561+
14901562
it("resolves absolute and root-relative refs", () => {
14911563
const html = `<link href="/global.css"><script src="https://cdn.example.com/lib.js"></script>`;
14921564
expect(extractSubResources(html, PAGE).sort()).toEqual([

plugins/plugin-agent-orchestrator/src/actions/tasks.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,8 @@ function taskWithResolvedRoute(
354354
"--- URL Path Mapping ---",
355355
"These mappings are authoritative for hosted artifacts and override conflicting guesses in the task text:",
356356
...mappingLines,
357+
"For hosted deliverables, do not leave placeholder/mock external assets, TODO/placeholder comments, or unfinished sample code; create complete local assets or omit the asset.",
358+
'If the user asks for buttons, forms, or calls to action, implement local behavior such as an in-page section, mailto link, or submit-state handler; do not leave inert href="#" controls.',
357359
);
358360
}
359361
}

0 commit comments

Comments
 (0)