fix(orchestrator): polish verified app completions

NubsCarson · NubsCarson · commit 7912c938ab10 · 2026-05-22T02:07:42.000Z
diff --git a/plugins/plugin-agent-orchestrator/__tests__/unit/spawn-agent.test.ts b/plugins/plugin-agent-orchestrator/__tests__/unit/spawn-agent.test.ts
@@ -273,6 +273,10 @@ describe("TASKS:spawn_agent", () => {
       expect(initialTask).toContain(
         "write files under data/apps/<slug>/, not apps/<slug>/ or public/apps/<slug>/",
       );
+      expect(initialTask).toContain(
+        "do not leave placeholder/mock external assets, TODO/placeholder comments, or unfinished sample code",
+      );
+      expect(initialTask).toContain('do not leave inert href="#" controls');
       expect(initialTask.indexOf("--- Resolved Workspace ---")).toBeLessThan(
         initialTask.indexOf("--- User Task ---"),
       );
diff --git a/plugins/plugin-agent-orchestrator/__tests__/unit/sub-agent-completion-evaluator.test.ts b/plugins/plugin-agent-orchestrator/__tests__/unit/sub-agent-completion-evaluator.test.ts
@@ -170,7 +170,38 @@ describe("subAgentCompletionResponseEvaluator", () => {
       setContexts: [SIMPLE_CONTEXT_ID],
       clearCandidateActions: true,
       clearParentActionHints: true,
-      reply: "https://example.test/apps/nebula/",
+      reply:
+        "The app is live at https://example.test/apps/nebula/. Let me know if you'd like tweaks.",
+      debug: [
+        "verified sub-agent completion has no concrete follow-up action; using direct reply",
+      ],
+    });
+  });
+
+  it("keeps clean completion prose after stripped tool output when it cites a verified URL", async () => {
+    const context = makeContext({
+      text: "[sub-agent: nebula app (opencode) — task_complete]\n[tool output: find files]\n/home/user/project/.git/config\n/home/user/project/data/apps/nebula/index.html\n[/tool output]\nBuilt Nebula Garden with product cards and a waitlist CTA.\nLive URL: https://example.test/apps/nebula/.",
+      metadata: {
+        subAgentVerifiedUrls: ["https://example.test/apps/nebula/"],
+      },
+      messageHandler: {
+        plan: {
+          contexts: ["general"],
+          reply: "https://example.test/apps/nebula/",
+          requiresTool: true,
+          candidateActions: ["SHELL"],
+        },
+      },
+    });
+
+    expect(subAgentCompletionResponseEvaluator.shouldRun(context)).toBe(true);
+    expect(subAgentCompletionResponseEvaluator.evaluate(context)).toEqual({
+      requiresTool: false,
+      setContexts: [SIMPLE_CONTEXT_ID],
+      clearCandidateActions: true,
+      clearParentActionHints: true,
+      reply:
+        "Built Nebula Garden with product cards and a waitlist CTA.\nLive URL: https://example.test/apps/nebula/.",
       debug: [
         "verified sub-agent completion has no concrete follow-up action; using direct reply",
       ],
@@ -208,6 +239,105 @@ describe("subAgentCompletionResponseEvaluator", () => {
     });
   });
 
+  it("keeps a clean synthesized reply when bare completion URLs are verified", async () => {
+    const context = makeContext({
+      text: "[sub-agent: permit garden (opencode) — task_complete]\nhttp://127.0.0.1:6900/apps/permit-garden/\nhttps://example.test/apps/permit-garden/",
+      metadata: {
+        subAgentVerifiedUrls: [
+          "http://127.0.0.1:6900/apps/permit-garden/",
+          "https://example.test/apps/permit-garden/",
+        ],
+      },
+      messageHandler: {
+        plan: {
+          contexts: ["general"],
+          reply:
+            "✅ Built Permit Garden as a fictional bureaucratic zine and sticker landing page. It has product cards, pricing, and a waitlist CTA: https://example.test/apps/permit-garden/",
+          requiresTool: false,
+        },
+      },
+    });
+
+    expect(subAgentCompletionResponseEvaluator.shouldRun(context)).toBe(true);
+    expect(subAgentCompletionResponseEvaluator.evaluate(context)).toEqual({
+      requiresTool: false,
+      setContexts: [SIMPLE_CONTEXT_ID],
+      clearCandidateActions: true,
+      clearParentActionHints: true,
+      reply:
+        "Built Permit Garden as a fictional bureaucratic zine and sticker landing page. It has product cards, pricing, and a waitlist CTA: https://example.test/apps/permit-garden/",
+      debug: [
+        "verified sub-agent completion has no concrete follow-up action; using direct reply",
+      ],
+    });
+  });
+
+  it("removes loopback route aliases from verified app completion replies", async () => {
+    const context = makeContext({
+      text: "[sub-agent: civic vitrine (opencode) — task_complete]\nBuilt Civic Vitrine.\n- URL: http://127.0.0.1:6900/apps/civic-vitrine/\n- Public URL: https://example.test/apps/civic-vitrine/\n- Waitlist form: local submit handler.",
+      metadata: {
+        subAgentVerifiedUrls: [
+          "http://127.0.0.1:6900/apps/civic-vitrine/",
+          "https://example.test/apps/civic-vitrine/",
+        ],
+      },
+      messageHandler: {
+        plan: {
+          contexts: ["general"],
+          reply:
+            "✅ Civic Vitrine site built. You can view it locally at http://127.0.0.1:6900/apps/civic-vitrine/ and publicly at https://example.test/apps/civic-vitrine/.",
+          requiresTool: false,
+        },
+      },
+    });
+
+    expect(subAgentCompletionResponseEvaluator.shouldRun(context)).toBe(true);
+    expect(subAgentCompletionResponseEvaluator.evaluate(context)).toEqual({
+      requiresTool: false,
+      setContexts: [SIMPLE_CONTEXT_ID],
+      clearCandidateActions: true,
+      clearParentActionHints: true,
+      reply:
+        "Built Civic Vitrine.\n- Public URL: https://example.test/apps/civic-vitrine/\n- Waitlist form: local submit handler.",
+      debug: [
+        "verified sub-agent completion has no concrete follow-up action; using direct reply",
+      ],
+    });
+  });
+
+  it("appends the public verified URL to a clean synthesized reply that omits it", async () => {
+    const context = makeContext({
+      text: "[sub-agent: queue cathedral (opencode) — task_complete]\nhttp://127.0.0.1:6900/apps/queue-cathedral/\nhttps://example.test/apps/queue-cathedral/",
+      metadata: {
+        subAgentVerifiedUrls: [
+          "http://127.0.0.1:6900/apps/queue-cathedral/",
+          "https://example.test/apps/queue-cathedral/",
+        ],
+      },
+      messageHandler: {
+        plan: {
+          contexts: ["simple"],
+          reply:
+            "The Queue Cathedral site is live with product cards, prices, and a waitlist CTA.",
+          requiresTool: false,
+        },
+      },
+    });
+
+    expect(subAgentCompletionResponseEvaluator.shouldRun(context)).toBe(true);
+    expect(subAgentCompletionResponseEvaluator.evaluate(context)).toEqual({
+      requiresTool: false,
+      setContexts: [SIMPLE_CONTEXT_ID],
+      clearCandidateActions: true,
+      clearParentActionHints: true,
+      reply:
+        "The Queue Cathedral site is live with product cards, prices, and a waitlist CTA.\nhttps://example.test/apps/queue-cathedral/",
+      debug: [
+        "verified sub-agent completion has no concrete follow-up action; using direct reply",
+      ],
+    });
+  });
+
   it("suppresses empty task_complete placeholders", async () => {
     const context = makeContext({
       text: "[sub-agent: tweet app (opencode) — task_complete]\nsub-agent reports task complete (no captured output).",
diff --git a/plugins/plugin-agent-orchestrator/__tests__/unit/sub-agent-router.test.ts b/plugins/plugin-agent-orchestrator/__tests__/unit/sub-agent-router.test.ts
@@ -757,6 +757,32 @@ describe("SubAgentRouter", () => {
       expect(posted?.content?.text).toContain("NOT reachable");
     });
 
+    it("suppresses an exhausted retry failure when a newer continuation is active", async () => {
+      session = sessionWithTask(`build it at ${DEAD_URL}`, 2);
+      const newer = {
+        ...session,
+        id: "11111111-2222-3333-4444-555555555555",
+        status: "running",
+        createdAt: new Date("2026-05-07T12:01:00.000Z"),
+        lastActivityAt: new Date("2026-05-07T12:01:00.000Z"),
+      } satisfies SessionInfo;
+      acp = makeAcpService(session);
+      acp.service.listSessions.mockResolvedValue([session, newer]);
+      const { runtime, handleMessage, spawnSession } = makeRuntime({
+        acp: acp.service,
+        setting: { ELIZA_URL_VERIFY_SETTLE_MS: "0" },
+      });
+      await SubAgentRouter.start(runtime);
+
+      acp.emit(SESSION_ID, "task_complete", {
+        response: `Done — live at ${DEAD_URL}`,
+      });
+      await new Promise((r) => setTimeout(r, 200));
+
+      expect(spawnSession).not.toHaveBeenCalled();
+      expect(handleMessage).not.toHaveBeenCalled();
+    });
+
     it("treats a 405 (reachable, GET-not-allowed) URL as not dead — no retry", async () => {
       // Sub-agents dump raw HTTP headers into their narration; incidental
       // URLs there (CDN telemetry / NEL `report-to`, POST-only APIs) 405 a
@@ -974,19 +1000,54 @@ describe("SubAgentRouter", () => {
         expect(spawnSession).not.toHaveBeenCalled();
         expect(handleMessage).toHaveBeenCalledTimes(1);
         const posted = handleMessage.mock.calls[0]?.[1];
-        expect(posted?.content?.text).toContain(localPage);
+        expect(posted?.content?.text).not.toContain(localPage);
         expect(posted?.content?.text).toContain(publicPage);
-        expect(posted?.content?.text).toContain("style.css");
-        expect(posted?.content?.text).toContain("app.js");
         expect(posted?.content?.metadata?.subAgentVerifiedUrls).toEqual([
-          localPage,
           publicPage,
         ]);
       } finally {
         fs.rmSync(tmpRoot, { recursive: true, force: true });
       }
     });
 
+    it("rejects generated app pages that reference unreachable image assets", async () => {
+      const appUrl = "https://example.test/apps/permit-garden/";
+      const imageUrl = "https://cdn.example.test/permit-garden/sticker.png";
+      const fetchMock = vi.fn(async (input: RequestInfo | URL) => {
+        if (String(input) === imageUrl) {
+          return new Response("not found", { status: 404 });
+        }
+        return new Response(
+          `<!doctype html><img src="${imageUrl}" alt="Sticker">`,
+          {
+            status: 200,
+            headers: { "content-type": "text/html" },
+          },
+        );
+      });
+      stubFetch(fetchMock);
+      session = sessionWithTask(`build and verify ${appUrl}`);
+      acp = makeAcpService(session);
+      const { runtime, handleMessage, spawnSession } = makeRuntime({
+        acp: acp.service,
+        setting: { ELIZA_URL_VERIFY_SETTLE_MS: "0" },
+      });
+      await SubAgentRouter.start(runtime);
+
+      acp.emit(SESSION_ID, "task_complete", {
+        response: `Done — live at ${appUrl}`,
+      });
+      await new Promise((r) => setTimeout(r, 200));
+
+      expect(spawnSession).toHaveBeenCalledTimes(1);
+      const retryTask = String(spawnSession.mock.calls[0]?.[0]?.initialTask);
+      expect(retryTask).toContain("--- VERIFICATION FEEDBACK");
+      expect(retryTask).toContain(imageUrl);
+      expect(retryTask).toContain("HTTP 404");
+      expect(handleMessage).not.toHaveBeenCalled();
+      expect(fetchMock).toHaveBeenCalledWith(imageUrl, expect.anything());
+    });
+
     it("rejects mapped app URLs whose local target was not written this session by default", async () => {
       const tmpRoot = fs.mkdtempSync(
         path.join(os.tmpdir(), "sub-agent-router-"),
@@ -1267,11 +1328,10 @@ describe("SubAgentRouter", () => {
         expect(handleMessage).toHaveBeenCalledTimes(1);
         const posted = handleMessage.mock.calls[0]?.[1];
         expect(posted?.content?.metadata?.subAgentVerifiedUrls).toEqual([
-          localUrl,
           publicUrl,
         ]);
-        expect(posted?.content?.text).toContain(localUrl);
-        expect(posted?.content?.text).not.toContain(publicUrl);
+        expect(posted?.content?.text).not.toContain(localUrl);
+        expect(posted?.content?.text).toContain(publicUrl);
         expect(posted?.content?.text).not.toContain("[verification:");
       } finally {
         fs.rmSync(tmpRoot, { recursive: true, force: true });
@@ -1487,6 +1547,18 @@ describe("extractSubResources", () => {
     ]);
   });
 
+  it("extracts media src and srcset resources", () => {
+    const html = `<!doctype html><img src="hero.png" srcset="hero-small.png 480w, https://cdn.example.com/hero-large.png 960w">
+      <source srcset="poster.webp 1x, poster@2x.webp 2x">`;
+    expect(extractSubResources(html, PAGE).sort()).toEqual([
+      "https://cdn.example.com/hero-large.png",
+      "https://example.test/apps/bmi/hero-small.png",
+      "https://example.test/apps/bmi/hero.png",
+      "https://example.test/apps/bmi/poster.webp",
+      "https://example.test/apps/bmi/poster@2x.webp",
+    ]);
+  });
+
   it("resolves absolute and root-relative refs", () => {
     const html = `<link href="/global.css"><script src="https://cdn.example.com/lib.js"></script>`;
     expect(extractSubResources(html, PAGE).sort()).toEqual([
diff --git a/plugins/plugin-agent-orchestrator/src/actions/tasks.ts b/plugins/plugin-agent-orchestrator/src/actions/tasks.ts
@@ -354,6 +354,8 @@ function taskWithResolvedRoute(
         "--- URL Path Mapping ---",
         "These mappings are authoritative for hosted artifacts and override conflicting guesses in the task text:",
         ...mappingLines,
+        "For hosted deliverables, do not leave placeholder/mock external assets, TODO/placeholder comments, or unfinished sample code; create complete local assets or omit the asset.",
+        'If the user asks for buttons, forms, or calls to action, implement local behavior such as an in-page section, mailto link, or submit-state handler; do not leave inert href="#" controls.',
       );
     }
   }
diff --git a/plugins/plugin-agent-orchestrator/src/evaluators/sub-agent-completion.ts b/plugins/plugin-agent-orchestrator/src/evaluators/sub-agent-completion.ts
diff --git a/plugins/plugin-agent-orchestrator/src/services/sub-agent-router.ts b/plugins/plugin-agent-orchestrator/src/services/sub-agent-router.ts

Original file line number	Diff line number	Diff line change
`@@ -354,6 +354,8 @@ function taskWithResolvedRoute(`
`354`	`354`	`"--- URL Path Mapping ---",`
`355`	`355`	`"These mappings are authoritative for hosted artifacts and override conflicting guesses in the task text:",`
`356`	`356`	`...mappingLines,`
	`357`	`+ "For hosted deliverables, do not leave placeholder/mock external assets, TODO/placeholder comments, or unfinished sample code; create complete local assets or omit the asset.",`
	`358`	`+ 'If the user asks for buttons, forms, or calls to action, implement local behavior such as an in-page section, mailto link, or submit-state handler; do not leave inert href="#" controls.',`
`357`	`359`	`);`
`358`	`360`	`}`
`359`	`361`	`}`