fix: align persistent stop hook and tighten agent output contracts (#2653)

Yeachan-Heo · Codex Review · web-flow · commit 15799479ec10 · 2026-04-16T10:23:07.000+09:00
Co-authored-by: Codex Review &lt;codex-review@example.com&gt;
diff --git a/hooks/hooks.json b/hooks/hooks.json
@@ -180,7 +180,7 @@
           },
           {
             "type": "command",
-            "command": "node \"$CLAUDE_PLUGIN_ROOT\"/scripts/run.cjs \"$CLAUDE_PLUGIN_ROOT\"/scripts/persistent-mode.cjs",
+            "command": "node \"$CLAUDE_PLUGIN_ROOT\"/scripts/run.cjs \"$CLAUDE_PLUGIN_ROOT\"/scripts/persistent-mode.mjs",
             "timeout": 10
           },
           {
diff --git a/src/__tests__/issue-2652-runtime-wiring-and-output-contract.test.ts b/src/__tests__/issue-2652-runtime-wiring-and-output-contract.test.ts
@@ -0,0 +1,27 @@
+import { describe, expect, it } from 'vitest';
+import { readFileSync } from 'fs';
+import { join } from 'path';
+import { ULTRAWORK_MESSAGE } from '../installer/hooks.js';
+
+describe('issue #2652 runtime wiring and output contract', () => {
+  it('ships the Stop hook through persistent-mode.mjs', () => {
+    const hooksJsonPath = join(process.cwd(), 'hooks', 'hooks.json');
+    const hooks = JSON.parse(readFileSync(hooksJsonPath, 'utf-8')) as {
+      hooks?: Record<string, Array<{ hooks?: Array<{ command?: string }> }>>;
+    };
+
+    const stopCommands = (hooks.hooks?.Stop ?? [])
+      .flatMap((entry) => entry.hooks ?? [])
+      .map((hook) => hook.command ?? '');
+
+    expect(stopCommands.some((command) => command.includes('/scripts/persistent-mode.mjs'))).toBe(true);
+    expect(stopCommands.some((command) => command.includes('/scripts/persistent-mode.cjs'))).toBe(false);
+  });
+
+  it('ultrawork mode instructs spawned agents to keep outputs concise', () => {
+    expect(ULTRAWORK_MESSAGE).toContain('CONCISE OUTPUTS');
+    expect(ULTRAWORK_MESSAGE).toContain('under 100 words');
+    expect(ULTRAWORK_MESSAGE).toContain('files touched');
+    expect(ULTRAWORK_MESSAGE).toContain('verification status');
+  });
+});
diff --git a/src/__tests__/ralph-prd-mandatory.test.ts b/src/__tests__/ralph-prd-mandatory.test.ts
@@ -320,6 +320,7 @@ describe('Ralph PRD-Mandatory', () => {
       expect(prompt).toContain('Are ALL requirements from the original task met?');
       expect(prompt).toContain('Is the implementation complete, not partial?');
       expect(prompt).not.toContain('Verify EACH acceptance criterion');
+      expect(prompt).toContain('concise review summary under 100 words');
     });
 
     it('should fall back to generic prompt when story is undefined', () => {
diff --git a/src/hooks/autopilot/__tests__/pipeline.test.ts b/src/hooks/autopilot/__tests__/pipeline.test.ts
@@ -133,6 +133,7 @@ describe('Stage Adapters', () => {
       expect(prompt).toContain('Team Mode');
       expect(prompt).toContain('TeamCreate');
       expect(prompt).toContain(EXECUTION_COMPLETION_SIGNAL);
+      expect(prompt).toContain('short execution summary under 100 words');
     });
 
     it('should generate solo prompt for solo mode', () => {
@@ -143,6 +144,7 @@ describe('Stage Adapters', () => {
       });
       expect(prompt).toContain('Solo Mode');
       expect(prompt).toContain(EXECUTION_COMPLETION_SIGNAL);
+      expect(prompt).toContain('short execution summary under 100 words');
     });
   });
 
@@ -166,6 +168,7 @@ describe('Stage Adapters', () => {
       });
       expect(prompt).toContain('50');
       expect(prompt).toContain(RALPH_COMPLETION_SIGNAL);
+      expect(prompt).toContain('concise review summary under 100 words');
     });
   });
 
diff --git a/src/hooks/autopilot/__tests__/prompts.test.ts b/src/hooks/autopilot/__tests__/prompts.test.ts
@@ -73,6 +73,13 @@ describe("Prompt Generation", () => {
       expect(prompt).toContain("Ralph");
       expect(prompt).toContain("Ultrawork");
     });
+
+    it("should require concise executor summaries", () => {
+      const prompt = getExecutionPrompt("plan.md");
+      expect(prompt).toContain("concise execution summary under 100 words");
+      expect(prompt).toContain("files touched");
+      expect(prompt).toContain("verification status");
+    });
   });
 
   describe("getQAPrompt", () => {
@@ -96,6 +103,13 @@ describe("Prompt Generation", () => {
       expect(prompt).toContain("Security");
       expect(prompt).toContain("Quality");
     });
+
+    it("should require concise reviewer summaries", () => {
+      const prompt = getValidationPrompt("spec.md");
+      expect(prompt).toContain("concise review summary under 100 words");
+      expect(prompt).toContain("evidence highlights");
+      expect(prompt).toContain("files checked");
+    });
   });
 
   describe("getPhasePrompt", () => {
diff --git a/src/hooks/autopilot/adapters/execution-adapter.ts b/src/hooks/autopilot/adapters/execution-adapter.ts
@@ -49,6 +49,10 @@ Use the Team orchestrator to execute tasks in parallel:
 4. **Monitor progress** as teammates complete tasks
 5. **Coordinate** dependencies between tasks
 
+### Output Contract
+
+Every teammate response must stay concise: return ONLY a short execution summary under 100 words covering what changed, files touched, verification status, and blockers. Store bulky logs/details in files or artifacts and reference them briefly.
+
 ### Agent Selection
 
 Match agent types to task complexity:
@@ -92,6 +96,10 @@ Execute tasks sequentially (or with limited parallelism via background agents):
 3. Use executor agents for independent tasks that can run in parallel
 4. Track progress in the TODO list
 
+### Output Contract
+
+Every spawned executor response must return ONLY a short execution summary under 100 words covering what changed, files touched, verification status, and blockers. Store bulky logs/details in files or artifacts and reference them briefly.
+
 ### Agent Spawning
 
 \`\`\`
diff --git a/src/hooks/autopilot/adapters/ralph-adapter.ts b/src/hooks/autopilot/adapters/ralph-adapter.ts
@@ -39,6 +39,8 @@ Verify the implementation against the specification using the Ralph verification
 
 Spawn parallel verification reviewers:
 
+Each reviewer must return ONLY a concise review summary under 100 words covering verdict, evidence highlights, files checked, and blockers. Avoid dumping long logs or transcripts into the main session.
+
 \`\`\`
 // Functional Completeness Review
 Task(
diff --git a/src/hooks/autopilot/prompts.ts b/src/hooks/autopilot/prompts.ts
@@ -214,6 +214,7 @@ Ralph and Ultrawork are now active. Execute tasks in parallel where possible.
 - Spawn multiple executor agents for parallel work
 - Track progress in the TODO list
 - Use appropriate agent tiers based on task complexity
+- Every spawned agent must return ONLY a concise execution summary under 100 words covering: what changed, files touched, verification status, and blockers. Do not paste long logs inline; write bulky output to files/artifacts and reference them briefly.
 
 ### Agent Spawning Pattern
 
@@ -316,6 +317,8 @@ Spawn parallel validation architects for comprehensive review.
 
 Spawn all three architects in parallel:
 
+Each reviewer must return ONLY a concise review summary under 100 words with verdict, evidence highlights, files checked, and blockers. Do not paste long transcripts or logs into the main session.
+
 \`\`\`
 // Functional Completeness Review
 Task(
diff --git a/src/hooks/ralph/verifier.ts b/src/hooks/ralph/verifier.ts
@@ -272,6 +272,7 @@ ${getVerificationAgentStep(state.critic_mode)}
    - Are there any obvious bugs or issues?
    - Does the code compile/run without errors?
    - Are tests passing (if applicable)?
+   - Return ONLY a concise review summary under 100 words with verdict, evidence highlights, files checked, and blockers. Do not paste long logs inline.
 
 3. **Based on ${criticLabel}'s response:**
    - If APPROVED: Output the exact correlated approval tag \`${approvalTag}\`, then run \`/oh-my-claudecode:cancel\` to cleanly exit
diff --git a/src/installer/hooks.ts b/src/installer/hooks.ts
@@ -135,6 +135,7 @@ TELL THE USER WHAT AGENTS YOU WILL LEVERAGE NOW TO SATISFY USER'S REQUEST.
 - **TODO**: Track EVERY step. Mark complete IMMEDIATELY after each.
 - **PARALLEL**: Fire independent agent calls simultaneously via Task(run_in_background=true) - NEVER wait sequentially.
 - **BACKGROUND FIRST**: Use Task tool for exploration/document-specialist agents (10+ concurrent if needed).
+- **CONCISE OUTPUTS**: Every Task/Agent result must return ONLY a short execution summary (target: under 100 words) covering what changed, files touched, verification status, and blockers. Do not paste long logs into the main session; put bulky details in files/artifacts and reference them briefly.
 - **VERIFY**: Re-read request after completion. Check ALL requirements met before reporting done.
 - **DELEGATE**: Don't do everything yourself - orchestrate specialized agents for their strengths.
 

Original file line number	Diff line number	Diff line change
`@@ -180,7 +180,7 @@`
`180`	`180`	`},`
`181`	`181`	`{`
`182`	`182`	`"type": "command",`
`183`		`- "command": "node \"$CLAUDE_PLUGIN_ROOT\"/scripts/run.cjs \"$CLAUDE_PLUGIN_ROOT\"/scripts/persistent-mode.cjs",`
	`183`	`+ "command": "node \"$CLAUDE_PLUGIN_ROOT\"/scripts/run.cjs \"$CLAUDE_PLUGIN_ROOT\"/scripts/persistent-mode.mjs",`
`184`	`184`	`"timeout": 10`
`185`	`185`	`},`
`186`	`186`	`{`