fix(codex): emit paired tool_result chunks so UI tool cards close (#1032)

Wirasm · web-flow · commit a53aff315a7f · 2026-04-07T20:10:50.000+03:00
* fix(codex): emit paired tool_result chunks so UI tool cards close Tool cards in the web UI sometimes spin forever for Codex workflows. The Codex client only yielded { type: 'tool', ... } on item.completed events, never the paired tool_result chunk. The web adapter's running tool entry then had nothing to close it, leaving the UI relying on the emitLockEvent fallback at lock release — which never fires inside a multi-node DAG, on cancel, or when SSE is briefly disconnected. The Codex SDK only emits item.completed once a command_execution, web_search, or mcp_tool_call is fully done (it carries aggregated_output, exit_code, status, etc). So we can emit the start and the result back-to-back in the same handler. Changes: - command_execution: emit tool_result with aggregated_output, append [exit code: N] when non-zero so failures are visible. - web_search: emit empty tool_result so the searching card closes. - mcp_tool_call: always emit tool + tool_result, including for the status === 'completed' branch which previously emitted nothing at all (so completed MCP calls were invisible) and for status === 'failed' where we previously emitted only a system message (leaving no card to close, but inconsistent with command_execution failures). - Update codex.test.ts assertions to cover paired chunks and exit codes. Note: tool_result is paired to its tool by the web adapter's name-based reverse-scan in web.ts. Since these chunks are yielded back-to-back with no other tools in between, the match is unambiguous. PR #1031 will add stable tool_use_id pairing for Claude; a follow-up can plumb Codex's item.id through once that lands. * fix(codex): log silent drops and assert paired web_search tool_result - command_execution: warn when item.command is falsy (was silently dropped) - mcp_tool_call: warn when result.content has unexpected shape (was silent empty) - Simplify exit_code guard to != null, drop redundant String() cast - Test: assert paired tool_result chunk for web_search Addresses review feedback on #1032.
diff --git a/packages/core/src/clients/codex.test.ts b/packages/core/src/clients/codex.test.ts
@@ -96,7 +96,12 @@ describe('CodexClient', () => {
         events: (async function* () {
           yield {
             type: 'item.completed',
-            item: { type: 'command_execution', command: 'npm test' },
+            item: {
+              type: 'command_execution',
+              command: 'npm test',
+              aggregated_output: 'tests passed\n',
+              exit_code: 0,
+            },
           };
           yield { type: 'turn.completed', usage: defaultUsage };
         })(),
@@ -107,7 +112,42 @@ describe('CodexClient', () => {
         chunks.push(chunk);
       }
 
+      // Codex item.completed fires once the command is fully done, so we emit
+      // start + result back-to-back to close the UI tool card immediately.
       expect(chunks[0]).toEqual({ type: 'tool', toolName: 'npm test' });
+      expect(chunks[1]).toEqual({
+        type: 'tool_result',
+        toolName: 'npm test',
+        toolOutput: 'tests passed\n',
+      });
+    });
+
+    test('appends non-zero exit code to command_execution tool_result', async () => {
+      mockRunStreamed.mockResolvedValue({
+        events: (async function* () {
+          yield {
+            type: 'item.completed',
+            item: {
+              type: 'command_execution',
+              command: 'npm test',
+              aggregated_output: 'failure\n',
+              exit_code: 1,
+            },
+          };
+          yield { type: 'turn.completed', usage: defaultUsage };
+        })(),
+      });
+
+      const chunks = [];
+      for await (const chunk of client.sendQuery('test prompt', '/workspace')) {
+        chunks.push(chunk);
+      }
+
+      expect(chunks[1]).toEqual({
+        type: 'tool_result',
+        toolName: 'npm test',
+        toolOutput: 'failure\n\n[exit code: 1]',
+      });
     });
 
     test('yields thinking events from reasoning items', async () => {
@@ -143,6 +183,11 @@ describe('CodexClient', () => {
       }
 
       expect(chunks[0]).toEqual({ type: 'tool', toolName: '🔍 Searching: codex sdk' });
+      expect(chunks[1]).toEqual({
+        type: 'tool_result',
+        toolName: '🔍 Searching: codex sdk',
+        toolOutput: '',
+      });
     });
 
     test('yields system task list for todo_list items and deduplicates', async () => {
@@ -349,10 +394,19 @@ describe('CodexClient', () => {
         chunks.push(chunk);
       }
 
+      // First mcp call (in_progress on item.completed): start + empty result
       expect(chunks[0]).toEqual({ type: 'tool', toolName: '🔌 MCP: fs/readFile' });
       expect(chunks[1]).toEqual({
-        type: 'system',
-        content: '⚠️ MCP fs/readFile failed: Permission denied',
+        type: 'tool_result',
+        toolName: '🔌 MCP: fs/readFile',
+        toolOutput: '',
+      });
+      // Second mcp call (failed): start + error result so the UI card closes
+      expect(chunks[2]).toEqual({ type: 'tool', toolName: '🔌 MCP: fs/readFile' });
+      expect(chunks[3]).toEqual({
+        type: 'tool_result',
+        toolName: '🔌 MCP: fs/readFile',
+        toolOutput: '❌ Error: Permission denied',
       });
       expect(mockLogger.warn).toHaveBeenCalledWith(
         expect.objectContaining({ server: 'fs', tool: 'readFile' }),
@@ -384,9 +438,21 @@ describe('CodexClient', () => {
         chunks.push(chunk);
       }
 
+      // Each item now emits start + empty result so the UI cards always close.
       expect(chunks[0]).toEqual({ type: 'tool', toolName: '🔌 MCP: readFile' });
-      expect(chunks[1]).toEqual({ type: 'tool', toolName: '🔌 MCP: fs' });
-      expect(chunks[2]).toEqual({ type: 'tool', toolName: '🔌 MCP: MCP tool' });
+      expect(chunks[1]).toEqual({
+        type: 'tool_result',
+        toolName: '🔌 MCP: readFile',
+        toolOutput: '',
+      });
+      expect(chunks[2]).toEqual({ type: 'tool', toolName: '🔌 MCP: fs' });
+      expect(chunks[3]).toEqual({ type: 'tool_result', toolName: '🔌 MCP: fs', toolOutput: '' });
+      expect(chunks[4]).toEqual({ type: 'tool', toolName: '🔌 MCP: MCP tool' });
+      expect(chunks[5]).toEqual({
+        type: 'tool_result',
+        toolName: '🔌 MCP: MCP tool',
+        toolOutput: '',
+      });
     });
 
     test('yields MCP failure without error message', async () => {
@@ -405,18 +471,26 @@ describe('CodexClient', () => {
         chunks.push(chunk);
       }
 
-      expect(chunks[0]).toEqual({
-        type: 'system',
-        content: '⚠️ MCP db/query failed',
+      expect(chunks[0]).toEqual({ type: 'tool', toolName: '🔌 MCP: db/query' });
+      expect(chunks[1]).toEqual({
+        type: 'tool_result',
+        toolName: '🔌 MCP: db/query',
+        toolOutput: '❌ Error: MCP tool failed',
       });
     });
 
-    test('skips MCP tool call with completed status', async () => {
+    test('emits paired tool + tool_result for completed MCP tool call', async () => {
       mockRunStreamed.mockResolvedValue({
         events: (async function* () {
           yield {
             type: 'item.completed',
-            item: { type: 'mcp_tool_call', server: 'fs', tool: 'readFile', status: 'completed' },
+            item: {
+              type: 'mcp_tool_call',
+              server: 'fs',
+              tool: 'readFile',
+              status: 'completed',
+              result: { content: [{ type: 'text', text: 'file contents' }] },
+            },
           };
           yield { type: 'turn.completed', usage: defaultUsage };
         })(),
@@ -427,9 +501,15 @@ describe('CodexClient', () => {
         chunks.push(chunk);
       }
 
-      // Only the result — completed MCP calls should not yield a duplicate tool event
-      expect(chunks).toHaveLength(1);
-      expect(chunks[0]).toEqual({
+      // Completed MCP calls now emit tool + tool_result so the UI card closes.
+      expect(chunks).toHaveLength(3);
+      expect(chunks[0]).toEqual({ type: 'tool', toolName: '🔌 MCP: fs/readFile' });
+      expect(chunks[1]).toEqual({
+        type: 'tool_result',
+        toolName: '🔌 MCP: fs/readFile',
+        toolOutput: JSON.stringify([{ type: 'text', text: 'file contents' }]),
+      });
+      expect(chunks[2]).toEqual({
         type: 'result',
         sessionId: 'new-thread-id',
         tokens: { input: 10, output: 5 },
diff --git a/packages/core/src/clients/codex.ts b/packages/core/src/clients/codex.ts
@@ -308,9 +308,23 @@ export class CodexClient implements IAssistantClient {
                 break;
 
               case 'command_execution':
-                // Tool/command execution
+                // Tool/command execution. The Codex SDK only emits item.completed
+                // once the command has fully run, so we emit the start + result
+                // back-to-back to close the UI's tool card immediately. Without
+                // the paired tool_result, the card spins forever until lock release.
                 if (item.command) {
                   yield { type: 'tool', toolName: item.command };
+                  const exitSuffix =
+                    item.exit_code != null && item.exit_code !== 0
+                      ? `\n[exit code: ${item.exit_code}]`
+                      : '';
+                  yield {
+                    type: 'tool_result',
+                    toolName: item.command,
+                    toolOutput: (item.aggregated_output ?? '') + exitSuffix,
+                  };
+                } else {
+                  getLog().warn({ itemId: item.id }, 'command_execution_missing_command');
                 }
                 break;
 
@@ -323,7 +337,10 @@ export class CodexClient implements IAssistantClient {
 
               case 'web_search':
                 if (item.query) {
-                  yield { type: 'tool', toolName: `🔍 Searching: ${item.query}` };
+                  const searchToolName = `🔍 Searching: ${item.query}`;
+                  yield { type: 'tool', toolName: searchToolName };
+                  // Web search items only fire on completion, so close the card immediately.
+                  yield { type: 'tool_result', toolName: searchToolName, toolOutput: '' };
                 } else {
                   getLog().debug({ itemId: item.id }, 'web_search_missing_query');
                 }
@@ -394,18 +411,41 @@ export class CodexClient implements IAssistantClient {
                   item.server && item.tool
                     ? `${item.server}/${item.tool}`
                     : (item.tool ?? item.server ?? 'MCP tool');
+                const mcpToolName = `🔌 MCP: ${toolInfo}`;
+
+                // Always emit start+result so the UI card closes. item.completed
+                // fires once the call is final (completed or failed).
+                yield { type: 'tool', toolName: mcpToolName };
 
                 if (item.status === 'failed') {
                   getLog().warn(
                     { server: item.server, tool: item.tool, error: item.error, itemId: item.id },
                     'mcp_tool_call_failed'
                   );
-                  const message = item.error?.message
-                    ? `⚠️ MCP ${toolInfo} failed: ${item.error.message}`
-                    : `⚠️ MCP ${toolInfo} failed`;
-                  yield { type: 'system', content: message };
-                } else if (item.status !== 'completed') {
-                  yield { type: 'tool', toolName: `🔌 MCP: ${toolInfo}` };
+                  const errMsg = item.error?.message
+                    ? `❌ Error: ${item.error.message}`
+                    : '❌ Error: MCP tool failed';
+                  yield { type: 'tool_result', toolName: mcpToolName, toolOutput: errMsg };
+                } else {
+                  // status === 'completed' (or 'in_progress', which shouldn't reach
+                  // item.completed but is closed defensively).
+                  let toolOutput = '';
+                  if (item.result?.content) {
+                    if (Array.isArray(item.result.content)) {
+                      toolOutput = JSON.stringify(item.result.content);
+                    } else {
+                      getLog().warn(
+                        {
+                          itemId: item.id,
+                          server: item.server,
+                          tool: item.tool,
+                          resultType: typeof item.result.content,
+                        },
+                        'mcp_tool_call_unexpected_result_shape'
+                      );
+                    }
+                  }
+                  yield { type: 'tool_result', toolName: mcpToolName, toolOutput };
                 }
                 break;
               }