Skip to content

Commit a53aff3

Browse files
authored
fix(codex): emit paired tool_result chunks so UI tool cards close (#1032)
* fix(codex): emit paired tool_result chunks so UI tool cards close Tool cards in the web UI sometimes spin forever for Codex workflows. The Codex client only yielded { type: 'tool', ... } on item.completed events, never the paired tool_result chunk. The web adapter's running tool entry then had nothing to close it, leaving the UI relying on the emitLockEvent fallback at lock release — which never fires inside a multi-node DAG, on cancel, or when SSE is briefly disconnected. The Codex SDK only emits item.completed once a command_execution, web_search, or mcp_tool_call is fully done (it carries aggregated_output, exit_code, status, etc). So we can emit the start and the result back-to-back in the same handler. Changes: - command_execution: emit tool_result with aggregated_output, append [exit code: N] when non-zero so failures are visible. - web_search: emit empty tool_result so the searching card closes. - mcp_tool_call: always emit tool + tool_result, including for the status === 'completed' branch which previously emitted nothing at all (so completed MCP calls were invisible) and for status === 'failed' where we previously emitted only a system message (leaving no card to close, but inconsistent with command_execution failures). - Update codex.test.ts assertions to cover paired chunks and exit codes. Note: tool_result is paired to its tool by the web adapter's name-based reverse-scan in web.ts. Since these chunks are yielded back-to-back with no other tools in between, the match is unambiguous. PR #1031 will add stable tool_use_id pairing for Claude; a follow-up can plumb Codex's item.id through once that lands. * fix(codex): log silent drops and assert paired web_search tool_result - command_execution: warn when item.command is falsy (was silently dropped) - mcp_tool_call: warn when result.content has unexpected shape (was silent empty) - Simplify exit_code guard to != null, drop redundant String() cast - Test: assert paired tool_result chunk for web_search Addresses review feedback on #1032.
1 parent a5091b2 commit a53aff3

2 files changed

Lines changed: 141 additions & 21 deletions

File tree

packages/core/src/clients/codex.test.ts

Lines changed: 93 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,12 @@ describe('CodexClient', () => {
9696
events: (async function* () {
9797
yield {
9898
type: 'item.completed',
99-
item: { type: 'command_execution', command: 'npm test' },
99+
item: {
100+
type: 'command_execution',
101+
command: 'npm test',
102+
aggregated_output: 'tests passed\n',
103+
exit_code: 0,
104+
},
100105
};
101106
yield { type: 'turn.completed', usage: defaultUsage };
102107
})(),
@@ -107,7 +112,42 @@ describe('CodexClient', () => {
107112
chunks.push(chunk);
108113
}
109114

115+
// Codex item.completed fires once the command is fully done, so we emit
116+
// start + result back-to-back to close the UI tool card immediately.
110117
expect(chunks[0]).toEqual({ type: 'tool', toolName: 'npm test' });
118+
expect(chunks[1]).toEqual({
119+
type: 'tool_result',
120+
toolName: 'npm test',
121+
toolOutput: 'tests passed\n',
122+
});
123+
});
124+
125+
test('appends non-zero exit code to command_execution tool_result', async () => {
126+
mockRunStreamed.mockResolvedValue({
127+
events: (async function* () {
128+
yield {
129+
type: 'item.completed',
130+
item: {
131+
type: 'command_execution',
132+
command: 'npm test',
133+
aggregated_output: 'failure\n',
134+
exit_code: 1,
135+
},
136+
};
137+
yield { type: 'turn.completed', usage: defaultUsage };
138+
})(),
139+
});
140+
141+
const chunks = [];
142+
for await (const chunk of client.sendQuery('test prompt', '/workspace')) {
143+
chunks.push(chunk);
144+
}
145+
146+
expect(chunks[1]).toEqual({
147+
type: 'tool_result',
148+
toolName: 'npm test',
149+
toolOutput: 'failure\n\n[exit code: 1]',
150+
});
111151
});
112152

113153
test('yields thinking events from reasoning items', async () => {
@@ -143,6 +183,11 @@ describe('CodexClient', () => {
143183
}
144184

145185
expect(chunks[0]).toEqual({ type: 'tool', toolName: '🔍 Searching: codex sdk' });
186+
expect(chunks[1]).toEqual({
187+
type: 'tool_result',
188+
toolName: '🔍 Searching: codex sdk',
189+
toolOutput: '',
190+
});
146191
});
147192

148193
test('yields system task list for todo_list items and deduplicates', async () => {
@@ -349,10 +394,19 @@ describe('CodexClient', () => {
349394
chunks.push(chunk);
350395
}
351396

397+
// First mcp call (in_progress on item.completed): start + empty result
352398
expect(chunks[0]).toEqual({ type: 'tool', toolName: '🔌 MCP: fs/readFile' });
353399
expect(chunks[1]).toEqual({
354-
type: 'system',
355-
content: '⚠️ MCP fs/readFile failed: Permission denied',
400+
type: 'tool_result',
401+
toolName: '🔌 MCP: fs/readFile',
402+
toolOutput: '',
403+
});
404+
// Second mcp call (failed): start + error result so the UI card closes
405+
expect(chunks[2]).toEqual({ type: 'tool', toolName: '🔌 MCP: fs/readFile' });
406+
expect(chunks[3]).toEqual({
407+
type: 'tool_result',
408+
toolName: '🔌 MCP: fs/readFile',
409+
toolOutput: '❌ Error: Permission denied',
356410
});
357411
expect(mockLogger.warn).toHaveBeenCalledWith(
358412
expect.objectContaining({ server: 'fs', tool: 'readFile' }),
@@ -384,9 +438,21 @@ describe('CodexClient', () => {
384438
chunks.push(chunk);
385439
}
386440

441+
// Each item now emits start + empty result so the UI cards always close.
387442
expect(chunks[0]).toEqual({ type: 'tool', toolName: '🔌 MCP: readFile' });
388-
expect(chunks[1]).toEqual({ type: 'tool', toolName: '🔌 MCP: fs' });
389-
expect(chunks[2]).toEqual({ type: 'tool', toolName: '🔌 MCP: MCP tool' });
443+
expect(chunks[1]).toEqual({
444+
type: 'tool_result',
445+
toolName: '🔌 MCP: readFile',
446+
toolOutput: '',
447+
});
448+
expect(chunks[2]).toEqual({ type: 'tool', toolName: '🔌 MCP: fs' });
449+
expect(chunks[3]).toEqual({ type: 'tool_result', toolName: '🔌 MCP: fs', toolOutput: '' });
450+
expect(chunks[4]).toEqual({ type: 'tool', toolName: '🔌 MCP: MCP tool' });
451+
expect(chunks[5]).toEqual({
452+
type: 'tool_result',
453+
toolName: '🔌 MCP: MCP tool',
454+
toolOutput: '',
455+
});
390456
});
391457

392458
test('yields MCP failure without error message', async () => {
@@ -405,18 +471,26 @@ describe('CodexClient', () => {
405471
chunks.push(chunk);
406472
}
407473

408-
expect(chunks[0]).toEqual({
409-
type: 'system',
410-
content: '⚠️ MCP db/query failed',
474+
expect(chunks[0]).toEqual({ type: 'tool', toolName: '🔌 MCP: db/query' });
475+
expect(chunks[1]).toEqual({
476+
type: 'tool_result',
477+
toolName: '🔌 MCP: db/query',
478+
toolOutput: '❌ Error: MCP tool failed',
411479
});
412480
});
413481

414-
test('skips MCP tool call with completed status', async () => {
482+
test('emits paired tool + tool_result for completed MCP tool call', async () => {
415483
mockRunStreamed.mockResolvedValue({
416484
events: (async function* () {
417485
yield {
418486
type: 'item.completed',
419-
item: { type: 'mcp_tool_call', server: 'fs', tool: 'readFile', status: 'completed' },
487+
item: {
488+
type: 'mcp_tool_call',
489+
server: 'fs',
490+
tool: 'readFile',
491+
status: 'completed',
492+
result: { content: [{ type: 'text', text: 'file contents' }] },
493+
},
420494
};
421495
yield { type: 'turn.completed', usage: defaultUsage };
422496
})(),
@@ -427,9 +501,15 @@ describe('CodexClient', () => {
427501
chunks.push(chunk);
428502
}
429503

430-
// Only the result — completed MCP calls should not yield a duplicate tool event
431-
expect(chunks).toHaveLength(1);
432-
expect(chunks[0]).toEqual({
504+
// Completed MCP calls now emit tool + tool_result so the UI card closes.
505+
expect(chunks).toHaveLength(3);
506+
expect(chunks[0]).toEqual({ type: 'tool', toolName: '🔌 MCP: fs/readFile' });
507+
expect(chunks[1]).toEqual({
508+
type: 'tool_result',
509+
toolName: '🔌 MCP: fs/readFile',
510+
toolOutput: JSON.stringify([{ type: 'text', text: 'file contents' }]),
511+
});
512+
expect(chunks[2]).toEqual({
433513
type: 'result',
434514
sessionId: 'new-thread-id',
435515
tokens: { input: 10, output: 5 },

packages/core/src/clients/codex.ts

Lines changed: 48 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -308,9 +308,23 @@ export class CodexClient implements IAssistantClient {
308308
break;
309309

310310
case 'command_execution':
311-
// Tool/command execution
311+
// Tool/command execution. The Codex SDK only emits item.completed
312+
// once the command has fully run, so we emit the start + result
313+
// back-to-back to close the UI's tool card immediately. Without
314+
// the paired tool_result, the card spins forever until lock release.
312315
if (item.command) {
313316
yield { type: 'tool', toolName: item.command };
317+
const exitSuffix =
318+
item.exit_code != null && item.exit_code !== 0
319+
? `\n[exit code: ${item.exit_code}]`
320+
: '';
321+
yield {
322+
type: 'tool_result',
323+
toolName: item.command,
324+
toolOutput: (item.aggregated_output ?? '') + exitSuffix,
325+
};
326+
} else {
327+
getLog().warn({ itemId: item.id }, 'command_execution_missing_command');
314328
}
315329
break;
316330

@@ -323,7 +337,10 @@ export class CodexClient implements IAssistantClient {
323337

324338
case 'web_search':
325339
if (item.query) {
326-
yield { type: 'tool', toolName: `🔍 Searching: ${item.query}` };
340+
const searchToolName = `🔍 Searching: ${item.query}`;
341+
yield { type: 'tool', toolName: searchToolName };
342+
// Web search items only fire on completion, so close the card immediately.
343+
yield { type: 'tool_result', toolName: searchToolName, toolOutput: '' };
327344
} else {
328345
getLog().debug({ itemId: item.id }, 'web_search_missing_query');
329346
}
@@ -394,18 +411,41 @@ export class CodexClient implements IAssistantClient {
394411
item.server && item.tool
395412
? `${item.server}/${item.tool}`
396413
: (item.tool ?? item.server ?? 'MCP tool');
414+
const mcpToolName = `🔌 MCP: ${toolInfo}`;
415+
416+
// Always emit start+result so the UI card closes. item.completed
417+
// fires once the call is final (completed or failed).
418+
yield { type: 'tool', toolName: mcpToolName };
397419

398420
if (item.status === 'failed') {
399421
getLog().warn(
400422
{ server: item.server, tool: item.tool, error: item.error, itemId: item.id },
401423
'mcp_tool_call_failed'
402424
);
403-
const message = item.error?.message
404-
? `⚠️ MCP ${toolInfo} failed: ${item.error.message}`
405-
: `⚠️ MCP ${toolInfo} failed`;
406-
yield { type: 'system', content: message };
407-
} else if (item.status !== 'completed') {
408-
yield { type: 'tool', toolName: `🔌 MCP: ${toolInfo}` };
425+
const errMsg = item.error?.message
426+
? `❌ Error: ${item.error.message}`
427+
: '❌ Error: MCP tool failed';
428+
yield { type: 'tool_result', toolName: mcpToolName, toolOutput: errMsg };
429+
} else {
430+
// status === 'completed' (or 'in_progress', which shouldn't reach
431+
// item.completed but is closed defensively).
432+
let toolOutput = '';
433+
if (item.result?.content) {
434+
if (Array.isArray(item.result.content)) {
435+
toolOutput = JSON.stringify(item.result.content);
436+
} else {
437+
getLog().warn(
438+
{
439+
itemId: item.id,
440+
server: item.server,
441+
tool: item.tool,
442+
resultType: typeof item.result.content,
443+
},
444+
'mcp_tool_call_unexpected_result_shape'
445+
);
446+
}
447+
}
448+
yield { type: 'tool_result', toolName: mcpToolName, toolOutput };
409449
}
410450
break;
411451
}

0 commit comments

Comments
 (0)