Skip to content

Commit 01f7781

Browse files
Brant Levinsoncursoragent
andcommitted
fix(codex): extract last JSON object from streamed structured output
Codex streams multiple intermediate JSON objects as progress updates during a turn. The accumulated text is a concatenation of all of them, which isn't valid JSON. When JSON.parse fails on the full text, we now extract the last complete top-level JSON object using brace-depth tracking — that's the authoritative final answer. Without this, structuredOutput was undefined for multi-message turns, causing downstream condition evaluators ($node.output.field) to fail with condition_json_parse_failed and skip conditional nodes. Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent 615d37a commit 01f7781

1 file changed

Lines changed: 74 additions & 10 deletions

File tree

packages/providers/src/codex/provider.ts

Lines changed: 74 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,59 @@ function buildTurnOptions(requestOptions?: SendQueryOptions): {
337337
return { turnOptions, hasOutputFormat };
338338
}
339339

340+
/**
341+
* Extract the last valid JSON object from a string that may contain multiple
342+
* concatenated JSON objects (common when Codex streams progress updates).
343+
*
344+
* Forward-scans to find top-level `{…}` blocks using brace-depth tracking
345+
* (respecting quoted strings), tries JSON.parse on each, and returns the
346+
* last successfully parsed object.
347+
*/
348+
function extractLastJsonObject(text: string): unknown {
349+
let lastParsed: unknown;
350+
let i = 0;
351+
while (i < text.length) {
352+
if (text[i] !== '{') {
353+
i++;
354+
continue;
355+
}
356+
let depth = 0;
357+
let inString = false;
358+
let escape = false;
359+
let j = i;
360+
for (; j < text.length; j++) {
361+
const ch = text[j];
362+
if (escape) {
363+
escape = false;
364+
continue;
365+
}
366+
if (ch === '\\' && inString) {
367+
escape = true;
368+
continue;
369+
}
370+
if (ch === '"') {
371+
inString = !inString;
372+
continue;
373+
}
374+
if (inString) continue;
375+
if (ch === '{') depth++;
376+
if (ch === '}') {
377+
depth--;
378+
if (depth === 0) {
379+
try {
380+
lastParsed = JSON.parse(text.slice(i, j + 1));
381+
} catch {
382+
// matched braces but not valid JSON — skip
383+
}
384+
break;
385+
}
386+
}
387+
}
388+
i = depth === 0 ? j + 1 : text.length;
389+
}
390+
return lastParsed;
391+
}
392+
340393
// ─── Stream Normalizer ───────────────────────────────────────────────────
341394

342395
/** State maintained across Codex event stream normalization. */
@@ -580,22 +633,33 @@ async function* streamCodexEvents(
580633
// Codex returns structured output inline in agent_message text.
581634
// Normalize: parse as JSON and put on structuredOutput so the
582635
// dag-executor can handle all providers uniformly.
636+
//
637+
// Codex may stream multiple intermediate JSON objects as separate
638+
// agent_message items (progress updates). accumulatedText is the
639+
// concatenation of all of them, which isn't valid JSON. When a
640+
// straight parse fails, extract the last complete JSON object —
641+
// that's the authoritative final answer.
583642
let structuredOutput: unknown;
584643
if (hasOutputFormat && accumulatedText) {
585644
try {
586645
structuredOutput = JSON.parse(accumulatedText);
587646
getLog().debug('codex.structured_output_parsed');
588647
} catch {
589-
getLog().warn(
590-
{ outputPreview: accumulatedText.slice(0, 200) },
591-
'codex.structured_output_not_json'
592-
);
593-
yield {
594-
type: 'system',
595-
content:
596-
'⚠️ Structured output requested but Codex returned non-JSON text. ' +
597-
'Downstream $nodeId.output.field references may not evaluate correctly.',
598-
};
648+
structuredOutput = extractLastJsonObject(accumulatedText);
649+
if (structuredOutput !== undefined) {
650+
getLog().debug('codex.structured_output_parsed_last_object');
651+
} else {
652+
getLog().warn(
653+
{ outputPreview: accumulatedText.slice(0, 200) },
654+
'codex.structured_output_not_json'
655+
);
656+
yield {
657+
type: 'system',
658+
content:
659+
'⚠️ Structured output requested but Codex returned non-JSON text. ' +
660+
'Downstream $nodeId.output.field references may not evaluate correctly.',
661+
};
662+
}
599663
}
600664
}
601665

0 commit comments

Comments
 (0)