Skip to content

Commit 274acd5

Browse files
authored
Merge pull request #221 from Kaguya-19/fix/max-output-fix
fix(agent): handle max output token truncation for pure-text output and empty responses
2 parents b870308 + 4552e62 commit 274acd5

1 file changed

Lines changed: 132 additions & 1 deletion

File tree

src/agent/loop/AgentLoop.ts

Lines changed: 132 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,15 +134,23 @@ export class AgentLoop {
134134
* and retries; a second hit falls through to the continuation recovery.
135135
*/
136136
let hasAttemptedOutputRetry = false;
137+
/**
138+
* Single-shot guard for empty assistant responses (no text, no tool
139+
* calls). The model's thinking may have consumed the full output
140+
* budget leaving nothing visible; we prompt it once to retry.
141+
*/
142+
let hasAttemptedEmptyRetry = false;
137143
/**
138144
* Multi-turn continuation recovery counter for `max_output_reached`.
139145
* After the single-shot token bump, the loop injects a continuation
140146
* prompt and preserves the truncated assistant message so the model can
141147
* resume from where it was cut off — up to MAX_OUTPUT_RECOVERY_LIMIT
142148
* times.
143149
*/
144-
const MAX_OUTPUT_RECOVERY_LIMIT = 3;
150+
const MAX_OUTPUT_RECOVERY_LIMIT = 50;
145151
let maxOutputRecoveryCount = 0;
152+
const MAX_CONSECUTIVE_EMPTY = 3;
153+
let consecutiveEmptyCount = 0;
146154
const MAX_JSON_SELF_CORRECT_RETRIES = 3;
147155
let jsonSelfCorrectCount = 0;
148156
const largeFileRepair = new LargeFileRepair();
@@ -536,6 +544,127 @@ export class AgentLoop {
536544
}
537545

538546
if (toolCalls.length === 0) {
547+
const assistantText = textFromMessage(assembled.message);
548+
549+
// Global guard: empty assistant response (no text, no tool calls).
550+
// The model produced nothing visible — typically because extended
551+
// thinking consumed the entire output budget.
552+
if (assistantText.length === 0) {
553+
messages.pop();
554+
555+
if (maxOutputRecoveryCount > 0) {
556+
consecutiveEmptyCount++;
557+
if (consecutiveEmptyCount < MAX_CONSECUTIVE_EMPTY
558+
&& maxOutputRecoveryCount < MAX_OUTPUT_RECOVERY_LIMIT) {
559+
maxOutputRecoveryCount++;
560+
messages.push({
561+
role: "user",
562+
content: [{
563+
type: "text",
564+
text: "Output token limit hit. Resume directly — no apology, no recap of what you were doing. "
565+
+ "Pick up mid-sentence if that is where the cut happened.",
566+
}],
567+
metadata: { synthetic: true, purpose: "max_output_recovery" },
568+
});
569+
yield {
570+
type: "turn_continued",
571+
sessionId: input.sessionId,
572+
turnId: input.turnId,
573+
reason: "model_error",
574+
};
575+
continue;
576+
}
577+
// Exhausted consecutive empty retries — surface error via frontend banner.
578+
finalMessage = messages.filter((m) => m.role === "assistant").at(-1);
579+
const result = this.createTurnResult(input, {
580+
type: "error",
581+
stopReason: "model_error",
582+
usage,
583+
permissionDenials,
584+
turns: turnCount,
585+
startedAt,
586+
finalMessage,
587+
errors: [agentError(
588+
"agent_model_error",
589+
"The model returned multiple consecutive empty responses. "
590+
+ "The max output token limit is likely too low — "
591+
+ "try increasing it so the model has room for visible output after reasoning.",
592+
)],
593+
});
594+
yield { type: "turn_failed", sessionId: input.sessionId, turnId: input.turnId, error: result.errors![0]! };
595+
await captureTurn(result.type === "error");
596+
yield { type: "turn_completed", sessionId: input.sessionId, turnId: input.turnId, result };
597+
return { result, messages };
598+
} else if (!hasAttemptedEmptyRetry) {
599+
// First occurrence: prompt the model to produce visible output.
600+
hasAttemptedEmptyRetry = true;
601+
messages.push({
602+
role: "user",
603+
content: [{
604+
type: "text",
605+
text: "Your previous response was empty (thinking only, no visible text). "
606+
+ "Please provide your answer as visible text output.",
607+
}],
608+
metadata: { synthetic: true, purpose: "empty_response_retry" },
609+
});
610+
yield {
611+
type: "turn_continued",
612+
sessionId: input.sessionId,
613+
turnId: input.turnId,
614+
reason: "model_error",
615+
};
616+
continue;
617+
} else {
618+
// Retry also returned empty — give user a diagnostic hint.
619+
finalMessage = {
620+
role: "assistant",
621+
content: [{
622+
type: "text",
623+
text: "[The model returned an empty response. "
624+
+ "This usually means the max output token limit is too low — "
625+
+ "the model's reasoning/thinking consumed all available output "
626+
+ "tokens before producing visible text. "
627+
+ "Try increasing the max output tokens setting.]",
628+
}],
629+
};
630+
messages.push(finalMessage);
631+
}
632+
// fall through to normal stop
633+
}
634+
635+
// Pure-text output truncated by max_output_tokens: the model was
636+
// mid-sentence with no tool calls. Unlike tool-call truncation we
637+
// skip the "strip-and-retry-with-doubled-tokens" phase (Phase A)
638+
// because (a) the text already generated is valid and discarding it
639+
// wastes tokens, and (b) blindly doubling maxOutputTokens may
640+
// exceed the provider's model cap and trigger a 400 error.
641+
// Instead, keep the truncated assistant message in context and
642+
// inject a continuation prompt so the model resumes from the cut.
643+
if (assembled.finishReason === "length") {
644+
consecutiveEmptyCount = 0;
645+
if (maxOutputRecoveryCount < MAX_OUTPUT_RECOVERY_LIMIT) {
646+
maxOutputRecoveryCount++;
647+
messages.push({
648+
role: "user",
649+
content: [{
650+
type: "text",
651+
text: "Output token limit hit. Resume directly — no apology, no recap of what you were doing. "
652+
+ "Pick up mid-sentence if that is where the cut happened.",
653+
}],
654+
metadata: { synthetic: true, purpose: "max_output_recovery" },
655+
});
656+
yield {
657+
type: "turn_continued",
658+
sessionId: input.sessionId,
659+
turnId: input.turnId,
660+
reason: "model_error",
661+
};
662+
continue;
663+
}
664+
// Exhausted — fall through to normal completion with whatever
665+
// text was produced so far.
666+
}
667+
539668
const largeFileDecision = largeFileRepair.onNoToolCalls();
540669
if (largeFileDecision) {
541670
const continued = await continueWithSyntheticPrompt(largeFileDecision);
@@ -839,7 +968,9 @@ export class AgentLoop {
839968
} else {
840969
consecutiveAllInvalidTurns = 0;
841970
maxOutputRecoveryCount = 0;
971+
consecutiveEmptyCount = 0;
842972
hasAttemptedOutputRetry = false;
973+
hasAttemptedEmptyRetry = false;
843974
}
844975

845976
if (this.config.stopOnStructuredOutput && structuredOutput !== undefined) {

0 commit comments

Comments
 (0)