@@ -134,15 +134,23 @@ export class AgentLoop {
134134 * and retries; a second hit falls through to the continuation recovery.
135135 */
136136 let hasAttemptedOutputRetry = false ;
137+ /**
138+ * Single-shot guard for empty assistant responses (no text, no tool
139+ * calls). The model's thinking may have consumed the full output
140+ * budget leaving nothing visible; we prompt it once to retry.
141+ */
142+ let hasAttemptedEmptyRetry = false ;
137143 /**
138144 * Multi-turn continuation recovery counter for `max_output_reached`.
139145 * After the single-shot token bump, the loop injects a continuation
140146 * prompt and preserves the truncated assistant message so the model can
141147 * resume from where it was cut off — up to MAX_OUTPUT_RECOVERY_LIMIT
142148 * times.
143149 */
144- const MAX_OUTPUT_RECOVERY_LIMIT = 3 ;
150+ const MAX_OUTPUT_RECOVERY_LIMIT = 50 ;
145151 let maxOutputRecoveryCount = 0 ;
152+ const MAX_CONSECUTIVE_EMPTY = 3 ;
153+ let consecutiveEmptyCount = 0 ;
146154 const MAX_JSON_SELF_CORRECT_RETRIES = 3 ;
147155 let jsonSelfCorrectCount = 0 ;
148156 const largeFileRepair = new LargeFileRepair ( ) ;
@@ -536,6 +544,127 @@ export class AgentLoop {
536544 }
537545
538546 if ( toolCalls . length === 0 ) {
547+ const assistantText = textFromMessage ( assembled . message ) ;
548+
549+ // Global guard: empty assistant response (no text, no tool calls).
550+ // The model produced nothing visible — typically because extended
551+ // thinking consumed the entire output budget.
552+ if ( assistantText . length === 0 ) {
553+ messages . pop ( ) ;
554+
555+ if ( maxOutputRecoveryCount > 0 ) {
556+ consecutiveEmptyCount ++ ;
557+ if ( consecutiveEmptyCount < MAX_CONSECUTIVE_EMPTY
558+ && maxOutputRecoveryCount < MAX_OUTPUT_RECOVERY_LIMIT ) {
559+ maxOutputRecoveryCount ++ ;
560+ messages . push ( {
561+ role : "user" ,
562+ content : [ {
563+ type : "text" ,
564+ text : "Output token limit hit. Resume directly — no apology, no recap of what you were doing. "
565+ + "Pick up mid-sentence if that is where the cut happened." ,
566+ } ] ,
567+ metadata : { synthetic : true , purpose : "max_output_recovery" } ,
568+ } ) ;
569+ yield {
570+ type : "turn_continued" ,
571+ sessionId : input . sessionId ,
572+ turnId : input . turnId ,
573+ reason : "model_error" ,
574+ } ;
575+ continue ;
576+ }
577+ // Exhausted consecutive empty retries — surface error via frontend banner.
578+ finalMessage = messages . filter ( ( m ) => m . role === "assistant" ) . at ( - 1 ) ;
579+ const result = this . createTurnResult ( input , {
580+ type : "error" ,
581+ stopReason : "model_error" ,
582+ usage,
583+ permissionDenials,
584+ turns : turnCount ,
585+ startedAt,
586+ finalMessage,
587+ errors : [ agentError (
588+ "agent_model_error" ,
589+ "The model returned multiple consecutive empty responses. "
590+ + "The max output token limit is likely too low — "
591+ + "try increasing it so the model has room for visible output after reasoning." ,
592+ ) ] ,
593+ } ) ;
594+ yield { type : "turn_failed" , sessionId : input . sessionId , turnId : input . turnId , error : result . errors ! [ 0 ] ! } ;
595+ await captureTurn ( result . type === "error" ) ;
596+ yield { type : "turn_completed" , sessionId : input . sessionId , turnId : input . turnId , result } ;
597+ return { result, messages } ;
598+ } else if ( ! hasAttemptedEmptyRetry ) {
599+ // First occurrence: prompt the model to produce visible output.
600+ hasAttemptedEmptyRetry = true ;
601+ messages . push ( {
602+ role : "user" ,
603+ content : [ {
604+ type : "text" ,
605+ text : "Your previous response was empty (thinking only, no visible text). "
606+ + "Please provide your answer as visible text output." ,
607+ } ] ,
608+ metadata : { synthetic : true , purpose : "empty_response_retry" } ,
609+ } ) ;
610+ yield {
611+ type : "turn_continued" ,
612+ sessionId : input . sessionId ,
613+ turnId : input . turnId ,
614+ reason : "model_error" ,
615+ } ;
616+ continue ;
617+ } else {
618+ // Retry also returned empty — give user a diagnostic hint.
619+ finalMessage = {
620+ role : "assistant" ,
621+ content : [ {
622+ type : "text" ,
623+ text : "[The model returned an empty response. "
624+ + "This usually means the max output token limit is too low — "
625+ + "the model's reasoning/thinking consumed all available output "
626+ + "tokens before producing visible text. "
627+ + "Try increasing the max output tokens setting.]" ,
628+ } ] ,
629+ } ;
630+ messages . push ( finalMessage ) ;
631+ }
632+ // fall through to normal stop
633+ }
634+
635+ // Pure-text output truncated by max_output_tokens: the model was
636+ // mid-sentence with no tool calls. Unlike tool-call truncation we
637+ // skip the "strip-and-retry-with-doubled-tokens" phase (Phase A)
638+ // because (a) the text already generated is valid and discarding it
639+ // wastes tokens, and (b) blindly doubling maxOutputTokens may
640+ // exceed the provider's model cap and trigger a 400 error.
641+ // Instead, keep the truncated assistant message in context and
642+ // inject a continuation prompt so the model resumes from the cut.
643+ if ( assembled . finishReason === "length" ) {
644+ consecutiveEmptyCount = 0 ;
645+ if ( maxOutputRecoveryCount < MAX_OUTPUT_RECOVERY_LIMIT ) {
646+ maxOutputRecoveryCount ++ ;
647+ messages . push ( {
648+ role : "user" ,
649+ content : [ {
650+ type : "text" ,
651+ text : "Output token limit hit. Resume directly — no apology, no recap of what you were doing. "
652+ + "Pick up mid-sentence if that is where the cut happened." ,
653+ } ] ,
654+ metadata : { synthetic : true , purpose : "max_output_recovery" } ,
655+ } ) ;
656+ yield {
657+ type : "turn_continued" ,
658+ sessionId : input . sessionId ,
659+ turnId : input . turnId ,
660+ reason : "model_error" ,
661+ } ;
662+ continue ;
663+ }
664+ // Exhausted — fall through to normal completion with whatever
665+ // text was produced so far.
666+ }
667+
539668 const largeFileDecision = largeFileRepair . onNoToolCalls ( ) ;
540669 if ( largeFileDecision ) {
541670 const continued = await continueWithSyntheticPrompt ( largeFileDecision ) ;
@@ -839,7 +968,9 @@ export class AgentLoop {
839968 } else {
840969 consecutiveAllInvalidTurns = 0 ;
841970 maxOutputRecoveryCount = 0 ;
971+ consecutiveEmptyCount = 0 ;
842972 hasAttemptedOutputRetry = false ;
973+ hasAttemptedEmptyRetry = false ;
843974 }
844975
845976 if ( this . config . stopOnStructuredOutput && structuredOutput !== undefined ) {
0 commit comments