Gitlawb · 0xfandom · May 14, 2026 · May 14, 2026 · May 15, 2026
diff --git a/src/query.ts b/src/query.ts
@@ -10,7 +10,11 @@ import {
   isAutoCompactEnabled,
   type AutoCompactTrackingState,
 } from './services/compact/autoCompact.js'
-import { buildPostCompactMessages } from './services/compact/compact.js'
+import {
+  buildPostCompactMessages,
+  compactConversation,
+  ERROR_MESSAGE_USER_ABORT,
+} from './services/compact/compact.js'
 /* eslint-disable @typescript-eslint/no-require-imports */
 const reactiveCompact = feature('REACTIVE_COMPACT')
   ? (require('./services/compact/reactiveCompact.js') as typeof import('./services/compact/reactiveCompact.js'))
@@ -40,8 +44,10 @@ import type {
 import { logError } from './utils/log.js'
 import {
   PROMPT_TOO_LONG_ERROR_MESSAGE,
+  isContextOverflowMessage,
   isPromptTooLongMessage,
 } from './services/api/errors.js'
+import { hasExactErrorMessage } from './utils/errors.js'
 import { logAntError, logForDebugging } from './utils/debug.js'
 import {
   createUserMessage,
@@ -208,6 +214,11 @@ type State = {
   autoCompactTracking: AutoCompactTrackingState | undefined
   maxOutputTokensRecoveryCount: number
   hasAttemptedReactiveCompact: boolean
+  // One-shot guard for the OSS context-overflow auto-recovery path
+  // (query.ts handleContextOverflow). Mirrors hasAttemptedReactiveCompact
+  // so a single turn cannot loop compact→error→compact forever; resets
+  // on each fresh tool round at the next_turn continue site.
+  hasAttemptedContextOverflowRecovery: boolean
   maxOutputTokensOverride: number | undefined
   pendingToolUseSummary: Promise<ToolUseSummaryMessage | null> | undefined
   stopHookActive: boolean | undefined
@@ -287,6 +298,7 @@ async function* queryLoop(
     stopHookActive: undefined,
     maxOutputTokensRecoveryCount: 0,
     hasAttemptedReactiveCompact: false,
+    hasAttemptedContextOverflowRecovery: false,
     turnCount: 1,
     continuationNudgeCount: 0,
     pendingToolUseSummary: undefined,
@@ -329,6 +341,7 @@ async function* queryLoop(
       autoCompactTracking,
       maxOutputTokensRecoveryCount,
       hasAttemptedReactiveCompact,
+      hasAttemptedContextOverflowRecovery,
       maxOutputTokensOverride,
       pendingToolUseSummary,
       stopHookActive,
@@ -898,6 +911,21 @@ async function* queryLoop(
             if (isWithheldMaxOutputTokens(message)) {
               withheld = true
             }
+            // OSS-side context-overflow withhold: catches the same prompt-too-long
+            // message that reactiveCompact/contextCollapse handle internally, plus
+            // the OpenAI-shim context_overflow category (Codex / GPT-5.5) and the
+            // Anthropic 500-with-context-keywords path. The recovery branch in
+            // queryLoop runs after the internal paths, so this only fires when
+            // reactiveCompact wasn't compiled in (external builds) or it didn't
+            // match (new category). Skipped once already attempted in this turn —
+            // hasAttemptedContextOverflowRecovery is the matching guard.
+            if (
+              !hasAttemptedContextOverflowRecovery &&
+              message.type === 'assistant' &&
+              isContextOverflowMessage(message)
+            ) {
+              withheld = true
+            }
             if (!withheld) {
               yield yieldMessage
             }
@@ -1177,6 +1205,7 @@ async function* queryLoop(
               autoCompactTracking: tracking,
               maxOutputTokensRecoveryCount,
               hasAttemptedReactiveCompact,
+              hasAttemptedContextOverflowRecovery,
               maxOutputTokensOverride: undefined,
               pendingToolUseSummary: undefined,
               stopHookActive: undefined,
@@ -1231,6 +1260,7 @@ async function* queryLoop(
             autoCompactTracking: undefined,
             maxOutputTokensRecoveryCount,
             hasAttemptedReactiveCompact: true,
+            hasAttemptedContextOverflowRecovery: true,
             maxOutputTokensOverride: undefined,
             pendingToolUseSummary: undefined,
             stopHookActive: undefined,
@@ -1259,6 +1289,91 @@ async function* queryLoop(
         return { reason: 'prompt_too_long' }
       }
 
+      // OSS context-overflow recovery (#1105). Catches the cases neither
+      // reactiveCompact nor contextCollapse handle: external builds with
+      // neither compiled in, and OpenAI-shim providers (Codex / GPT-5.5)
+      // that surface the limit through a 500 with context-overflow keywords
+      // rather than the Anthropic PTL path. Runs at most once per turn —
+      // hasAttemptedContextOverflowRecovery gates re-entry, and the
+      // autocompact 3-strike circuit breaker in autoCompact.ts handles
+      // deeper recursion if the post-compact retry overflows again.
+      //
+      // Skip for compact/session_memory fork sources — those are forked
+      // worker queries whose messagesForQuery is the worker prompt, not the
+      // original conversation. Recovering here would re-enter
+      // compactConversation with the worker prompt as forkContextMessages,
+      // bypassing the dedicated compact retry path and producing a
+      // misleading post-compact retry. Mirrors the pre-flight blocking-limit
+      // guard at the top of this loop body (~line 691).
+      const isWithheldContextOverflow =
+        !hasAttemptedContextOverflowRecovery &&
+        querySource !== 'compact' &&
+        querySource !== 'session_memory' &&
+        lastMessage?.type === 'assistant' &&
+        lastMessage.isApiErrorMessage === true &&
+        isContextOverflowMessage(lastMessage)
+      if (isWithheldContextOverflow) {
+        try {
+          const compactionResult = await compactConversation(
+            messagesForQuery,
+            toolUseContext,
+            {
+              systemPrompt,
+              userContext,
+              systemContext,
+              toolUseContext,
+              forkContextMessages: messagesForQuery,
+            },
+            true, // suppressFollowUpQuestions
+            undefined, // customInstructions
+            true, // isAutoCompact — reuses the auto-compact telemetry + circuit breaker
+          )
+
+          if (params.taskBudget) {
+            const preCompactContext =
+              finalContextTokensFromLastResponse(messagesForQuery)
+            taskBudgetRemaining = Math.max(
+              0,
+              (taskBudgetRemaining ?? params.taskBudget.total) -
+                preCompactContext,
+            )
+          }
+
+          const postCompactMessages = buildPostCompactMessages(compactionResult)
+          for (const msg of postCompactMessages) {
+            yield msg
+          }
+          const next: State = {
+            messages: postCompactMessages,
+            toolUseContext,
+            autoCompactTracking: undefined,
+            maxOutputTokensRecoveryCount,
+            hasAttemptedReactiveCompact,
+            hasAttemptedContextOverflowRecovery: true,
+            maxOutputTokensOverride: undefined,
+            pendingToolUseSummary: undefined,
+            stopHookActive: undefined,
+            turnCount,
+            continuationNudgeCount: state.continuationNudgeCount,
+            transition: { reason: 'context_overflow_recovery' },
+          }
+          state = next
+          continue
+        } catch (compactError) {
+          if (!hasExactErrorMessage(compactError, ERROR_MESSAGE_USER_ABORT)) {
+            logError(compactError)
+          }
+          // Compaction failed (aborted, or a deeper API error). Fall through
+          // to surface the original withheld context-overflow error rather
+          // than the compact failure — that's the actionable diagnostic for
+          // the user. Don't run stop hooks for the same death-spiral reason
+          // as the prompt-too-long path above.
+          yield lastMessage
+          void executeStopFailureHooks(lastMessage, toolUseContext)
+          return { reason: 'context_overflow' }
+        }
+      }
+
       // Check for max_output_tokens and inject recovery message. The error
       // was withheld from the stream above; only surface it if recovery
       // exhausts.
@@ -1287,6 +1402,7 @@ async function* queryLoop(
             autoCompactTracking: tracking,
             maxOutputTokensRecoveryCount,
             hasAttemptedReactiveCompact,
+            hasAttemptedContextOverflowRecovery,
             maxOutputTokensOverride: ESCALATED_MAX_TOKENS,
             pendingToolUseSummary: undefined,
             stopHookActive: undefined,
@@ -1316,6 +1432,7 @@ async function* queryLoop(
             autoCompactTracking: tracking,
             maxOutputTokensRecoveryCount: maxOutputTokensRecoveryCount + 1,
             hasAttemptedReactiveCompact,
+            hasAttemptedContextOverflowRecovery,
             maxOutputTokensOverride: undefined,
             pendingToolUseSummary: undefined,
             stopHookActive: undefined,
@@ -1374,6 +1491,7 @@ async function* queryLoop(
           // here caused an infinite loop: compact → still too long → error →
           // stop hook blocking → compact → … burning thousands of API calls.
           hasAttemptedReactiveCompact,
+          hasAttemptedContextOverflowRecovery,
           maxOutputTokensOverride: undefined,
           pendingToolUseSummary: undefined,
           stopHookActive: true,
@@ -1411,6 +1529,7 @@ async function* queryLoop(
             autoCompactTracking: tracking,
             maxOutputTokensRecoveryCount: 0,
             hasAttemptedReactiveCompact: false,
+            hasAttemptedContextOverflowRecovery: false,
             maxOutputTokensOverride: undefined,
             pendingToolUseSummary: undefined,
             stopHookActive: undefined,
@@ -1493,6 +1612,7 @@ async function* queryLoop(
               autoCompactTracking: tracking,
               maxOutputTokensRecoveryCount: 0,
               hasAttemptedReactiveCompact: false,
+              hasAttemptedContextOverflowRecovery: false,
               maxOutputTokensOverride: undefined,
               pendingToolUseSummary: undefined,
               stopHookActive: undefined,
@@ -1903,6 +2023,7 @@ async function* queryLoop(
       turnCount: nextTurnCount,
       maxOutputTokensRecoveryCount: 0,
       hasAttemptedReactiveCompact: false,
+      hasAttemptedContextOverflowRecovery: false,
       continuationNudgeCount: 0,
       pendingToolUseSummary: nextPendingToolUseSummary,
       maxOutputTokensOverride: undefined,

diff --git a/src/services/api/errors.test.ts b/src/services/api/errors.test.ts
@@ -0,0 +1,88 @@
+import { expect, test } from 'bun:test'
+
+import { createAssistantAPIErrorMessage } from '../../utils/messages.js'
+
+import {
+  PROMPT_TOO_LONG_ERROR_MESSAGE,
+  isContextOverflowMessage,
+  isPromptTooLongMessage,
+} from './errors.js'
+
+test('isContextOverflowMessage matches Anthropic prompt-too-long messages', () => {
+  const msg = createAssistantAPIErrorMessage({
+    content: PROMPT_TOO_LONG_ERROR_MESSAGE,
+    apiError: 'context_overflow',
+    error: 'invalid_request',
+    errorDetails: 'prompt is too long: 200000 tokens > 199000 maximum',
+  })
+
+  expect(isContextOverflowMessage(msg)).toBe(true)
+  // Still recognized by the legacy helper so the existing reactiveCompact
+  // path keeps catching the same message in internal builds.
+  expect(isPromptTooLongMessage(msg)).toBe(true)
+})
+
+test('isContextOverflowMessage matches OpenAI-shim context_overflow messages (Codex / GPT-5.5)', () => {
+  const msg = createAssistantAPIErrorMessage({
+    content:
+      'The conversation exceeded the provider context limit. Run /compact or start a new session with /new.',
+    apiError: 'context_overflow',
+    error: 'invalid_request',
+  })
+
+  expect(isContextOverflowMessage(msg)).toBe(true)
+  // Not surfaced as Anthropic PTL — verifies the new helper widens detection
+  // beyond the prompt-too-long path so the OpenAI-shim case gets recovery too.
+  expect(isPromptTooLongMessage(msg)).toBe(false)
+})
+
+test('isContextOverflowMessage matches Anthropic 500-context-overflow messages', () => {
+  const msg = createAssistantAPIErrorMessage({
+    content:
+      'The conversation has grown too large for the API to process. Press esc twice to go up a few messages, or run /compact to reduce context. Alternatively, start a new session with /new.',
+    apiError: 'context_overflow',
+    error: 'invalid_request',
+    errorDetails: 'Context overflow (500): too many tokens in request',
+  })
+
+  expect(isContextOverflowMessage(msg)).toBe(true)
+})
+
+test('isContextOverflowMessage falls back to content fingerprints if apiError tag missing', () => {
+  // Older sites that emit the same content text without the apiError tag must
+  // still be recognised — the content prefix list is the secondary signal.
+  const msg = createAssistantAPIErrorMessage({
+    content:
+      'The conversation has grown too large for the API to process. Run /compact.',
+    error: 'invalid_request',
+  })
+
+  expect(isContextOverflowMessage(msg)).toBe(true)
+})
+
+test('isContextOverflowMessage rejects unrelated API errors', () => {
+  const rateLimitMsg = createAssistantAPIErrorMessage({
+    content: 'API Error: Provider rate limit reached. Retry in a few seconds.',
+    error: 'rate_limit',
+  })
+  expect(isContextOverflowMessage(rateLimitMsg)).toBe(false)
+
+  const authMsg = createAssistantAPIErrorMessage({
+    content: 'API Error: Authentication failed.',
+    error: 'authentication_failed',
+  })
+  expect(isContextOverflowMessage(authMsg)).toBe(false)
+})
+
+test('isContextOverflowMessage rejects non-error assistant messages', () => {
+  // Synthetic message with the content text but isApiErrorMessage=false must
+  // not be classified — guard against assistant text accidentally tripping
+  // the loop's recovery path.
+  const baseMsg = createAssistantAPIErrorMessage({
+    content: 'The conversation has grown too large for the API to process.',
+    error: 'invalid_request',
+  })
+  const nonErrorMsg = { ...baseMsg, isApiErrorMessage: false } as typeof baseMsg
+
+  expect(isContextOverflowMessage(nonErrorMsg)).toBe(false)
+})
diff --git a/src/services/api/errors.ts b/src/services/api/errors.ts
@@ -123,6 +123,7 @@ function mapOpenAICompatibilityFailureToAssistantMessage(options: {
     case 'context_overflow':
       return createAssistantAPIErrorMessage({
         content: `The conversation exceeded the provider context limit. ${compactHint}`,
+        apiError: 'context_overflow',
         error: 'invalid_request',
       })
 
@@ -183,6 +184,38 @@ export function isPromptTooLongMessage(msg: AssistantMessage): boolean {
   )
 }
 
+// Content-text fingerprints for assistant error messages that mean
+// "context window / provider context limit exceeded." Kept in sync with the
+// strings produced in this file for each of the three sources we recover
+// from in query.ts (PTL, OpenAI-shim context_overflow, Anthropic 500 with
+// context keywords). New phrasing must be added here too — isContextOverflowMessage
+// is what the query-loop withhold and one-shot autocompact-retry path looks at.
+const CONTEXT_OVERFLOW_CONTENT_PREFIXES = [
+  PROMPT_TOO_LONG_ERROR_MESSAGE,
+  'The conversation exceeded the provider context limit.',
+  'The conversation has grown too large for the API to process.',
+] as const
+
+export function isContextOverflowMessage(msg: AssistantMessage): boolean {
+  if (!msg.isApiErrorMessage) {
+    return false
+  }
+  if (msg.apiError === 'context_overflow') {
+    return true
+  }
+  const content = msg.message.content
+  if (!Array.isArray(content)) {
+    return false
+  }
+  return content.some(
+    block =>
+      block.type === 'text' &&
+      CONTEXT_OVERFLOW_CONTENT_PREFIXES.some(prefix =>
+        block.text.startsWith(prefix),
+      ),
+  )
+}
+
 /**
  * Parse actual/limit token counts from a raw prompt-too-long API error
  * message like "prompt is too long: 137500 tokens > 135000 maximum".
@@ -699,6 +732,7 @@ export function getAssistantMessageFromError(
     // parses the gap from there via getPromptTooLongTokenGap.
     return createAssistantAPIErrorMessage({
       content: PROMPT_TOO_LONG_ERROR_MESSAGE,
+      apiError: 'context_overflow',
       error: 'invalid_request',
       errorDetails: error.message,
     })
@@ -1064,6 +1098,7 @@ export function getAssistantMessageFromError(
       : ' Press esc twice to go up a few messages, or run /compact to reduce context.'
     return createAssistantAPIErrorMessage({
       content: `The conversation has grown too large for the API to process.${rewindInstruction} Alternatively, start a new session with /new.`,
+      apiError: 'context_overflow',
       error: 'invalid_request',
       errorDetails: `Context overflow (500): ${error.message}`,
     })