Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 122 additions & 1 deletion src/query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@ import {
isAutoCompactEnabled,
type AutoCompactTrackingState,
} from './services/compact/autoCompact.js'
import { buildPostCompactMessages } from './services/compact/compact.js'
import {
buildPostCompactMessages,
compactConversation,
ERROR_MESSAGE_USER_ABORT,
} from './services/compact/compact.js'
/* eslint-disable @typescript-eslint/no-require-imports */
const reactiveCompact = feature('REACTIVE_COMPACT')
? (require('./services/compact/reactiveCompact.js') as typeof import('./services/compact/reactiveCompact.js'))
Expand Down Expand Up @@ -40,8 +44,10 @@ import type {
import { logError } from './utils/log.js'
import {
PROMPT_TOO_LONG_ERROR_MESSAGE,
isContextOverflowMessage,
isPromptTooLongMessage,
} from './services/api/errors.js'
import { hasExactErrorMessage } from './utils/errors.js'
import { logAntError, logForDebugging } from './utils/debug.js'
import {
createUserMessage,
Expand Down Expand Up @@ -208,6 +214,11 @@ type State = {
autoCompactTracking: AutoCompactTrackingState | undefined
maxOutputTokensRecoveryCount: number
hasAttemptedReactiveCompact: boolean
// One-shot guard for the OSS context-overflow auto-recovery path
// (query.ts handleContextOverflow). Mirrors hasAttemptedReactiveCompact
// so a single turn cannot loop compact→error→compact forever; resets
// on each fresh tool round at the next_turn continue site.
hasAttemptedContextOverflowRecovery: boolean
maxOutputTokensOverride: number | undefined
pendingToolUseSummary: Promise<ToolUseSummaryMessage | null> | undefined
stopHookActive: boolean | undefined
Expand Down Expand Up @@ -287,6 +298,7 @@ async function* queryLoop(
stopHookActive: undefined,
maxOutputTokensRecoveryCount: 0,
hasAttemptedReactiveCompact: false,
hasAttemptedContextOverflowRecovery: false,
turnCount: 1,
continuationNudgeCount: 0,
pendingToolUseSummary: undefined,
Expand Down Expand Up @@ -329,6 +341,7 @@ async function* queryLoop(
autoCompactTracking,
maxOutputTokensRecoveryCount,
hasAttemptedReactiveCompact,
hasAttemptedContextOverflowRecovery,
maxOutputTokensOverride,
pendingToolUseSummary,
stopHookActive,
Expand Down Expand Up @@ -898,6 +911,21 @@ async function* queryLoop(
if (isWithheldMaxOutputTokens(message)) {
withheld = true
}
// OSS-side context-overflow withhold: catches the same prompt-too-long
// message that reactiveCompact/contextCollapse handle internally, plus
// the OpenAI-shim context_overflow category (Codex / GPT-5.5) and the
// Anthropic 500-with-context-keywords path. The recovery branch in
// queryLoop runs after the internal paths, so this only fires when
// reactiveCompact wasn't compiled in (external builds) or it didn't
// match (new category). Skipped once already attempted in this turn —
// hasAttemptedContextOverflowRecovery is the matching guard.
if (
!hasAttemptedContextOverflowRecovery &&
message.type === 'assistant' &&
isContextOverflowMessage(message)
) {
withheld = true
}
if (!withheld) {
yield yieldMessage
}
Expand Down Expand Up @@ -1177,6 +1205,7 @@ async function* queryLoop(
autoCompactTracking: tracking,
maxOutputTokensRecoveryCount,
hasAttemptedReactiveCompact,
hasAttemptedContextOverflowRecovery,
maxOutputTokensOverride: undefined,
pendingToolUseSummary: undefined,
stopHookActive: undefined,
Expand Down Expand Up @@ -1231,6 +1260,7 @@ async function* queryLoop(
autoCompactTracking: undefined,
maxOutputTokensRecoveryCount,
hasAttemptedReactiveCompact: true,
hasAttemptedContextOverflowRecovery: true,
maxOutputTokensOverride: undefined,
pendingToolUseSummary: undefined,
stopHookActive: undefined,
Expand Down Expand Up @@ -1259,6 +1289,91 @@ async function* queryLoop(
return { reason: 'prompt_too_long' }
}

// OSS context-overflow recovery (#1105). Catches the cases neither
// reactiveCompact nor contextCollapse handle: external builds with
// neither compiled in, and OpenAI-shim providers (Codex / GPT-5.5)
// that surface the limit through a 500 with context-overflow keywords
// rather than the Anthropic PTL path. Runs at most once per turn —
// hasAttemptedContextOverflowRecovery gates re-entry, and the
// autocompact 3-strike circuit breaker in autoCompact.ts handles
// deeper recursion if the post-compact retry overflows again.
//
// Skip for compact/session_memory fork sources — those are forked
// worker queries whose messagesForQuery is the worker prompt, not the
// original conversation. Recovering here would re-enter
// compactConversation with the worker prompt as forkContextMessages,
// bypassing the dedicated compact retry path and producing a
// misleading post-compact retry. Mirrors the pre-flight blocking-limit
// guard at the top of this loop body (~line 691).
const isWithheldContextOverflow =
!hasAttemptedContextOverflowRecovery &&
querySource !== 'compact' &&
querySource !== 'session_memory' &&
lastMessage?.type === 'assistant' &&
lastMessage.isApiErrorMessage === true &&
isContextOverflowMessage(lastMessage)
if (isWithheldContextOverflow) {
try {
const compactionResult = await compactConversation(
messagesForQuery,
toolUseContext,
{
systemPrompt,
userContext,
systemContext,
toolUseContext,
forkContextMessages: messagesForQuery,
},
true, // suppressFollowUpQuestions
undefined, // customInstructions
true, // isAutoCompact — reuses the auto-compact telemetry + circuit breaker
)

if (params.taskBudget) {
const preCompactContext =
finalContextTokensFromLastResponse(messagesForQuery)
taskBudgetRemaining = Math.max(
0,
(taskBudgetRemaining ?? params.taskBudget.total) -
preCompactContext,
)
}

const postCompactMessages = buildPostCompactMessages(compactionResult)
for (const msg of postCompactMessages) {
yield msg
}
const next: State = {
messages: postCompactMessages,
toolUseContext,
autoCompactTracking: undefined,
maxOutputTokensRecoveryCount,
hasAttemptedReactiveCompact,
hasAttemptedContextOverflowRecovery: true,
maxOutputTokensOverride: undefined,
pendingToolUseSummary: undefined,
stopHookActive: undefined,
turnCount,
continuationNudgeCount: state.continuationNudgeCount,
transition: { reason: 'context_overflow_recovery' },
}
state = next
continue
} catch (compactError) {
if (!hasExactErrorMessage(compactError, ERROR_MESSAGE_USER_ABORT)) {
logError(compactError)
}
// Compaction failed (aborted, or a deeper API error). Fall through
// to surface the original withheld context-overflow error rather
// than the compact failure — that's the actionable diagnostic for
// the user. Don't run stop hooks for the same death-spiral reason
// as the prompt-too-long path above.
yield lastMessage
void executeStopFailureHooks(lastMessage, toolUseContext)
return { reason: 'context_overflow' }
}
}

// Check for max_output_tokens and inject recovery message. The error
// was withheld from the stream above; only surface it if recovery
// exhausts.
Expand Down Expand Up @@ -1287,6 +1402,7 @@ async function* queryLoop(
autoCompactTracking: tracking,
maxOutputTokensRecoveryCount,
hasAttemptedReactiveCompact,
hasAttemptedContextOverflowRecovery,
maxOutputTokensOverride: ESCALATED_MAX_TOKENS,
pendingToolUseSummary: undefined,
stopHookActive: undefined,
Expand Down Expand Up @@ -1316,6 +1432,7 @@ async function* queryLoop(
autoCompactTracking: tracking,
maxOutputTokensRecoveryCount: maxOutputTokensRecoveryCount + 1,
hasAttemptedReactiveCompact,
hasAttemptedContextOverflowRecovery,
maxOutputTokensOverride: undefined,
pendingToolUseSummary: undefined,
stopHookActive: undefined,
Expand Down Expand Up @@ -1374,6 +1491,7 @@ async function* queryLoop(
// here caused an infinite loop: compact → still too long → error →
// stop hook blocking → compact → … burning thousands of API calls.
hasAttemptedReactiveCompact,
hasAttemptedContextOverflowRecovery,
maxOutputTokensOverride: undefined,
pendingToolUseSummary: undefined,
stopHookActive: true,
Expand Down Expand Up @@ -1411,6 +1529,7 @@ async function* queryLoop(
autoCompactTracking: tracking,
maxOutputTokensRecoveryCount: 0,
hasAttemptedReactiveCompact: false,
hasAttemptedContextOverflowRecovery: false,
maxOutputTokensOverride: undefined,
pendingToolUseSummary: undefined,
stopHookActive: undefined,
Expand Down Expand Up @@ -1493,6 +1612,7 @@ async function* queryLoop(
autoCompactTracking: tracking,
maxOutputTokensRecoveryCount: 0,
hasAttemptedReactiveCompact: false,
hasAttemptedContextOverflowRecovery: false,
maxOutputTokensOverride: undefined,
pendingToolUseSummary: undefined,
stopHookActive: undefined,
Expand Down Expand Up @@ -1903,6 +2023,7 @@ async function* queryLoop(
turnCount: nextTurnCount,
maxOutputTokensRecoveryCount: 0,
hasAttemptedReactiveCompact: false,
hasAttemptedContextOverflowRecovery: false,
continuationNudgeCount: 0,
pendingToolUseSummary: nextPendingToolUseSummary,
maxOutputTokensOverride: undefined,
Expand Down
88 changes: 88 additions & 0 deletions src/services/api/errors.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import { expect, test } from 'bun:test'

import { createAssistantAPIErrorMessage } from '../../utils/messages.js'

import {
PROMPT_TOO_LONG_ERROR_MESSAGE,
isContextOverflowMessage,
isPromptTooLongMessage,
} from './errors.js'

test('isContextOverflowMessage matches Anthropic prompt-too-long messages', () => {
const msg = createAssistantAPIErrorMessage({
content: PROMPT_TOO_LONG_ERROR_MESSAGE,
apiError: 'context_overflow',
error: 'invalid_request',
errorDetails: 'prompt is too long: 200000 tokens > 199000 maximum',
})

expect(isContextOverflowMessage(msg)).toBe(true)
// Still recognized by the legacy helper so the existing reactiveCompact
// path keeps catching the same message in internal builds.
expect(isPromptTooLongMessage(msg)).toBe(true)
})

test('isContextOverflowMessage matches OpenAI-shim context_overflow messages (Codex / GPT-5.5)', () => {
const msg = createAssistantAPIErrorMessage({
content:
'The conversation exceeded the provider context limit. Run /compact or start a new session with /new.',
apiError: 'context_overflow',
error: 'invalid_request',
})

expect(isContextOverflowMessage(msg)).toBe(true)
// Not surfaced as Anthropic PTL — verifies the new helper widens detection
// beyond the prompt-too-long path so the OpenAI-shim case gets recovery too.
expect(isPromptTooLongMessage(msg)).toBe(false)
})

test('isContextOverflowMessage matches Anthropic 500-context-overflow messages', () => {
const msg = createAssistantAPIErrorMessage({
content:
'The conversation has grown too large for the API to process. Press esc twice to go up a few messages, or run /compact to reduce context. Alternatively, start a new session with /new.',
apiError: 'context_overflow',
error: 'invalid_request',
errorDetails: 'Context overflow (500): too many tokens in request',
})

expect(isContextOverflowMessage(msg)).toBe(true)
})

test('isContextOverflowMessage falls back to content fingerprints if apiError tag missing', () => {
// Older sites that emit the same content text without the apiError tag must
// still be recognised — the content prefix list is the secondary signal.
const msg = createAssistantAPIErrorMessage({
content:
'The conversation has grown too large for the API to process. Run /compact.',
error: 'invalid_request',
})

expect(isContextOverflowMessage(msg)).toBe(true)
})

test('isContextOverflowMessage rejects unrelated API errors', () => {
const rateLimitMsg = createAssistantAPIErrorMessage({
content: 'API Error: Provider rate limit reached. Retry in a few seconds.',
error: 'rate_limit',
})
expect(isContextOverflowMessage(rateLimitMsg)).toBe(false)

const authMsg = createAssistantAPIErrorMessage({
content: 'API Error: Authentication failed.',
error: 'authentication_failed',
})
expect(isContextOverflowMessage(authMsg)).toBe(false)
})

test('isContextOverflowMessage rejects non-error assistant messages', () => {
// Synthetic message with the content text but isApiErrorMessage=false must
// not be classified — guard against assistant text accidentally tripping
// the loop's recovery path.
const baseMsg = createAssistantAPIErrorMessage({
content: 'The conversation has grown too large for the API to process.',
error: 'invalid_request',
})
const nonErrorMsg = { ...baseMsg, isApiErrorMessage: false } as typeof baseMsg

expect(isContextOverflowMessage(nonErrorMsg)).toBe(false)
})
35 changes: 35 additions & 0 deletions src/services/api/errors.ts
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ function mapOpenAICompatibilityFailureToAssistantMessage(options: {
case 'context_overflow':
return createAssistantAPIErrorMessage({
content: `The conversation exceeded the provider context limit. ${compactHint}`,
apiError: 'context_overflow',
error: 'invalid_request',
})

Expand Down Expand Up @@ -183,6 +184,38 @@ export function isPromptTooLongMessage(msg: AssistantMessage): boolean {
)
}

// Content-text fingerprints for assistant error messages that mean
// "context window / provider context limit exceeded." Kept in sync with the
// strings produced in this file for each of the three sources we recover
// from in query.ts (PTL, OpenAI-shim context_overflow, Anthropic 500 with
// context keywords). New phrasing must be added here too — isContextOverflowMessage
// is what the query-loop withhold and one-shot autocompact-retry path looks at.
const CONTEXT_OVERFLOW_CONTENT_PREFIXES = [
PROMPT_TOO_LONG_ERROR_MESSAGE,
'The conversation exceeded the provider context limit.',
'The conversation has grown too large for the API to process.',
] as const

export function isContextOverflowMessage(msg: AssistantMessage): boolean {
if (!msg.isApiErrorMessage) {
return false
}
if (msg.apiError === 'context_overflow') {
return true
}
const content = msg.message.content
if (!Array.isArray(content)) {
return false
}
return content.some(
block =>
block.type === 'text' &&
CONTEXT_OVERFLOW_CONTENT_PREFIXES.some(prefix =>
block.text.startsWith(prefix),
),
)
}

/**
* Parse actual/limit token counts from a raw prompt-too-long API error
* message like "prompt is too long: 137500 tokens > 135000 maximum".
Expand Down Expand Up @@ -699,6 +732,7 @@ export function getAssistantMessageFromError(
// parses the gap from there via getPromptTooLongTokenGap.
return createAssistantAPIErrorMessage({
content: PROMPT_TOO_LONG_ERROR_MESSAGE,
apiError: 'context_overflow',
error: 'invalid_request',
errorDetails: error.message,
})
Expand Down Expand Up @@ -1064,6 +1098,7 @@ export function getAssistantMessageFromError(
: ' Press esc twice to go up a few messages, or run /compact to reduce context.'
return createAssistantAPIErrorMessage({
content: `The conversation has grown too large for the API to process.${rewindInstruction} Alternatively, start a new session with /new.`,
apiError: 'context_overflow',
error: 'invalid_request',
errorDetails: `Context overflow (500): ${error.message}`,
})
Expand Down