diff --git a/scripts/benchmarks/shim-tool-minify-live.sh b/scripts/benchmarks/shim-tool-minify-live.sh new file mode 100755 index 0000000000..cc461d3c20 --- /dev/null +++ b/scripts/benchmarks/shim-tool-minify-live.sh @@ -0,0 +1,202 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="${1:-/tmp/openclaude-shim-bench}" +RESULTS="$ROOT/results" +BASE="$ROOT/base" +OFF="$ROOT/off" +MINIFY="$ROOT/minify" + +rm -rf "$ROOT" +mkdir -p "$BASE/src" "$BASE/docs" "$BASE/tests" "$RESULTS" + +cat >"$BASE/README.md" <<'EOF' +# Shim Live Test Project + +This disposable project is used to test OpenClaude workflows against the +OpenAI-compatible shim. + +The project exposes a tiny calculator module, a config file, and notes that +mention NanoGPT, DeepSeek, Qwen, and tool schema minimization. + +Expected version: 0.4.2 +EOF + +cat >"$BASE/src/calculator.js" <<'EOF' +export function add(a, b) { + return a + b +} + +export function multiply(a, b) { + return a * b +} + +export function formatResult(label, value) { + return `${label}: ${value}` +} +EOF + +cat >"$BASE/src/config.json" <<'EOF' +{ + "name": "shim-live-test", + "version": "0.4.2", + "provider": "nanogpt", + "defaultModel": "deepseek/deepseek-v4-pro" +} +EOF + +cat >"$BASE/docs/notes.md" <<'EOF' +# Notes + +- NanoGPT is used through an OpenAI-compatible endpoint. +- DeepSeek V4 Pro is the default model for agentic coding work. +- Qwen models can be used as alternatives. +- Tool schema minimization should preserve all available tools. +EOF + +cat >"$BASE/tests/calculator.test.js" <<'EOF' +import { add, multiply, formatResult } from '../src/calculator.js' + +if (add(2, 3) !== 5) throw new Error('add failed') +if (multiply(3, 4) !== 12) throw new Error('multiply failed') +if (formatResult('sum', 5) !== 'sum: 5') throw new Error('format failed') + +console.log('calculator tests passed') +EOF + +cat >"$BASE/package.json" <<'EOF' +{ + "name": "shim-live-test", + "version": "0.4.2", + "type": "module", + "scripts": { + "test": "node tests/calculator.test.js" + } +} +EOF + +cp -a "$BASE" "$OFF" +cp -a "$BASE" "$MINIFY" + +run_case() { + local mode="$1" + local workdir="$2" + local outdir="$3" + local id="$4" + local prompt="$5" + mkdir -p "$outdir" + + printf 'running %s %s\n' "$mode" "$id" >&2 + ( + cd "$workdir" + OPENAI_SHIM_TOOL_MODE="$mode" \ + timeout 120 openclaude --bare -p --output-format json \ + --permission-mode bypassPermissions \ + --max-budget-usd 0.20 \ + "$prompt" + ) >"$outdir/${id}.json" 2>"$outdir/${id}.log" || { + code=$? + printf '{"type":"harness_error","exit_code":%s,"case":"%s"}\n' "$code" "$id" >>"$outdir/${id}.json" + } +} + +run_suite() { + local mode="$1" + local workdir="$2" + local outdir="$RESULTS/$mode" + + run_case "$mode" "$workdir" "$outdir" "01_arithmetic" "Reply with exactly the number: 4" + run_case "$mode" "$workdir" "$outdir" "02_read_config" "Read src/config.json and report only the version and defaultModel." + run_case "$mode" "$workdir" "$outdir" "03_search_deepseek" "Search the project for DeepSeek and report matching file paths only." + run_case "$mode" "$workdir" "$outdir" "04_run_tests" "Run the test suite with npm test and report pass or fail with the key output." + run_case "$mode" "$workdir" "$outdir" "05_create_doc" "Create docs/generated-summary.md containing one concise sentence about this project, then report the file path." + run_case "$mode" "$workdir" "$outdir" "06_edit_code" "Edit src/calculator.js to add an exported subtract(a, b) function, then report what changed." + run_case "$mode" "$workdir" "$outdir" "07_update_test" "Update tests/calculator.test.js to test subtract(7, 2) === 5, then run npm test and report pass or fail." + run_case "$mode" "$workdir" "$outdir" "08_summarize_project" "Read README.md and docs/notes.md, then summarize the project in three bullets." + run_case "$mode" "$workdir" "$outdir" "09_find_version" "Find every occurrence of 0.4.2 in this project and report file paths." + run_case "$mode" "$workdir" "$outdir" "10_plan_next" "Inspect the project structure and propose the next two engineering tasks. Mention the files you inspected." +} + +run_suite off "$OFF" +run_suite minify "$MINIFY" + +node - "$RESULTS" <<'NODE' +const fs = require('fs') +const path = require('path') +const root = process.argv[2] + +function readRun(mode, id) { + const file = path.join(root, mode, `${id}.json`) + const raw = fs.readFileSync(file, 'utf8').trim().split('\n').filter(Boolean).at(-1) + const json = JSON.parse(raw) + const model = json.modelUsage ? Object.keys(json.modelUsage)[0] : '' + const usage = model ? json.modelUsage[model] : {} + return { + ok: json.type === 'result' && json.subtype === 'success' && !json.is_error, + input: usage.inputTokens ?? json.usage?.input_tokens ?? 0, + output: usage.outputTokens ?? json.usage?.output_tokens ?? 0, + cost: usage.costUSD ?? json.total_cost_usd ?? 0, + duration: json.duration_ms ?? 0, + turns: json.num_turns ?? 0, + result: String(json.result ?? '').replace(/\s+/g, ' ').slice(0, 120), + } +} + +const ids = fs.readdirSync(path.join(root, 'off')) + .filter(name => name.endsWith('.json')) + .map(name => name.replace(/\.json$/, '')) + .sort() + +const lines = [ + 'case,off_input,minify_input,input_reduction_pct,off_cost,minify_cost,cost_reduction_pct,off_turns,minify_turns,off_ok,minify_ok', +] +let offInput = 0 +let minInput = 0 +let offCost = 0 +let minCost = 0 +let offOk = 0 +let minOk = 0 + +for (const id of ids) { + const off = readRun('off', id) + const min = readRun('minify', id) + offInput += off.input + minInput += min.input + offCost += off.cost + minCost += min.cost + offOk += off.ok ? 1 : 0 + minOk += min.ok ? 1 : 0 + lines.push([ + id, + off.input, + min.input, + ((off.input - min.input) / off.input * 100).toFixed(1), + off.cost.toFixed(6), + min.cost.toFixed(6), + ((off.cost - min.cost) / off.cost * 100).toFixed(1), + off.turns, + min.turns, + off.ok, + min.ok, + ].join(',')) +} + +lines.push([ + 'TOTAL', + offInput, + minInput, + ((offInput - minInput) / offInput * 100).toFixed(1), + offCost.toFixed(6), + minCost.toFixed(6), + ((offCost - minCost) / offCost * 100).toFixed(1), + '', + '', + `${offOk}/${ids.length}`, + `${minOk}/${ids.length}`, +].join(',')) + +fs.writeFileSync(path.join(root, 'summary.csv'), `${lines.join('\n')}\n`) +console.log(lines.join('\n')) +NODE + +printf '\nResults written to %s\n' "$RESULTS" diff --git a/src/services/api/openaiShim.test.ts b/src/services/api/openaiShim.test.ts index 2ab3b0eb9e..bf9f043d0a 100644 --- a/src/services/api/openaiShim.test.ts +++ b/src/services/api/openaiShim.test.ts @@ -3,6 +3,9 @@ import { registerGateway } from '../../integrations/index.ts' import { createOpenAIShimClient } from './openaiShim.ts' type FetchType = typeof globalThis.fetch +type OpenAIShimWithResponseData = AsyncIterable> & { + content: Array<{ type: string; text?: string }> +} const originalEnv = { OPENAI_BASE_URL: process.env.OPENAI_BASE_URL, @@ -34,6 +37,11 @@ const originalEnv = { OPENROUTER_API_KEY: process.env.OPENROUTER_API_KEY, DEEPSEEK_API_KEY: process.env.DEEPSEEK_API_KEY, MIMO_API_KEY: process.env.MIMO_API_KEY, + OPENAI_SHIM_TOOL_MODE: process.env.OPENAI_SHIM_TOOL_MODE, + ENABLE_SHIM_TOOL_SEARCH: process.env.ENABLE_SHIM_TOOL_SEARCH, + OPENAI_SHIM_DEBUG: process.env.OPENAI_SHIM_DEBUG, + ENABLE_SHIM_TOOL_SEARCH_DEBUG: process.env.ENABLE_SHIM_TOOL_SEARCH_DEBUG, + OPENAI_SHIM_TOKEN_AUDIT: process.env.OPENAI_SHIM_TOKEN_AUDIT, } const originalFetch = globalThis.fetch @@ -53,7 +61,11 @@ type OpenAIShimClient = { params: Record, options?: Record, ) => Promise & { - withResponse: () => Promise<{ data: AsyncIterable> }> + withResponse: () => Promise<{ + data: OpenAIShimWithResponseData + response: Response + request_id: string + }> } } } @@ -115,6 +127,11 @@ beforeEach(() => { delete process.env.OPENROUTER_API_KEY delete process.env.DEEPSEEK_API_KEY delete process.env.MIMO_API_KEY + delete process.env.OPENAI_SHIM_TOOL_MODE + delete process.env.ENABLE_SHIM_TOOL_SEARCH + delete process.env.OPENAI_SHIM_DEBUG + delete process.env.ENABLE_SHIM_TOOL_SEARCH_DEBUG + delete process.env.OPENAI_SHIM_TOKEN_AUDIT }) afterEach(() => { @@ -147,6 +164,11 @@ afterEach(() => { restoreEnv('OPENROUTER_API_KEY', originalEnv.OPENROUTER_API_KEY) restoreEnv('DEEPSEEK_API_KEY', originalEnv.DEEPSEEK_API_KEY) restoreEnv('MIMO_API_KEY', originalEnv.MIMO_API_KEY) + restoreEnv('OPENAI_SHIM_TOOL_MODE', originalEnv.OPENAI_SHIM_TOOL_MODE) + restoreEnv('ENABLE_SHIM_TOOL_SEARCH', originalEnv.ENABLE_SHIM_TOOL_SEARCH) + restoreEnv('OPENAI_SHIM_DEBUG', originalEnv.OPENAI_SHIM_DEBUG) + restoreEnv('ENABLE_SHIM_TOOL_SEARCH_DEBUG', originalEnv.ENABLE_SHIM_TOOL_SEARCH_DEBUG) + restoreEnv('OPENAI_SHIM_TOKEN_AUDIT', originalEnv.OPENAI_SHIM_TOKEN_AUDIT) globalThis.fetch = originalFetch }) @@ -983,18 +1005,1015 @@ test('uses route-specific credential env vars for descriptor-backed openai-compa role: 'assistant', content: 'ok', }, - finish_reason: 'stop', + finish_reason: 'stop', + }, + ], + usage: { + prompt_tokens: 5, + completion_tokens: 1, + total_tokens: 6, + }, + }), + { + headers: { + 'Content-Type': 'application/json', + }, + }, + ) + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + + await client.beta.messages.create({ + model: 'openai/gpt-5-mini', + messages: [{ role: 'user', content: 'hello' }], + max_tokens: 64, + stream: false, + }) + + expect(capturedHeaders?.get('authorization')).toBe('Bearer or-route-key') +}) + +test('preserves Gemini tool call extra_content in follow-up requests', async () => { + let requestBody: Record | undefined + + globalThis.fetch = (async (_input, init) => { + requestBody = JSON.parse(String(init?.body)) + + return new Response( + JSON.stringify({ + id: 'chatcmpl-1', + model: 'google/gemini-3.1-pro-preview', + choices: [ + { + message: { + role: 'assistant', + content: 'done', + }, + finish_reason: 'stop', + }, + ], + usage: { + prompt_tokens: 12, + completion_tokens: 4, + total_tokens: 16, + }, + }), + { + headers: { + 'Content-Type': 'application/json', + }, + }, + ) + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + + await client.beta.messages.create({ + model: 'google/gemini-3.1-pro-preview', + system: 'test system', + messages: [ + { role: 'user', content: 'Use Bash' }, + { + role: 'assistant', + content: [ + { + type: 'tool_use', + id: 'call_1', + name: 'Bash', + input: { command: 'pwd' }, + extra_content: { + google: { + thought_signature: 'sig-123', + }, + }, + }, + ], + }, + { + role: 'user', + content: [ + { + type: 'tool_result', + tool_use_id: 'call_1', + content: 'D:\\repo', + }, + ], + }, + ], + max_tokens: 64, + stream: false, + }) + + const assistantWithToolCall = (requestBody?.messages as Array>).find( + message => Array.isArray(message.tool_calls), + ) as { tool_calls?: Array> } | undefined + + expect(assistantWithToolCall?.tool_calls?.[0]).toMatchObject({ + id: 'call_1', + type: 'function', + function: { + name: 'Bash', + arguments: JSON.stringify({ command: 'pwd' }), + }, + extra_content: { + google: { + thought_signature: 'sig-123', + }, + }, + }) +}) + +test('preserves Grep tool pattern field in OpenAI-compatible schemas', async () => { + let requestBody: Record | undefined + + globalThis.fetch = (async (_input, init) => { + requestBody = JSON.parse(String(init?.body)) + + return new Response( + JSON.stringify({ + id: 'chatcmpl-grep-schema', + model: 'qwen/qwen3.6-plus', + choices: [ + { + message: { + role: 'assistant', + content: 'done', + }, + finish_reason: 'stop', + }, + ], + usage: { + prompt_tokens: 12, + completion_tokens: 4, + total_tokens: 16, + }, + }), + { + headers: { + 'Content-Type': 'application/json', + }, + }, + ) + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + + await client.beta.messages.create({ + model: 'qwen/qwen3.6-plus', + system: 'test system', + messages: [{ role: 'user', content: 'Use Grep' }], + tools: [ + { + name: 'Grep', + description: 'Search file contents', + input_schema: { + type: 'object', + properties: { + pattern: { type: 'string', description: 'Search pattern' }, + path: { type: 'string' }, + }, + required: ['pattern'], + additionalProperties: false, + }, + }, + ], + max_tokens: 64, + stream: false, + }) + + const tools = requestBody?.tools as Array> | undefined + const grepTool = tools?.find(tool => (tool.function as Record)?.name === 'Grep') as + | { function?: { parameters?: { properties?: Record; required?: string[] } } } + | undefined + + expect(Object.keys(grepTool?.function?.parameters?.properties ?? {})).toContain('pattern') + expect(grepTool?.function?.parameters?.required).toContain('pattern') +}) + +function makeShimToolFixtures() { + return [ + { + name: 'Bash', + description: 'Execute shell commands. This long description should be truncated by shim minification because third-party providers do not need every detail.', + input_schema: { + type: 'object', + properties: { + command: { + type: 'string', + description: 'The command to execute.', + }, + }, + required: ['command'], + }, + }, + { + name: 'Read', + description: 'Read file contents.', + input_schema: { + type: 'object', + properties: { + file_path: { + type: 'string', + description: 'Path to read.', + }, + }, + required: ['file_path'], + }, + }, + { + name: 'WebSearch', + description: 'Search the web for current information.', + input_schema: { + type: 'object', + properties: { + query: { + type: 'string', + description: 'Search query.', + }, + }, + required: ['query'], + }, + }, + { + name: 'WebFetch', + description: 'Fetch and read a web page.', + input_schema: { + type: 'object', + properties: { + url: { + type: 'string', + description: 'URL to fetch.', + }, + }, + required: ['url'], + }, + }, + { + name: 'Skill', + description: 'Load and use a skill.', + input_schema: { + type: 'object', + properties: { + name: { + type: 'string', + description: 'Skill name.', + }, + }, + required: ['name'], + }, + }, + ] +} + +test('ShimToolSearch minify mode sends every available tool with parameter prose stripped', async () => { + let requestBody: Record | undefined + process.env.OPENAI_SHIM_TOOL_MODE = 'minify' + + globalThis.fetch = (async (_input, init) => { + requestBody = JSON.parse(String(init?.body)) + + return new Response( + JSON.stringify({ + id: 'chatcmpl-shim-minify', + model: 'fake-model', + choices: [{ message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' }], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, + }), + { headers: { 'Content-Type': 'application/json' } }, + ) + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + + await client.beta.messages.create({ + model: 'fake-model', + system: 'test system', + messages: [{ role: 'user', content: 'hello' }], + tools: makeShimToolFixtures(), + max_tokens: 32, + stream: false, + }) + + const tools = requestBody?.tools as Array<{ function: { name: string; parameters: Record } }> + expect(tools.map(tool => tool.function.name).sort()).toEqual([ + 'Bash', + 'Read', + 'Skill', + 'WebFetch', + 'WebSearch', + ]) + const bashParameters = tools.find(tool => tool.function.name === 'Bash')?.function.parameters as { + properties?: Record> + } + expect(bashParameters.properties?.command?.description).toBeUndefined() +}) + +test('ShimToolSearch minify mode reduces tools for responses transport', async () => { + let requestBody: Record | undefined + let auditOutput = '' + const originalStderrWrite = process.stderr.write + process.env.OPENAI_API_FORMAT = 'responses' + process.env.OPENAI_SHIM_TOOL_MODE = 'minify' + process.env.OPENAI_SHIM_TOKEN_AUDIT = '1' + + process.stderr.write = ((chunk: string | Uint8Array): boolean => { + auditOutput += String(chunk) + return true + }) as typeof process.stderr.write + + globalThis.fetch = (async (_input, init) => { + requestBody = JSON.parse(String(init?.body)) + + return new Response( + JSON.stringify({ + id: 'resp-shim-minify', + model: 'fake-model', + output: [ + { + type: 'message', + role: 'assistant', + content: [{ type: 'output_text', text: 'ok' }], + }, + ], + usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 }, + }), + { headers: { 'Content-Type': 'application/json' } }, + ) + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + + try { + await client.beta.messages.create({ + model: 'fake-model', + system: 'test system', + messages: [{ role: 'user', content: 'hello' }], + tools: makeShimToolFixtures(), + max_tokens: 32, + stream: false, + }) + } finally { + process.stderr.write = originalStderrWrite + } + + const tools = requestBody?.tools as Array<{ name: string; parameters: { properties?: Record> } }> + expect(tools.map(tool => tool.name).sort()).toEqual([ + 'Bash', + 'Read', + 'Skill', + 'WebFetch', + 'WebSearch', + ]) + const bashParameters = tools.find(tool => tool.name === 'Bash')?.parameters + expect(bashParameters?.properties?.command?.description).toBeUndefined() + expect(auditOutput).toContain('transport=responses') + expect(auditOutput).toContain('tool.Bash') +}) + +test('OpenAI shim token audit logs request component breakdown when enabled', async () => { + let requestBody: Record | undefined + let auditOutput = '' + const originalStderrWrite = process.stderr.write + process.env.OPENAI_SHIM_TOKEN_AUDIT = '1' + process.env.OPENAI_SHIM_TOOL_MODE = 'minify' + + process.stderr.write = ((chunk: string | Uint8Array): boolean => { + auditOutput += String(chunk) + return true + }) as typeof process.stderr.write + + globalThis.fetch = (async (_input, init) => { + requestBody = JSON.parse(String(init?.body)) + + return new Response( + JSON.stringify({ + id: 'chatcmpl-shim-audit', + model: 'fake-model', + choices: [{ message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' }], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, + }), + { headers: { 'Content-Type': 'application/json' } }, + ) + }) as FetchType + + try { + const client = createOpenAIShimClient({}) as OpenAIShimClient + + await client.beta.messages.create({ + model: 'fake-model', + system: 'test system', + messages: [ + { role: 'user', content: 'Read src/config.json' }, + { role: 'assistant', content: 'I will read it.' }, + ], + tools: makeShimToolFixtures(), + max_tokens: 32, + stream: false, + }) + } finally { + process.stderr.write = originalStderrWrite + } + + expect(requestBody).toBeDefined() + expect(auditOutput).toContain('[OpenAIShimTokenAudit] total chars=') + expect(auditOutput).toContain('messages.system') + expect(auditOutput).toContain('messages.user') + expect(auditOutput).toContain('messages.assistant') + expect(auditOutput).toContain('tool_schemas') + expect(auditOutput).toContain('tool.Bash') +}) + +test('ShimToolSearch predict mode sends all tools when prediction is uncertain', async () => { + let requestBody: Record | undefined + process.env.OPENAI_SHIM_TOOL_MODE = 'predict' + + globalThis.fetch = (async (_input, init) => { + requestBody = JSON.parse(String(init?.body)) + + return new Response( + JSON.stringify({ + id: 'chatcmpl-shim-predict', + model: 'fake-model', + choices: [{ message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' }], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, + }), + { headers: { 'Content-Type': 'application/json' } }, + ) + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + + await client.beta.messages.create({ + model: 'fake-model', + system: 'test system', + messages: [{ role: 'user', content: 'Investigate this repository architecture' }], + tools: makeShimToolFixtures(), + max_tokens: 32, + stream: false, + }) + + const toolNames = ((requestBody?.tools ?? []) as Array<{ function: { name: string } }>) + .map(tool => tool.function.name) + .sort() + expect(toolNames).toEqual(['Bash', 'Read', 'Skill', 'WebFetch', 'WebSearch']) +}) + +test('ShimToolSearch predict mode includes web tools for current web requests', async () => { + let requestBody: Record | undefined + process.env.OPENAI_SHIM_TOOL_MODE = 'predict' + + globalThis.fetch = (async (_input, init) => { + requestBody = JSON.parse(String(init?.body)) + + return new Response( + JSON.stringify({ + id: 'chatcmpl-shim-web', + model: 'fake-model', + choices: [{ message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' }], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, + }), + { headers: { 'Content-Type': 'application/json' } }, + ) + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + + await client.beta.messages.create({ + model: 'fake-model', + system: 'test system', + messages: [{ role: 'user', content: 'Search the web for current React release notes' }], + tools: makeShimToolFixtures(), + max_tokens: 32, + stream: false, + }) + + const toolNames = ((requestBody?.tools ?? []) as Array<{ function: { name: string } }>) + .map(tool => tool.function.name) + expect(toolNames).toContain('WebSearch') + expect(toolNames).toContain('WebFetch') +}) + +test('ShimToolSearch predict mode includes forced tool_choice tool for conversational prompts', async () => { + let requestBody: Record | undefined + process.env.OPENAI_SHIM_TOOL_MODE = 'predict' + + globalThis.fetch = (async (_input, init) => { + requestBody = JSON.parse(String(init?.body)) + + return new Response( + JSON.stringify({ + id: 'chatcmpl-shim-predict-forced-tool', + model: 'fake-model', + choices: [{ message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' }], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, + }), + { headers: { 'Content-Type': 'application/json' } }, + ) + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + + await client.beta.messages.create({ + model: 'fake-model', + system: 'test system', + messages: [{ role: 'user', content: 'What is 2+2?' }], + tools: makeShimToolFixtures(), + tool_choice: { type: 'tool', name: 'WebSearch' }, + max_tokens: 32, + stream: false, + }) + + const toolNames = ((requestBody?.tools ?? []) as Array<{ function: { name: string } }>) + .map(tool => tool.function.name) + expect(toolNames.length).toBeGreaterThan(0) + expect(toolNames).toContain('WebSearch') + expect(requestBody?.tool_choice).toEqual({ + type: 'function', + function: { name: 'WebSearch' }, + }) +}) + +test('ShimToolSearch predict mode reduces tools for responses transport', async () => { + let requestBody: Record | undefined + process.env.OPENAI_API_FORMAT = 'responses' + process.env.OPENAI_SHIM_TOOL_MODE = 'predict' + + globalThis.fetch = (async (_input, init) => { + requestBody = JSON.parse(String(init?.body)) + + return new Response( + JSON.stringify({ + id: 'resp-shim-predict-web', + model: 'fake-model', + output: [ + { + type: 'message', + role: 'assistant', + content: [{ type: 'output_text', text: 'ok' }], + }, + ], + usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 }, + }), + { headers: { 'Content-Type': 'application/json' } }, + ) + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + + await client.beta.messages.create({ + model: 'fake-model', + system: 'test system', + messages: [{ role: 'user', content: 'Search the web for current React release notes' }], + tools: makeShimToolFixtures(), + max_tokens: 32, + stream: false, + }) + + const tools = (requestBody?.tools ?? []) as Array<{ + name: string + parameters: { properties?: Record> } + }> + expect(tools.map(tool => tool.name).sort()).toEqual(['Bash', 'Read', 'WebFetch', 'WebSearch']) + const webSearchParameters = tools.find(tool => tool.name === 'WebSearch')?.parameters + expect(webSearchParameters?.properties?.query?.description).toBeUndefined() +}) + +test('ShimToolSearch predict mode does not re-expand empty predicted tools for responses transport', async () => { + let requestBody: Record | undefined + process.env.OPENAI_API_FORMAT = 'responses' + process.env.OPENAI_SHIM_TOOL_MODE = 'predict' + + globalThis.fetch = (async (_input, init) => { + requestBody = JSON.parse(String(init?.body)) + + return new Response( + JSON.stringify({ + id: 'resp-shim-predict-empty', + model: 'fake-model', + output: [ + { + type: 'message', + role: 'assistant', + content: [{ type: 'output_text', text: 'ok' }], + }, + ], + usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 }, + }), + { headers: { 'Content-Type': 'application/json' } }, + ) + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + + await client.beta.messages.create({ + model: 'fake-model', + system: 'test system', + messages: [{ role: 'user', content: 'What is 2+2?' }], + tools: makeShimToolFixtures(), + max_tokens: 32, + stream: false, + }) + + expect(requestBody?.tools).toBeUndefined() + expect(requestBody?.tool_choice).toBeUndefined() +}) + +test('ShimToolSearch predict mode includes forced tool_choice tool for responses transport', async () => { + let requestBody: Record | undefined + process.env.OPENAI_API_FORMAT = 'responses' + process.env.OPENAI_SHIM_TOOL_MODE = 'predict' + + globalThis.fetch = (async (_input, init) => { + requestBody = JSON.parse(String(init?.body)) + + return new Response( + JSON.stringify({ + id: 'resp-shim-predict-forced-tool', + model: 'fake-model', + output: [ + { + type: 'message', + role: 'assistant', + content: [{ type: 'output_text', text: 'ok' }], + }, + ], + usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 }, + }), + { headers: { 'Content-Type': 'application/json' } }, + ) + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + + await client.beta.messages.create({ + model: 'fake-model', + system: 'test system', + messages: [{ role: 'user', content: 'What is 2+2?' }], + tools: makeShimToolFixtures(), + tool_choice: { type: 'tool', name: 'WebSearch' }, + max_tokens: 32, + stream: false, + }) + + const toolNames = ((requestBody?.tools ?? []) as Array<{ name: string }>) + .map(tool => tool.name) + expect(toolNames).toContain('WebSearch') + expect(requestBody?.tool_choice).toEqual({ + type: 'function', + name: 'WebSearch', + }) +}) + +test('OpenAI-compatible chat transport drops forced tool_choice when selected tools lack that schema', async () => { + let requestBody: Record | undefined + process.env.OPENAI_SHIM_TOOL_MODE = 'predict' + + globalThis.fetch = (async (_input, init) => { + requestBody = JSON.parse(String(init?.body)) + + return new Response( + JSON.stringify({ + id: 'chatcmpl-shim-missing-forced-tool', + model: 'fake-model', + choices: [{ message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' }], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, + }), + { headers: { 'Content-Type': 'application/json' } }, + ) + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + + await client.beta.messages.create({ + model: 'fake-model', + system: 'test system', + messages: [{ role: 'user', content: 'What is 2+2?' }], + tools: makeShimToolFixtures().filter(tool => tool.name !== 'WebSearch'), + tool_choice: { type: 'tool', name: 'WebSearch' }, + max_tokens: 32, + stream: false, + }) + + const toolNames = ((requestBody?.tools ?? []) as Array<{ function: { name: string } }>) + .map(tool => tool.function.name) + expect(toolNames).not.toContain('WebSearch') + expect(requestBody?.tool_choice).toBeUndefined() +}) + +test('OpenAI-compatible responses transport drops forced tool_choice when selected tools lack that schema', async () => { + let requestBody: Record | undefined + process.env.OPENAI_API_FORMAT = 'responses' + process.env.OPENAI_SHIM_TOOL_MODE = 'predict' + + globalThis.fetch = (async (_input, init) => { + requestBody = JSON.parse(String(init?.body)) + + return new Response( + JSON.stringify({ + id: 'resp-shim-missing-forced-tool', + model: 'fake-model', + output: [ + { + type: 'message', + role: 'assistant', + content: [{ type: 'output_text', text: 'ok' }], + }, + ], + usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 }, + }), + { headers: { 'Content-Type': 'application/json' } }, + ) + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + + await client.beta.messages.create({ + model: 'fake-model', + system: 'test system', + messages: [{ role: 'user', content: 'What is 2+2?' }], + tools: makeShimToolFixtures().filter(tool => tool.name !== 'WebSearch'), + tool_choice: { type: 'tool', name: 'WebSearch' }, + max_tokens: 32, + stream: false, + }) + + const toolNames = ((requestBody?.tools ?? []) as Array<{ name: string }>) + .map(tool => tool.name) + expect(toolNames).not.toContain('WebSearch') + expect(requestBody?.tool_choice).toBeUndefined() +}) + +test('OpenAI-compatible responses transport drops scalar any when predict selects no tools', async () => { + let requestBody: Record | undefined + process.env.OPENAI_API_FORMAT = 'responses' + process.env.OPENAI_SHIM_TOOL_MODE = 'predict' + + globalThis.fetch = (async (_input, init) => { + requestBody = JSON.parse(String(init?.body)) + + return new Response( + JSON.stringify({ + id: 'resp-shim-any-empty-tools', + model: 'fake-model', + output: [ + { + type: 'message', + role: 'assistant', + content: [{ type: 'output_text', text: 'ok' }], + }, + ], + usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 }, + }), + { headers: { 'Content-Type': 'application/json' } }, + ) + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + + await client.beta.messages.create({ + model: 'fake-model', + system: 'test system', + messages: [{ role: 'user', content: 'What is 2+2?' }], + tools: makeShimToolFixtures(), + tool_choice: { type: 'any' }, + max_tokens: 32, + stream: false, + }) + + expect(requestBody?.tools).toBeUndefined() + expect(requestBody?.tool_choice).toBeUndefined() +}) + +test('OpenAI-compatible responses transport preserves tool_choice', async () => { + let requestBody: Record | undefined + process.env.OPENAI_API_FORMAT = 'responses' + process.env.OPENAI_SHIM_TOOL_MODE = 'minify' + + globalThis.fetch = (async (_input, init) => { + requestBody = JSON.parse(String(init?.body)) + + return new Response( + JSON.stringify({ + id: 'resp-shim-tool-choice', + model: 'fake-model', + output: [ + { + type: 'message', + role: 'assistant', + content: [{ type: 'output_text', text: 'ok' }], + }, + ], + usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 }, + }), + { headers: { 'Content-Type': 'application/json' } }, + ) + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + + await client.beta.messages.create({ + model: 'fake-model', + system: 'test system', + messages: [{ role: 'user', content: 'Read src/config.json' }], + tools: makeShimToolFixtures(), + tool_choice: { type: 'tool', name: 'Read' }, + max_tokens: 32, + stream: false, + }) + + expect(requestBody?.tool_choice).toEqual({ + type: 'function', + name: 'Read', + }) +}) + +test('OpenAI-compatible responses transport preserves scalar tool_choice modes', async () => { + const cases: Array<{ + anthropicToolChoice: Record + expectedResponsesToolChoice: string + }> = [ + { anthropicToolChoice: { type: 'auto' }, expectedResponsesToolChoice: 'auto' }, + { anthropicToolChoice: { type: 'any' }, expectedResponsesToolChoice: 'required' }, + { anthropicToolChoice: { type: 'none' }, expectedResponsesToolChoice: 'none' }, + ] + + for (const testCase of cases) { + let requestBody: Record | undefined + process.env.OPENAI_API_FORMAT = 'responses' + process.env.OPENAI_SHIM_TOOL_MODE = 'minify' + + globalThis.fetch = (async (_input, init) => { + requestBody = JSON.parse(String(init?.body)) + + return new Response( + JSON.stringify({ + id: 'resp-shim-tool-choice-scalar', + model: 'fake-model', + output: [ + { + type: 'message', + role: 'assistant', + content: [{ type: 'output_text', text: 'ok' }], + }, + ], + usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 }, + }), + { headers: { 'Content-Type': 'application/json' } }, + ) + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + + await client.beta.messages.create({ + model: 'fake-model', + system: 'test system', + messages: [{ role: 'user', content: 'Read src/config.json' }], + tools: makeShimToolFixtures(), + tool_choice: testCase.anthropicToolChoice, + max_tokens: 32, + stream: false, + }) + + expect(requestBody?.tool_choice).toBe(testCase.expectedResponsesToolChoice) + } +}) + +test('ShimToolSearch lazy phase 1 sends a valid request_tools schema through system prompt', async () => { + const requestBodies: Array> = [] + process.env.OPENAI_SHIM_TOOL_MODE = 'lazy' + + globalThis.fetch = (async (_input, init) => { + requestBodies.push(JSON.parse(String(init?.body))) + + return new Response( + JSON.stringify({ + id: 'chatcmpl-shim-lazy', + model: 'fake-model', + choices: [{ message: { role: 'assistant', content: 'direct answer' }, finish_reason: 'stop' }], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, + }), + { headers: { 'Content-Type': 'application/json' } }, + ) + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + + const result = await client.beta.messages + .create({ + model: 'fake-model', + system: 'test system', + messages: [{ role: 'user', content: 'What is 2+2?' }], + tools: makeShimToolFixtures(), + max_tokens: 32, + stream: false, + }) + .withResponse() + + expect(requestBodies).toHaveLength(1) + const phase1Body = requestBodies[0] + expect((phase1Body.tools as Array<{ function: { name: string } }>)[0].function.name).toBe('request_tools') + const systemMessage = (phase1Body.messages as Array<{ role: string; content: string }>).find( + message => message.role === 'system', + ) + expect(systemMessage?.content).toContain('Available tools') + expect(systemMessage?.content).toContain('WebSearch') + expect(result.data.content).toEqual([{ type: 'text', text: 'direct answer' }]) + expect(result.response.headers.get('content-type')).toContain('application/json') + expect(result.request_id).toMatch(/^msg_/) +}) + +test('ShimToolSearch lazy mode skips phase 1 for forced non-request_tools tool_choice', async () => { + const requestBodies: Array> = [] + process.env.OPENAI_SHIM_TOOL_MODE = 'lazy' + + globalThis.fetch = (async (_input, init) => { + requestBodies.push(JSON.parse(String(init?.body))) + + return new Response( + JSON.stringify({ + id: 'chatcmpl-shim-lazy-forced-tool', + model: 'fake-model', + choices: [{ message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' }], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, + }), + { headers: { 'Content-Type': 'application/json' } }, + ) + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + + await client.beta.messages.create({ + model: 'fake-model', + system: 'test system', + messages: [{ role: 'user', content: 'What is 2+2?' }], + tools: makeShimToolFixtures(), + tool_choice: { type: 'tool', name: 'WebSearch' }, + max_tokens: 32, + stream: false, + }) + + expect(requestBodies).toHaveLength(1) + const toolNames = ((requestBodies[0].tools ?? []) as Array<{ function: { name: string } }>) + .map(tool => tool.function.name) + expect(toolNames).toContain('WebSearch') + expect(toolNames).not.toContain('request_tools') + expect(requestBodies[0].tool_choice).toEqual({ + type: 'function', + function: { name: 'WebSearch' }, + }) +}) + +test('ShimToolSearch lazy phase 2 falls back to all tools on malformed request_tools JSON', async () => { + const requestBodies: Array> = [] + process.env.OPENAI_SHIM_TOOL_MODE = 'lazy' + + globalThis.fetch = (async (_input, init) => { + requestBodies.push(JSON.parse(String(init?.body))) + + if (requestBodies.length === 1) { + return new Response( + JSON.stringify({ + id: 'chatcmpl-shim-lazy-phase1', + model: 'fake-model', + choices: [ + { + message: { + role: 'assistant', + content: null, + tool_calls: [ + { + id: 'call_request_tools', + type: 'function', + function: { name: 'request_tools', arguments: '{not-json' }, + }, + ], + }, + finish_reason: 'tool_calls', + }, + ], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, + }), + { + headers: { + 'Content-Type': 'application/json', + 'x-request-id': 'phase1-request', }, - ], - usage: { - prompt_tokens: 5, - completion_tokens: 1, - total_tokens: 6, }, + ) + } + + return new Response( + JSON.stringify({ + id: 'chatcmpl-shim-lazy-phase2', + model: 'fake-model', + choices: [{ message: { role: 'assistant', content: 'done' }, finish_reason: 'stop' }], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, }), { headers: { 'Content-Type': 'application/json', + 'x-request-id': 'phase2-request', }, }, ) @@ -1002,171 +2021,156 @@ test('uses route-specific credential env vars for descriptor-backed openai-compa const client = createOpenAIShimClient({}) as OpenAIShimClient - await client.beta.messages.create({ - model: 'openai/gpt-5-mini', - messages: [{ role: 'user', content: 'hello' }], - max_tokens: 64, - stream: false, - }) + const result = await client.beta.messages + .create({ + model: 'fake-model', + system: 'test system', + messages: [{ role: 'user', content: 'What is 2+2?' }], + tools: makeShimToolFixtures(), + max_tokens: 32, + stream: false, + }) + .withResponse() - expect(capturedHeaders?.get('authorization')).toBe('Bearer or-route-key') + expect(requestBodies).toHaveLength(2) + const phase2ToolNames = ((requestBodies[1].tools ?? []) as Array<{ function: { name: string } }>) + .map(tool => tool.function.name) + .sort() + expect(phase2ToolNames).toEqual(['Bash', 'Read', 'Skill', 'WebFetch', 'WebSearch']) + expect(result.data.content).toEqual([{ type: 'text', text: 'done' }]) + expect(result.response.headers.get('x-request-id')).toBe('phase2-request') + expect(result.request_id).toBe('phase2-request') }) -test('preserves Gemini tool call extra_content in follow-up requests', async () => { - let requestBody: Record | undefined +test('ShimToolSearch lazy phase 2 applies requested tool reduction for responses transport', async () => { + const requestBodies: Array> = [] + process.env.OPENAI_API_FORMAT = 'responses' + process.env.OPENAI_SHIM_TOOL_MODE = 'lazy' globalThis.fetch = (async (_input, init) => { - requestBody = JSON.parse(String(init?.body)) + requestBodies.push(JSON.parse(String(init?.body))) + + if (requestBodies.length === 1) { + return new Response( + JSON.stringify({ + id: 'resp-shim-lazy-phase1', + model: 'fake-model', + output: [ + { + type: 'function_call', + name: 'request_tools', + arguments: '{"tools":["WebSearch"]}', + call_id: 'call_request_tools', + }, + ], + usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 }, + }), + { headers: { 'Content-Type': 'application/json' } }, + ) + } return new Response( JSON.stringify({ - id: 'chatcmpl-1', - model: 'google/gemini-3.1-pro-preview', - choices: [ + id: 'resp-shim-lazy-phase2', + model: 'fake-model', + output: [ { - message: { - role: 'assistant', - content: 'done', - }, - finish_reason: 'stop', + type: 'message', + role: 'assistant', + content: [{ type: 'output_text', text: 'done' }], }, ], - usage: { - prompt_tokens: 12, - completion_tokens: 4, - total_tokens: 16, - }, + usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 }, }), - { - headers: { - 'Content-Type': 'application/json', - }, - }, + { headers: { 'Content-Type': 'application/json' } }, ) }) as FetchType const client = createOpenAIShimClient({}) as OpenAIShimClient await client.beta.messages.create({ - model: 'google/gemini-3.1-pro-preview', + model: 'fake-model', system: 'test system', - messages: [ - { role: 'user', content: 'Use Bash' }, - { - role: 'assistant', - content: [ - { - type: 'tool_use', - id: 'call_1', - name: 'Bash', - input: { command: 'pwd' }, - extra_content: { - google: { - thought_signature: 'sig-123', - }, - }, - }, - ], - }, - { - role: 'user', - content: [ - { - type: 'tool_result', - tool_use_id: 'call_1', - content: 'D:\\repo', - }, - ], - }, - ], - max_tokens: 64, + messages: [{ role: 'user', content: 'What is 2+2?' }], + tools: makeShimToolFixtures(), + max_tokens: 32, stream: false, }) - const assistantWithToolCall = (requestBody?.messages as Array>).find( - message => Array.isArray(message.tool_calls), - ) as { tool_calls?: Array> } | undefined + expect(requestBodies).toHaveLength(2) + const phase1ToolNames = ((requestBodies[0].tools ?? []) as Array<{ name: string }>) + .map(tool => tool.name) + expect(phase1ToolNames).toEqual(['request_tools']) - expect(assistantWithToolCall?.tool_calls?.[0]).toMatchObject({ - id: 'call_1', - type: 'function', - function: { - name: 'Bash', - arguments: JSON.stringify({ command: 'pwd' }), - }, - extra_content: { - google: { - thought_signature: 'sig-123', - }, - }, - }) + const phase2Tools = (requestBodies[1].tools ?? []) as Array<{ + name: string + parameters: { properties?: Record> } + }> + expect(phase2Tools.map(tool => tool.name).sort()).toEqual(['Bash', 'Read', 'WebSearch']) + const webSearchParameters = phase2Tools.find(tool => tool.name === 'WebSearch')?.parameters + expect(webSearchParameters?.properties?.query?.description).toBeUndefined() }) -test('preserves Grep tool pattern field in OpenAI-compatible schemas', async () => { - let requestBody: Record | undefined +test('ShimToolSearch lazy phase 2 falls back to all responses tools on malformed request_tools JSON', async () => { + const requestBodies: Array> = [] + process.env.OPENAI_API_FORMAT = 'responses' + process.env.OPENAI_SHIM_TOOL_MODE = 'lazy' globalThis.fetch = (async (_input, init) => { - requestBody = JSON.parse(String(init?.body)) + requestBodies.push(JSON.parse(String(init?.body))) + + if (requestBodies.length === 1) { + return new Response( + JSON.stringify({ + id: 'resp-shim-lazy-malformed-phase1', + model: 'fake-model', + output: [ + { + type: 'function_call', + name: 'request_tools', + arguments: '{not-json', + call_id: 'call_request_tools', + }, + ], + usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 }, + }), + { headers: { 'Content-Type': 'application/json' } }, + ) + } return new Response( JSON.stringify({ - id: 'chatcmpl-grep-schema', - model: 'qwen/qwen3.6-plus', - choices: [ + id: 'resp-shim-lazy-malformed-phase2', + model: 'fake-model', + output: [ { - message: { - role: 'assistant', - content: 'done', - }, - finish_reason: 'stop', + type: 'message', + role: 'assistant', + content: [{ type: 'output_text', text: 'done' }], }, ], - usage: { - prompt_tokens: 12, - completion_tokens: 4, - total_tokens: 16, - }, + usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 }, }), - { - headers: { - 'Content-Type': 'application/json', - }, - }, + { headers: { 'Content-Type': 'application/json' } }, ) }) as FetchType const client = createOpenAIShimClient({}) as OpenAIShimClient await client.beta.messages.create({ - model: 'qwen/qwen3.6-plus', + model: 'fake-model', system: 'test system', - messages: [{ role: 'user', content: 'Use Grep' }], - tools: [ - { - name: 'Grep', - description: 'Search file contents', - input_schema: { - type: 'object', - properties: { - pattern: { type: 'string', description: 'Search pattern' }, - path: { type: 'string' }, - }, - required: ['pattern'], - additionalProperties: false, - }, - }, - ], - max_tokens: 64, + messages: [{ role: 'user', content: 'What is 2+2?' }], + tools: makeShimToolFixtures(), + max_tokens: 32, stream: false, }) - const tools = requestBody?.tools as Array> | undefined - const grepTool = tools?.find(tool => (tool.function as Record)?.name === 'Grep') as - | { function?: { parameters?: { properties?: Record; required?: string[] } } } - | undefined - - expect(Object.keys(grepTool?.function?.parameters?.properties ?? {})).toContain('pattern') - expect(grepTool?.function?.parameters?.required).toContain('pattern') + expect(requestBodies).toHaveLength(2) + const phase2ToolNames = ((requestBodies[1].tools ?? []) as Array<{ name: string }>) + .map(tool => tool.name) + .sort() + expect(phase2ToolNames).toEqual(['Bash', 'Read', 'Skill', 'WebFetch', 'WebSearch']) }) test('does not infer Gemini mode from OPENAI_BASE_URL path substrings', async () => { @@ -3972,6 +4976,74 @@ test('self-heals tool-call incompatibility by retrying local Ollama requests wit expect(requestBodies[1]?.tool_choice).toBeUndefined() }) +test('self-heals responses transport tool-call incompatibility without stale tools', async () => { + process.env.OPENAI_BASE_URL = 'http://localhost:11434/v1' + process.env.OPENAI_API_FORMAT = 'responses' + process.env.OPENAI_SHIM_TOOL_MODE = 'predict' + + const requestBodies: Array> = [] + globalThis.fetch = (async (_input, init) => { + const requestBody = JSON.parse(String(init?.body)) as Record + requestBodies.push(requestBody) + + if (requestBodies.length === 1) { + return new Response('tool_calls are not supported', { + status: 400, + headers: { + 'Content-Type': 'text/plain', + }, + }) + } + + return new Response( + JSON.stringify({ + id: 'resp-local-toolless', + model: 'qwen2.5-coder:7b', + output: [ + { + type: 'message', + role: 'assistant', + content: [{ type: 'output_text', text: 'fallback without tools' }], + }, + ], + usage: { + input_tokens: 8, + output_tokens: 4, + total_tokens: 12, + }, + }), + { + status: 200, + headers: { + 'Content-Type': 'application/json', + }, + }, + ) + }) as FetchType + + const client = createOpenAIShimClient({}) as OpenAIShimClient + + await expect( + client.beta.messages.create({ + model: 'qwen2.5-coder:7b', + messages: [{ role: 'user', content: 'Read a file in this repository' }], + tools: makeShimToolFixtures(), + tool_choice: { type: 'tool', name: 'Read' }, + max_tokens: 64, + stream: false, + }), + ).resolves.toBeDefined() + + expect(requestBodies).toHaveLength(2) + expect(((requestBodies[0]?.tools ?? []) as Array<{ name: string }>).map(tool => tool.name)).toContain('Read') + expect(requestBodies[0]?.tool_choice).toEqual({ + type: 'function', + name: 'Read', + }) + expect(requestBodies[1]?.tools).toBeUndefined() + expect(requestBodies[1]?.tool_choice).toBeUndefined() +}) + test('preserves valid tool_result and drops orphan tool_result', async () => { let requestBody: Record | undefined diff --git a/src/services/api/openaiShim.ts b/src/services/api/openaiShim.ts index 51b52108ba..f3486aff4e 100644 --- a/src/services/api/openaiShim.ts +++ b/src/services/api/openaiShim.ts @@ -199,6 +199,445 @@ function sleepMs(ms: number): Promise { return new Promise(resolve => setTimeout(resolve, ms)) } +// --------------------------------------------------------------------------- +// ShimToolSearch — opt-in tool schema reduction for 3P providers. +// Off by default. Modes: +// minify — send every tool, but strip verbose schema prose. +// predict — send a predicted subset only when confident; uncertain sends all. +// lazy — two-phase request_tools protocol for conversational turns. +// ENABLE_SHIM_TOOL_SEARCH=1 is kept as a backwards-compatible alias for lazy. +// --------------------------------------------------------------------------- + +type ShimToolSearchMode = 'off' | 'minify' | 'predict' | 'lazy' + +interface OpenAIShimTokenAuditPart { + name: string + chars: number + estimatedTokens: number +} + +function getShimToolSearchMode(): ShimToolSearchMode { + const raw = ( + process.env.OPENAI_SHIM_TOOL_MODE ?? + process.env.ENABLE_SHIM_TOOL_SEARCH ?? + '' + ).trim().toLowerCase() + + if (!raw || raw === '0' || raw === 'false' || raw === 'off') return 'off' + if (raw === 'minify') return 'minify' + if (raw === 'predict' || raw === 'safe-predict' || raw === 'predict-safe') { + return 'predict' + } + if (raw === 'lazy' || raw === '1' || raw === 'true' || raw === 'on') { + return 'lazy' + } + return 'off' +} + +function isOpenAIShimTokenAuditEnabled(): boolean { + return isEnvTruthy(process.env.OPENAI_SHIM_TOKEN_AUDIT) +} + +function debugShimToolSearch(message: string): void { + if ( + isEnvTruthy(process.env.OPENAI_SHIM_DEBUG) || + isEnvTruthy(process.env.ENABLE_SHIM_TOOL_SEARCH_DEBUG) + ) { + process.stderr.write(`[ShimToolSearch] ${message}\n`) + } +} + +function estimateRequestTokensFromChars(chars: number): number { + return Math.round(chars / 4) +} + +function serializedSizeOf(value: unknown): number { + return stableStringify(value).length +} + +function sumSerializedMessageChars( + messages: OpenAIMessage[], + role: OpenAIMessage['role'], +): number { + return messages + .filter(message => message.role === role) + .reduce((sum, message) => sum + serializedSizeOf(message), 0) +} + +function sumResponsesInputMessageChars( + input: unknown, + role: string, +): number { + if (!Array.isArray(input)) return 0 + + return input + .filter(item => + item && + typeof item === 'object' && + 'role' in item && + item.role === role, + ) + .reduce((sum, item) => sum + serializedSizeOf(item), 0) +} + +function makeTokenAuditPart(name: string, chars: number): OpenAIShimTokenAuditPart { + return { + name, + chars, + estimatedTokens: estimateRequestTokensFromChars(chars), + } +} + +function buildChatCompletionsTokenAuditParts( + body: Record, +): OpenAIShimTokenAuditPart[] { + const messages = Array.isArray(body.messages) + ? body.messages as OpenAIMessage[] + : [] + const responseConfig = { ...body } + delete responseConfig.messages + delete responseConfig.tools + + return [ + makeTokenAuditPart('messages.system', sumSerializedMessageChars(messages, 'system')), + makeTokenAuditPart('messages.user', sumSerializedMessageChars(messages, 'user')), + makeTokenAuditPart('messages.assistant', sumSerializedMessageChars(messages, 'assistant')), + makeTokenAuditPart('messages.tool_results', sumSerializedMessageChars(messages, 'tool')), + makeTokenAuditPart('tool_schemas', Array.isArray(body.tools) ? serializedSizeOf(body.tools) : 0), + makeTokenAuditPart('response_config', serializedSizeOf(responseConfig)), + ] +} + +function buildResponsesTokenAuditParts( + body: Record, +): OpenAIShimTokenAuditPart[] { + const responseConfig = { ...body } + delete responseConfig.input + delete responseConfig.instructions + delete responseConfig.tools + + return [ + makeTokenAuditPart('instructions', typeof body.instructions === 'string' ? body.instructions.length : 0), + makeTokenAuditPart('input.user', sumResponsesInputMessageChars(body.input, 'user')), + makeTokenAuditPart('input.assistant', sumResponsesInputMessageChars(body.input, 'assistant')), + makeTokenAuditPart('input.system', sumResponsesInputMessageChars(body.input, 'system')), + makeTokenAuditPart('tool_schemas', Array.isArray(body.tools) ? serializedSizeOf(body.tools) : 0), + makeTokenAuditPart('response_config', serializedSizeOf(responseConfig)), + ] +} + +function logOpenAIShimTokenAudit(args: { + body: Record + serializedBody: string + model: string + transport: ReturnType['transport'] +}): void { + if (!isOpenAIShimTokenAuditEnabled()) return + + const baseParts = args.transport === 'responses' + ? buildResponsesTokenAuditParts(args.body) + : buildChatCompletionsTokenAuditParts(args.body) + const usedChars = baseParts.reduce((sum, part) => sum + part.chars, 0) + const remainingChars = Math.max(args.serializedBody.length - usedChars, 0) + const parts = [ + ...baseParts, + makeTokenAuditPart('json_overhead', remainingChars), + ] + .filter(part => part.chars > 0) + .sort((a, b) => b.chars - a.chars) + + const totalEstimatedTokens = estimateRequestTokensFromChars(args.serializedBody.length) + const toolCount = Array.isArray(args.body.tools) ? args.body.tools.length : 0 + process.stderr.write( + `[OpenAIShimTokenAudit] total chars=${args.serializedBody.length} est_tokens=${totalEstimatedTokens} model=${args.model} transport=${args.transport} tools=${toolCount}\n`, + ) + + for (const part of parts) { + const percent = args.serializedBody.length > 0 + ? Math.round((part.chars / args.serializedBody.length) * 100) + : 0 + process.stderr.write( + `[OpenAIShimTokenAudit] ${part.name}: chars=${part.chars} est_tokens=${part.estimatedTokens} pct=${percent}\n`, + ) + } + + const getToolNameForAudit = (tool: unknown): string | null => { + if (!tool || typeof tool !== 'object' || Array.isArray(tool)) return null + + const record = tool as Record + const functionValue = record.function + if (functionValue && typeof functionValue === 'object' && !Array.isArray(functionValue)) { + const functionName = (functionValue as Record).name + if (typeof functionName === 'string') return functionName + } + + return typeof record.name === 'string' ? record.name : null + } + + const tools = Array.isArray(args.body.tools) ? args.body.tools : [] + for (const tool of tools + .map(tool => ({ + name: getToolNameForAudit(tool), + chars: serializedSizeOf(tool), + })) + .filter((tool): tool is { name: string; chars: number } => tool.name !== null) + .sort((a, b) => b.chars - a.chars) + .slice(0, 10)) { + process.stderr.write( + `[OpenAIShimTokenAudit] tool.${tool.name}: chars=${tool.chars} est_tokens=${estimateRequestTokensFromChars(tool.chars)}\n`, + ) + } +} + +/** + * Keep only the first sentence of a tool description (up to maxLen chars). + * 235B models already know what Bash/Read/Write/etc. do by name. + * The fat Anthropic descriptions (500–2000 words) waste tokens on 3P providers. + */ +function truncateToolDescription(text: string, maxLen = 200): string { + if (!text || text.length <= maxLen) return text + // Try to cut at a sentence boundary in a generous window + const window = text.slice(0, maxLen + 80) + const match = window.match(/^[\s\S]{30,}?[.!?](\s|\n|$)/) + if (match && match[0].length <= maxLen + 20) return match[0].trim() + // Fall back to word boundary + return text.slice(0, maxLen).replace(/\s\S*$/, '') + '…' +} + +/** + * Strip description/title from parameter schemas while preserving structure. + * Models still get type/required/properties/enum — enough to generate valid calls. + */ +function stripParamDescriptions(schema: Record): Record { + const out: Record = {} + for (const [k, v] of Object.entries(schema)) { + if (k === 'description' || k === 'title') continue + if (Array.isArray(v)) { + out[k] = v.map(item => + item && typeof item === 'object' && !Array.isArray(item) + ? stripParamDescriptions(item as Record) + : item, + ) + } else if (v && typeof v === 'object') { + out[k] = stripParamDescriptions(v as Record) + } else { + out[k] = v + } + } + return out +} + +/** + * Minify tool schemas for 3P providers — dramatically reduces token usage: + * Bash: 11.4KB → ~0.4KB, TodoWrite: 9.6KB → ~1.2KB, Aggregate: ~63KB → ~5KB + */ +function minifyToolSchemas(tools: OpenAITool[]): OpenAITool[] { + return tools.map(tool => ({ + ...tool, + function: { + ...tool.function, + description: truncateToolDescription(tool.function.description), + parameters: stripParamDescriptions(tool.function.parameters as Record), + }, + })) +} + +/** One-line descriptions for the tool directory injected during phase-1. */ +const TOOL_DIRECTORY: Record = { + Bash: 'Execute shell commands (build, test, install, git, etc.)', + Read: 'Read file contents', + Write: 'Create or overwrite a file', + Edit: 'Make targeted edits to an existing file', + MultiEdit: 'Make multiple coordinated edits to an existing file', + Glob: 'List files matching a pattern', + Grep: 'Search file contents with regex', + WebSearch: 'Search the web for current information', + WebFetch: 'Fetch and read web page content', + NotebookEdit: 'Edit Jupyter notebook cells', + Skill: 'Load and use a named skill', + TodoWrite: 'Create/update structured task list', + AskUserQuestion: 'Ask the user a clarifying question', + Agent: 'Spawn a sub-agent to handle a complex sub-task', + TaskCreate: 'Create a tracked task for teammate/coordinator workflows', + TaskGet: 'Read a tracked task', + TaskUpdate: 'Update a tracked task status or ownership', + TaskList: 'List tracked tasks', + TaskOutput: 'Read output from a background task', + TaskStop: 'Stop a running task', + EnterPlanMode: 'Enter plan mode when implementation should wait', + ExitPlanMode: 'Exit plan mode with an implementation plan', + EnterWorktree: 'Enter a worktree session', + ExitWorktree: 'Exit a worktree session', + SendMessage: 'Send a message to a teammate or coordinator', + ListPeers: 'List connected peers or teammates', + LSP: 'Use language server intelligence', +} + +const CORE_TOOL_NAMES = new Set([ + 'Bash', + 'Read', + 'Write', + 'Edit', + 'MultiEdit', + 'Glob', + 'Grep', + 'TodoWrite', + 'AskUserQuestion', + 'Agent', +]) + +/** The single meta-tool sent during phase-1, in Anthropic tool shape. */ +const REQUEST_TOOLS_TOOL = { + name: 'request_tools', + description: 'Request the full schema for one or more tools before using them. Call this first if you need to use any tools.', + input_schema: { + type: 'object', + properties: { + tools: { + type: 'array', + description: 'Names of the tools needed for the next step.', + items: { + type: 'string', + }, + }, + rationale: { + type: 'string', + description: 'Brief reason these tools are needed.', + }, + }, + required: ['tools'], + additionalProperties: false, + }, +} + +/** + * Keyword heuristics to predict which tools a request needs. + * Returns a Set of tool names, empty Set for conversational, or null if uncertain. + */ +function predictNeededTools(messages: unknown[]): Set | null { + function extractUserQuery(text: string): string { + return text + .replace(//gi, '') + .replace(//gi, '') + .trim() + } + + let lastUserText = '' + for (let i = messages.length - 1; i >= 0; i--) { + const m = messages[i] as { role?: string; content?: unknown } + if (m.role !== 'user') continue + let rawText = '' + if (typeof m.content === 'string') { + rawText = m.content + } else if (Array.isArray(m.content)) { + const parts = (m.content as Array<{ type?: string; text?: string }>) + .filter(p => p.type === 'text') + .map(p => p.text ?? '') + rawText = parts.join(' ') + } + const clean = extractUserQuery(rawText) + if (clean) { + lastUserText = clean + break + } + } + if (!lastUserText) return null + + const t = lastUserText.toLowerCase() + + const isConversational = /^(what|who|why|how|when|where|explain|describe|tell me|is it|can you|do you|list|summarize|overview|difference between|compare|pros and cons)/.test(t.trim()) + && !/file|code|codebase|repository|repo|project|source|module|component|function|class|test|build|run|install|create|write|edit|implement|fix|debug/.test(t) + + if (isConversational) return new Set([]) + + const tools = new Set() + + if (/\brun\b|\bexecut|\bbuild|\btest\b|\binstall\b|\bnpm\b|\bpip\b|\bgit\b|\bcompil|\bscript|\bdocker|\bpython\b|\bnode\b/.test(t)) tools.add('Bash') + if (/\bread\b|\bshow\b|\bcontent|\blook at|\bopen\b|\bcat\b|\bwhat is in|\bwhat does.*file/.test(t)) tools.add('Read') + if (/\bcreate\b|\bwrite\b|\bnew file|\bgenerat|\bscaffold|\binitializ|\btouch\b/.test(t)) tools.add('Write') + if (/\bedit\b|\bmodif|\bchange\b|\bfix\b|\bupdat|\brefactor|\breplace|\bimpleme|\badd.*to\b|\bremove\b|\bdelet.*from/.test(t)) tools.add('Edit') + if (/\bsearch\b|\bfind\b|\bgrep\b|\blook for\b|\bwhere is\b|\boccurrenc|\bwhich file/.test(t)) { tools.add('Grep'); tools.add('Glob') } + if (/\bweb\b|\binternet\b|\bonline\b|\bsearch the web\b|\bbrowse\b|\blatest\b|\bcurrent\b|\btoday\b|\bfetch.*https?:\/\//.test(t)) { tools.add('WebSearch'); tools.add('WebFetch') } + if (/\blist.*file|\bfind.*file|\bfiles in|\bwhat files|\blist.*dir|\bls\b/.test(t)) tools.add('Glob') + if (/\bnotebook\b|\bjupyter\b|\bipynb\b/.test(t)) tools.add('NotebookEdit') + if (/\bskill\b|\bskills\b/.test(t)) tools.add('Skill') + if (/\btask\b|\bteammate\b|\bcoordinator\b|\bassign\b|\bowner\b/.test(t)) { tools.add('TaskCreate'); tools.add('TaskUpdate'); tools.add('TaskList') } + if (/\btodo\b|\bplan\b|\btask list|\btrack\b|\bprogress\b/.test(t)) tools.add('TodoWrite') + + if (/\bimplement\b|\bbuild.*feature|\badd.*feature|\bwrite.*function|\bwrite.*class|\bcreate.*function|\bcreate.*class/.test(t)) { + tools.add('Bash'); tools.add('Write'); tools.add('Edit'); tools.add('Read') + } + + if (tools.has('Edit') || tools.has('Write')) { + tools.add('Bash'); tools.add('Read') + } + + return tools.size > 0 ? tools : null +} + +function appendSystemPrompt(system: unknown, note: string): ShimCreateParams['system'] { + if (typeof system === 'string') return `${system}${note}` + if (Array.isArray(system)) { + return [ + ...system, + { type: 'text', text: note.trimStart() }, + ] as ShimCreateParams['system'] + } + return `You are a helpful AI assistant.${note}` +} + +function getToolDirectoryLines(tools: OpenAITool[]): string { + return tools + .map(tool => { + const name = tool.function.name + const desc = TOOL_DIRECTORY[name] ?? truncateToolDescription(tool.function.description, 120) + return `- ${name}: ${desc || 'Available tool'}` + }) + .join('\n') +} + +function getForcedToolChoiceName(toolChoice: unknown): string | undefined { + if (!toolChoice || typeof toolChoice !== 'object' || Array.isArray(toolChoice)) { + return undefined + } + + const record = toolChoice as Record + return record.type === 'tool' && typeof record.name === 'string' + ? record.name + : undefined +} + +function hasOpenAIToolNamed(tools: OpenAITool[], name: string): boolean { + return tools.some(tool => tool.function.name === name) +} + +function selectShimToolSet( + converted: OpenAITool[], + messages: unknown[], + mode: Exclude, + forcedToolName?: string, +): OpenAITool[] { + if (mode === 'minify') return minifyToolSchemas(converted) + + const predicted = predictNeededTools(messages) + if (predicted === null) { + return minifyToolSchemas(converted) + } + if (predicted.size === 0 && !forcedToolName) { + return [] + } + + const wanted = new Set(CORE_TOOL_NAMES) + for (const toolName of predicted) wanted.add(toolName) + if (forcedToolName) wanted.add(forcedToolName) + const filtered = converted.filter(tool => wanted.has(tool.function.name)) + return minifyToolSchemas(filtered.length > 0 ? filtered : converted) +} + +function isRequestToolsOnly(tools: OpenAITool[]): boolean { + return tools.length === 1 && tools[0]?.function.name === 'request_tools' +} + // --------------------------------------------------------------------------- // Types — minimal subset of Anthropic SDK types we need to produce // --------------------------------------------------------------------------- @@ -238,6 +677,138 @@ interface OpenAITool { } } +interface OpenAIResponsesTool { + type: 'function' + name: string + description: string + parameters: Record + strict: boolean +} + +interface ShimPhase1ToolCall { + function?: { + name?: string + arguments?: string + } +} + +interface ShimPhase1Response { + choices?: Array<{ + message?: { + role?: string + content?: string | null + tool_calls?: ShimPhase1ToolCall[] + } + finish_reason?: string + }> + output?: Array<{ + type?: string + role?: string + content?: string | null | Array<{ type?: string; text?: string }> + name?: string + arguments?: string + }> + output_text?: string | null +} + +function convertOpenAIToolsToResponsesTools(tools: OpenAITool[]): OpenAIResponsesTool[] { + return tools + .filter(tool => tool.function.name !== 'ToolSearchTool') + .map(tool => ({ + type: 'function' as const, + name: tool.function.name, + description: tool.function.description, + parameters: tool.function.parameters, + strict: tool.function.strict ?? true, + })) +} + +function convertOpenAIToolChoiceToResponsesToolChoice(toolChoice: unknown): unknown { + if ( + toolChoice === 'auto' || + toolChoice === 'required' || + toolChoice === 'none' + ) { + return toolChoice + } + + if (!toolChoice || typeof toolChoice !== 'object' || Array.isArray(toolChoice)) { + return undefined + } + + const record = toolChoice as Record + const functionValue = record.function + if ( + record.type === 'function' && + functionValue && + typeof functionValue === 'object' && + !Array.isArray(functionValue) + ) { + const name = (functionValue as Record).name + if (typeof name === 'string') { + return { + type: 'function', + name, + } + } + } + + return undefined +} + +function extractResponsesMessageContent( + content: string | null | Array<{ type?: string; text?: string }> | undefined, +): string { + if (typeof content === 'string') return content + if (!Array.isArray(content)) return '' + return content + .map(part => typeof part.text === 'string' ? part.text : '') + .join('') +} + +function extractShimPhase1ToolCalls(phase1Json: ShimPhase1Response): ShimPhase1ToolCall[] { + const chatToolCalls = phase1Json.choices?.[0]?.message?.tool_calls + if (Array.isArray(chatToolCalls)) { + return chatToolCalls + } + + if (!Array.isArray(phase1Json.output)) { + return [] + } + + return phase1Json.output + .filter(item => item.type === 'function_call' || item.type === 'tool_call') + .map(item => ({ + function: { + name: item.name, + arguments: item.arguments, + }, + })) +} + +function extractShimPhase1Message(phase1Json: ShimPhase1Response): Record { + const chatMessage = phase1Json.choices?.[0]?.message + if (chatMessage) { + return chatMessage + } + + const outputMessage = Array.isArray(phase1Json.output) + ? phase1Json.output.find(item => item.type === 'message' && item.role === 'assistant') + : undefined + + if (outputMessage) { + return { + role: outputMessage.role ?? 'assistant', + content: extractResponsesMessageContent(outputMessage.content), + } + } + + return { + role: 'assistant', + content: phase1Json.output_text ?? '', + } +} + function convertSystemPrompt( system: unknown, ): string { @@ -1378,66 +1949,40 @@ class OpenAIShimMessages { const promise = (async () => { const request = resolveProviderRequest({ model: self.providerOverride?.model ?? params.model, baseUrl: self.providerOverride?.baseURL, reasoningEffortOverride: self.reasoningEffort }) - const response = await self._doRequest(request, params, options) - httpResponse = response - if (params.stream) { - const isResponsesStream = response.url?.includes('/responses') - return new OpenAIShimStream( - ( - request.transport === 'codex_responses' || - request.transport === 'responses' || - isResponsesStream - ) - ? codexStreamToAnthropic(response, request.resolvedModel, options?.signal) - : openaiStreamToAnthropic(response, request.resolvedModel, options?.signal), - ) - } - - if (request.transport === 'codex_responses') { - const data = await collectCodexCompletedResponse(response, options?.signal) - return convertCodexResponseToAnthropicMessage( - data, - request.resolvedModel, - ) - } - - const isResponsesNonStream = response.url?.includes('/responses') + // ShimToolSearch lazy mode: for conversational turns, send only the + // request_tools meta-tool and let the model ask for schemas if needed. + const shimToolSearchMode = getShimToolSearchMode() if ( - request.transport === 'responses' || - isResponsesNonStream || - (request.transport === 'chat_completions' && isGithubModelsMode()) + shimToolSearchMode === 'lazy' && + params.tools && (params.tools as unknown[]).length > 0 ) { - const contentType = response.headers.get('content-type') ?? '' - if (contentType.includes('application/json')) { - const parsed = await response.json() as Record - if ( - parsed && - typeof parsed === 'object' && - ('output' in parsed || 'incomplete_details' in parsed) - ) { - return convertCodexResponseToAnthropicMessage( - parsed, - request.resolvedModel, - ) + const forcedToolName = getForcedToolChoiceName(params.tool_choice) + const msgs = Array.isArray(params.messages) ? params.messages as unknown[] : [] + const predicted = predictNeededTools(msgs) + if ( + predicted !== null && + predicted.size === 0 && + (!forcedToolName || forcedToolName === 'request_tools') + ) { + const shimResult = await self._shimToolSearchCreate(request, params, options) + httpResponse = shimResult.response + if (shimResult.converted) { + return shimResult.data } - return self._convertNonStreamingResponse(parsed, request.resolvedModel) + return await self._convertCreateResponse( + request, + shimResult.data as Response, + params, + options, + ) } } - const contentType = response.headers.get('content-type') ?? '' - if (contentType.includes('application/json')) { - const data = await response.json() - return self._convertNonStreamingResponse(data, request.resolvedModel) - } + const response = await self._doRequest(request, params, options) + httpResponse = response - const textBody = await response.text().catch(() => '') - throw APIError.generate( - response.status, - undefined, - `OpenAI API error ${response.status}: unexpected response: ${textBody.slice(0, 500)}`, - response.headers as unknown as Headers, - ) + return await self._convertCreateResponse(request, response, params, options) })() ; (promise as unknown as Record).withResponse = @@ -1454,6 +1999,71 @@ class OpenAIShimMessages { return promise } + private async _convertCreateResponse( + request: ReturnType, + response: Response, + params: ShimCreateParams, + options?: { signal?: AbortSignal; headers?: Record }, + ) { + if (params.stream) { + const isResponsesStream = response.url?.includes('/responses') + return new OpenAIShimStream( + ( + request.transport === 'codex_responses' || + request.transport === 'responses' || + isResponsesStream + ) + ? codexStreamToAnthropic(response, request.resolvedModel, options?.signal) + : openaiStreamToAnthropic(response, request.resolvedModel, options?.signal), + ) + } + + if (request.transport === 'codex_responses') { + const data = await collectCodexCompletedResponse(response, options?.signal) + return convertCodexResponseToAnthropicMessage( + data, + request.resolvedModel, + ) + } + + const isResponsesNonStream = response.url?.includes('/responses') + if ( + request.transport === 'responses' || + isResponsesNonStream || + (request.transport === 'chat_completions' && isGithubModelsMode()) + ) { + const contentType = response.headers.get('content-type') ?? '' + if (contentType.includes('application/json')) { + const parsed = await response.json() as Record + if ( + parsed && + typeof parsed === 'object' && + ('output' in parsed || 'incomplete_details' in parsed) + ) { + return convertCodexResponseToAnthropicMessage( + parsed, + request.resolvedModel, + ) + } + return this._convertNonStreamingResponse(parsed, request.resolvedModel) + } + } + + const contentType = response.headers.get('content-type') ?? '' + if (contentType.includes('application/json')) { + const data = await response.json() + return this._convertNonStreamingResponse(data, request.resolvedModel) + } + + const textBody = await response.text().catch(() => '') + throw APIError.generate( + response.status, + undefined, + `OpenAI API error ${response.status}: unexpected response: ${textBody.slice(0, 500)}`, + response.headers as unknown as Headers, + ) + } + private async _doRequest( request: ReturnType, params: ShimCreateParams, @@ -1536,10 +2146,158 @@ class OpenAIShimMessages { return this._doOpenAIRequest(request, params, options) } + // --------------------------------------------------------------------------- + // ShimToolSearch — two-phase protocol + // --------------------------------------------------------------------------- + + /** + * Wrap a single OpenAI-format message as a ReadableStream + * mimicking a streaming SSE response, so it can be fed into the existing + * stream-to-Anthropic pipeline. + */ + private _syntheticStream(msg: Record): ReadableStream { + const encoder = new TextEncoder() + const chunk = { + id: `chatcmpl-shim-${Date.now()}`, + object: 'chat.completion.chunk', + created: Math.floor(Date.now() / 1000), + model: 'shim-tool-search', + choices: [{ index: 0, delta: msg, finish_reason: 'stop' }], + } + const payload = `data: ${JSON.stringify(chunk)}\n\ndata: [DONE]\n\n` + return new ReadableStream({ + start(controller) { + controller.enqueue(encoder.encode(payload)) + controller.close() + }, + }) + } + + /** + * Two-phase ShimToolSearch protocol: + * Phase 1 — send only the request_tools meta-tool + tool directory in the + * system prompt. If the model calls request_tools, go to phase 2. + * Phase 2 — re-request with only the requested tools (minified). + * + * If the model doesn't call request_tools, return a synthetic stream wrapping + * its conversational response. + */ + private async _shimToolSearchCreate( + request: ReturnType, + params: ShimCreateParams, + options?: { signal?: AbortSignal; headers?: Record }, + ): Promise<{ + data: OpenAIShimStream | Response + response: Response + converted: boolean + }> { + debugShimToolSearch('Phase 1: conversational prediction — sending meta-tool only') + + const allConverted = convertTools( + params.tools as Array<{ name: string; description?: string; input_schema?: Record }>, + ) + + // Build directory listing for the system prompt + const directoryLines = getToolDirectoryLines(allConverted) + const directoryNote = `\n\nAvailable tools (call request_tools to use any):\n${directoryLines}` + + const phase1Messages = JSON.parse(JSON.stringify(params.messages)) as Array<{ role: string; content: unknown }> + const phase1System = appendSystemPrompt(params.system, directoryNote) + + // Phase 1 request — non-streaming, single meta-tool + const phase1Params = { + ...params, + stream: false, + system: phase1System, + messages: phase1Messages, + tools: [REQUEST_TOOLS_TOOL] as typeof params.tools, + } + const phase1Response = await this._doRequest(request, phase1Params, options) + const phase1Json = await phase1Response.json() as ShimPhase1Response + const toolCalls = extractShimPhase1ToolCalls(phase1Json) + const requestToolsCall = toolCalls.find(tc => tc.function?.name === 'request_tools') + + if (!requestToolsCall) { + // Model chose to respond conversationally — return as synthetic stream + debugShimToolSearch('Phase 1 result: conversational (no tools requested)') + const msg = extractShimPhase1Message(phase1Json) + if (params.stream) { + return { + data: new OpenAIShimStream( + openaiStreamToAnthropic( + new Response(this._syntheticStream(msg), { + status: 200, + headers: { 'content-type': 'text/event-stream' }, + }), + request.resolvedModel, + ), + ), + response: phase1Response, + converted: true, + } + } + return { + data: new Response(JSON.stringify(phase1Json), { + status: 200, + headers: { 'content-type': 'application/json' }, + }), + response: phase1Response, + converted: false, + } + } + + // Model requested tools — parse and do phase 2 + let requestedNames: string[] = [] + let parsedRequestedNames = true + try { + const args = JSON.parse(requestToolsCall.function?.arguments ?? '{}') + requestedNames = Array.isArray(args.tools) ? args.tools : [] + } catch { + debugShimToolSearch('Phase 1 returned malformed request_tools JSON; falling back to all tools') + parsedRequestedNames = false + requestedNames = [] + } + debugShimToolSearch(`Phase 2: model requested tools: ${requestedNames.join(', ')}`) + + // Build full tool set from the original params, filtered + minified + const forcedToolName = getForcedToolChoiceName(params.tool_choice) + const wanted = new Set([...requestedNames, ...CORE_TOOL_NAMES]) + if (forcedToolName && forcedToolName !== 'request_tools') wanted.add(forcedToolName) + const filtered = allConverted.filter(t => wanted.has(t.function.name)) + const toolSet = minifyToolSchemas( + parsedRequestedNames && requestedNames.length > 0 && filtered.length > 0 + ? filtered + : allConverted, + ) + debugShimToolSearch(`Phase 2: sending ${toolSet.length} tools (${JSON.stringify(toolSet).length} chars)`) + + // Phase 2 — re-request with the actual tools + const response = await this._doOpenAIRequest(request, params, options, toolSet) + + if (params.stream) { + const isResponsesStream = response.url?.includes('/responses') + return { + data: new OpenAIShimStream( + (request.transport === 'codex_responses' || isResponsesStream) + ? codexStreamToAnthropic(response, request.resolvedModel) + : openaiStreamToAnthropic(response, request.resolvedModel), + ), + response, + converted: true, + } + } + return { + data: response, + response, + converted: false, + } + } + private async _doOpenAIRequest( request: ReturnType, params: ShimCreateParams, options?: { signal?: AbortSignal; headers?: Record }, + convertedToolOverride?: OpenAITool[], ): Promise { // Local backends (llama.cpp, vLLM, Ollama, LM Studio, …) do not implement // the cloud-side caching/strict-validation behaviours that several of our @@ -1653,8 +2411,8 @@ class OpenAIShimMessages { } } - if (params.tools && params.tools.length > 0) { - const converted = convertTools( + if ((params.tools && params.tools.length > 0) || convertedToolOverride) { + const converted = convertedToolOverride ?? convertTools( params.tools as Array<{ name: string description?: string @@ -1663,12 +2421,34 @@ class OpenAIShimMessages { { skipStrict: fastPath.skipStrictTools }, ) if (converted.length > 0) { - body.tools = converted - if (params.tool_choice) { + const shimToolSearchMode = getShimToolSearchMode() + const forcedToolName = getForcedToolChoiceName(params.tool_choice) + if (convertedToolOverride || isRequestToolsOnly(converted)) { + body.tools = convertedToolOverride + ? convertedToolOverride + : converted + } else if (shimToolSearchMode === 'minify' || shimToolSearchMode === 'predict') { + const msgs = Array.isArray(params.messages) ? params.messages as unknown[] : [] + const toolSet = selectShimToolSet(converted, msgs, shimToolSearchMode, forcedToolName) + body.tools = toolSet + const names = toolSet.map(t => t.function.name) + const totalChars = JSON.stringify(toolSet).length + debugShimToolSearch(`${shimToolSearchMode}: ${toolSet.length} tools (${totalChars} chars): ${names.join(', ')}`) + } else if (shimToolSearchMode === 'lazy') { + const msgs = Array.isArray(params.messages) ? params.messages as unknown[] : [] + body.tools = selectShimToolSet(converted, msgs, 'predict', forcedToolName) + } else { + body.tools = converted + } + if (params.tool_choice && Array.isArray(body.tools) && body.tools.length > 0) { const tc = params.tool_choice as { type?: string; name?: string } if (tc.type === 'auto') { body.tool_choice = 'auto' - } else if (tc.type === 'tool' && tc.name) { + } else if ( + tc.type === 'tool' && + tc.name && + hasOpenAIToolNamed(body.tools as OpenAITool[], tc.name) + ) { body.tool_choice = { type: 'function', function: { name: tc.name }, @@ -1725,7 +2505,14 @@ class OpenAIShimMessages { if (params.temperature !== undefined) responsesBody.temperature = params.temperature if (params.top_p !== undefined) responsesBody.top_p = params.top_p - if (!omitResponsesTools && params.tools && params.tools.length > 0) { + if (!omitResponsesTools && Array.isArray(body.tools)) { + if (body.tools.length > 0) { + const convertedTools = convertOpenAIToolsToResponsesTools(body.tools as OpenAITool[]) + if (convertedTools.length > 0) { + responsesBody.tools = convertedTools + } + } + } else if (!omitResponsesTools && params.tools && params.tools.length > 0) { const convertedTools = convertToolsToResponsesTools( params.tools as Array<{ name?: string @@ -1738,6 +2525,13 @@ class OpenAIShimMessages { } } + if (responsesBody.tools && body.tool_choice !== undefined) { + const responsesToolChoice = convertOpenAIToolChoiceToResponsesToolChoice(body.tool_choice) + if (responsesToolChoice !== undefined) { + responsesBody.tool_choice = responsesToolChoice + } + } + return responsesBody } @@ -1899,12 +2693,12 @@ class OpenAIShimMessages { // Local backends do not implement prefix caching, so the deep key-sort // is pure CPU overhead per request (issue #1016). Drop to the native // `JSON.stringify` fast path when the fast-path config opts out. + let outgoingBody = request.transport === 'responses' ? buildResponsesBody() : body const serializeBody = (): string => { - const payload = - request.transport === 'responses' ? buildResponsesBody() : body + outgoingBody = request.transport === 'responses' ? buildResponsesBody() : body return fastPath.skipStableStringify - ? JSON.stringify(payload) - : stableStringify(payload) + ? JSON.stringify(outgoingBody) + : stableStringify(outgoingBody) } let serializedBody = serializeBody() @@ -1912,6 +2706,13 @@ class OpenAIShimMessages { serializedBody = serializeBody() } + logOpenAIShimTokenAudit({ + body: outgoingBody, + serializedBody, + model: request.resolvedModel, + transport: request.transport, + }) + const buildFetchInit = () => ({ method: 'POST' as const, headers, @@ -2185,10 +2986,10 @@ class OpenAIShimMessages { role?: string content?: | string - | null - | Array<{ type?: string; text?: string }> - reasoning_content?: string | null - tool_calls?: Array<{ + | null + | Array<{ type?: string; text?: string }> + reasoning_content?: string | null + tool_calls?: Array<{ id: string function: { name: string; arguments: string } extra_content?: Record