diff --git a/scripts/benchmarks/shim-tool-minify-live.sh b/scripts/benchmarks/shim-tool-minify-live.sh
new file mode 100755
index 0000000000..cc461d3c20
--- /dev/null
+++ b/scripts/benchmarks/shim-tool-minify-live.sh
@@ -0,0 +1,202 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT="${1:-/tmp/openclaude-shim-bench}"
+RESULTS="$ROOT/results"
+BASE="$ROOT/base"
+OFF="$ROOT/off"
+MINIFY="$ROOT/minify"
+
+rm -rf "$ROOT"
+mkdir -p "$BASE/src" "$BASE/docs" "$BASE/tests" "$RESULTS"
+
+cat >"$BASE/README.md" <<'EOF'
+# Shim Live Test Project
+
+This disposable project is used to test OpenClaude workflows against the
+OpenAI-compatible shim.
+
+The project exposes a tiny calculator module, a config file, and notes that
+mention NanoGPT, DeepSeek, Qwen, and tool schema minimization.
+
+Expected version: 0.4.2
+EOF
+
+cat >"$BASE/src/calculator.js" <<'EOF'
+export function add(a, b) {
+  return a + b
+}
+
+export function multiply(a, b) {
+  return a * b
+}
+
+export function formatResult(label, value) {
+  return `${label}: ${value}`
+}
+EOF
+
+cat >"$BASE/src/config.json" <<'EOF'
+{
+  "name": "shim-live-test",
+  "version": "0.4.2",
+  "provider": "nanogpt",
+  "defaultModel": "deepseek/deepseek-v4-pro"
+}
+EOF
+
+cat >"$BASE/docs/notes.md" <<'EOF'
+# Notes
+
+- NanoGPT is used through an OpenAI-compatible endpoint.
+- DeepSeek V4 Pro is the default model for agentic coding work.
+- Qwen models can be used as alternatives.
+- Tool schema minimization should preserve all available tools.
+EOF
+
+cat >"$BASE/tests/calculator.test.js" <<'EOF'
+import { add, multiply, formatResult } from '../src/calculator.js'
+
+if (add(2, 3) !== 5) throw new Error('add failed')
+if (multiply(3, 4) !== 12) throw new Error('multiply failed')
+if (formatResult('sum', 5) !== 'sum: 5') throw new Error('format failed')
+
+console.log('calculator tests passed')
+EOF
+
+cat >"$BASE/package.json" <<'EOF'
+{
+  "name": "shim-live-test",
+  "version": "0.4.2",
+  "type": "module",
+  "scripts": {
+    "test": "node tests/calculator.test.js"
+  }
+}
+EOF
+
+cp -a "$BASE" "$OFF"
+cp -a "$BASE" "$MINIFY"
+
+run_case() {
+  local mode="$1"
+  local workdir="$2"
+  local outdir="$3"
+  local id="$4"
+  local prompt="$5"
+  mkdir -p "$outdir"
+
+  printf 'running %s %s\n' "$mode" "$id" >&2
+  (
+    cd "$workdir"
+    OPENAI_SHIM_TOOL_MODE="$mode" \
+      timeout 120 openclaude --bare -p --output-format json \
+      --permission-mode bypassPermissions \
+      --max-budget-usd 0.20 \
+      "$prompt"
+  ) >"$outdir/${id}.json" 2>"$outdir/${id}.log" || {
+    code=$?
+    printf '{"type":"harness_error","exit_code":%s,"case":"%s"}\n' "$code" "$id" >>"$outdir/${id}.json"
+  }
+}
+
+run_suite() {
+  local mode="$1"
+  local workdir="$2"
+  local outdir="$RESULTS/$mode"
+
+  run_case "$mode" "$workdir" "$outdir" "01_arithmetic" "Reply with exactly the number: 4"
+  run_case "$mode" "$workdir" "$outdir" "02_read_config" "Read src/config.json and report only the version and defaultModel."
+  run_case "$mode" "$workdir" "$outdir" "03_search_deepseek" "Search the project for DeepSeek and report matching file paths only."
+  run_case "$mode" "$workdir" "$outdir" "04_run_tests" "Run the test suite with npm test and report pass or fail with the key output."
+  run_case "$mode" "$workdir" "$outdir" "05_create_doc" "Create docs/generated-summary.md containing one concise sentence about this project, then report the file path."
+  run_case "$mode" "$workdir" "$outdir" "06_edit_code" "Edit src/calculator.js to add an exported subtract(a, b) function, then report what changed."
+  run_case "$mode" "$workdir" "$outdir" "07_update_test" "Update tests/calculator.test.js to test subtract(7, 2) === 5, then run npm test and report pass or fail."
+  run_case "$mode" "$workdir" "$outdir" "08_summarize_project" "Read README.md and docs/notes.md, then summarize the project in three bullets."
+  run_case "$mode" "$workdir" "$outdir" "09_find_version" "Find every occurrence of 0.4.2 in this project and report file paths."
+  run_case "$mode" "$workdir" "$outdir" "10_plan_next" "Inspect the project structure and propose the next two engineering tasks. Mention the files you inspected."
+}
+
+run_suite off "$OFF"
+run_suite minify "$MINIFY"
+
+node - "$RESULTS" <<'NODE'
+const fs = require('fs')
+const path = require('path')
+const root = process.argv[2]
+
+function readRun(mode, id) {
+  const file = path.join(root, mode, `${id}.json`)
+  const raw = fs.readFileSync(file, 'utf8').trim().split('\n').filter(Boolean).at(-1)
+  const json = JSON.parse(raw)
+  const model = json.modelUsage ? Object.keys(json.modelUsage)[0] : ''
+  const usage = model ? json.modelUsage[model] : {}
+  return {
+    ok: json.type === 'result' && json.subtype === 'success' && !json.is_error,
+    input: usage.inputTokens ?? json.usage?.input_tokens ?? 0,
+    output: usage.outputTokens ?? json.usage?.output_tokens ?? 0,
+    cost: usage.costUSD ?? json.total_cost_usd ?? 0,
+    duration: json.duration_ms ?? 0,
+    turns: json.num_turns ?? 0,
+    result: String(json.result ?? '').replace(/\s+/g, ' ').slice(0, 120),
+  }
+}
+
+const ids = fs.readdirSync(path.join(root, 'off'))
+  .filter(name => name.endsWith('.json'))
+  .map(name => name.replace(/\.json$/, ''))
+  .sort()
+
+const lines = [
+  'case,off_input,minify_input,input_reduction_pct,off_cost,minify_cost,cost_reduction_pct,off_turns,minify_turns,off_ok,minify_ok',
+]
+let offInput = 0
+let minInput = 0
+let offCost = 0
+let minCost = 0
+let offOk = 0
+let minOk = 0
+
+for (const id of ids) {
+  const off = readRun('off', id)
+  const min = readRun('minify', id)
+  offInput += off.input
+  minInput += min.input
+  offCost += off.cost
+  minCost += min.cost
+  offOk += off.ok ? 1 : 0
+  minOk += min.ok ? 1 : 0
+  lines.push([
+    id,
+    off.input,
+    min.input,
+    ((off.input - min.input) / off.input * 100).toFixed(1),
+    off.cost.toFixed(6),
+    min.cost.toFixed(6),
+    ((off.cost - min.cost) / off.cost * 100).toFixed(1),
+    off.turns,
+    min.turns,
+    off.ok,
+    min.ok,
+  ].join(','))
+}
+
+lines.push([
+  'TOTAL',
+  offInput,
+  minInput,
+  ((offInput - minInput) / offInput * 100).toFixed(1),
+  offCost.toFixed(6),
+  minCost.toFixed(6),
+  ((offCost - minCost) / offCost * 100).toFixed(1),
+  '',
+  '',
+  `${offOk}/${ids.length}`,
+  `${minOk}/${ids.length}`,
+].join(','))
+
+fs.writeFileSync(path.join(root, 'summary.csv'), `${lines.join('\n')}\n`)
+console.log(lines.join('\n'))
+NODE
+
+printf '\nResults written to %s\n' "$RESULTS"
diff --git a/src/services/api/openaiShim.test.ts b/src/services/api/openaiShim.test.ts
index 2ab3b0eb9e..bf9f043d0a 100644
--- a/src/services/api/openaiShim.test.ts
+++ b/src/services/api/openaiShim.test.ts
@@ -3,6 +3,9 @@ import { registerGateway } from '../../integrations/index.ts'
 import { createOpenAIShimClient } from './openaiShim.ts'
 
 type FetchType = typeof globalThis.fetch
+type OpenAIShimWithResponseData = AsyncIterable<Record<string, unknown>> & {
+  content: Array<{ type: string; text?: string }>
+}
 
 const originalEnv = {
   OPENAI_BASE_URL: process.env.OPENAI_BASE_URL,
@@ -34,6 +37,11 @@ const originalEnv = {
   OPENROUTER_API_KEY: process.env.OPENROUTER_API_KEY,
   DEEPSEEK_API_KEY: process.env.DEEPSEEK_API_KEY,
   MIMO_API_KEY: process.env.MIMO_API_KEY,
+  OPENAI_SHIM_TOOL_MODE: process.env.OPENAI_SHIM_TOOL_MODE,
+  ENABLE_SHIM_TOOL_SEARCH: process.env.ENABLE_SHIM_TOOL_SEARCH,
+  OPENAI_SHIM_DEBUG: process.env.OPENAI_SHIM_DEBUG,
+  ENABLE_SHIM_TOOL_SEARCH_DEBUG: process.env.ENABLE_SHIM_TOOL_SEARCH_DEBUG,
+  OPENAI_SHIM_TOKEN_AUDIT: process.env.OPENAI_SHIM_TOKEN_AUDIT,
 }
 
 const originalFetch = globalThis.fetch
@@ -53,7 +61,11 @@ type OpenAIShimClient = {
         params: Record<string, unknown>,
         options?: Record<string, unknown>,
       ) => Promise<unknown> & {
-        withResponse: () => Promise<{ data: AsyncIterable<Record<string, unknown>> }>
+        withResponse: () => Promise<{
+          data: OpenAIShimWithResponseData
+          response: Response
+          request_id: string
+        }>
       }
     }
   }
@@ -115,6 +127,11 @@ beforeEach(() => {
   delete process.env.OPENROUTER_API_KEY
   delete process.env.DEEPSEEK_API_KEY
   delete process.env.MIMO_API_KEY
+  delete process.env.OPENAI_SHIM_TOOL_MODE
+  delete process.env.ENABLE_SHIM_TOOL_SEARCH
+  delete process.env.OPENAI_SHIM_DEBUG
+  delete process.env.ENABLE_SHIM_TOOL_SEARCH_DEBUG
+  delete process.env.OPENAI_SHIM_TOKEN_AUDIT
 })
 
 afterEach(() => {
@@ -147,6 +164,11 @@ afterEach(() => {
   restoreEnv('OPENROUTER_API_KEY', originalEnv.OPENROUTER_API_KEY)
   restoreEnv('DEEPSEEK_API_KEY', originalEnv.DEEPSEEK_API_KEY)
   restoreEnv('MIMO_API_KEY', originalEnv.MIMO_API_KEY)
+  restoreEnv('OPENAI_SHIM_TOOL_MODE', originalEnv.OPENAI_SHIM_TOOL_MODE)
+  restoreEnv('ENABLE_SHIM_TOOL_SEARCH', originalEnv.ENABLE_SHIM_TOOL_SEARCH)
+  restoreEnv('OPENAI_SHIM_DEBUG', originalEnv.OPENAI_SHIM_DEBUG)
+  restoreEnv('ENABLE_SHIM_TOOL_SEARCH_DEBUG', originalEnv.ENABLE_SHIM_TOOL_SEARCH_DEBUG)
+  restoreEnv('OPENAI_SHIM_TOKEN_AUDIT', originalEnv.OPENAI_SHIM_TOKEN_AUDIT)
   globalThis.fetch = originalFetch
 })
 
@@ -983,18 +1005,1015 @@ test('uses route-specific credential env vars for descriptor-backed openai-compa
               role: 'assistant',
               content: 'ok',
             },
-            finish_reason: 'stop',
+            finish_reason: 'stop',
+          },
+        ],
+        usage: {
+          prompt_tokens: 5,
+          completion_tokens: 1,
+          total_tokens: 6,
+        },
+      }),
+      {
+        headers: {
+          'Content-Type': 'application/json',
+        },
+      },
+    )
+  }) as FetchType
+
+  const client = createOpenAIShimClient({}) as OpenAIShimClient
+
+  await client.beta.messages.create({
+    model: 'openai/gpt-5-mini',
+    messages: [{ role: 'user', content: 'hello' }],
+    max_tokens: 64,
+    stream: false,
+  })
+
+  expect(capturedHeaders?.get('authorization')).toBe('Bearer or-route-key')
+})
+
+test('preserves Gemini tool call extra_content in follow-up requests', async () => {
+  let requestBody: Record<string, unknown> | undefined
+
+  globalThis.fetch = (async (_input, init) => {
+    requestBody = JSON.parse(String(init?.body))
+
+    return new Response(
+      JSON.stringify({
+        id: 'chatcmpl-1',
+        model: 'google/gemini-3.1-pro-preview',
+        choices: [
+          {
+            message: {
+              role: 'assistant',
+              content: 'done',
+            },
+            finish_reason: 'stop',
+          },
+        ],
+        usage: {
+          prompt_tokens: 12,
+          completion_tokens: 4,
+          total_tokens: 16,
+        },
+      }),
+      {
+        headers: {
+          'Content-Type': 'application/json',
+        },
+      },
+    )
+  }) as FetchType
+
+  const client = createOpenAIShimClient({}) as OpenAIShimClient
+
+  await client.beta.messages.create({
+    model: 'google/gemini-3.1-pro-preview',
+    system: 'test system',
+    messages: [
+      { role: 'user', content: 'Use Bash' },
+      {
+        role: 'assistant',
+        content: [
+          {
+            type: 'tool_use',
+            id: 'call_1',
+            name: 'Bash',
+            input: { command: 'pwd' },
+            extra_content: {
+              google: {
+                thought_signature: 'sig-123',
+              },
+            },
+          },
+        ],
+      },
+      {
+        role: 'user',
+        content: [
+          {
+            type: 'tool_result',
+            tool_use_id: 'call_1',
+            content: 'D:\\repo',
+          },
+        ],
+      },
+    ],
+    max_tokens: 64,
+    stream: false,
+  })
+
+  const assistantWithToolCall = (requestBody?.messages as Array<Record<string, unknown>>).find(
+    message => Array.isArray(message.tool_calls),
+  ) as { tool_calls?: Array<Record<string, unknown>> } | undefined
+
+  expect(assistantWithToolCall?.tool_calls?.[0]).toMatchObject({
+    id: 'call_1',
+    type: 'function',
+    function: {
+      name: 'Bash',
+      arguments: JSON.stringify({ command: 'pwd' }),
+    },
+    extra_content: {
+      google: {
+        thought_signature: 'sig-123',
+      },
+    },
+  })
+})
+
+test('preserves Grep tool pattern field in OpenAI-compatible schemas', async () => {
+  let requestBody: Record<string, unknown> | undefined
+
+  globalThis.fetch = (async (_input, init) => {
+    requestBody = JSON.parse(String(init?.body))
+
+    return new Response(
+      JSON.stringify({
+        id: 'chatcmpl-grep-schema',
+        model: 'qwen/qwen3.6-plus',
+        choices: [
+          {
+            message: {
+              role: 'assistant',
+              content: 'done',
+            },
+            finish_reason: 'stop',
+          },
+        ],
+        usage: {
+          prompt_tokens: 12,
+          completion_tokens: 4,
+          total_tokens: 16,
+        },
+      }),
+      {
+        headers: {
+          'Content-Type': 'application/json',
+        },
+      },
+    )
+  }) as FetchType
+
+  const client = createOpenAIShimClient({}) as OpenAIShimClient
+
+  await client.beta.messages.create({
+    model: 'qwen/qwen3.6-plus',
+    system: 'test system',
+    messages: [{ role: 'user', content: 'Use Grep' }],
+    tools: [
+      {
+        name: 'Grep',
+        description: 'Search file contents',
+        input_schema: {
+          type: 'object',
+          properties: {
+            pattern: { type: 'string', description: 'Search pattern' },
+            path: { type: 'string' },
+          },
+          required: ['pattern'],
+          additionalProperties: false,
+        },
+      },
+    ],
+    max_tokens: 64,
+    stream: false,
+  })
+
+  const tools = requestBody?.tools as Array<Record<string, unknown>> | undefined
+  const grepTool = tools?.find(tool => (tool.function as Record<string, unknown>)?.name === 'Grep') as
+    | { function?: { parameters?: { properties?: Record<string, unknown>; required?: string[] } } }
+    | undefined
+
+  expect(Object.keys(grepTool?.function?.parameters?.properties ?? {})).toContain('pattern')
+  expect(grepTool?.function?.parameters?.required).toContain('pattern')
+})
+
+function makeShimToolFixtures() {
+  return [
+    {
+      name: 'Bash',
+      description: 'Execute shell commands. This long description should be truncated by shim minification because third-party providers do not need every detail.',
+      input_schema: {
+        type: 'object',
+        properties: {
+          command: {
+            type: 'string',
+            description: 'The command to execute.',
+          },
+        },
+        required: ['command'],
+      },
+    },
+    {
+      name: 'Read',
+      description: 'Read file contents.',
+      input_schema: {
+        type: 'object',
+        properties: {
+          file_path: {
+            type: 'string',
+            description: 'Path to read.',
+          },
+        },
+        required: ['file_path'],
+      },
+    },
+    {
+      name: 'WebSearch',
+      description: 'Search the web for current information.',
+      input_schema: {
+        type: 'object',
+        properties: {
+          query: {
+            type: 'string',
+            description: 'Search query.',
+          },
+        },
+        required: ['query'],
+      },
+    },
+    {
+      name: 'WebFetch',
+      description: 'Fetch and read a web page.',
+      input_schema: {
+        type: 'object',
+        properties: {
+          url: {
+            type: 'string',
+            description: 'URL to fetch.',
+          },
+        },
+        required: ['url'],
+      },
+    },
+    {
+      name: 'Skill',
+      description: 'Load and use a skill.',
+      input_schema: {
+        type: 'object',
+        properties: {
+          name: {
+            type: 'string',
+            description: 'Skill name.',
+          },
+        },
+        required: ['name'],
+      },
+    },
+  ]
+}
+
+test('ShimToolSearch minify mode sends every available tool with parameter prose stripped', async () => {
+  let requestBody: Record<string, unknown> | undefined
+  process.env.OPENAI_SHIM_TOOL_MODE = 'minify'
+
+  globalThis.fetch = (async (_input, init) => {
+    requestBody = JSON.parse(String(init?.body))
+
+    return new Response(
+      JSON.stringify({
+        id: 'chatcmpl-shim-minify',
+        model: 'fake-model',
+        choices: [{ message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' }],
+        usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
+      }),
+      { headers: { 'Content-Type': 'application/json' } },
+    )
+  }) as FetchType
+
+  const client = createOpenAIShimClient({}) as OpenAIShimClient
+
+  await client.beta.messages.create({
+    model: 'fake-model',
+    system: 'test system',
+    messages: [{ role: 'user', content: 'hello' }],
+    tools: makeShimToolFixtures(),
+    max_tokens: 32,
+    stream: false,
+  })
+
+  const tools = requestBody?.tools as Array<{ function: { name: string; parameters: Record<string, unknown> } }>
+  expect(tools.map(tool => tool.function.name).sort()).toEqual([
+    'Bash',
+    'Read',
+    'Skill',
+    'WebFetch',
+    'WebSearch',
+  ])
+  const bashParameters = tools.find(tool => tool.function.name === 'Bash')?.function.parameters as {
+    properties?: Record<string, Record<string, unknown>>
+  }
+  expect(bashParameters.properties?.command?.description).toBeUndefined()
+})
+
+test('ShimToolSearch minify mode reduces tools for responses transport', async () => {
+  let requestBody: Record<string, unknown> | undefined
+  let auditOutput = ''
+  const originalStderrWrite = process.stderr.write
+  process.env.OPENAI_API_FORMAT = 'responses'
+  process.env.OPENAI_SHIM_TOOL_MODE = 'minify'
+  process.env.OPENAI_SHIM_TOKEN_AUDIT = '1'
+
+  process.stderr.write = ((chunk: string | Uint8Array): boolean => {
+    auditOutput += String(chunk)
+    return true
+  }) as typeof process.stderr.write
+
+  globalThis.fetch = (async (_input, init) => {
+    requestBody = JSON.parse(String(init?.body))
+
+    return new Response(
+      JSON.stringify({
+        id: 'resp-shim-minify',
+        model: 'fake-model',
+        output: [
+          {
+            type: 'message',
+            role: 'assistant',
+            content: [{ type: 'output_text', text: 'ok' }],
+          },
+        ],
+        usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 },
+      }),
+      { headers: { 'Content-Type': 'application/json' } },
+    )
+  }) as FetchType
+
+  const client = createOpenAIShimClient({}) as OpenAIShimClient
+
+  try {
+    await client.beta.messages.create({
+      model: 'fake-model',
+      system: 'test system',
+      messages: [{ role: 'user', content: 'hello' }],
+      tools: makeShimToolFixtures(),
+      max_tokens: 32,
+      stream: false,
+    })
+  } finally {
+    process.stderr.write = originalStderrWrite
+  }
+
+  const tools = requestBody?.tools as Array<{ name: string; parameters: { properties?: Record<string, Record<string, unknown>> } }>
+  expect(tools.map(tool => tool.name).sort()).toEqual([
+    'Bash',
+    'Read',
+    'Skill',
+    'WebFetch',
+    'WebSearch',
+  ])
+  const bashParameters = tools.find(tool => tool.name === 'Bash')?.parameters
+  expect(bashParameters?.properties?.command?.description).toBeUndefined()
+  expect(auditOutput).toContain('transport=responses')
+  expect(auditOutput).toContain('tool.Bash')
+})
+
+test('OpenAI shim token audit logs request component breakdown when enabled', async () => {
+  let requestBody: Record<string, unknown> | undefined
+  let auditOutput = ''
+  const originalStderrWrite = process.stderr.write
+  process.env.OPENAI_SHIM_TOKEN_AUDIT = '1'
+  process.env.OPENAI_SHIM_TOOL_MODE = 'minify'
+
+  process.stderr.write = ((chunk: string | Uint8Array): boolean => {
+    auditOutput += String(chunk)
+    return true
+  }) as typeof process.stderr.write
+
+  globalThis.fetch = (async (_input, init) => {
+    requestBody = JSON.parse(String(init?.body))
+
+    return new Response(
+      JSON.stringify({
+        id: 'chatcmpl-shim-audit',
+        model: 'fake-model',
+        choices: [{ message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' }],
+        usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
+      }),
+      { headers: { 'Content-Type': 'application/json' } },
+    )
+  }) as FetchType
+
+  try {
+    const client = createOpenAIShimClient({}) as OpenAIShimClient
+
+    await client.beta.messages.create({
+      model: 'fake-model',
+      system: 'test system',
+      messages: [
+        { role: 'user', content: 'Read src/config.json' },
+        { role: 'assistant', content: 'I will read it.' },
+      ],
+      tools: makeShimToolFixtures(),
+      max_tokens: 32,
+      stream: false,
+    })
+  } finally {
+    process.stderr.write = originalStderrWrite
+  }
+
+  expect(requestBody).toBeDefined()
+  expect(auditOutput).toContain('[OpenAIShimTokenAudit] total chars=')
+  expect(auditOutput).toContain('messages.system')
+  expect(auditOutput).toContain('messages.user')
+  expect(auditOutput).toContain('messages.assistant')
+  expect(auditOutput).toContain('tool_schemas')
+  expect(auditOutput).toContain('tool.Bash')
+})
+
+test('ShimToolSearch predict mode sends all tools when prediction is uncertain', async () => {
+  let requestBody: Record<string, unknown> | undefined
+  process.env.OPENAI_SHIM_TOOL_MODE = 'predict'
+
+  globalThis.fetch = (async (_input, init) => {
+    requestBody = JSON.parse(String(init?.body))
+
+    return new Response(
+      JSON.stringify({
+        id: 'chatcmpl-shim-predict',
+        model: 'fake-model',
+        choices: [{ message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' }],
+        usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
+      }),
+      { headers: { 'Content-Type': 'application/json' } },
+    )
+  }) as FetchType
+
+  const client = createOpenAIShimClient({}) as OpenAIShimClient
+
+  await client.beta.messages.create({
+    model: 'fake-model',
+    system: 'test system',
+    messages: [{ role: 'user', content: 'Investigate this repository architecture' }],
+    tools: makeShimToolFixtures(),
+    max_tokens: 32,
+    stream: false,
+  })
+
+  const toolNames = ((requestBody?.tools ?? []) as Array<{ function: { name: string } }>)
+    .map(tool => tool.function.name)
+    .sort()
+  expect(toolNames).toEqual(['Bash', 'Read', 'Skill', 'WebFetch', 'WebSearch'])
+})
+
+test('ShimToolSearch predict mode includes web tools for current web requests', async () => {
+  let requestBody: Record<string, unknown> | undefined
+  process.env.OPENAI_SHIM_TOOL_MODE = 'predict'
+
+  globalThis.fetch = (async (_input, init) => {
+    requestBody = JSON.parse(String(init?.body))
+
+    return new Response(
+      JSON.stringify({
+        id: 'chatcmpl-shim-web',
+        model: 'fake-model',
+        choices: [{ message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' }],
+        usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
+      }),
+      { headers: { 'Content-Type': 'application/json' } },
+    )
+  }) as FetchType
+
+  const client = createOpenAIShimClient({}) as OpenAIShimClient
+
+  await client.beta.messages.create({
+    model: 'fake-model',
+    system: 'test system',
+    messages: [{ role: 'user', content: 'Search the web for current React release notes' }],
+    tools: makeShimToolFixtures(),
+    max_tokens: 32,
+    stream: false,
+  })
+
+  const toolNames = ((requestBody?.tools ?? []) as Array<{ function: { name: string } }>)
+    .map(tool => tool.function.name)
+  expect(toolNames).toContain('WebSearch')
+  expect(toolNames).toContain('WebFetch')
+})
+
+test('ShimToolSearch predict mode includes forced tool_choice tool for conversational prompts', async () => {
+  let requestBody: Record<string, unknown> | undefined
+  process.env.OPENAI_SHIM_TOOL_MODE = 'predict'
+
+  globalThis.fetch = (async (_input, init) => {
+    requestBody = JSON.parse(String(init?.body))
+
+    return new Response(
+      JSON.stringify({
+        id: 'chatcmpl-shim-predict-forced-tool',
+        model: 'fake-model',
+        choices: [{ message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' }],
+        usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
+      }),
+      { headers: { 'Content-Type': 'application/json' } },
+    )
+  }) as FetchType
+
+  const client = createOpenAIShimClient({}) as OpenAIShimClient
+
+  await client.beta.messages.create({
+    model: 'fake-model',
+    system: 'test system',
+    messages: [{ role: 'user', content: 'What is 2+2?' }],
+    tools: makeShimToolFixtures(),
+    tool_choice: { type: 'tool', name: 'WebSearch' },
+    max_tokens: 32,
+    stream: false,
+  })
+
+  const toolNames = ((requestBody?.tools ?? []) as Array<{ function: { name: string } }>)
+    .map(tool => tool.function.name)
+  expect(toolNames.length).toBeGreaterThan(0)
+  expect(toolNames).toContain('WebSearch')
+  expect(requestBody?.tool_choice).toEqual({
+    type: 'function',
+    function: { name: 'WebSearch' },
+  })
+})
+
+test('ShimToolSearch predict mode reduces tools for responses transport', async () => {
+  let requestBody: Record<string, unknown> | undefined
+  process.env.OPENAI_API_FORMAT = 'responses'
+  process.env.OPENAI_SHIM_TOOL_MODE = 'predict'
+
+  globalThis.fetch = (async (_input, init) => {
+    requestBody = JSON.parse(String(init?.body))
+
+    return new Response(
+      JSON.stringify({
+        id: 'resp-shim-predict-web',
+        model: 'fake-model',
+        output: [
+          {
+            type: 'message',
+            role: 'assistant',
+            content: [{ type: 'output_text', text: 'ok' }],
+          },
+        ],
+        usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 },
+      }),
+      { headers: { 'Content-Type': 'application/json' } },
+    )
+  }) as FetchType
+
+  const client = createOpenAIShimClient({}) as OpenAIShimClient
+
+  await client.beta.messages.create({
+    model: 'fake-model',
+    system: 'test system',
+    messages: [{ role: 'user', content: 'Search the web for current React release notes' }],
+    tools: makeShimToolFixtures(),
+    max_tokens: 32,
+    stream: false,
+  })
+
+  const tools = (requestBody?.tools ?? []) as Array<{
+    name: string
+    parameters: { properties?: Record<string, Record<string, unknown>> }
+  }>
+  expect(tools.map(tool => tool.name).sort()).toEqual(['Bash', 'Read', 'WebFetch', 'WebSearch'])
+  const webSearchParameters = tools.find(tool => tool.name === 'WebSearch')?.parameters
+  expect(webSearchParameters?.properties?.query?.description).toBeUndefined()
+})
+
+test('ShimToolSearch predict mode does not re-expand empty predicted tools for responses transport', async () => {
+  let requestBody: Record<string, unknown> | undefined
+  process.env.OPENAI_API_FORMAT = 'responses'
+  process.env.OPENAI_SHIM_TOOL_MODE = 'predict'
+
+  globalThis.fetch = (async (_input, init) => {
+    requestBody = JSON.parse(String(init?.body))
+
+    return new Response(
+      JSON.stringify({
+        id: 'resp-shim-predict-empty',
+        model: 'fake-model',
+        output: [
+          {
+            type: 'message',
+            role: 'assistant',
+            content: [{ type: 'output_text', text: 'ok' }],
+          },
+        ],
+        usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 },
+      }),
+      { headers: { 'Content-Type': 'application/json' } },
+    )
+  }) as FetchType
+
+  const client = createOpenAIShimClient({}) as OpenAIShimClient
+
+  await client.beta.messages.create({
+    model: 'fake-model',
+    system: 'test system',
+    messages: [{ role: 'user', content: 'What is 2+2?' }],
+    tools: makeShimToolFixtures(),
+    max_tokens: 32,
+    stream: false,
+  })
+
+  expect(requestBody?.tools).toBeUndefined()
+  expect(requestBody?.tool_choice).toBeUndefined()
+})
+
+test('ShimToolSearch predict mode includes forced tool_choice tool for responses transport', async () => {
+  let requestBody: Record<string, unknown> | undefined
+  process.env.OPENAI_API_FORMAT = 'responses'
+  process.env.OPENAI_SHIM_TOOL_MODE = 'predict'
+
+  globalThis.fetch = (async (_input, init) => {
+    requestBody = JSON.parse(String(init?.body))
+
+    return new Response(
+      JSON.stringify({
+        id: 'resp-shim-predict-forced-tool',
+        model: 'fake-model',
+        output: [
+          {
+            type: 'message',
+            role: 'assistant',
+            content: [{ type: 'output_text', text: 'ok' }],
+          },
+        ],
+        usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 },
+      }),
+      { headers: { 'Content-Type': 'application/json' } },
+    )
+  }) as FetchType
+
+  const client = createOpenAIShimClient({}) as OpenAIShimClient
+
+  await client.beta.messages.create({
+    model: 'fake-model',
+    system: 'test system',
+    messages: [{ role: 'user', content: 'What is 2+2?' }],
+    tools: makeShimToolFixtures(),
+    tool_choice: { type: 'tool', name: 'WebSearch' },
+    max_tokens: 32,
+    stream: false,
+  })
+
+  const toolNames = ((requestBody?.tools ?? []) as Array<{ name: string }>)
+    .map(tool => tool.name)
+  expect(toolNames).toContain('WebSearch')
+  expect(requestBody?.tool_choice).toEqual({
+    type: 'function',
+    name: 'WebSearch',
+  })
+})
+
+test('OpenAI-compatible chat transport drops forced tool_choice when selected tools lack that schema', async () => {
+  let requestBody: Record<string, unknown> | undefined
+  process.env.OPENAI_SHIM_TOOL_MODE = 'predict'
+
+  globalThis.fetch = (async (_input, init) => {
+    requestBody = JSON.parse(String(init?.body))
+
+    return new Response(
+      JSON.stringify({
+        id: 'chatcmpl-shim-missing-forced-tool',
+        model: 'fake-model',
+        choices: [{ message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' }],
+        usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
+      }),
+      { headers: { 'Content-Type': 'application/json' } },
+    )
+  }) as FetchType
+
+  const client = createOpenAIShimClient({}) as OpenAIShimClient
+
+  await client.beta.messages.create({
+    model: 'fake-model',
+    system: 'test system',
+    messages: [{ role: 'user', content: 'What is 2+2?' }],
+    tools: makeShimToolFixtures().filter(tool => tool.name !== 'WebSearch'),
+    tool_choice: { type: 'tool', name: 'WebSearch' },
+    max_tokens: 32,
+    stream: false,
+  })
+
+  const toolNames = ((requestBody?.tools ?? []) as Array<{ function: { name: string } }>)
+    .map(tool => tool.function.name)
+  expect(toolNames).not.toContain('WebSearch')
+  expect(requestBody?.tool_choice).toBeUndefined()
+})
+
+test('OpenAI-compatible responses transport drops forced tool_choice when selected tools lack that schema', async () => {
+  let requestBody: Record<string, unknown> | undefined
+  process.env.OPENAI_API_FORMAT = 'responses'
+  process.env.OPENAI_SHIM_TOOL_MODE = 'predict'
+
+  globalThis.fetch = (async (_input, init) => {
+    requestBody = JSON.parse(String(init?.body))
+
+    return new Response(
+      JSON.stringify({
+        id: 'resp-shim-missing-forced-tool',
+        model: 'fake-model',
+        output: [
+          {
+            type: 'message',
+            role: 'assistant',
+            content: [{ type: 'output_text', text: 'ok' }],
+          },
+        ],
+        usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 },
+      }),
+      { headers: { 'Content-Type': 'application/json' } },
+    )
+  }) as FetchType
+
+  const client = createOpenAIShimClient({}) as OpenAIShimClient
+
+  await client.beta.messages.create({
+    model: 'fake-model',
+    system: 'test system',
+    messages: [{ role: 'user', content: 'What is 2+2?' }],
+    tools: makeShimToolFixtures().filter(tool => tool.name !== 'WebSearch'),
+    tool_choice: { type: 'tool', name: 'WebSearch' },
+    max_tokens: 32,
+    stream: false,
+  })
+
+  const toolNames = ((requestBody?.tools ?? []) as Array<{ name: string }>)
+    .map(tool => tool.name)
+  expect(toolNames).not.toContain('WebSearch')
+  expect(requestBody?.tool_choice).toBeUndefined()
+})
+
+test('OpenAI-compatible responses transport drops scalar any when predict selects no tools', async () => {
+  let requestBody: Record<string, unknown> | undefined
+  process.env.OPENAI_API_FORMAT = 'responses'
+  process.env.OPENAI_SHIM_TOOL_MODE = 'predict'
+
+  globalThis.fetch = (async (_input, init) => {
+    requestBody = JSON.parse(String(init?.body))
+
+    return new Response(
+      JSON.stringify({
+        id: 'resp-shim-any-empty-tools',
+        model: 'fake-model',
+        output: [
+          {
+            type: 'message',
+            role: 'assistant',
+            content: [{ type: 'output_text', text: 'ok' }],
+          },
+        ],
+        usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 },
+      }),
+      { headers: { 'Content-Type': 'application/json' } },
+    )
+  }) as FetchType
+
+  const client = createOpenAIShimClient({}) as OpenAIShimClient
+
+  await client.beta.messages.create({
+    model: 'fake-model',
+    system: 'test system',
+    messages: [{ role: 'user', content: 'What is 2+2?' }],
+    tools: makeShimToolFixtures(),
+    tool_choice: { type: 'any' },
+    max_tokens: 32,
+    stream: false,
+  })
+
+  expect(requestBody?.tools).toBeUndefined()
+  expect(requestBody?.tool_choice).toBeUndefined()
+})
+
+test('OpenAI-compatible responses transport preserves tool_choice', async () => {
+  let requestBody: Record<string, unknown> | undefined
+  process.env.OPENAI_API_FORMAT = 'responses'
+  process.env.OPENAI_SHIM_TOOL_MODE = 'minify'
+
+  globalThis.fetch = (async (_input, init) => {
+    requestBody = JSON.parse(String(init?.body))
+
+    return new Response(
+      JSON.stringify({
+        id: 'resp-shim-tool-choice',
+        model: 'fake-model',
+        output: [
+          {
+            type: 'message',
+            role: 'assistant',
+            content: [{ type: 'output_text', text: 'ok' }],
+          },
+        ],
+        usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 },
+      }),
+      { headers: { 'Content-Type': 'application/json' } },
+    )
+  }) as FetchType
+
+  const client = createOpenAIShimClient({}) as OpenAIShimClient
+
+  await client.beta.messages.create({
+    model: 'fake-model',
+    system: 'test system',
+    messages: [{ role: 'user', content: 'Read src/config.json' }],
+    tools: makeShimToolFixtures(),
+    tool_choice: { type: 'tool', name: 'Read' },
+    max_tokens: 32,
+    stream: false,
+  })
+
+  expect(requestBody?.tool_choice).toEqual({
+    type: 'function',
+    name: 'Read',
+  })
+})
+
+test('OpenAI-compatible responses transport preserves scalar tool_choice modes', async () => {
+  const cases: Array<{
+    anthropicToolChoice: Record<string, string>
+    expectedResponsesToolChoice: string
+  }> = [
+    { anthropicToolChoice: { type: 'auto' }, expectedResponsesToolChoice: 'auto' },
+    { anthropicToolChoice: { type: 'any' }, expectedResponsesToolChoice: 'required' },
+    { anthropicToolChoice: { type: 'none' }, expectedResponsesToolChoice: 'none' },
+  ]
+
+  for (const testCase of cases) {
+    let requestBody: Record<string, unknown> | undefined
+    process.env.OPENAI_API_FORMAT = 'responses'
+    process.env.OPENAI_SHIM_TOOL_MODE = 'minify'
+
+    globalThis.fetch = (async (_input, init) => {
+      requestBody = JSON.parse(String(init?.body))
+
+      return new Response(
+        JSON.stringify({
+          id: 'resp-shim-tool-choice-scalar',
+          model: 'fake-model',
+          output: [
+            {
+              type: 'message',
+              role: 'assistant',
+              content: [{ type: 'output_text', text: 'ok' }],
+            },
+          ],
+          usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 },
+        }),
+        { headers: { 'Content-Type': 'application/json' } },
+      )
+    }) as FetchType
+
+    const client = createOpenAIShimClient({}) as OpenAIShimClient
+
+    await client.beta.messages.create({
+      model: 'fake-model',
+      system: 'test system',
+      messages: [{ role: 'user', content: 'Read src/config.json' }],
+      tools: makeShimToolFixtures(),
+      tool_choice: testCase.anthropicToolChoice,
+      max_tokens: 32,
+      stream: false,
+    })
+
+    expect(requestBody?.tool_choice).toBe(testCase.expectedResponsesToolChoice)
+  }
+})
+
+test('ShimToolSearch lazy phase 1 sends a valid request_tools schema through system prompt', async () => {
+  const requestBodies: Array<Record<string, unknown>> = []
+  process.env.OPENAI_SHIM_TOOL_MODE = 'lazy'
+
+  globalThis.fetch = (async (_input, init) => {
+    requestBodies.push(JSON.parse(String(init?.body)))
+
+    return new Response(
+      JSON.stringify({
+        id: 'chatcmpl-shim-lazy',
+        model: 'fake-model',
+        choices: [{ message: { role: 'assistant', content: 'direct answer' }, finish_reason: 'stop' }],
+        usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
+      }),
+      { headers: { 'Content-Type': 'application/json' } },
+    )
+  }) as FetchType
+
+  const client = createOpenAIShimClient({}) as OpenAIShimClient
+
+  const result = await client.beta.messages
+    .create({
+      model: 'fake-model',
+      system: 'test system',
+      messages: [{ role: 'user', content: 'What is 2+2?' }],
+      tools: makeShimToolFixtures(),
+      max_tokens: 32,
+      stream: false,
+    })
+    .withResponse()
+
+  expect(requestBodies).toHaveLength(1)
+  const phase1Body = requestBodies[0]
+  expect((phase1Body.tools as Array<{ function: { name: string } }>)[0].function.name).toBe('request_tools')
+  const systemMessage = (phase1Body.messages as Array<{ role: string; content: string }>).find(
+    message => message.role === 'system',
+  )
+  expect(systemMessage?.content).toContain('Available tools')
+  expect(systemMessage?.content).toContain('WebSearch')
+  expect(result.data.content).toEqual([{ type: 'text', text: 'direct answer' }])
+  expect(result.response.headers.get('content-type')).toContain('application/json')
+  expect(result.request_id).toMatch(/^msg_/)
+})
+
+test('ShimToolSearch lazy mode skips phase 1 for forced non-request_tools tool_choice', async () => {
+  const requestBodies: Array<Record<string, unknown>> = []
+  process.env.OPENAI_SHIM_TOOL_MODE = 'lazy'
+
+  globalThis.fetch = (async (_input, init) => {
+    requestBodies.push(JSON.parse(String(init?.body)))
+
+    return new Response(
+      JSON.stringify({
+        id: 'chatcmpl-shim-lazy-forced-tool',
+        model: 'fake-model',
+        choices: [{ message: { role: 'assistant', content: 'ok' }, finish_reason: 'stop' }],
+        usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
+      }),
+      { headers: { 'Content-Type': 'application/json' } },
+    )
+  }) as FetchType
+
+  const client = createOpenAIShimClient({}) as OpenAIShimClient
+
+  await client.beta.messages.create({
+    model: 'fake-model',
+    system: 'test system',
+    messages: [{ role: 'user', content: 'What is 2+2?' }],
+    tools: makeShimToolFixtures(),
+    tool_choice: { type: 'tool', name: 'WebSearch' },
+    max_tokens: 32,
+    stream: false,
+  })
+
+  expect(requestBodies).toHaveLength(1)
+  const toolNames = ((requestBodies[0].tools ?? []) as Array<{ function: { name: string } }>)
+    .map(tool => tool.function.name)
+  expect(toolNames).toContain('WebSearch')
+  expect(toolNames).not.toContain('request_tools')
+  expect(requestBodies[0].tool_choice).toEqual({
+    type: 'function',
+    function: { name: 'WebSearch' },
+  })
+})
+
+test('ShimToolSearch lazy phase 2 falls back to all tools on malformed request_tools JSON', async () => {
+  const requestBodies: Array<Record<string, unknown>> = []
+  process.env.OPENAI_SHIM_TOOL_MODE = 'lazy'
+
+  globalThis.fetch = (async (_input, init) => {
+    requestBodies.push(JSON.parse(String(init?.body)))
+
+    if (requestBodies.length === 1) {
+      return new Response(
+        JSON.stringify({
+          id: 'chatcmpl-shim-lazy-phase1',
+          model: 'fake-model',
+          choices: [
+            {
+              message: {
+                role: 'assistant',
+                content: null,
+                tool_calls: [
+                  {
+                    id: 'call_request_tools',
+                    type: 'function',
+                    function: { name: 'request_tools', arguments: '{not-json' },
+                  },
+                ],
+              },
+              finish_reason: 'tool_calls',
+            },
+          ],
+          usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
+        }),
+        {
+          headers: {
+            'Content-Type': 'application/json',
+            'x-request-id': 'phase1-request',
           },
-        ],
-        usage: {
-          prompt_tokens: 5,
-          completion_tokens: 1,
-          total_tokens: 6,
         },
+      )
+    }
+
+    return new Response(
+      JSON.stringify({
+        id: 'chatcmpl-shim-lazy-phase2',
+        model: 'fake-model',
+        choices: [{ message: { role: 'assistant', content: 'done' }, finish_reason: 'stop' }],
+        usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
       }),
       {
         headers: {
           'Content-Type': 'application/json',
+          'x-request-id': 'phase2-request',
         },
       },
     )
@@ -1002,171 +2021,156 @@ test('uses route-specific credential env vars for descriptor-backed openai-compa
 
   const client = createOpenAIShimClient({}) as OpenAIShimClient
 
-  await client.beta.messages.create({
-    model: 'openai/gpt-5-mini',
-    messages: [{ role: 'user', content: 'hello' }],
-    max_tokens: 64,
-    stream: false,
-  })
+  const result = await client.beta.messages
+    .create({
+      model: 'fake-model',
+      system: 'test system',
+      messages: [{ role: 'user', content: 'What is 2+2?' }],
+      tools: makeShimToolFixtures(),
+      max_tokens: 32,
+      stream: false,
+    })
+    .withResponse()
 
-  expect(capturedHeaders?.get('authorization')).toBe('Bearer or-route-key')
+  expect(requestBodies).toHaveLength(2)
+  const phase2ToolNames = ((requestBodies[1].tools ?? []) as Array<{ function: { name: string } }>)
+    .map(tool => tool.function.name)
+    .sort()
+  expect(phase2ToolNames).toEqual(['Bash', 'Read', 'Skill', 'WebFetch', 'WebSearch'])
+  expect(result.data.content).toEqual([{ type: 'text', text: 'done' }])
+  expect(result.response.headers.get('x-request-id')).toBe('phase2-request')
+  expect(result.request_id).toBe('phase2-request')
 })
 
-test('preserves Gemini tool call extra_content in follow-up requests', async () => {
-  let requestBody: Record<string, unknown> | undefined
+test('ShimToolSearch lazy phase 2 applies requested tool reduction for responses transport', async () => {
+  const requestBodies: Array<Record<string, unknown>> = []
+  process.env.OPENAI_API_FORMAT = 'responses'
+  process.env.OPENAI_SHIM_TOOL_MODE = 'lazy'
 
   globalThis.fetch = (async (_input, init) => {
-    requestBody = JSON.parse(String(init?.body))
+    requestBodies.push(JSON.parse(String(init?.body)))
+
+    if (requestBodies.length === 1) {
+      return new Response(
+        JSON.stringify({
+          id: 'resp-shim-lazy-phase1',
+          model: 'fake-model',
+          output: [
+            {
+              type: 'function_call',
+              name: 'request_tools',
+              arguments: '{"tools":["WebSearch"]}',
+              call_id: 'call_request_tools',
+            },
+          ],
+          usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 },
+        }),
+        { headers: { 'Content-Type': 'application/json' } },
+      )
+    }
 
     return new Response(
       JSON.stringify({
-        id: 'chatcmpl-1',
-        model: 'google/gemini-3.1-pro-preview',
-        choices: [
+        id: 'resp-shim-lazy-phase2',
+        model: 'fake-model',
+        output: [
           {
-            message: {
-              role: 'assistant',
-              content: 'done',
-            },
-            finish_reason: 'stop',
+            type: 'message',
+            role: 'assistant',
+            content: [{ type: 'output_text', text: 'done' }],
           },
         ],
-        usage: {
-          prompt_tokens: 12,
-          completion_tokens: 4,
-          total_tokens: 16,
-        },
+        usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 },
       }),
-      {
-        headers: {
-          'Content-Type': 'application/json',
-        },
-      },
+      { headers: { 'Content-Type': 'application/json' } },
     )
   }) as FetchType
 
   const client = createOpenAIShimClient({}) as OpenAIShimClient
 
   await client.beta.messages.create({
-    model: 'google/gemini-3.1-pro-preview',
+    model: 'fake-model',
     system: 'test system',
-    messages: [
-      { role: 'user', content: 'Use Bash' },
-      {
-        role: 'assistant',
-        content: [
-          {
-            type: 'tool_use',
-            id: 'call_1',
-            name: 'Bash',
-            input: { command: 'pwd' },
-            extra_content: {
-              google: {
-                thought_signature: 'sig-123',
-              },
-            },
-          },
-        ],
-      },
-      {
-        role: 'user',
-        content: [
-          {
-            type: 'tool_result',
-            tool_use_id: 'call_1',
-            content: 'D:\\repo',
-          },
-        ],
-      },
-    ],
-    max_tokens: 64,
+    messages: [{ role: 'user', content: 'What is 2+2?' }],
+    tools: makeShimToolFixtures(),
+    max_tokens: 32,
     stream: false,
   })
 
-  const assistantWithToolCall = (requestBody?.messages as Array<Record<string, unknown>>).find(
-    message => Array.isArray(message.tool_calls),
-  ) as { tool_calls?: Array<Record<string, unknown>> } | undefined
+  expect(requestBodies).toHaveLength(2)
+  const phase1ToolNames = ((requestBodies[0].tools ?? []) as Array<{ name: string }>)
+    .map(tool => tool.name)
+  expect(phase1ToolNames).toEqual(['request_tools'])
 
-  expect(assistantWithToolCall?.tool_calls?.[0]).toMatchObject({
-    id: 'call_1',
-    type: 'function',
-    function: {
-      name: 'Bash',
-      arguments: JSON.stringify({ command: 'pwd' }),
-    },
-    extra_content: {
-      google: {
-        thought_signature: 'sig-123',
-      },
-    },
-  })
+  const phase2Tools = (requestBodies[1].tools ?? []) as Array<{
+    name: string
+    parameters: { properties?: Record<string, Record<string, unknown>> }
+  }>
+  expect(phase2Tools.map(tool => tool.name).sort()).toEqual(['Bash', 'Read', 'WebSearch'])
+  const webSearchParameters = phase2Tools.find(tool => tool.name === 'WebSearch')?.parameters
+  expect(webSearchParameters?.properties?.query?.description).toBeUndefined()
 })
 
-test('preserves Grep tool pattern field in OpenAI-compatible schemas', async () => {
-  let requestBody: Record<string, unknown> | undefined
+test('ShimToolSearch lazy phase 2 falls back to all responses tools on malformed request_tools JSON', async () => {
+  const requestBodies: Array<Record<string, unknown>> = []
+  process.env.OPENAI_API_FORMAT = 'responses'
+  process.env.OPENAI_SHIM_TOOL_MODE = 'lazy'
 
   globalThis.fetch = (async (_input, init) => {
-    requestBody = JSON.parse(String(init?.body))
+    requestBodies.push(JSON.parse(String(init?.body)))
+
+    if (requestBodies.length === 1) {
+      return new Response(
+        JSON.stringify({
+          id: 'resp-shim-lazy-malformed-phase1',
+          model: 'fake-model',
+          output: [
+            {
+              type: 'function_call',
+              name: 'request_tools',
+              arguments: '{not-json',
+              call_id: 'call_request_tools',
+            },
+          ],
+          usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 },
+        }),
+        { headers: { 'Content-Type': 'application/json' } },
+      )
+    }
 
     return new Response(
       JSON.stringify({
-        id: 'chatcmpl-grep-schema',
-        model: 'qwen/qwen3.6-plus',
-        choices: [
+        id: 'resp-shim-lazy-malformed-phase2',
+        model: 'fake-model',
+        output: [
           {
-            message: {
-              role: 'assistant',
-              content: 'done',
-            },
-            finish_reason: 'stop',
+            type: 'message',
+            role: 'assistant',
+            content: [{ type: 'output_text', text: 'done' }],
           },
         ],
-        usage: {
-          prompt_tokens: 12,
-          completion_tokens: 4,
-          total_tokens: 16,
-        },
+        usage: { input_tokens: 1, output_tokens: 1, total_tokens: 2 },
       }),
-      {
-        headers: {
-          'Content-Type': 'application/json',
-        },
-      },
+      { headers: { 'Content-Type': 'application/json' } },
     )
   }) as FetchType
 
   const client = createOpenAIShimClient({}) as OpenAIShimClient
 
   await client.beta.messages.create({
-    model: 'qwen/qwen3.6-plus',
+    model: 'fake-model',
     system: 'test system',
-    messages: [{ role: 'user', content: 'Use Grep' }],
-    tools: [
-      {
-        name: 'Grep',
-        description: 'Search file contents',
-        input_schema: {
-          type: 'object',
-          properties: {
-            pattern: { type: 'string', description: 'Search pattern' },
-            path: { type: 'string' },
-          },
-          required: ['pattern'],
-          additionalProperties: false,
-        },
-      },
-    ],
-    max_tokens: 64,
+    messages: [{ role: 'user', content: 'What is 2+2?' }],
+    tools: makeShimToolFixtures(),
+    max_tokens: 32,
     stream: false,
   })
 
-  const tools = requestBody?.tools as Array<Record<string, unknown>> | undefined
-  const grepTool = tools?.find(tool => (tool.function as Record<string, unknown>)?.name === 'Grep') as
-    | { function?: { parameters?: { properties?: Record<string, unknown>; required?: string[] } } }
-    | undefined
-
-  expect(Object.keys(grepTool?.function?.parameters?.properties ?? {})).toContain('pattern')
-  expect(grepTool?.function?.parameters?.required).toContain('pattern')
+  expect(requestBodies).toHaveLength(2)
+  const phase2ToolNames = ((requestBodies[1].tools ?? []) as Array<{ name: string }>)
+    .map(tool => tool.name)
+    .sort()
+  expect(phase2ToolNames).toEqual(['Bash', 'Read', 'Skill', 'WebFetch', 'WebSearch'])
 })
 
 test('does not infer Gemini mode from OPENAI_BASE_URL path substrings', async () => {
@@ -3972,6 +4976,74 @@ test('self-heals tool-call incompatibility by retrying local Ollama requests wit
   expect(requestBodies[1]?.tool_choice).toBeUndefined()
 })
 
+test('self-heals responses transport tool-call incompatibility without stale tools', async () => {
+  process.env.OPENAI_BASE_URL = 'http://localhost:11434/v1'
+  process.env.OPENAI_API_FORMAT = 'responses'
+  process.env.OPENAI_SHIM_TOOL_MODE = 'predict'
+
+  const requestBodies: Array<Record<string, unknown>> = []
+  globalThis.fetch = (async (_input, init) => {
+    const requestBody = JSON.parse(String(init?.body)) as Record<string, unknown>
+    requestBodies.push(requestBody)
+
+    if (requestBodies.length === 1) {
+      return new Response('tool_calls are not supported', {
+        status: 400,
+        headers: {
+          'Content-Type': 'text/plain',
+        },
+      })
+    }
+
+    return new Response(
+      JSON.stringify({
+        id: 'resp-local-toolless',
+        model: 'qwen2.5-coder:7b',
+        output: [
+          {
+            type: 'message',
+            role: 'assistant',
+            content: [{ type: 'output_text', text: 'fallback without tools' }],
+          },
+        ],
+        usage: {
+          input_tokens: 8,
+          output_tokens: 4,
+          total_tokens: 12,
+        },
+      }),
+      {
+        status: 200,
+        headers: {
+          'Content-Type': 'application/json',
+        },
+      },
+    )
+  }) as FetchType
+
+  const client = createOpenAIShimClient({}) as OpenAIShimClient
+
+  await expect(
+    client.beta.messages.create({
+      model: 'qwen2.5-coder:7b',
+      messages: [{ role: 'user', content: 'Read a file in this repository' }],
+      tools: makeShimToolFixtures(),
+      tool_choice: { type: 'tool', name: 'Read' },
+      max_tokens: 64,
+      stream: false,
+    }),
+  ).resolves.toBeDefined()
+
+  expect(requestBodies).toHaveLength(2)
+  expect(((requestBodies[0]?.tools ?? []) as Array<{ name: string }>).map(tool => tool.name)).toContain('Read')
+  expect(requestBodies[0]?.tool_choice).toEqual({
+    type: 'function',
+    name: 'Read',
+  })
+  expect(requestBodies[1]?.tools).toBeUndefined()
+  expect(requestBodies[1]?.tool_choice).toBeUndefined()
+})
+
 test('preserves valid tool_result and drops orphan tool_result', async () => {
   let requestBody: Record<string, unknown> | undefined
 
diff --git a/src/services/api/openaiShim.ts b/src/services/api/openaiShim.ts
index 51b52108ba..f3486aff4e 100644
--- a/src/services/api/openaiShim.ts
+++ b/src/services/api/openaiShim.ts
@@ -199,6 +199,445 @@ function sleepMs(ms: number): Promise<void> {
   return new Promise(resolve => setTimeout(resolve, ms))
 }
 
+// ---------------------------------------------------------------------------
+// ShimToolSearch — opt-in tool schema reduction for 3P providers.
+// Off by default. Modes:
+//   minify  — send every tool, but strip verbose schema prose.
+//   predict — send a predicted subset only when confident; uncertain sends all.
+//   lazy    — two-phase request_tools protocol for conversational turns.
+// ENABLE_SHIM_TOOL_SEARCH=1 is kept as a backwards-compatible alias for lazy.
+// ---------------------------------------------------------------------------
+
+type ShimToolSearchMode = 'off' | 'minify' | 'predict' | 'lazy'
+
+interface OpenAIShimTokenAuditPart {
+  name: string
+  chars: number
+  estimatedTokens: number
+}
+
+function getShimToolSearchMode(): ShimToolSearchMode {
+  const raw = (
+    process.env.OPENAI_SHIM_TOOL_MODE ??
+    process.env.ENABLE_SHIM_TOOL_SEARCH ??
+    ''
+  ).trim().toLowerCase()
+
+  if (!raw || raw === '0' || raw === 'false' || raw === 'off') return 'off'
+  if (raw === 'minify') return 'minify'
+  if (raw === 'predict' || raw === 'safe-predict' || raw === 'predict-safe') {
+    return 'predict'
+  }
+  if (raw === 'lazy' || raw === '1' || raw === 'true' || raw === 'on') {
+    return 'lazy'
+  }
+  return 'off'
+}
+
+function isOpenAIShimTokenAuditEnabled(): boolean {
+  return isEnvTruthy(process.env.OPENAI_SHIM_TOKEN_AUDIT)
+}
+
+function debugShimToolSearch(message: string): void {
+  if (
+    isEnvTruthy(process.env.OPENAI_SHIM_DEBUG) ||
+    isEnvTruthy(process.env.ENABLE_SHIM_TOOL_SEARCH_DEBUG)
+  ) {
+    process.stderr.write(`[ShimToolSearch] ${message}\n`)
+  }
+}
+
+function estimateRequestTokensFromChars(chars: number): number {
+  return Math.round(chars / 4)
+}
+
+function serializedSizeOf(value: unknown): number {
+  return stableStringify(value).length
+}
+
+function sumSerializedMessageChars(
+  messages: OpenAIMessage[],
+  role: OpenAIMessage['role'],
+): number {
+  return messages
+    .filter(message => message.role === role)
+    .reduce((sum, message) => sum + serializedSizeOf(message), 0)
+}
+
+function sumResponsesInputMessageChars(
+  input: unknown,
+  role: string,
+): number {
+  if (!Array.isArray(input)) return 0
+
+  return input
+    .filter(item =>
+      item &&
+      typeof item === 'object' &&
+      'role' in item &&
+      item.role === role,
+    )
+    .reduce((sum, item) => sum + serializedSizeOf(item), 0)
+}
+
+function makeTokenAuditPart(name: string, chars: number): OpenAIShimTokenAuditPart {
+  return {
+    name,
+    chars,
+    estimatedTokens: estimateRequestTokensFromChars(chars),
+  }
+}
+
+function buildChatCompletionsTokenAuditParts(
+  body: Record<string, unknown>,
+): OpenAIShimTokenAuditPart[] {
+  const messages = Array.isArray(body.messages)
+    ? body.messages as OpenAIMessage[]
+    : []
+  const responseConfig = { ...body }
+  delete responseConfig.messages
+  delete responseConfig.tools
+
+  return [
+    makeTokenAuditPart('messages.system', sumSerializedMessageChars(messages, 'system')),
+    makeTokenAuditPart('messages.user', sumSerializedMessageChars(messages, 'user')),
+    makeTokenAuditPart('messages.assistant', sumSerializedMessageChars(messages, 'assistant')),
+    makeTokenAuditPart('messages.tool_results', sumSerializedMessageChars(messages, 'tool')),
+    makeTokenAuditPart('tool_schemas', Array.isArray(body.tools) ? serializedSizeOf(body.tools) : 0),
+    makeTokenAuditPart('response_config', serializedSizeOf(responseConfig)),
+  ]
+}
+
+function buildResponsesTokenAuditParts(
+  body: Record<string, unknown>,
+): OpenAIShimTokenAuditPart[] {
+  const responseConfig = { ...body }
+  delete responseConfig.input
+  delete responseConfig.instructions
+  delete responseConfig.tools
+
+  return [
+    makeTokenAuditPart('instructions', typeof body.instructions === 'string' ? body.instructions.length : 0),
+    makeTokenAuditPart('input.user', sumResponsesInputMessageChars(body.input, 'user')),
+    makeTokenAuditPart('input.assistant', sumResponsesInputMessageChars(body.input, 'assistant')),
+    makeTokenAuditPart('input.system', sumResponsesInputMessageChars(body.input, 'system')),
+    makeTokenAuditPart('tool_schemas', Array.isArray(body.tools) ? serializedSizeOf(body.tools) : 0),
+    makeTokenAuditPart('response_config', serializedSizeOf(responseConfig)),
+  ]
+}
+
+function logOpenAIShimTokenAudit(args: {
+  body: Record<string, unknown>
+  serializedBody: string
+  model: string
+  transport: ReturnType<typeof resolveProviderRequest>['transport']
+}): void {
+  if (!isOpenAIShimTokenAuditEnabled()) return
+
+  const baseParts = args.transport === 'responses'
+    ? buildResponsesTokenAuditParts(args.body)
+    : buildChatCompletionsTokenAuditParts(args.body)
+  const usedChars = baseParts.reduce((sum, part) => sum + part.chars, 0)
+  const remainingChars = Math.max(args.serializedBody.length - usedChars, 0)
+  const parts = [
+    ...baseParts,
+    makeTokenAuditPart('json_overhead', remainingChars),
+  ]
+    .filter(part => part.chars > 0)
+    .sort((a, b) => b.chars - a.chars)
+
+  const totalEstimatedTokens = estimateRequestTokensFromChars(args.serializedBody.length)
+  const toolCount = Array.isArray(args.body.tools) ? args.body.tools.length : 0
+  process.stderr.write(
+    `[OpenAIShimTokenAudit] total chars=${args.serializedBody.length} est_tokens=${totalEstimatedTokens} model=${args.model} transport=${args.transport} tools=${toolCount}\n`,
+  )
+
+  for (const part of parts) {
+    const percent = args.serializedBody.length > 0
+      ? Math.round((part.chars / args.serializedBody.length) * 100)
+      : 0
+    process.stderr.write(
+      `[OpenAIShimTokenAudit]   ${part.name}: chars=${part.chars} est_tokens=${part.estimatedTokens} pct=${percent}\n`,
+    )
+  }
+
+  const getToolNameForAudit = (tool: unknown): string | null => {
+    if (!tool || typeof tool !== 'object' || Array.isArray(tool)) return null
+
+    const record = tool as Record<string, unknown>
+    const functionValue = record.function
+    if (functionValue && typeof functionValue === 'object' && !Array.isArray(functionValue)) {
+      const functionName = (functionValue as Record<string, unknown>).name
+      if (typeof functionName === 'string') return functionName
+    }
+
+    return typeof record.name === 'string' ? record.name : null
+  }
+
+  const tools = Array.isArray(args.body.tools) ? args.body.tools : []
+  for (const tool of tools
+    .map(tool => ({
+      name: getToolNameForAudit(tool),
+      chars: serializedSizeOf(tool),
+    }))
+    .filter((tool): tool is { name: string; chars: number } => tool.name !== null)
+    .sort((a, b) => b.chars - a.chars)
+    .slice(0, 10)) {
+    process.stderr.write(
+      `[OpenAIShimTokenAudit]   tool.${tool.name}: chars=${tool.chars} est_tokens=${estimateRequestTokensFromChars(tool.chars)}\n`,
+    )
+  }
+}
+
+/**
+ * Keep only the first sentence of a tool description (up to maxLen chars).
+ * 235B models already know what Bash/Read/Write/etc. do by name.
+ * The fat Anthropic descriptions (500–2000 words) waste tokens on 3P providers.
+ */
+function truncateToolDescription(text: string, maxLen = 200): string {
+  if (!text || text.length <= maxLen) return text
+  // Try to cut at a sentence boundary in a generous window
+  const window = text.slice(0, maxLen + 80)
+  const match = window.match(/^[\s\S]{30,}?[.!?](\s|\n|$)/)
+  if (match && match[0].length <= maxLen + 20) return match[0].trim()
+  // Fall back to word boundary
+  return text.slice(0, maxLen).replace(/\s\S*$/, '') + '…'
+}
+
+/**
+ * Strip description/title from parameter schemas while preserving structure.
+ * Models still get type/required/properties/enum — enough to generate valid calls.
+ */
+function stripParamDescriptions(schema: Record<string, unknown>): Record<string, unknown> {
+  const out: Record<string, unknown> = {}
+  for (const [k, v] of Object.entries(schema)) {
+    if (k === 'description' || k === 'title') continue
+    if (Array.isArray(v)) {
+      out[k] = v.map(item =>
+        item && typeof item === 'object' && !Array.isArray(item)
+          ? stripParamDescriptions(item as Record<string, unknown>)
+          : item,
+      )
+    } else if (v && typeof v === 'object') {
+      out[k] = stripParamDescriptions(v as Record<string, unknown>)
+    } else {
+      out[k] = v
+    }
+  }
+  return out
+}
+
+/**
+ * Minify tool schemas for 3P providers — dramatically reduces token usage:
+ *   Bash: 11.4KB → ~0.4KB, TodoWrite: 9.6KB → ~1.2KB, Aggregate: ~63KB → ~5KB
+ */
+function minifyToolSchemas(tools: OpenAITool[]): OpenAITool[] {
+  return tools.map(tool => ({
+    ...tool,
+    function: {
+      ...tool.function,
+      description: truncateToolDescription(tool.function.description),
+      parameters: stripParamDescriptions(tool.function.parameters as Record<string, unknown>),
+    },
+  }))
+}
+
+/** One-line descriptions for the tool directory injected during phase-1. */
+const TOOL_DIRECTORY: Record<string, string> = {
+  Bash:            'Execute shell commands (build, test, install, git, etc.)',
+  Read:            'Read file contents',
+  Write:           'Create or overwrite a file',
+  Edit:            'Make targeted edits to an existing file',
+  MultiEdit:       'Make multiple coordinated edits to an existing file',
+  Glob:            'List files matching a pattern',
+  Grep:            'Search file contents with regex',
+  WebSearch:       'Search the web for current information',
+  WebFetch:        'Fetch and read web page content',
+  NotebookEdit:    'Edit Jupyter notebook cells',
+  Skill:           'Load and use a named skill',
+  TodoWrite:       'Create/update structured task list',
+  AskUserQuestion: 'Ask the user a clarifying question',
+  Agent:           'Spawn a sub-agent to handle a complex sub-task',
+  TaskCreate:      'Create a tracked task for teammate/coordinator workflows',
+  TaskGet:         'Read a tracked task',
+  TaskUpdate:      'Update a tracked task status or ownership',
+  TaskList:        'List tracked tasks',
+  TaskOutput:      'Read output from a background task',
+  TaskStop:        'Stop a running task',
+  EnterPlanMode:   'Enter plan mode when implementation should wait',
+  ExitPlanMode:    'Exit plan mode with an implementation plan',
+  EnterWorktree:   'Enter a worktree session',
+  ExitWorktree:    'Exit a worktree session',
+  SendMessage:     'Send a message to a teammate or coordinator',
+  ListPeers:       'List connected peers or teammates',
+  LSP:             'Use language server intelligence',
+}
+
+const CORE_TOOL_NAMES = new Set([
+  'Bash',
+  'Read',
+  'Write',
+  'Edit',
+  'MultiEdit',
+  'Glob',
+  'Grep',
+  'TodoWrite',
+  'AskUserQuestion',
+  'Agent',
+])
+
+/** The single meta-tool sent during phase-1, in Anthropic tool shape. */
+const REQUEST_TOOLS_TOOL = {
+  name: 'request_tools',
+  description: 'Request the full schema for one or more tools before using them. Call this first if you need to use any tools.',
+  input_schema: {
+    type: 'object',
+    properties: {
+      tools: {
+        type: 'array',
+        description: 'Names of the tools needed for the next step.',
+        items: {
+          type: 'string',
+        },
+      },
+      rationale: {
+        type: 'string',
+        description: 'Brief reason these tools are needed.',
+      },
+    },
+    required: ['tools'],
+    additionalProperties: false,
+  },
+}
+
+/**
+ * Keyword heuristics to predict which tools a request needs.
+ * Returns a Set of tool names, empty Set for conversational, or null if uncertain.
+ */
+function predictNeededTools(messages: unknown[]): Set<string> | null {
+  function extractUserQuery(text: string): string {
+    return text
+      .replace(/<system-reminder[\s\S]*?<\/system-reminder>/gi, '')
+      .replace(/<context[\s\S]*?<\/context>/gi, '')
+      .trim()
+  }
+
+  let lastUserText = ''
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const m = messages[i] as { role?: string; content?: unknown }
+    if (m.role !== 'user') continue
+    let rawText = ''
+    if (typeof m.content === 'string') {
+      rawText = m.content
+    } else if (Array.isArray(m.content)) {
+      const parts = (m.content as Array<{ type?: string; text?: string }>)
+        .filter(p => p.type === 'text')
+        .map(p => p.text ?? '')
+      rawText = parts.join(' ')
+    }
+    const clean = extractUserQuery(rawText)
+    if (clean) {
+      lastUserText = clean
+      break
+    }
+  }
+  if (!lastUserText) return null
+
+  const t = lastUserText.toLowerCase()
+
+  const isConversational = /^(what|who|why|how|when|where|explain|describe|tell me|is it|can you|do you|list|summarize|overview|difference between|compare|pros and cons)/.test(t.trim())
+    && !/file|code|codebase|repository|repo|project|source|module|component|function|class|test|build|run|install|create|write|edit|implement|fix|debug/.test(t)
+
+  if (isConversational) return new Set([])
+
+  const tools = new Set<string>()
+
+  if (/\brun\b|\bexecut|\bbuild|\btest\b|\binstall\b|\bnpm\b|\bpip\b|\bgit\b|\bcompil|\bscript|\bdocker|\bpython\b|\bnode\b/.test(t)) tools.add('Bash')
+  if (/\bread\b|\bshow\b|\bcontent|\blook at|\bopen\b|\bcat\b|\bwhat is in|\bwhat does.*file/.test(t)) tools.add('Read')
+  if (/\bcreate\b|\bwrite\b|\bnew file|\bgenerat|\bscaffold|\binitializ|\btouch\b/.test(t)) tools.add('Write')
+  if (/\bedit\b|\bmodif|\bchange\b|\bfix\b|\bupdat|\brefactor|\breplace|\bimpleme|\badd.*to\b|\bremove\b|\bdelet.*from/.test(t)) tools.add('Edit')
+  if (/\bsearch\b|\bfind\b|\bgrep\b|\blook for\b|\bwhere is\b|\boccurrenc|\bwhich file/.test(t)) { tools.add('Grep'); tools.add('Glob') }
+  if (/\bweb\b|\binternet\b|\bonline\b|\bsearch the web\b|\bbrowse\b|\blatest\b|\bcurrent\b|\btoday\b|\bfetch.*https?:\/\//.test(t)) { tools.add('WebSearch'); tools.add('WebFetch') }
+  if (/\blist.*file|\bfind.*file|\bfiles in|\bwhat files|\blist.*dir|\bls\b/.test(t)) tools.add('Glob')
+  if (/\bnotebook\b|\bjupyter\b|\bipynb\b/.test(t)) tools.add('NotebookEdit')
+  if (/\bskill\b|\bskills\b/.test(t)) tools.add('Skill')
+  if (/\btask\b|\bteammate\b|\bcoordinator\b|\bassign\b|\bowner\b/.test(t)) { tools.add('TaskCreate'); tools.add('TaskUpdate'); tools.add('TaskList') }
+  if (/\btodo\b|\bplan\b|\btask list|\btrack\b|\bprogress\b/.test(t)) tools.add('TodoWrite')
+
+  if (/\bimplement\b|\bbuild.*feature|\badd.*feature|\bwrite.*function|\bwrite.*class|\bcreate.*function|\bcreate.*class/.test(t)) {
+    tools.add('Bash'); tools.add('Write'); tools.add('Edit'); tools.add('Read')
+  }
+
+  if (tools.has('Edit') || tools.has('Write')) {
+    tools.add('Bash'); tools.add('Read')
+  }
+
+  return tools.size > 0 ? tools : null
+}
+
+function appendSystemPrompt(system: unknown, note: string): ShimCreateParams['system'] {
+  if (typeof system === 'string') return `${system}${note}`
+  if (Array.isArray(system)) {
+    return [
+      ...system,
+      { type: 'text', text: note.trimStart() },
+    ] as ShimCreateParams['system']
+  }
+  return `You are a helpful AI assistant.${note}`
+}
+
+function getToolDirectoryLines(tools: OpenAITool[]): string {
+  return tools
+    .map(tool => {
+      const name = tool.function.name
+      const desc = TOOL_DIRECTORY[name] ?? truncateToolDescription(tool.function.description, 120)
+      return `- ${name}: ${desc || 'Available tool'}`
+    })
+    .join('\n')
+}
+
+function getForcedToolChoiceName(toolChoice: unknown): string | undefined {
+  if (!toolChoice || typeof toolChoice !== 'object' || Array.isArray(toolChoice)) {
+    return undefined
+  }
+
+  const record = toolChoice as Record<string, unknown>
+  return record.type === 'tool' && typeof record.name === 'string'
+    ? record.name
+    : undefined
+}
+
+function hasOpenAIToolNamed(tools: OpenAITool[], name: string): boolean {
+  return tools.some(tool => tool.function.name === name)
+}
+
+function selectShimToolSet(
+  converted: OpenAITool[],
+  messages: unknown[],
+  mode: Exclude<ShimToolSearchMode, 'off' | 'lazy'>,
+  forcedToolName?: string,
+): OpenAITool[] {
+  if (mode === 'minify') return minifyToolSchemas(converted)
+
+  const predicted = predictNeededTools(messages)
+  if (predicted === null) {
+    return minifyToolSchemas(converted)
+  }
+  if (predicted.size === 0 && !forcedToolName) {
+    return []
+  }
+
+  const wanted = new Set<string>(CORE_TOOL_NAMES)
+  for (const toolName of predicted) wanted.add(toolName)
+  if (forcedToolName) wanted.add(forcedToolName)
+  const filtered = converted.filter(tool => wanted.has(tool.function.name))
+  return minifyToolSchemas(filtered.length > 0 ? filtered : converted)
+}
+
+function isRequestToolsOnly(tools: OpenAITool[]): boolean {
+  return tools.length === 1 && tools[0]?.function.name === 'request_tools'
+}
+
 // ---------------------------------------------------------------------------
 // Types — minimal subset of Anthropic SDK types we need to produce
 // ---------------------------------------------------------------------------
@@ -238,6 +677,138 @@ interface OpenAITool {
   }
 }
 
+interface OpenAIResponsesTool {
+  type: 'function'
+  name: string
+  description: string
+  parameters: Record<string, unknown>
+  strict: boolean
+}
+
+interface ShimPhase1ToolCall {
+  function?: {
+    name?: string
+    arguments?: string
+  }
+}
+
+interface ShimPhase1Response {
+  choices?: Array<{
+    message?: {
+      role?: string
+      content?: string | null
+      tool_calls?: ShimPhase1ToolCall[]
+    }
+    finish_reason?: string
+  }>
+  output?: Array<{
+    type?: string
+    role?: string
+    content?: string | null | Array<{ type?: string; text?: string }>
+    name?: string
+    arguments?: string
+  }>
+  output_text?: string | null
+}
+
+function convertOpenAIToolsToResponsesTools(tools: OpenAITool[]): OpenAIResponsesTool[] {
+  return tools
+    .filter(tool => tool.function.name !== 'ToolSearchTool')
+    .map(tool => ({
+      type: 'function' as const,
+      name: tool.function.name,
+      description: tool.function.description,
+      parameters: tool.function.parameters,
+      strict: tool.function.strict ?? true,
+    }))
+}
+
+function convertOpenAIToolChoiceToResponsesToolChoice(toolChoice: unknown): unknown {
+  if (
+    toolChoice === 'auto' ||
+    toolChoice === 'required' ||
+    toolChoice === 'none'
+  ) {
+    return toolChoice
+  }
+
+  if (!toolChoice || typeof toolChoice !== 'object' || Array.isArray(toolChoice)) {
+    return undefined
+  }
+
+  const record = toolChoice as Record<string, unknown>
+  const functionValue = record.function
+  if (
+    record.type === 'function' &&
+    functionValue &&
+    typeof functionValue === 'object' &&
+    !Array.isArray(functionValue)
+  ) {
+    const name = (functionValue as Record<string, unknown>).name
+    if (typeof name === 'string') {
+      return {
+        type: 'function',
+        name,
+      }
+    }
+  }
+
+  return undefined
+}
+
+function extractResponsesMessageContent(
+  content: string | null | Array<{ type?: string; text?: string }> | undefined,
+): string {
+  if (typeof content === 'string') return content
+  if (!Array.isArray(content)) return ''
+  return content
+    .map(part => typeof part.text === 'string' ? part.text : '')
+    .join('')
+}
+
+function extractShimPhase1ToolCalls(phase1Json: ShimPhase1Response): ShimPhase1ToolCall[] {
+  const chatToolCalls = phase1Json.choices?.[0]?.message?.tool_calls
+  if (Array.isArray(chatToolCalls)) {
+    return chatToolCalls
+  }
+
+  if (!Array.isArray(phase1Json.output)) {
+    return []
+  }
+
+  return phase1Json.output
+    .filter(item => item.type === 'function_call' || item.type === 'tool_call')
+    .map(item => ({
+      function: {
+        name: item.name,
+        arguments: item.arguments,
+      },
+    }))
+}
+
+function extractShimPhase1Message(phase1Json: ShimPhase1Response): Record<string, unknown> {
+  const chatMessage = phase1Json.choices?.[0]?.message
+  if (chatMessage) {
+    return chatMessage
+  }
+
+  const outputMessage = Array.isArray(phase1Json.output)
+    ? phase1Json.output.find(item => item.type === 'message' && item.role === 'assistant')
+    : undefined
+
+  if (outputMessage) {
+    return {
+      role: outputMessage.role ?? 'assistant',
+      content: extractResponsesMessageContent(outputMessage.content),
+    }
+  }
+
+  return {
+    role: 'assistant',
+    content: phase1Json.output_text ?? '',
+  }
+}
+
 function convertSystemPrompt(
   system: unknown,
 ): string {
@@ -1378,66 +1949,40 @@ class OpenAIShimMessages {
 
     const promise = (async () => {
       const request = resolveProviderRequest({ model: self.providerOverride?.model ?? params.model, baseUrl: self.providerOverride?.baseURL, reasoningEffortOverride: self.reasoningEffort })
-      const response = await self._doRequest(request, params, options)
-      httpResponse = response
 
-      if (params.stream) {
-        const isResponsesStream = response.url?.includes('/responses')
-        return new OpenAIShimStream(
-          (
-            request.transport === 'codex_responses' ||
-            request.transport === 'responses' ||
-            isResponsesStream
-          )
-            ? codexStreamToAnthropic(response, request.resolvedModel, options?.signal)
-            : openaiStreamToAnthropic(response, request.resolvedModel, options?.signal),
-        )
-      }
-
-      if (request.transport === 'codex_responses') {
-        const data = await collectCodexCompletedResponse(response, options?.signal)
-        return convertCodexResponseToAnthropicMessage(
-          data,
-          request.resolvedModel,
-        )
-      }
-
-      const isResponsesNonStream = response.url?.includes('/responses')
+      // ShimToolSearch lazy mode: for conversational turns, send only the
+      // request_tools meta-tool and let the model ask for schemas if needed.
+      const shimToolSearchMode = getShimToolSearchMode()
       if (
-        request.transport === 'responses' ||
-        isResponsesNonStream ||
-        (request.transport === 'chat_completions' && isGithubModelsMode())
+        shimToolSearchMode === 'lazy' &&
+        params.tools && (params.tools as unknown[]).length > 0
       ) {
-        const contentType = response.headers.get('content-type') ?? ''
-        if (contentType.includes('application/json')) {
-          const parsed = await response.json() as Record<string, unknown>
-          if (
-            parsed &&
-            typeof parsed === 'object' &&
-            ('output' in parsed || 'incomplete_details' in parsed)
-          ) {
-            return convertCodexResponseToAnthropicMessage(
-              parsed,
-              request.resolvedModel,
-            )
+        const forcedToolName = getForcedToolChoiceName(params.tool_choice)
+        const msgs = Array.isArray(params.messages) ? params.messages as unknown[] : []
+        const predicted = predictNeededTools(msgs)
+        if (
+          predicted !== null &&
+          predicted.size === 0 &&
+          (!forcedToolName || forcedToolName === 'request_tools')
+        ) {
+          const shimResult = await self._shimToolSearchCreate(request, params, options)
+          httpResponse = shimResult.response
+          if (shimResult.converted) {
+            return shimResult.data
           }
-          return self._convertNonStreamingResponse(parsed, request.resolvedModel)
+          return await self._convertCreateResponse(
+            request,
+            shimResult.data as Response,
+            params,
+            options,
+          )
         }
       }
 
-      const contentType = response.headers.get('content-type') ?? ''
-      if (contentType.includes('application/json')) {
-        const data = await response.json()
-        return self._convertNonStreamingResponse(data, request.resolvedModel)
-      }
+      const response = await self._doRequest(request, params, options)
+      httpResponse = response
 
-      const textBody = await response.text().catch(() => '')
-      throw APIError.generate(
-        response.status,
-        undefined,
-        `OpenAI API error ${response.status}: unexpected response: ${textBody.slice(0, 500)}`,
-        response.headers as unknown as Headers,
-      )
+      return await self._convertCreateResponse(request, response, params, options)
     })()
 
       ; (promise as unknown as Record<string, unknown>).withResponse =
@@ -1454,6 +1999,71 @@ class OpenAIShimMessages {
     return promise
   }
 
+  private async _convertCreateResponse(
+    request: ReturnType<typeof resolveProviderRequest>,
+    response: Response,
+    params: ShimCreateParams,
+    options?: { signal?: AbortSignal; headers?: Record<string, string> },
+  ) {
+    if (params.stream) {
+      const isResponsesStream = response.url?.includes('/responses')
+      return new OpenAIShimStream(
+        (
+          request.transport === 'codex_responses' ||
+          request.transport === 'responses' ||
+          isResponsesStream
+        )
+          ? codexStreamToAnthropic(response, request.resolvedModel, options?.signal)
+          : openaiStreamToAnthropic(response, request.resolvedModel, options?.signal),
+      )
+    }
+
+    if (request.transport === 'codex_responses') {
+      const data = await collectCodexCompletedResponse(response, options?.signal)
+      return convertCodexResponseToAnthropicMessage(
+        data,
+        request.resolvedModel,
+      )
+    }
+
+    const isResponsesNonStream = response.url?.includes('/responses')
+    if (
+      request.transport === 'responses' ||
+      isResponsesNonStream ||
+      (request.transport === 'chat_completions' && isGithubModelsMode())
+    ) {
+      const contentType = response.headers.get('content-type') ?? ''
+      if (contentType.includes('application/json')) {
+        const parsed = await response.json() as Record<string, unknown>
+        if (
+          parsed &&
+          typeof parsed === 'object' &&
+          ('output' in parsed || 'incomplete_details' in parsed)
+        ) {
+          return convertCodexResponseToAnthropicMessage(
+            parsed,
+            request.resolvedModel,
+          )
+        }
+        return this._convertNonStreamingResponse(parsed, request.resolvedModel)
+      }
+    }
+
+    const contentType = response.headers.get('content-type') ?? ''
+    if (contentType.includes('application/json')) {
+      const data = await response.json()
+      return this._convertNonStreamingResponse(data, request.resolvedModel)
+    }
+
+    const textBody = await response.text().catch(() => '')
+    throw APIError.generate(
+      response.status,
+      undefined,
+      `OpenAI API error ${response.status}: unexpected response: ${textBody.slice(0, 500)}`,
+      response.headers as unknown as Headers,
+    )
+  }
+
   private async _doRequest(
     request: ReturnType<typeof resolveProviderRequest>,
     params: ShimCreateParams,
@@ -1536,10 +2146,158 @@ class OpenAIShimMessages {
     return this._doOpenAIRequest(request, params, options)
   }
 
+  // ---------------------------------------------------------------------------
+  // ShimToolSearch — two-phase protocol
+  // ---------------------------------------------------------------------------
+
+  /**
+   * Wrap a single OpenAI-format message as a ReadableStream<Uint8Array>
+   * mimicking a streaming SSE response, so it can be fed into the existing
+   * stream-to-Anthropic pipeline.
+   */
+  private _syntheticStream(msg: Record<string, unknown>): ReadableStream<Uint8Array> {
+    const encoder = new TextEncoder()
+    const chunk = {
+      id: `chatcmpl-shim-${Date.now()}`,
+      object: 'chat.completion.chunk',
+      created: Math.floor(Date.now() / 1000),
+      model: 'shim-tool-search',
+      choices: [{ index: 0, delta: msg, finish_reason: 'stop' }],
+    }
+    const payload = `data: ${JSON.stringify(chunk)}\n\ndata: [DONE]\n\n`
+    return new ReadableStream({
+      start(controller) {
+        controller.enqueue(encoder.encode(payload))
+        controller.close()
+      },
+    })
+  }
+
+  /**
+   * Two-phase ShimToolSearch protocol:
+   *   Phase 1 — send only the request_tools meta-tool + tool directory in the
+   *             system prompt. If the model calls request_tools, go to phase 2.
+   *   Phase 2 — re-request with only the requested tools (minified).
+   *
+   * If the model doesn't call request_tools, return a synthetic stream wrapping
+   * its conversational response.
+   */
+  private async _shimToolSearchCreate(
+    request: ReturnType<typeof resolveProviderRequest>,
+    params: ShimCreateParams,
+    options?: { signal?: AbortSignal; headers?: Record<string, string> },
+  ): Promise<{
+    data: OpenAIShimStream | Response
+    response: Response
+    converted: boolean
+  }> {
+    debugShimToolSearch('Phase 1: conversational prediction — sending meta-tool only')
+
+    const allConverted = convertTools(
+      params.tools as Array<{ name: string; description?: string; input_schema?: Record<string, unknown> }>,
+    )
+
+    // Build directory listing for the system prompt
+    const directoryLines = getToolDirectoryLines(allConverted)
+    const directoryNote = `\n\nAvailable tools (call request_tools to use any):\n${directoryLines}`
+
+    const phase1Messages = JSON.parse(JSON.stringify(params.messages)) as Array<{ role: string; content: unknown }>
+    const phase1System = appendSystemPrompt(params.system, directoryNote)
+
+    // Phase 1 request — non-streaming, single meta-tool
+    const phase1Params = {
+      ...params,
+      stream: false,
+      system: phase1System,
+      messages: phase1Messages,
+      tools: [REQUEST_TOOLS_TOOL] as typeof params.tools,
+    }
+    const phase1Response = await this._doRequest(request, phase1Params, options)
+    const phase1Json = await phase1Response.json() as ShimPhase1Response
+    const toolCalls = extractShimPhase1ToolCalls(phase1Json)
+    const requestToolsCall = toolCalls.find(tc => tc.function?.name === 'request_tools')
+
+    if (!requestToolsCall) {
+      // Model chose to respond conversationally — return as synthetic stream
+      debugShimToolSearch('Phase 1 result: conversational (no tools requested)')
+      const msg = extractShimPhase1Message(phase1Json)
+      if (params.stream) {
+        return {
+          data: new OpenAIShimStream(
+            openaiStreamToAnthropic(
+              new Response(this._syntheticStream(msg), {
+                status: 200,
+                headers: { 'content-type': 'text/event-stream' },
+              }),
+              request.resolvedModel,
+            ),
+          ),
+          response: phase1Response,
+          converted: true,
+        }
+      }
+      return {
+        data: new Response(JSON.stringify(phase1Json), {
+          status: 200,
+          headers: { 'content-type': 'application/json' },
+        }),
+        response: phase1Response,
+        converted: false,
+      }
+    }
+
+    // Model requested tools — parse and do phase 2
+    let requestedNames: string[] = []
+    let parsedRequestedNames = true
+    try {
+      const args = JSON.parse(requestToolsCall.function?.arguments ?? '{}')
+      requestedNames = Array.isArray(args.tools) ? args.tools : []
+    } catch {
+      debugShimToolSearch('Phase 1 returned malformed request_tools JSON; falling back to all tools')
+      parsedRequestedNames = false
+      requestedNames = []
+    }
+    debugShimToolSearch(`Phase 2: model requested tools: ${requestedNames.join(', ')}`)
+
+    // Build full tool set from the original params, filtered + minified
+    const forcedToolName = getForcedToolChoiceName(params.tool_choice)
+    const wanted = new Set([...requestedNames, ...CORE_TOOL_NAMES])
+    if (forcedToolName && forcedToolName !== 'request_tools') wanted.add(forcedToolName)
+    const filtered = allConverted.filter(t => wanted.has(t.function.name))
+    const toolSet = minifyToolSchemas(
+      parsedRequestedNames && requestedNames.length > 0 && filtered.length > 0
+        ? filtered
+        : allConverted,
+    )
+    debugShimToolSearch(`Phase 2: sending ${toolSet.length} tools (${JSON.stringify(toolSet).length} chars)`)
+
+    // Phase 2 — re-request with the actual tools
+    const response = await this._doOpenAIRequest(request, params, options, toolSet)
+
+    if (params.stream) {
+      const isResponsesStream = response.url?.includes('/responses')
+      return {
+        data: new OpenAIShimStream(
+          (request.transport === 'codex_responses' || isResponsesStream)
+            ? codexStreamToAnthropic(response, request.resolvedModel)
+            : openaiStreamToAnthropic(response, request.resolvedModel),
+        ),
+        response,
+        converted: true,
+      }
+    }
+    return {
+      data: response,
+      response,
+      converted: false,
+    }
+  }
+
   private async _doOpenAIRequest(
     request: ReturnType<typeof resolveProviderRequest>,
     params: ShimCreateParams,
     options?: { signal?: AbortSignal; headers?: Record<string, string> },
+    convertedToolOverride?: OpenAITool[],
   ): Promise<Response> {
     // Local backends (llama.cpp, vLLM, Ollama, LM Studio, …) do not implement
     // the cloud-side caching/strict-validation behaviours that several of our
@@ -1653,8 +2411,8 @@ class OpenAIShimMessages {
       }
     }
 
-    if (params.tools && params.tools.length > 0) {
-      const converted = convertTools(
+    if ((params.tools && params.tools.length > 0) || convertedToolOverride) {
+      const converted = convertedToolOverride ?? convertTools(
         params.tools as Array<{
           name: string
           description?: string
@@ -1663,12 +2421,34 @@ class OpenAIShimMessages {
         { skipStrict: fastPath.skipStrictTools },
       )
       if (converted.length > 0) {
-        body.tools = converted
-        if (params.tool_choice) {
+        const shimToolSearchMode = getShimToolSearchMode()
+        const forcedToolName = getForcedToolChoiceName(params.tool_choice)
+        if (convertedToolOverride || isRequestToolsOnly(converted)) {
+          body.tools = convertedToolOverride
+            ? convertedToolOverride
+            : converted
+        } else if (shimToolSearchMode === 'minify' || shimToolSearchMode === 'predict') {
+          const msgs = Array.isArray(params.messages) ? params.messages as unknown[] : []
+          const toolSet = selectShimToolSet(converted, msgs, shimToolSearchMode, forcedToolName)
+          body.tools = toolSet
+          const names = toolSet.map(t => t.function.name)
+          const totalChars = JSON.stringify(toolSet).length
+          debugShimToolSearch(`${shimToolSearchMode}: ${toolSet.length} tools (${totalChars} chars): ${names.join(', ')}`)
+        } else if (shimToolSearchMode === 'lazy') {
+          const msgs = Array.isArray(params.messages) ? params.messages as unknown[] : []
+          body.tools = selectShimToolSet(converted, msgs, 'predict', forcedToolName)
+        } else {
+          body.tools = converted
+        }
+        if (params.tool_choice && Array.isArray(body.tools) && body.tools.length > 0) {
           const tc = params.tool_choice as { type?: string; name?: string }
           if (tc.type === 'auto') {
             body.tool_choice = 'auto'
-          } else if (tc.type === 'tool' && tc.name) {
+          } else if (
+            tc.type === 'tool' &&
+            tc.name &&
+            hasOpenAIToolNamed(body.tools as OpenAITool[], tc.name)
+          ) {
             body.tool_choice = {
               type: 'function',
               function: { name: tc.name },
@@ -1725,7 +2505,14 @@ class OpenAIShimMessages {
       if (params.temperature !== undefined) responsesBody.temperature = params.temperature
       if (params.top_p !== undefined) responsesBody.top_p = params.top_p
 
-      if (!omitResponsesTools && params.tools && params.tools.length > 0) {
+      if (!omitResponsesTools && Array.isArray(body.tools)) {
+        if (body.tools.length > 0) {
+          const convertedTools = convertOpenAIToolsToResponsesTools(body.tools as OpenAITool[])
+          if (convertedTools.length > 0) {
+            responsesBody.tools = convertedTools
+          }
+        }
+      } else if (!omitResponsesTools && params.tools && params.tools.length > 0) {
         const convertedTools = convertToolsToResponsesTools(
           params.tools as Array<{
             name?: string
@@ -1738,6 +2525,13 @@ class OpenAIShimMessages {
         }
       }
 
+      if (responsesBody.tools && body.tool_choice !== undefined) {
+        const responsesToolChoice = convertOpenAIToolChoiceToResponsesToolChoice(body.tool_choice)
+        if (responsesToolChoice !== undefined) {
+          responsesBody.tool_choice = responsesToolChoice
+        }
+      }
+
       return responsesBody
     }
 
@@ -1899,12 +2693,12 @@ class OpenAIShimMessages {
     // Local backends do not implement prefix caching, so the deep key-sort
     // is pure CPU overhead per request (issue #1016). Drop to the native
     // `JSON.stringify` fast path when the fast-path config opts out.
+    let outgoingBody = request.transport === 'responses' ? buildResponsesBody() : body
     const serializeBody = (): string => {
-      const payload =
-        request.transport === 'responses' ? buildResponsesBody() : body
+      outgoingBody = request.transport === 'responses' ? buildResponsesBody() : body
       return fastPath.skipStableStringify
-        ? JSON.stringify(payload)
-        : stableStringify(payload)
+        ? JSON.stringify(outgoingBody)
+        : stableStringify(outgoingBody)
     }
     let serializedBody = serializeBody()
 
@@ -1912,6 +2706,13 @@ class OpenAIShimMessages {
       serializedBody = serializeBody()
     }
 
+    logOpenAIShimTokenAudit({
+      body: outgoingBody,
+      serializedBody,
+      model: request.resolvedModel,
+      transport: request.transport,
+    })
+
     const buildFetchInit = () => ({
       method: 'POST' as const,
       headers,
@@ -2185,10 +2986,10 @@ class OpenAIShimMessages {
           role?: string
           content?:
             | string
-            | null
-            | Array<{ type?: string; text?: string }>
-          reasoning_content?: string | null
-          tool_calls?: Array<{
+          | null
+          | Array<{ type?: string; text?: string }>
+        reasoning_content?: string | null
+        tool_calls?: Array<{
             id: string
             function: { name: string; arguments: string }
             extra_content?: Record<string, unknown>