Skip to content

Commit 4b38a0a

Browse files
author
Aeshma-Daeva
committed
feat: opt-in ShimToolSearch — lazy tool loading for 3P providers
Adds keyword-heuristic tool prediction + schema minification, gated behind ENABLE_SHIM_TOOL_SEARCH=1 (off by default, zero behavioral change without it). When enabled: - Conversational turns: sends only a request_tools meta-tool (phase 1) then re-requests with needed tools if needed (phase 2) - Tool-requiring turns: filters to essential + predicted tools, strips verbose descriptions/param docs, truncates to first-sentence Measured: 63KB → 5KB tool payloads (90%+ reduction) on Cerebras. Single file, no new dependencies.
1 parent 1e79624 commit 4b38a0a

1 file changed

Lines changed: 319 additions & 1 deletion

File tree

src/services/api/openaiShim.ts

Lines changed: 319 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,160 @@ function sleepMs(ms: number): Promise<void> {
125125
return new Promise(resolve => setTimeout(resolve, ms))
126126
}
127127

128+
// ---------------------------------------------------------------------------
129+
// ShimToolSearch — opt-in lazy tool loading for 3P providers
130+
// Gated behind ENABLE_SHIM_TOOL_SEARCH=1. Off by default.
131+
// ---------------------------------------------------------------------------
132+
133+
/**
134+
* Keep only the first sentence of a tool description (up to maxLen chars).
135+
* 235B models already know what Bash/Read/Write/etc. do by name.
136+
* The fat Anthropic descriptions (500–2000 words) waste tokens on 3P providers.
137+
*/
138+
function truncateToolDescription(text: string, maxLen = 200): string {
139+
if (!text || text.length <= maxLen) return text
140+
// Try to cut at a sentence boundary in a generous window
141+
const window = text.slice(0, maxLen + 80)
142+
const match = window.match(/^[\s\S]{30,}?[.!?](\s|\n|$)/)
143+
if (match && match[0].length <= maxLen + 20) return match[0].trim()
144+
// Fall back to word boundary
145+
return text.slice(0, maxLen).replace(/\s\S*$/, '') + '…'
146+
}
147+
148+
/**
149+
* Strip description/title from parameter schemas while preserving structure.
150+
* Models still get type/required/properties/enum — enough to generate valid calls.
151+
*/
152+
function stripParamDescriptions(schema: Record<string, unknown>): Record<string, unknown> {
153+
const out: Record<string, unknown> = {}
154+
for (const [k, v] of Object.entries(schema)) {
155+
if (k === 'description' || k === 'title') continue
156+
if (Array.isArray(v)) {
157+
out[k] = v.map(item =>
158+
item && typeof item === 'object' && !Array.isArray(item)
159+
? stripParamDescriptions(item as Record<string, unknown>)
160+
: item,
161+
)
162+
} else if (v && typeof v === 'object') {
163+
out[k] = stripParamDescriptions(v as Record<string, unknown>)
164+
} else {
165+
out[k] = v
166+
}
167+
}
168+
return out
169+
}
170+
171+
/**
172+
* Minify tool schemas for 3P providers — dramatically reduces token usage:
173+
* Bash: 11.4KB → ~0.4KB, TodoWrite: 9.6KB → ~1.2KB, Aggregate: ~63KB → ~5KB
174+
*/
175+
function minifyToolSchemas(tools: OpenAITool[]): OpenAITool[] {
176+
return tools.map(tool => ({
177+
...tool,
178+
function: {
179+
...tool.function,
180+
description: truncateToolDescription(tool.function.description),
181+
parameters: stripParamDescriptions(tool.function.parameters as Record<string, unknown>),
182+
},
183+
}))
184+
}
185+
186+
/** One-line descriptions for the tool directory injected during phase-1 */
187+
const TOOL_DIRECTORY: Record<string, string> = {
188+
Bash: 'Execute shell commands (build, test, install, git, etc.)',
189+
Read: 'Read file contents',
190+
Write: 'Create or overwrite a file',
191+
Edit: 'Make targeted edits to an existing file',
192+
Glob: 'List files matching a pattern',
193+
Grep: 'Search file contents with regex',
194+
TodoWrite: 'Create/update structured task list',
195+
AskUserQuestion: 'Ask the user a clarifying question',
196+
Agent: 'Spawn a sub-agent to handle a complex sub-task',
197+
}
198+
199+
/** Derived from TOOL_DIRECTORY so the two are always in sync. */
200+
const ESSENTIAL_TOOL_NAMES = new Set(Object.keys(TOOL_DIRECTORY))
201+
202+
/** The single meta-tool sent during phase-1 of ShimToolSearch */
203+
const REQUEST_TOOLS_SCHEMA: OpenAITool = {
204+
type: 'function',
205+
function: {
206+
name: 'request_tools',
207+
description: 'Request the full schema for one or more tools before using them. Call this first if you need to use any tools.',
208+
parameters: {
209+
type: 'object',
210+
properties: {
211+
tools: {
212+
type: 'array',
213+
items: { type: 'string' },
214+
},
215+
},
216+
required: ['tools'],
217+
},
218+
},
219+
}
220+
221+
/**
222+
* Keyword heuristics to predict which tools a request needs.
223+
* Returns a Set of tool names, empty Set for conversational, or null if uncertain.
224+
*/
225+
function predictNeededTools(messages: unknown[]): Set<string> | null {
226+
function extractUserQuery(text: string): string {
227+
return text
228+
.replace(/<system-reminder[\s\S]*?<\/system-reminder>/gi, '')
229+
.replace(/<context[\s\S]*?<\/context>/gi, '')
230+
.trim()
231+
}
232+
233+
let lastUserText = ''
234+
for (let i = messages.length - 1; i >= 0; i--) {
235+
const m = messages[i] as { role?: string; content?: unknown }
236+
if (m.role !== 'user') continue
237+
let rawText = ''
238+
if (typeof m.content === 'string') {
239+
rawText = m.content
240+
} else if (Array.isArray(m.content)) {
241+
const parts = (m.content as Array<{ type?: string; text?: string }>)
242+
.filter(p => p.type === 'text')
243+
.map(p => p.text ?? '')
244+
rawText = parts.join(' ')
245+
}
246+
const clean = extractUserQuery(rawText)
247+
if (clean) {
248+
lastUserText = clean
249+
break
250+
}
251+
}
252+
if (!lastUserText) return null
253+
254+
const t = lastUserText.toLowerCase()
255+
256+
const isConversational = /^(what|who|why|how|when|where|explain|describe|tell me|is it|can you|do you|list|summarize|overview|difference between|compare|pros and cons)/.test(t.trim())
257+
&& !/file|code|function|class|test|build|run|install|create|write|edit|implement|fix|debug/.test(t)
258+
259+
if (isConversational) return new Set([])
260+
261+
const tools = new Set<string>()
262+
263+
if (/\brun\b|\bexecut|\bbuild|\btest\b|\binstall\b|\bnpm\b|\bpip\b|\bgit\b|\bcompil|\bscript|\bdocker|\bpython\b|\bnode\b/.test(t)) tools.add('Bash')
264+
if (/\bread\b|\bshow\b|\bcontent|\blook at|\bopen\b|\bcat\b|\bwhat is in|\bwhat does.*file/.test(t)) tools.add('Read')
265+
if (/\bcreate\b|\bwrite\b|\bnew file|\bgenerat|\bscaffold|\binitializ|\btouch\b/.test(t)) tools.add('Write')
266+
if (/\bedit\b|\bmodif|\bchange\b|\bfix\b|\bupdat|\brefactor|\breplace|\bimpleme|\badd.*to\b|\bremove\b|\bdelet.*from/.test(t)) tools.add('Edit')
267+
if (/\bsearch\b|\bfind\b|\bgrep\b|\blook for\b|\bwhere is\b|\boccurrenc|\bwhich file/.test(t)) { tools.add('Grep'); tools.add('Glob') }
268+
if (/\blist.*file|\bfind.*file|\bfiles in|\bwhat files|\blist.*dir|\bls\b/.test(t)) tools.add('Glob')
269+
if (/\btodo\b|\bplan\b|\btask list|\btrack\b|\bprogress\b/.test(t)) tools.add('TodoWrite')
270+
271+
if (/\bimplement\b|\bbuild.*feature|\badd.*feature|\bwrite.*function|\bwrite.*class|\bcreate.*function|\bcreate.*class/.test(t)) {
272+
tools.add('Bash'); tools.add('Write'); tools.add('Edit'); tools.add('Read')
273+
}
274+
275+
if (tools.has('Edit') || tools.has('Write')) {
276+
tools.add('Bash'); tools.add('Read')
277+
}
278+
279+
return tools.size > 0 ? tools : null
280+
}
281+
128282
// ---------------------------------------------------------------------------
129283
// Types — minimal subset of Anthropic SDK types we need to produce
130284
// ---------------------------------------------------------------------------
@@ -1033,6 +1187,20 @@ class OpenAIShimMessages {
10331187

10341188
const promise = (async () => {
10351189
const request = resolveProviderRequest({ model: self.providerOverride?.model ?? params.model, baseUrl: self.providerOverride?.baseURL, reasoningEffortOverride: self.reasoningEffort })
1190+
1191+
// ShimToolSearch: for conversational turns, skip tools entirely (phase-1 bypass)
1192+
if (
1193+
isEnvTruthy(process.env.ENABLE_SHIM_TOOL_SEARCH) &&
1194+
params.tools && (params.tools as unknown[]).length > 0
1195+
) {
1196+
const msgs = Array.isArray(params.messages) ? params.messages as unknown[] : []
1197+
const predicted = predictNeededTools(msgs)
1198+
if (predicted !== null && predicted.size === 0) {
1199+
// Pure conversational — send with request_tools meta-tool only
1200+
return await self._shimToolSearchCreate(request, params, options)
1201+
}
1202+
}
1203+
10361204
const response = await self._doRequest(request, params, options)
10371205
httpResponse = response
10381206

@@ -1169,6 +1337,137 @@ class OpenAIShimMessages {
11691337
return this._doOpenAIRequest(request, params, options)
11701338
}
11711339

1340+
// ---------------------------------------------------------------------------
1341+
// ShimToolSearch — two-phase protocol
1342+
// ---------------------------------------------------------------------------
1343+
1344+
/**
1345+
* Wrap a single OpenAI-format message as a ReadableStream<Uint8Array>
1346+
* mimicking a streaming SSE response, so it can be fed into the existing
1347+
* stream-to-Anthropic pipeline.
1348+
*/
1349+
private _syntheticStream(msg: Record<string, unknown>): ReadableStream<Uint8Array> {
1350+
const encoder = new TextEncoder()
1351+
const chunk = {
1352+
id: `chatcmpl-shim-${Date.now()}`,
1353+
object: 'chat.completion.chunk',
1354+
created: Math.floor(Date.now() / 1000),
1355+
model: 'shim-tool-search',
1356+
choices: [{ index: 0, delta: msg, finish_reason: 'stop' }],
1357+
}
1358+
const payload = `data: ${JSON.stringify(chunk)}\n\ndata: [DONE]\n\n`
1359+
return new ReadableStream({
1360+
start(controller) {
1361+
controller.enqueue(encoder.encode(payload))
1362+
controller.close()
1363+
},
1364+
})
1365+
}
1366+
1367+
/**
1368+
* Two-phase ShimToolSearch protocol:
1369+
* Phase 1 — send only the request_tools meta-tool + tool directory in the
1370+
* system prompt. If the model calls request_tools, go to phase 2.
1371+
* Phase 2 — re-request with only the requested tools (minified).
1372+
*
1373+
* If the model doesn't call request_tools, return a synthetic stream wrapping
1374+
* its conversational response.
1375+
*/
1376+
private async _shimToolSearchCreate(
1377+
request: ReturnType<typeof resolveProviderRequest>,
1378+
params: ShimCreateParams,
1379+
options?: { signal?: AbortSignal; headers?: Record<string, string> },
1380+
): Promise<OpenAIShimStream | Response> {
1381+
process.stderr.write('[ShimToolSearch] Phase 1: conversational prediction — sending meta-tool only\n')
1382+
1383+
// Build directory listing for the system prompt
1384+
const directoryLines = Object.entries(TOOL_DIRECTORY)
1385+
.map(([name, desc]) => `- ${name}: ${desc}`)
1386+
.join('\n')
1387+
const directoryNote = `\n\nAvailable tools (call request_tools to use any):\n${directoryLines}`
1388+
1389+
// Clone messages, inject directory into first system message
1390+
const phase1Messages = JSON.parse(JSON.stringify(params.messages)) as Array<{ role: string; content: unknown }>
1391+
const sysIdx = phase1Messages.findIndex(m => m.role === 'system')
1392+
if (sysIdx >= 0 && typeof phase1Messages[sysIdx].content === 'string') {
1393+
phase1Messages[sysIdx].content += directoryNote
1394+
} else {
1395+
phase1Messages.unshift({ role: 'system', content: `You are a helpful AI assistant.${directoryNote}` })
1396+
}
1397+
1398+
// Phase 1 request — non-streaming, single meta-tool
1399+
const phase1Params = { ...params, stream: false, messages: phase1Messages, tools: [REQUEST_TOOLS_SCHEMA] as unknown as typeof params.tools }
1400+
const phase1Response = await this._doRequest(request, phase1Params, options)
1401+
const phase1Json = await phase1Response.json() as {
1402+
choices?: Array<{
1403+
message?: {
1404+
role?: string
1405+
content?: string | null
1406+
tool_calls?: Array<{ function?: { name?: string; arguments?: string } }>
1407+
}
1408+
finish_reason?: string
1409+
}>
1410+
}
1411+
1412+
const choice = phase1Json.choices?.[0]
1413+
const toolCalls = choice?.message?.tool_calls ?? []
1414+
const requestToolsCall = toolCalls.find(tc => tc.function?.name === 'request_tools')
1415+
1416+
if (!requestToolsCall) {
1417+
// Model chose to respond conversationally — return as synthetic stream
1418+
process.stderr.write('[ShimToolSearch] Phase 1 result: conversational (no tools requested)\n')
1419+
const msg = choice?.message ?? { role: 'assistant', content: '' }
1420+
if (params.stream) {
1421+
return new OpenAIShimStream(
1422+
openaiStreamToAnthropic(
1423+
new Response(this._syntheticStream(msg as Record<string, unknown>), {
1424+
status: 200,
1425+
headers: { 'content-type': 'text/event-stream' },
1426+
}),
1427+
request.resolvedModel,
1428+
),
1429+
)
1430+
}
1431+
return new Response(JSON.stringify(phase1Json), {
1432+
status: 200,
1433+
headers: { 'content-type': 'application/json' },
1434+
})
1435+
}
1436+
1437+
// Model requested tools — parse and do phase 2
1438+
let requestedNames: string[] = []
1439+
try {
1440+
const args = JSON.parse(requestToolsCall.function?.arguments ?? '{}')
1441+
requestedNames = Array.isArray(args.tools) ? args.tools : []
1442+
} catch {
1443+
requestedNames = []
1444+
}
1445+
process.stderr.write(`[ShimToolSearch] Phase 2: model requested tools: ${requestedNames.join(', ')}\n`)
1446+
1447+
// Build full tool set from the original params, filtered + minified
1448+
const allConverted = convertTools(
1449+
params.tools as Array<{ name: string; description?: string; input_schema?: Record<string, unknown> }>,
1450+
)
1451+
const wanted = new Set([...requestedNames, ...ESSENTIAL_TOOL_NAMES])
1452+
const filtered = allConverted.filter(t => wanted.has(t.function.name))
1453+
const toolSet = minifyToolSchemas(filtered.length > 0 ? filtered : allConverted)
1454+
process.stderr.write(`[ShimToolSearch] Phase 2: sending ${toolSet.length} tools (${JSON.stringify(toolSet).length} chars)\n`)
1455+
1456+
// Phase 2 — re-request with the actual tools
1457+
const phase2Params = { ...params, tools: toolSet as unknown as typeof params.tools }
1458+
const response = await this._doRequest(request, phase2Params, options)
1459+
1460+
if (params.stream) {
1461+
const isResponsesStream = response.url?.includes('/responses')
1462+
return new OpenAIShimStream(
1463+
(request.transport === 'codex_responses' || isResponsesStream)
1464+
? codexStreamToAnthropic(response, request.resolvedModel)
1465+
: openaiStreamToAnthropic(response, request.resolvedModel),
1466+
)
1467+
}
1468+
return response
1469+
}
1470+
11721471
private async _doOpenAIRequest(
11731472
request: ReturnType<typeof resolveProviderRequest>,
11741473
params: ShimCreateParams,
@@ -1231,7 +1530,26 @@ class OpenAIShimMessages {
12311530
}>,
12321531
)
12331532
if (converted.length > 0) {
1234-
body.tools = converted
1533+
// ShimToolSearch: filter + minify tool schemas when enabled
1534+
if (isEnvTruthy(process.env.ENABLE_SHIM_TOOL_SEARCH)) {
1535+
const msgs = Array.isArray(params.messages) ? params.messages as unknown[] : []
1536+
const predicted = predictNeededTools(msgs)
1537+
// Start from essential tools, then add whatever the heuristic predicts
1538+
const wanted = new Set(ESSENTIAL_TOOL_NAMES)
1539+
if (predicted) {
1540+
for (const t of predicted) wanted.add(t)
1541+
}
1542+
const filtered = converted.filter(t => wanted.has(t.function.name))
1543+
const toolSet = minifyToolSchemas(filtered.length > 0 ? filtered : converted)
1544+
body.tools = toolSet
1545+
const names = toolSet.map(t => t.function.name)
1546+
const totalChars = JSON.stringify(toolSet).length
1547+
process.stderr.write(
1548+
`[ShimToolSearch] ${toolSet.length} tools (${totalChars} chars): ${names.join(', ')}\n`,
1549+
)
1550+
} else {
1551+
body.tools = converted
1552+
}
12351553
if (params.tool_choice) {
12361554
const tc = params.tool_choice as { type?: string; name?: string }
12371555
if (tc.type === 'auto') {

0 commit comments

Comments
 (0)