Skip to content

Commit 47c7027

Browse files
committed
tool-call auto
1 parent d876479 commit 47c7027

3 files changed

Lines changed: 676 additions & 53 deletions

File tree

src/kvboost/server/__main__.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -198,9 +198,17 @@ def parse_args():
198198
"the server forwards 'tools' to the chat template and parses "
199199
"tool calls out of the model output using --tool-call-parser.")
200200
p.add_argument("--tool-call-parser", default="hermes",
201-
choices=["hermes"],
202-
help="Format used by the model to emit tool calls. "
203-
"'hermes' = <tool_call>{json}</tool_call> (Qwen2.5/3, Hermes 2/3).")
201+
choices=["hermes", "json_codeblock", "qwen3_coder",
202+
"llama", "mistral", "auto"],
203+
help="Format used by the model to emit tool calls.\n"
204+
" hermes = <tool_call>{json}</tool_call> (Qwen2.5/3, Hermes 2/3)\n"
205+
" json_codeblock = ```json\\n{...}\\n``` (Qwen2.5-Coder agent, mixed-format models)\n"
206+
" qwen3_coder = <function=X><parameter=K>V</parameter> XML attr style\n"
207+
" llama = <|python_tag|>{json}<|eom_id|> (Llama 3.1/3.2/3.3)\n"
208+
" mistral = [TOOL_CALLS][...] prefix + JSON array\n"
209+
" auto = try each parser in order; first match wins.\n"
210+
"Recommended: 'auto' when serving mixed/unknown formats; pin to a specific one\n"
211+
"for the model you're serving.")
204212

205213
# Pre-warm
206214
p.add_argument("--warm", default=None,
@@ -455,8 +463,9 @@ def main():
455463
max_retries=args.oom_max_retries,
456464
)
457465
log.info(
458-
"OOM recovery enabled: max_retries=%d, streaming=%s",
459-
args.oom_max_retries, args.awq_streaming,
466+
"OOM recovery enabled: max_retries=%s, streaming=%s",
467+
args.oom_max_retries if args.oom_max_retries is not None else "default",
468+
args.awq_streaming,
460469
)
461470

462471
worker = EngineWorker(

src/kvboost/server/app.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -292,8 +292,15 @@ async def chat_completions(req: ChatCompletionRequest, response: Response):
292292
and req.tool_choice != "none"
293293
)
294294
if tools_active:
295+
# Build an allowlist of declared tool names — the parser uses it
296+
# to drop calls naming hallucinated / unrelated functions, which
297+
# also prevents misclassifying incidental ```json blocks as calls.
298+
tool_names = {
299+
t.function.name for t in (req.tools or [])
300+
if t.function and t.function.name
301+
}
295302
cleaned_text, parsed_calls = tool_parsers.parse(
296-
result.output_text, _parser_name,
303+
result.output_text, _parser_name, tool_names=tool_names,
297304
)
298305
parsed_calls = _filter_tool_choice(parsed_calls, req.tool_choice)
299306
if parsed_calls:
@@ -489,8 +496,16 @@ async def _stream_chat(
489496
tools_active = (
490497
auto_tools and bool(req.tools) and req.tool_choice != "none"
491498
)
499+
stream_tool_names = (
500+
{
501+
t.function.name for t in (req.tools or [])
502+
if t.function and t.function.name
503+
}
504+
if tools_active else None
505+
)
492506
stream_parser = (
493-
tool_parsers.make_streaming_parser(parser_name) if tools_active else None
507+
tool_parsers.make_streaming_parser(parser_name, tool_names=stream_tool_names)
508+
if tools_active else None
494509
)
495510
emitted_tool_calls = [] # list of ToolCall, used to set finish_reason
496511
tool_call_index = 0 # OpenAI delta indexing within this completion

0 commit comments

Comments
 (0)