|
54 | 54 | "source": [
|
55 | 55 | "Note that `--tool-call-parser` defines the parser used to interpret responses. Currently supported parsers include:\n",
|
56 | 56 | "\n",
|
57 |
| - "- llama3: Llama 3.1 / 3.2 (e.g. meta-llama/Llama-3.1-8B-Instruct, meta-llama/Llama-3.2-1B-Instruct).\n", |
| 57 | + "- llama3: Llama 3.1 / 3.2 / 3.3 (e.g. meta-llama/Llama-3.1-8B-Instruct, meta-llama/Llama-3.2-1B-Instruct, meta-llama/Llama-3.3-70B-Instruct).\n", |
| 58 | + "- llama4: Llama 4 (e.g. meta-llama/Llama-4-Scout-17B-16E-Instruct).\n", |
58 | 59 | "- mistral: Mistral (e.g. mistralai/Mistral-7B-Instruct-v0.3, mistralai/Mistral-Nemo-Instruct-2407, mistralai/\n",
|
59 | 60 | "Mistral-Nemo-Instruct-2407, mistralai/Mistral-7B-v0.3).\n",
|
60 |
| - "- qwen25: Qwen 2.5 (e.g. Qwen/Qwen2.5-1.5B-Instruct, Qwen/Qwen2.5-7B-Instruct) and QwQ (i.e. Qwen/QwQ-32B). Especially, for QwQ, we can enable the reasoning parser together with tool call parser, details about reasoning parser can be found in [reasoning parser](https://docs.sglang.ai/backend/separate_reasoning.html)." |
| 61 | + "- qwen25: Qwen 2.5 (e.g. Qwen/Qwen2.5-1.5B-Instruct, Qwen/Qwen2.5-7B-Instruct) and QwQ (i.e. Qwen/QwQ-32B). Especially, for QwQ, we can enable the reasoning parser together with tool call parser, details about reasoning parser can be found in [reasoning parser](https://docs.sglang.ai/backend/separate_reasoning.html).\n", |
| 62 | + "- deepseekv3: DeepSeek-v3 (e.g., deepseek-ai/DeepSeek-V3-0324).\n" |
61 | 63 | ]
|
62 | 64 | },
|
63 | 65 | {
|
|
360 | 362 | "print(final_response.choices[0].message.content)"
|
361 | 363 | ]
|
362 | 364 | },
|
| 365 | + { |
| 366 | + "cell_type": "markdown", |
| 367 | + "metadata": {}, |
| 368 | + "source": [ |
| 369 | + "## Tool Choice Mode\n", |
| 370 | + "\n", |
| 371 | + "SGLang supports OpenAI's `tool_choice` parameter to control when and which tools the model should call. This feature is implemented using EBNF (Extended Backus-Naur Form) grammar to ensure reliable tool calling behavior.\n", |
| 372 | + "\n", |
| 373 | + "### Supported Tool Choice Options\n", |
| 374 | + "\n", |
| 375 | + "- **`tool_choice=\"required\"`**: Forces the model to call at least one tool\n", |
| 376 | + "- **`tool_choice={\"type\": \"function\", \"function\": {\"name\": \"specific_function\"}}`**: Forces the model to call a specific function\n", |
| 377 | + "\n", |
| 378 | + "### Backend Compatibility\n", |
| 379 | + "\n", |
| 380 | + "Tool choice is fully supported with the **Xgrammar backend**, which is the default grammar backend (`--grammar-backend xgrammar`). However, it may not be fully supported with other backends such as `outlines`.\n", |
| 381 | + "\n", |
| 382 | + "### Example: Required Tool Choice" |
| 383 | + ] |
| 384 | + }, |
| 385 | + { |
| 386 | + "cell_type": "code", |
| 387 | + "execution_count": null, |
| 388 | + "metadata": {}, |
| 389 | + "outputs": [ |
| 390 | + { |
| 391 | + "name": "stdout", |
| 392 | + "output_type": "stream", |
| 393 | + "text": [ |
| 394 | + "Response with tool_choice='required':\n", |
| 395 | + "Content: None\n", |
| 396 | + "Tool calls: [ChatCompletionMessageToolCall(id='call_NFO3TSZuRRO8Eu3Cv79uiQ', function=Function(arguments='{\"city\": \"Paris\", \"unit\": \"celsius\"}', name='get_current_weather'), type='function', index=0)]\n" |
| 397 | + ] |
| 398 | + } |
| 399 | + ], |
| 400 | + "source": [ |
| 401 | + "from openai import OpenAI\n", |
| 402 | + "import json\n", |
| 403 | + "from sglang.utils import wait_for_server, print_highlight, terminate_process\n", |
| 404 | + "from sglang.test.test_utils import is_in_ci\n", |
| 405 | + "\n", |
| 406 | + "if is_in_ci():\n", |
| 407 | + " from patch import launch_server_cmd\n", |
| 408 | + "else:\n", |
| 409 | + " from sglang.utils import launch_server_cmd\n", |
| 410 | + " import nest_asyncio\n", |
| 411 | + "\n", |
| 412 | + " nest_asyncio.apply()\n", |
| 413 | + "\n", |
| 414 | + "# Start a new server session for tool choice examples\n", |
| 415 | + "server_process_tool_choice, port_tool_choice = launch_server_cmd(\n", |
| 416 | + " \"python3 -m sglang.launch_server --model-path Qwen/Qwen2.5-7B-Instruct --tool-call-parser qwen25 --host 0.0.0.0\"\n", |
| 417 | + ")\n", |
| 418 | + "wait_for_server(f\"http://localhost:{port_tool_choice}\")\n", |
| 419 | + "\n", |
| 420 | + "# Initialize client for tool choice examples\n", |
| 421 | + "client_tool_choice = OpenAI(\n", |
| 422 | + " api_key=\"None\", base_url=f\"http://0.0.0.0:{port_tool_choice}/v1\"\n", |
| 423 | + ")\n", |
| 424 | + "model_name_tool_choice = client_tool_choice.models.list().data[0].id\n", |
| 425 | + "\n", |
| 426 | + "# Example with tool_choice=\"required\" - forces the model to call a tool\n", |
| 427 | + "messages_required = [\n", |
| 428 | + " {\"role\": \"user\", \"content\": \"Hello, what is the capital of France?\"}\n", |
| 429 | + "]\n", |
| 430 | + "\n", |
| 431 | + "# Define tools\n", |
| 432 | + "tools = [\n", |
| 433 | + " {\n", |
| 434 | + " \"type\": \"function\",\n", |
| 435 | + " \"function\": {\n", |
| 436 | + " \"name\": \"get_current_weather\",\n", |
| 437 | + " \"description\": \"Get the current weather in a given location\",\n", |
| 438 | + " \"parameters\": {\n", |
| 439 | + " \"type\": \"object\",\n", |
| 440 | + " \"properties\": {\n", |
| 441 | + " \"city\": {\n", |
| 442 | + " \"type\": \"string\",\n", |
| 443 | + " \"description\": \"The city to find the weather for, e.g. 'San Francisco'\",\n", |
| 444 | + " },\n", |
| 445 | + " \"unit\": {\n", |
| 446 | + " \"type\": \"string\",\n", |
| 447 | + " \"description\": \"The unit to fetch the temperature in\",\n", |
| 448 | + " \"enum\": [\"celsius\", \"fahrenheit\"],\n", |
| 449 | + " },\n", |
| 450 | + " },\n", |
| 451 | + " \"required\": [\"city\", \"unit\"],\n", |
| 452 | + " },\n", |
| 453 | + " },\n", |
| 454 | + " }\n", |
| 455 | + "]\n", |
| 456 | + "\n", |
| 457 | + "response_required = client_tool_choice.chat.completions.create(\n", |
| 458 | + " model=model_name_tool_choice,\n", |
| 459 | + " messages=messages_required,\n", |
| 460 | + " temperature=0,\n", |
| 461 | + " max_tokens=1024,\n", |
| 462 | + " tools=tools,\n", |
| 463 | + " tool_choice=\"required\", # Force the model to call a tool\n", |
| 464 | + ")\n", |
| 465 | + "\n", |
| 466 | + "print_highlight(\"Response with tool_choice='required':\")\n", |
| 467 | + "print(\"Content:\", response_required.choices[0].message.content)\n", |
| 468 | + "print(\"Tool calls:\", response_required.choices[0].message.tool_calls)" |
| 469 | + ] |
| 470 | + }, |
| 471 | + { |
| 472 | + "cell_type": "markdown", |
| 473 | + "metadata": {}, |
| 474 | + "source": [ |
| 475 | + "### Example: Specific Function Choice\n" |
| 476 | + ] |
| 477 | + }, |
| 478 | + { |
| 479 | + "cell_type": "code", |
| 480 | + "execution_count": null, |
| 481 | + "metadata": {}, |
| 482 | + "outputs": [ |
| 483 | + { |
| 484 | + "name": "stdout", |
| 485 | + "output_type": "stream", |
| 486 | + "text": [ |
| 487 | + "Response with specific function choice:\n", |
| 488 | + "Content: None\n", |
| 489 | + "Tool calls: [ChatCompletionMessageToolCall(id='call_fGL_1qsPQFqntNBPkSynJw', function=Function(arguments='{\"city\": \"Sophia Antipolis\", \"unit\": \"celsius\"}', name='get_current_weather'), type='function', index=0)]\n", |
| 490 | + "Called function: get_current_weather\n", |
| 491 | + "Arguments: {\"city\": \"Sophia Antipolis\", \"unit\": \"celsius\"}\n" |
| 492 | + ] |
| 493 | + } |
| 494 | + ], |
| 495 | + "source": [ |
| 496 | + "# Example with specific function choice - forces the model to call a specific function\n", |
| 497 | + "messages_specific = [\n", |
| 498 | + " {\"role\": \"user\", \"content\": \"What are the most attactive places in France?\"}\n", |
| 499 | + "]\n", |
| 500 | + "\n", |
| 501 | + "response_specific = client_tool_choice.chat.completions.create(\n", |
| 502 | + " model=model_name_tool_choice,\n", |
| 503 | + " messages=messages_specific,\n", |
| 504 | + " temperature=0,\n", |
| 505 | + " max_tokens=1024,\n", |
| 506 | + " tools=tools,\n", |
| 507 | + " tool_choice={\n", |
| 508 | + " \"type\": \"function\",\n", |
| 509 | + " \"function\": {\"name\": \"get_current_weather\"},\n", |
| 510 | + " }, # Force the model to call the specific get_current_weather function\n", |
| 511 | + ")\n", |
| 512 | + "\n", |
| 513 | + "print_highlight(\"Response with specific function choice:\")\n", |
| 514 | + "print(\"Content:\", response_specific.choices[0].message.content)\n", |
| 515 | + "print(\"Tool calls:\", response_specific.choices[0].message.tool_calls)\n", |
| 516 | + "\n", |
| 517 | + "if response_specific.choices[0].message.tool_calls:\n", |
| 518 | + " tool_call = response_specific.choices[0].message.tool_calls[0]\n", |
| 519 | + " print(f\"Called function: {tool_call.function.name}\")\n", |
| 520 | + " print(f\"Arguments: {tool_call.function.arguments}\")" |
| 521 | + ] |
| 522 | + }, |
363 | 523 | {
|
364 | 524 | "cell_type": "markdown",
|
365 | 525 | "metadata": {},
|
|
444 | 604 | "outputs": [],
|
445 | 605 | "source": [
|
446 | 606 | "import sglang as sgl\n",
|
447 |
| - "from sglang.srt.function_call_parser import FunctionCallParser\n", |
| 607 | + "from sglang.srt.function_call.function_call_parser import FunctionCallParser\n", |
448 | 608 | "from sglang.srt.managers.io_struct import Tool, Function\n",
|
449 | 609 | "\n",
|
450 | 610 | "llm = sgl.Engine(model_path=\"Qwen/Qwen2.5-7B-Instruct\")\n",
|
|
0 commit comments