Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/features/tool_calling.md
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,7 @@ Flags: `--tool-call-parser pythonic --chat-template {see_above}`

## How to Write a Tool Parser Plugin

A tool parser plugin is a Python file containing one or more ToolParser implementations. You can write a ToolParser similar to the `Hermes2ProToolParser` in [vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py](../../vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py).
A tool parser plugin is a Python file containing one or more ToolParser implementations. You can write a ToolParser similar to the `Hermes2ProToolParser` in [vllm/tool_parsers/hermes_tool_parser.py](../../vllm/tool_parsers/hermes_tool_parser.py).

Here is a summary of a plugin file:

Expand Down Expand Up @@ -468,7 +468,7 @@ Here is a summary of a plugin file:
# register the tool parser to ToolParserManager
ToolParserManager.register_lazy_module(
name="example",
module_path="vllm.entrypoints.openai.tool_parsers.example",
module_path="vllm.tool_parsers.example",
class_name="ExampleToolParser",
)

Expand Down
2 changes: 1 addition & 1 deletion tests/entrypoints/openai/test_serving_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
)
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
from vllm.entrypoints.openai.tool_parsers import ToolParserManager
from vllm.outputs import CompletionOutput, RequestOutput
from vllm.tokenizers import get_tokenizer
from vllm.tool_parsers import ToolParserManager
from vllm.v1.engine.async_llm import AsyncLLM

from ...utils import RemoteOpenAIServer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
run_tool_extraction_streaming,
)
from vllm.entrypoints.openai.protocol import FunctionCall
from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers import ToolParser, ToolParserManager

SIMPLE_ARGS_DICT = {
"action": "create",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
import pytest

from vllm.entrypoints.openai.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.tool_parsers.hermes_tool_parser import Hermes2ProToolParser
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.hermes_tool_parser import Hermes2ProToolParser

from ....utils import RemoteOpenAIServer

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
run_tool_extraction_streaming,
)
from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager
from vllm.tool_parsers import ToolParser, ToolParserManager


def make_tool_call(name, arguments):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
import pytest

from vllm.entrypoints.openai.protocol import ExtractedToolCallInformation
from vllm.entrypoints.openai.tool_parsers.llama_tool_parser import Llama3JsonToolParser
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.llama_tool_parser import Llama3JsonToolParser


@pytest.fixture
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
run_tool_extraction_streaming,
)
from vllm.entrypoints.openai.protocol import FunctionCall
from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers import ToolParser, ToolParserManager

# Test cases similar to pythonic parser but with Llama4 specific format
SIMPLE_FUNCTION_OUTPUT = "[get_weather(city='LA', metric='C')]"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
run_tool_extraction_streaming,
)
from vllm.entrypoints.openai.protocol import FunctionCall
from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers import ToolParser, ToolParserManager

# https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/text_prompt_format.md#model-response-format-1
SIMPLE_FUNCTION_OUTPUT = "get_weather(city='San Francisco', metric='celsius')"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
run_tool_extraction_streaming,
)
from vllm.entrypoints.openai.protocol import FunctionCall
from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers import ToolParser, ToolParserManager

# https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/text_prompt_format.md#model-response-format-1
SIMPLE_FUNCTION_OUTPUT = "get_weather(city='San Francisco', metric='celsius')"
Expand Down
2 changes: 1 addition & 1 deletion tests/entrypoints/openai/tool_parsers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
FunctionCall,
ToolCall,
)
from vllm.entrypoints.openai.tool_parsers import ToolParser
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers import ToolParser


class StreamingToolReconstructor:
Expand Down
6 changes: 3 additions & 3 deletions tests/models/language/generation/test_mistral.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@

import pytest

from vllm.entrypoints.openai.tool_parsers.mistral_tool_parser import (
from vllm.sampling_params import SamplingParams
from vllm.tokenizers.mistral import MistralTokenizer
from vllm.tool_parsers.mistral_tool_parser import (
MistralToolCall,
MistralToolParser,
)
from vllm.sampling_params import SamplingParams
from vllm.tokenizers.mistral import MistralTokenizer

from ...utils import check_logprobs_close

Expand Down
4 changes: 2 additions & 2 deletions tests/tool_use/test_deepseekv31_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@

import pytest

from vllm.entrypoints.openai.tool_parsers.deepseekv31_tool_parser import (
from vllm.tokenizers import get_tokenizer
from vllm.tool_parsers.deepseekv31_tool_parser import (
DeepSeekV31ToolParser,
)
from vllm.tokenizers import get_tokenizer

MODEL = "deepseek-ai/DeepSeek-V3.1"

Expand Down
2 changes: 1 addition & 1 deletion tests/tool_use/test_ernie45_moe_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
FunctionCall,
ToolCall,
)
from vllm.entrypoints.openai.tool_parsers.ernie45_tool_parser import Ernie45ToolParser
from vllm.tokenizers import TokenizerLike, get_tokenizer
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
from vllm.tool_parsers.ernie45_tool_parser import Ernie45ToolParser

# Use a common model that is likely to be available
MODEL = "baidu/ERNIE-4.5-21B-A3B-Thinking"
Expand Down
4 changes: 2 additions & 2 deletions tests/tool_use/test_glm4_moe_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
import pytest

from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
from vllm.entrypoints.openai.tool_parsers.glm4_moe_tool_parser import (
from vllm.tokenizers import get_tokenizer
from vllm.tool_parsers.glm4_moe_tool_parser import (
Glm4MoeModelToolParser,
)
from vllm.tokenizers import get_tokenizer

pytestmark = pytest.mark.cpu_test

Expand Down
2 changes: 1 addition & 1 deletion tests/tool_use/test_jamba_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
from partial_json_parser.core.options import Allow

from vllm.entrypoints.openai.protocol import DeltaMessage, FunctionCall, ToolCall
from vllm.entrypoints.openai.tool_parsers.jamba_tool_parser import JambaToolParser
from vllm.tokenizers import TokenizerLike, get_tokenizer
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
from vllm.tool_parsers.jamba_tool_parser import JambaToolParser

pytestmark = pytest.mark.cpu_test

Expand Down
2 changes: 1 addition & 1 deletion tests/tool_use/test_kimi_k2_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
import pytest

from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
from vllm.entrypoints.openai.tool_parsers.kimi_k2_tool_parser import KimiK2ToolParser
from vllm.tokenizers import get_tokenizer
from vllm.tool_parsers.kimi_k2_tool_parser import KimiK2ToolParser

pytestmark = pytest.mark.cpu_test

Expand Down
2 changes: 1 addition & 1 deletion tests/tool_use/test_minimax_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
FunctionCall,
ToolCall,
)
from vllm.entrypoints.openai.tool_parsers.minimax_tool_parser import MinimaxToolParser
from vllm.tokenizers import get_tokenizer
from vllm.tool_parsers.minimax_tool_parser import MinimaxToolParser

pytestmark = pytest.mark.cpu_test

Expand Down
2 changes: 1 addition & 1 deletion tests/tool_use/test_mistral_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
from partial_json_parser.core.options import Allow

from vllm.entrypoints.openai.protocol import DeltaMessage, DeltaToolCall
from vllm.entrypoints.openai.tool_parsers.mistral_tool_parser import MistralToolParser
from vllm.tokenizers import TokenizerLike, get_tokenizer
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
from vllm.tokenizers.mistral import MistralTokenizer
from vllm.tool_parsers.mistral_tool_parser import MistralToolParser


@pytest.fixture(scope="module")
Expand Down
2 changes: 1 addition & 1 deletion tests/tool_use/test_openai_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
)

from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
from vllm.entrypoints.openai.tool_parsers.openai_tool_parser import OpenAIToolParser
from vllm.tokenizers import get_tokenizer
from vllm.tool_parsers.openai_tool_parser import OpenAIToolParser

MODEL = "gpt2"

Expand Down
8 changes: 4 additions & 4 deletions tests/tool_use/test_qwen3coder_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
FunctionCall,
ToolCall,
)
from vllm.entrypoints.openai.tool_parsers.qwen3coder_tool_parser import (
Qwen3CoderToolParser,
)
from vllm.entrypoints.openai.tool_parsers.qwen3xml_tool_parser import Qwen3XMLToolParser
from vllm.tokenizers import TokenizerLike, get_tokenizer
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
from vllm.tool_parsers.qwen3coder_tool_parser import (
Qwen3CoderToolParser,
)
from vllm.tool_parsers.qwen3xml_tool_parser import Qwen3XMLToolParser

pytestmark = pytest.mark.cpu_test

Expand Down
2 changes: 1 addition & 1 deletion tests/tool_use/test_seed_oss_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
FunctionCall,
ToolCall,
)
from vllm.entrypoints.openai.tool_parsers.seed_oss_tool_parser import SeedOssToolParser
from vllm.tokenizers import TokenizerLike, get_tokenizer
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
from vllm.tool_parsers.seed_oss_tool_parser import SeedOssToolParser

pytestmark = pytest.mark.cpu_test

Expand Down
2 changes: 1 addition & 1 deletion tests/tool_use/test_tool_choice_required.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
ChatCompletionToolsParam,
)
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.tool_parsers.utils import get_json_schema_from_tools
from vllm.tool_parsers.utils import get_json_schema_from_tools

pytestmark = pytest.mark.cpu_test

Expand Down
2 changes: 1 addition & 1 deletion tests/tool_use/test_xlam_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
FunctionCall,
ToolCall,
)
from vllm.entrypoints.openai.tool_parsers.xlam_tool_parser import xLAMToolParser
from vllm.tokenizers import TokenizerLike, get_tokenizer
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
from vllm.tool_parsers.xlam_tool_parser import xLAMToolParser

pytestmark = pytest.mark.cpu_test

Expand Down
2 changes: 1 addition & 1 deletion vllm/entrypoints/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@
ResponseRawMessageAndToken,
ResponsesRequest,
)
from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ToolParser
from vllm.entrypoints.responses_utils import construct_tool_dicts
from vllm.entrypoints.tool import Tool
from vllm.entrypoints.tool_server import ToolServer
from vllm.outputs import RequestOutput
from vllm.reasoning.abs_reasoning_parsers import ReasoningParser
from vllm.tokenizers.protocol import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import ToolParser
from vllm.transformers_utils.tokenizer import AnyTokenizer
from vllm.utils import random_uuid

Expand Down
2 changes: 1 addition & 1 deletion vllm/entrypoints/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@
OpenAIServingTranscription,
OpenAIServingTranslation,
)
from vllm.entrypoints.openai.tool_parsers import ToolParserManager
from vllm.entrypoints.openai.utils import validate_json_request
from vllm.entrypoints.pooling.classify.serving import ServingClassification
from vllm.entrypoints.pooling.embed.serving import OpenAIServingEmbedding
Expand All @@ -95,6 +94,7 @@
from vllm.logger import init_logger
from vllm.reasoning import ReasoningParserManager
from vllm.tasks import POOLING_TASKS
from vllm.tool_parsers import ToolParserManager
from vllm.usage.usage_lib import UsageContext
from vllm.utils.argparse_utils import FlexibleArgumentParser
from vllm.utils.gc_utils import freeze_gc_heap
Expand Down
2 changes: 1 addition & 1 deletion vllm/entrypoints/openai/cli_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT,
)
from vllm.entrypoints.openai.serving_models import LoRAModulePath
from vllm.entrypoints.openai.tool_parsers import ToolParserManager
from vllm.logger import init_logger
from vllm.tool_parsers import ToolParserManager
from vllm.utils.argparse_utils import FlexibleArgumentParser

logger = init_logger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion vllm/entrypoints/openai/parser/responses_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
)

from vllm.entrypoints.openai.protocol import ResponseInputOutputItem, ResponsesRequest
from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ToolParser
from vllm.outputs import CompletionOutput
from vllm.reasoning.abs_reasoning_parsers import ReasoningParser
from vllm.tokenizers.protocol import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import ToolParser
from vllm.transformers_utils.tokenizer import AnyTokenizer
from vllm.utils import random_uuid

Expand Down
4 changes: 2 additions & 2 deletions vllm/entrypoints/openai/serving_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,6 @@
clamp_prompt_logprobs,
)
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
from vllm.entrypoints.openai.tool_parsers import ToolParser
from vllm.entrypoints.openai.tool_parsers.mistral_tool_parser import MistralToolCall
from vllm.entrypoints.openai.utils import maybe_filter_parallel_tool_calls
from vllm.entrypoints.utils import get_max_tokens, should_include_usage
from vllm.inputs.data import TokensPrompt
Expand All @@ -73,6 +71,8 @@
truncate_tool_call_ids,
validate_request_params,
)
from vllm.tool_parsers import ToolParser
from vllm.tool_parsers.mistral_tool_parser import MistralToolCall
from vllm.utils.collection_utils import as_list
from vllm.v1.sample.logits_processor import validate_logits_processors_parameters

Expand Down
2 changes: 1 addition & 1 deletion vllm/entrypoints/openai/serving_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@
TranslationRequest,
)
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager
from vllm.entrypoints.pooling.classify.protocol import (
ClassificationChatRequest,
ClassificationCompletionRequest,
Expand Down Expand Up @@ -104,6 +103,7 @@
from vllm.tokenizers import TokenizerLike
from vllm.tokenizers.deepseek_v32 import DeepseekV32Tokenizer
from vllm.tokenizers.mistral import MistralTokenizer
from vllm.tool_parsers import ToolParser, ToolParserManager
from vllm.tracing import (
contains_trace_headers,
extract_trace_headers,
Expand Down
Loading
Loading