Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/features/tool_calling.md
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,7 @@ Flags: `--tool-call-parser pythonic --chat-template {see_above}`

## How to Write a Tool Parser Plugin

A tool parser plugin is a Python file containing one or more ToolParser implementations. You can write a ToolParser similar to the `Hermes2ProToolParser` in [vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py](../../vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py).
A tool parser plugin is a Python file containing one or more ToolParser implementations. You can write a ToolParser similar to the `Hermes2ProToolParser` in [vllm/tool_parsers/hermes_tool_parser.py](../../vllm/tool_parsers/hermes_tool_parser.py).

Here is a summary of a plugin file:

Expand Down Expand Up @@ -468,7 +468,7 @@ Here is a summary of a plugin file:
# register the tool parser to ToolParserManager
ToolParserManager.register_lazy_module(
name="example",
module_path="vllm.entrypoints.openai.tool_parsers.example",
module_path="vllm.tool_parsers.example",
class_name="ExampleToolParser",
)

Expand Down
2 changes: 1 addition & 1 deletion tests/entrypoints/openai/test_serving_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
)
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
from vllm.entrypoints.openai.tool_parsers import ToolParserManager
from vllm.outputs import CompletionOutput, RequestOutput
from vllm.tokenizers import get_tokenizer
from vllm.tool_parsers import ToolParserManager
from vllm.v1.engine.async_llm import AsyncLLM

from ...utils import RemoteOpenAIServer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
run_tool_extraction_streaming,
)
from vllm.entrypoints.openai.protocol import FunctionCall
from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers import ToolParser, ToolParserManager

SIMPLE_ARGS_DICT = {
"action": "create",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
import pytest

from vllm.entrypoints.openai.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.tool_parsers.hermes_tool_parser import Hermes2ProToolParser
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.hermes_tool_parser import Hermes2ProToolParser

from ....utils import RemoteOpenAIServer

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
run_tool_extraction_streaming,
)
from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager
from vllm.tool_parsers import ToolParser, ToolParserManager


def make_tool_call(name, arguments):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
import pytest

from vllm.entrypoints.openai.protocol import ExtractedToolCallInformation
from vllm.entrypoints.openai.tool_parsers.llama_tool_parser import Llama3JsonToolParser
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.llama_tool_parser import Llama3JsonToolParser


@pytest.fixture
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
run_tool_extraction_streaming,
)
from vllm.entrypoints.openai.protocol import FunctionCall
from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers import ToolParser, ToolParserManager

# Test cases similar to pythonic parser but with Llama4 specific format
SIMPLE_FUNCTION_OUTPUT = "[get_weather(city='LA', metric='C')]"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
run_tool_extraction_streaming,
)
from vllm.entrypoints.openai.protocol import FunctionCall
from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers import ToolParser, ToolParserManager

# https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/text_prompt_format.md#model-response-format-1
SIMPLE_FUNCTION_OUTPUT = "get_weather(city='San Francisco', metric='celsius')"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
run_tool_extraction_streaming,
)
from vllm.entrypoints.openai.protocol import FunctionCall
from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers import ToolParser, ToolParserManager

# https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/text_prompt_format.md#model-response-format-1
SIMPLE_FUNCTION_OUTPUT = "get_weather(city='San Francisco', metric='celsius')"
Expand Down
2 changes: 1 addition & 1 deletion tests/entrypoints/openai/tool_parsers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
FunctionCall,
ToolCall,
)
from vllm.entrypoints.openai.tool_parsers import ToolParser
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers import ToolParser


class StreamingToolReconstructor:
Expand Down
6 changes: 3 additions & 3 deletions tests/models/language/generation/test_mistral.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@

import pytest

from vllm.entrypoints.openai.tool_parsers.mistral_tool_parser import (
from vllm.sampling_params import SamplingParams
from vllm.tokenizers.mistral import MistralTokenizer
from vllm.tool_parsers.mistral_tool_parser import (
MistralToolCall,
MistralToolParser,
)
from vllm.sampling_params import SamplingParams
from vllm.tokenizers.mistral import MistralTokenizer

from ...utils import check_logprobs_close

Expand Down
4 changes: 2 additions & 2 deletions tests/tool_use/test_deepseekv31_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@

import pytest

from vllm.entrypoints.openai.tool_parsers.deepseekv31_tool_parser import (
from vllm.tokenizers import get_tokenizer
from vllm.tool_parsers.deepseekv31_tool_parser import (
DeepSeekV31ToolParser,
)
from vllm.tokenizers import get_tokenizer

MODEL = "deepseek-ai/DeepSeek-V3.1"

Expand Down
2 changes: 1 addition & 1 deletion tests/tool_use/test_ernie45_moe_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
FunctionCall,
ToolCall,
)
from vllm.entrypoints.openai.tool_parsers.ernie45_tool_parser import Ernie45ToolParser
from vllm.tokenizers import TokenizerLike, get_tokenizer
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
from vllm.tool_parsers.ernie45_tool_parser import Ernie45ToolParser

# Use a common model that is likely to be available
MODEL = "baidu/ERNIE-4.5-21B-A3B-Thinking"
Expand Down
4 changes: 2 additions & 2 deletions tests/tool_use/test_glm4_moe_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
import pytest

from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
from vllm.entrypoints.openai.tool_parsers.glm4_moe_tool_parser import (
from vllm.tokenizers import get_tokenizer
from vllm.tool_parsers.glm4_moe_tool_parser import (
Glm4MoeModelToolParser,
)
from vllm.tokenizers import get_tokenizer

pytestmark = pytest.mark.cpu_test

Expand Down
2 changes: 1 addition & 1 deletion tests/tool_use/test_jamba_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
from partial_json_parser.core.options import Allow

from vllm.entrypoints.openai.protocol import DeltaMessage, FunctionCall, ToolCall
from vllm.entrypoints.openai.tool_parsers.jamba_tool_parser import JambaToolParser
from vllm.tokenizers import TokenizerLike, get_tokenizer
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
from vllm.tool_parsers.jamba_tool_parser import JambaToolParser

pytestmark = pytest.mark.cpu_test

Expand Down
2 changes: 1 addition & 1 deletion tests/tool_use/test_kimi_k2_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
import pytest

from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
from vllm.entrypoints.openai.tool_parsers.kimi_k2_tool_parser import KimiK2ToolParser
from vllm.tokenizers import get_tokenizer
from vllm.tool_parsers.kimi_k2_tool_parser import KimiK2ToolParser

pytestmark = pytest.mark.cpu_test

Expand Down
2 changes: 1 addition & 1 deletion tests/tool_use/test_minimax_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
FunctionCall,
ToolCall,
)
from vllm.entrypoints.openai.tool_parsers.minimax_tool_parser import MinimaxToolParser
from vllm.tokenizers import get_tokenizer
from vllm.tool_parsers.minimax_tool_parser import MinimaxToolParser

pytestmark = pytest.mark.cpu_test

Expand Down
2 changes: 1 addition & 1 deletion tests/tool_use/test_mistral_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
from partial_json_parser.core.options import Allow

from vllm.entrypoints.openai.protocol import DeltaMessage, DeltaToolCall
from vllm.entrypoints.openai.tool_parsers.mistral_tool_parser import MistralToolParser
from vllm.tokenizers import TokenizerLike, get_tokenizer
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
from vllm.tokenizers.mistral import MistralTokenizer
from vllm.tool_parsers.mistral_tool_parser import MistralToolParser


@pytest.fixture(scope="module")
Expand Down
2 changes: 1 addition & 1 deletion tests/tool_use/test_openai_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
)

from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
from vllm.entrypoints.openai.tool_parsers.openai_tool_parser import OpenAIToolParser
from vllm.tokenizers import get_tokenizer
from vllm.tool_parsers.openai_tool_parser import OpenAIToolParser

MODEL = "gpt2"

Expand Down
8 changes: 4 additions & 4 deletions tests/tool_use/test_qwen3coder_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
FunctionCall,
ToolCall,
)
from vllm.entrypoints.openai.tool_parsers.qwen3coder_tool_parser import (
Qwen3CoderToolParser,
)
from vllm.entrypoints.openai.tool_parsers.qwen3xml_tool_parser import Qwen3XMLToolParser
from vllm.tokenizers import TokenizerLike, get_tokenizer
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
from vllm.tool_parsers.qwen3coder_tool_parser import (
Qwen3CoderToolParser,
)
from vllm.tool_parsers.qwen3xml_tool_parser import Qwen3XMLToolParser

pytestmark = pytest.mark.cpu_test

Expand Down
2 changes: 1 addition & 1 deletion tests/tool_use/test_seed_oss_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
FunctionCall,
ToolCall,
)
from vllm.entrypoints.openai.tool_parsers.seed_oss_tool_parser import SeedOssToolParser
from vllm.tokenizers import TokenizerLike, get_tokenizer
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
from vllm.tool_parsers.seed_oss_tool_parser import SeedOssToolParser

pytestmark = pytest.mark.cpu_test

Expand Down
2 changes: 1 addition & 1 deletion tests/tool_use/test_tool_choice_required.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
ChatCompletionToolsParam,
)
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
from vllm.entrypoints.openai.tool_parsers.utils import get_json_schema_from_tools
from vllm.tool_parsers.utils import get_json_schema_from_tools

pytestmark = pytest.mark.cpu_test

Expand Down
2 changes: 1 addition & 1 deletion tests/tool_use/test_xlam_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
FunctionCall,
ToolCall,
)
from vllm.entrypoints.openai.tool_parsers.xlam_tool_parser import xLAMToolParser
from vllm.tokenizers import TokenizerLike, get_tokenizer
from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
from vllm.tool_parsers.xlam_tool_parser import xLAMToolParser

pytestmark = pytest.mark.cpu_test

Expand Down
2 changes: 1 addition & 1 deletion vllm/entrypoints/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@
ResponseRawMessageAndToken,
ResponsesRequest,
)
from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ToolParser
from vllm.entrypoints.responses_utils import construct_tool_dicts
from vllm.entrypoints.tool import Tool
from vllm.entrypoints.tool_server import ToolServer
from vllm.outputs import RequestOutput
from vllm.reasoning.abs_reasoning_parsers import ReasoningParser
from vllm.tokenizers.protocol import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import ToolParser
from vllm.transformers_utils.tokenizer import AnyTokenizer
from vllm.utils import random_uuid

Expand Down
2 changes: 1 addition & 1 deletion vllm/entrypoints/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@
OpenAIServingTranscription,
OpenAIServingTranslation,
)
from vllm.entrypoints.openai.tool_parsers import ToolParserManager
from vllm.entrypoints.openai.utils import validate_json_request
from vllm.entrypoints.pooling.classify.serving import ServingClassification
from vllm.entrypoints.pooling.embed.serving import OpenAIServingEmbedding
Expand All @@ -95,6 +94,7 @@
from vllm.logger import init_logger
from vllm.reasoning import ReasoningParserManager
from vllm.tasks import POOLING_TASKS
from vllm.tool_parsers import ToolParserManager
from vllm.usage.usage_lib import UsageContext
from vllm.utils.argparse_utils import FlexibleArgumentParser
from vllm.utils.gc_utils import freeze_gc_heap
Expand Down
2 changes: 1 addition & 1 deletion vllm/entrypoints/openai/cli_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT,
)
from vllm.entrypoints.openai.serving_models import LoRAModulePath
from vllm.entrypoints.openai.tool_parsers import ToolParserManager
from vllm.logger import init_logger
from vllm.tool_parsers import ToolParserManager
from vllm.utils.argparse_utils import FlexibleArgumentParser

logger = init_logger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion vllm/entrypoints/openai/parser/responses_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
)

from vllm.entrypoints.openai.protocol import ResponseInputOutputItem, ResponsesRequest
from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ToolParser
from vllm.outputs import CompletionOutput
from vllm.reasoning.abs_reasoning_parsers import ReasoningParser
from vllm.tokenizers.protocol import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import ToolParser
from vllm.transformers_utils.tokenizer import AnyTokenizer
from vllm.utils import random_uuid

Expand Down
4 changes: 2 additions & 2 deletions vllm/entrypoints/openai/serving_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,6 @@
clamp_prompt_logprobs,
)
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
from vllm.entrypoints.openai.tool_parsers import ToolParser
from vllm.entrypoints.openai.tool_parsers.mistral_tool_parser import MistralToolCall
from vllm.entrypoints.openai.utils import maybe_filter_parallel_tool_calls
from vllm.entrypoints.utils import get_max_tokens, should_include_usage
from vllm.inputs.data import TokensPrompt
Expand All @@ -73,6 +71,8 @@
truncate_tool_call_ids,
validate_request_params,
)
from vllm.tool_parsers import ToolParser
from vllm.tool_parsers.mistral_tool_parser import MistralToolCall
from vllm.utils.collection_utils import as_list
from vllm.v1.sample.logits_processor import validate_logits_processors_parameters

Expand Down
2 changes: 1 addition & 1 deletion vllm/entrypoints/openai/serving_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@
TranslationRequest,
)
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager
from vllm.entrypoints.pooling.classify.protocol import (
ClassificationChatRequest,
ClassificationCompletionRequest,
Expand Down Expand Up @@ -104,6 +103,7 @@
from vllm.tokenizers import TokenizerLike
from vllm.tokenizers.deepseek_v32 import DeepseekV32Tokenizer
from vllm.tokenizers.mistral import MistralTokenizer
from vllm.tool_parsers import ToolParser, ToolParserManager
from vllm.tracing import (
contains_trace_headers,
extract_trace_headers,
Expand Down
Loading