Skip to content

Commit a29a146

Browse files
authored
updated vllm toolparser imports that changed in 0.14.0 + fixed LMCache integration tests (deepjavalibrary#2989)
1 parent 5b27521 commit a29a146

File tree

6 files changed

+18
-9
lines changed

6 files changed

+18
-9
lines changed

engines/python/setup/djl_python/chat_completions/vllm_chat_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from pydantic import Field
1616
from vllm import TokensPrompt
1717
from vllm.entrypoints.openai.serving_engine import RequestPrompt, TextTokensPrompt
18-
from vllm.entrypoints.openai.tool_parsers import ToolParser
18+
from vllm.tool_parsers import ToolParser
1919
from vllm.tokenizers.mistral import maybe_serialize_tool_calls
2020
from vllm.transformers_utils.tokenizer import AnyTokenizer
2121
from vllm.entrypoints.openai.protocol import ChatCompletionRequest

engines/python/setup/djl_python/properties_manager/vllm_rb_properties.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ def validate_pipeline_parallel(self):
121121
@model_validator(mode='after')
122122
def validate_tool_call_parser(self):
123123
if self.enable_auto_tool_choice:
124-
from vllm.entrypoints.openai.tool_parsers import ToolParserManager
124+
from vllm.tool_parsers import ToolParserManager
125125
valid_tool_parses = ToolParserManager.list_registered()
126126
if self.tool_call_parser not in valid_tool_parses:
127127
raise ValueError(

engines/python/setup/djl_python/rolling_batch/vllm_rolling_batch.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def __init__(self, model_id_or_path: str, properties: dict,
5555
self.tool_parser = None
5656
self.reasoning_parser = None
5757
if self.vllm_configs.enable_auto_tool_choice:
58-
from vllm.entrypoints.openai.tool_parsers import ToolParserManager
58+
from vllm.tool_parsers import ToolParserManager
5959
try:
6060
self.tool_parser = ToolParserManager.get_tool_parser(
6161
self.vllm_configs.tool_call_parser)

tests/integration/llm/client.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -284,10 +284,10 @@ def get_model_name():
284284
"seq_length": [256],
285285
"tokenizer": "Qwen/Qwen3-8B"
286286
},
287-
"qwen2.5-72b-lmcache-auto": {
287+
"qwen2.5-32b-lmcache-auto": {
288288
"batch_size": [1, 4],
289289
"seq_length": [256],
290-
"tokenizer": "Qwen/Qwen2.5-72B"
290+
"tokenizer": "Qwen/Qwen2.5-32B"
291291
},
292292
}
293293

tests/integration/llm/prepare.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,7 @@
529529
},
530530
"qwen3-8b-no-cache": {
531531
"option.model_id": "Qwen/Qwen3-8B",
532-
"option.tensor_parallel_degree": 1,
532+
"option.tensor_parallel_degree": 2,
533533
"option.load_format": "dummy",
534534
"option.max_new_tokens": 100,
535535
"option.enable_prefix_caching": False,
@@ -672,6 +672,15 @@
672672
"option.kv_transfer_config":
673673
'{"kv_connector":"LMCacheConnectorV1", "kv_role":"kv_both"}',
674674
},
675+
"qwen2.5-32b": {
676+
"option.model_id": "Qwen/Qwen2.5-32B",
677+
"option.tensor_parallel_degree": 4,
678+
"option.load_format": "dummy",
679+
"option.max_new_tokens": 100,
680+
"option.max_model_len": 16384,
681+
"option.enable_prefix_caching": False,
682+
"load_on_devices": 0,
683+
},
675684
}
676685

677686
vllm_neo_model_list = {

tests/integration/tests.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -715,12 +715,12 @@ def test_lmcache_auto_config(self):
715715
client.run("vllm_lmcache qwen3-8b-lmcache-auto".split())
716716

717717
def test_lmcache_auto_config_larger_model(self):
718-
with Runner("lmi", "qwen2.5-72b-no-cache") as r:
719-
prepare.build_vllm_async_model("qwen2.5-72b")
718+
with Runner("lmi", "qwen2.5-32b") as r:
719+
prepare.build_vllm_async_model("qwen2.5-32b")
720720
r.launch(env_vars=[
721721
"PYTHONHASHSEED=0", "OPTION_LMCACHE_AUTO_CONFIG=True"
722722
])
723-
client.run("vllm_lmcache qwen2.5-72b-lmcache-auto".split())
723+
client.run("vllm_lmcache qwen2.5-32b-lmcache-auto".split())
724724

725725

726726
@pytest.mark.vllm

0 commit comments

Comments
 (0)