Fixed vllm test suite Automation Failures (#998)

Raghul-M · pre-commit-ci[bot] · web-flow · commit 0f51317f6e9e · 2026-01-12T12:43:07.000Z
* Fixed vllm test suite Automation Failures * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Added failure messgae for speculative decoding lab_draft tests * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
diff --git a/tests/model_serving/model_runtime/vllm/basic_model_deployment/test_elyza_japanese_llama_2_7b_instruct.py b/tests/model_serving/model_runtime/vllm/basic_model_deployment/test_elyza_japanese_llama_2_7b_instruct.py
@@ -21,7 +21,7 @@
     "--model=/mnt/models",
     "--uvicorn-log-level=debug",
     "--dtype=float16",
-    "--chat-template=/app/data/template/template_chatglm.jinja",
+    "--chat-template=/opt/app-root/template/template_chatglm.jinja",
 ]
 
 MODEL_PATH: str = "ELYZA-japanese-Llama-2-7b-instruct-hf"
diff --git a/tests/model_serving/model_runtime/vllm/basic_model_deployment/test_granite_7b_redhat_lab.py b/tests/model_serving/model_runtime/vllm/basic_model_deployment/test_granite_7b_redhat_lab.py
@@ -21,7 +21,7 @@
     "--model=/mnt/models",
     "--uvicorn-log-level=debug",
     "--dtype=float16",
-    "--chat-template=/app/data/template/template_chatglm.jinja",
+    "--chat-template=/opt/app-root/template/template_chatglm.jinja",
 ]
 
 MODEL_PATH: str = "granite-7b-redhat-lab"
diff --git a/tests/model_serving/model_runtime/vllm/basic_model_deployment/test_granite_7b_starter.py b/tests/model_serving/model_runtime/vllm/basic_model_deployment/test_granite_7b_starter.py
@@ -21,7 +21,7 @@
     "--model=/mnt/models",
     "--uvicorn-log-level=debug",
     "--dtype=float16",
-    "--chat-template=/app/data/template/template_chatglm.jinja",
+    "--chat-template=/opt/app-root/template/template_chatglm.jinja",
 ]
 
 MODEL_PATH: str = "granite-7b-starter"
diff --git a/tests/model_serving/model_runtime/vllm/basic_model_deployment/test_llama31_8B_instruct.py b/tests/model_serving/model_runtime/vllm/basic_model_deployment/test_llama31_8B_instruct.py
@@ -21,7 +21,7 @@
     "--model=/mnt/models",
     "--uvicorn-log-level=debug",
     "--dtype=float16",
-    "--chat-template=/app/data/template/tool_chat_template_llama3.1_json.jinja",
+    "--chat-template=/opt/app-root/template/tool_chat_template_llama3.1_json.jinja",
 ]
 
 MODEL_PATH: str = "Meta-Llama-3.1-8B"
diff --git a/tests/model_serving/model_runtime/vllm/basic_model_deployment/test_llama3_8B_instruct.py b/tests/model_serving/model_runtime/vllm/basic_model_deployment/test_llama3_8B_instruct.py
@@ -21,7 +21,7 @@
     "--model=/mnt/models",
     "--uvicorn-log-level=debug",
     "--dtype=float16",
-    "--chat-template=/app/data/template/template_chatglm.jinja",
+    "--chat-template=/opt/app-root/template/template_chatglm.jinja",
 ]
 
 MODEL_PATH: str = "Meta-Llama-3-8B-Instruct"
diff --git a/tests/model_serving/model_runtime/vllm/basic_model_deployment/test_llama_2_13b_chat.py b/tests/model_serving/model_runtime/vllm/basic_model_deployment/test_llama_2_13b_chat.py
@@ -22,7 +22,7 @@
     "--model=/mnt/models",
     "--uvicorn-log-level=debug",
     "--dtype=float16",
-    "--chat-template=/app/data/template/template_chatglm.jinja",
+    "--chat-template=/opt/app-root/template/template_chatglm.jinja",
 ]
 
 MODEL_PATH: str = "Llama-2-13b-chat-hf"
diff --git a/tests/model_serving/model_runtime/vllm/basic_model_deployment/test_merlinite_7b_lab.py b/tests/model_serving/model_runtime/vllm/basic_model_deployment/test_merlinite_7b_lab.py
@@ -21,7 +21,7 @@
     "--model=/mnt/models",
     "--uvicorn-log-level=debug",
     "--dtype=float16",
-    "--chat-template=/app/data/template/template_chatglm.jinja",
+    "--chat-template=/opt/app-root/template/template_chatglm.jinja",
 ]
 
 MODEL_PATH: str = "merlinite-7b-lab"
diff --git a/tests/model_serving/model_runtime/vllm/multimodal/test_granite_31_2b_vision.py b/tests/model_serving/model_runtime/vllm/multimodal/test_granite_31_2b_vision.py
@@ -13,7 +13,7 @@
 LOGGER = get_logger(name=__name__)
 
 
-SERVING_ARGUMENT: List[str] = ["--model=/mnt/models", "--uvicorn-log-level=debug", "--limit-mm-per-prompt", "image=2"]
+SERVING_ARGUMENT: List[str] = ["--model=/mnt/models", "--uvicorn-log-level=debug", '--limit-mm-per-prompt={"image": 2}']
 
 MODEL_PATH: str = "ibm-granite/granite-vision-3.1-2b-preview"
 
diff --git a/tests/model_serving/model_runtime/vllm/quantization/test_openhermes-2_5_mistral-7b_awq.py b/tests/model_serving/model_runtime/vllm/quantization/test_openhermes-2_5_mistral-7b_awq.py
@@ -13,7 +13,7 @@
 SERVING_ARGUMENT = [
     "--model=/mnt/models",
     "--uvicorn-log-level=debug",
-    "--chat-template=/app/data/template/tool_chat_template_mistral.jinja",
+    "--chat-template=/opt/app-root/template/tool_chat_template_mistral.jinja",
 ]
 
 MODEL_PATH = "TheBloke/OpenHermes-2.5-Mistral-7B-AWQ"
diff --git a/tests/model_serving/model_runtime/vllm/speculative_decoding/test_granite_7b_lab_draft.py b/tests/model_serving/model_runtime/vllm/speculative_decoding/test_granite_7b_lab_draft.py
@@ -19,7 +19,6 @@
     "--dtype=float16",
     "--speculative_config",
     '{ "model": "/mnt/models/granite-7b-instruct-accelerator", "num_speculative_tokens": 5 }',
-    "--use-v2-block-manager",
 ]
 
 MODEL_PATH: str = "speculative_decoding"
@@ -48,6 +47,9 @@
     indirect=True,
 )
 class TestGraniteLabDraftModel:
+    @pytest.mark.xfail(
+        reason="vLLM does not support MLPSpeculatorPreTrainedModel architecture for draft model speculative decoding"
+    )
     def test_spec_draft_inference(
         self,
         vllm_inference_service: Generator[InferenceService, Any, Any],
@@ -97,6 +99,9 @@ def test_spec_draft_inference(
     indirect=True,
 )
 class TestMultiGraniteLabDraftModel:
+    @pytest.mark.xfail(
+        reason="vLLM does not support MLPSpeculatorPreTrainedModel architecture for draft model speculative decoding"
+    )
     def test_multi_spec_draft_inference(
         self,
         vllm_inference_service: Generator[InferenceService, Any, Any],
diff --git a/tests/model_serving/model_runtime/vllm/speculative_decoding/test_granite_7b_lab_ngram.py b/tests/model_serving/model_runtime/vllm/speculative_decoding/test_granite_7b_lab_ngram.py
@@ -19,7 +19,6 @@
     "--dtype=float16",
     "--speculative_config",
     '{"model": "ngram", "num_speculative_tokens": 5, "prompt_lookup_max": 4}',
-    "--use-v2-block-manager",
 ]
 
 MODEL_PATH: str = "granite-7b-lab"
diff --git a/tests/model_serving/model_runtime/vllm/toolcalling/test_granite_3_2_8b_instruct_preview.py b/tests/model_serving/model_runtime/vllm/toolcalling/test_granite_3_2_8b_instruct_preview.py
@@ -24,7 +24,7 @@
     "--model=/mnt/models",
     "--uvicorn-log-level=debug",
     "--dtype=float16",
-    "--chat-template=/app/data/template/tool_chat_template_granite.jinja",
+    "--chat-template=/opt/app-root/template/tool_chat_template_granite.jinja",
     "--enable-auto-tool-choice",
     "--tool-call-parser=granite",
 ]

Original file line number	Diff line number	Diff line change
`@@ -21,7 +21,7 @@`
`21`	`21`	`"--model=/mnt/models",`
`22`	`22`	`"--uvicorn-log-level=debug",`
`23`	`23`	`"--dtype=float16",`
`24`		`- "--chat-template=/app/data/template/template_chatglm.jinja",`
	`24`	`+ "--chat-template=/opt/app-root/template/template_chatglm.jinja",`
`25`	`25`	`]`
`26`	`26`
`27`	`27`	`MODEL_PATH: str = "ELYZA-japanese-Llama-2-7b-instruct-hf"`
Original file line number	Diff line number	Diff line change
`@@ -22,7 +22,7 @@`
`22`	`22`	`"--model=/mnt/models",`
`23`	`23`	`"--uvicorn-log-level=debug",`
`24`	`24`	`"--dtype=float16",`
`25`		`- "--chat-template=/app/data/template/template_chatglm.jinja",`
	`25`	`+ "--chat-template=/opt/app-root/template/template_chatglm.jinja",`
`26`	`26`	`]`
`27`	`27`
`28`	`28`	`MODEL_PATH: str = "Llama-2-13b-chat-hf"`
Original file line number	Diff line number	Diff line change
`@@ -13,7 +13,7 @@`
`13`	`13`	`SERVING_ARGUMENT = [`
`14`	`14`	`"--model=/mnt/models",`
`15`	`15`	`"--uvicorn-log-level=debug",`
`16`		`- "--chat-template=/app/data/template/tool_chat_template_mistral.jinja",`
	`16`	`+ "--chat-template=/opt/app-root/template/tool_chat_template_mistral.jinja",`
`17`	`17`	`]`
`18`	`18`
`19`	`19`	`MODEL_PATH = "TheBloke/OpenHermes-2.5-Mistral-7B-AWQ"`