Skip to content

Commit 0f51317

Browse files
Fixed vllm test suite Automation Failures (#998)
* Fixed vllm test suite Automation Failures * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Added failure messgae for speculative decoding lab_draft tests * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 502d97b commit 0f51317

12 files changed

+16
-12
lines changed

tests/model_serving/model_runtime/vllm/basic_model_deployment/test_elyza_japanese_llama_2_7b_instruct.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
"--model=/mnt/models",
2222
"--uvicorn-log-level=debug",
2323
"--dtype=float16",
24-
"--chat-template=/app/data/template/template_chatglm.jinja",
24+
"--chat-template=/opt/app-root/template/template_chatglm.jinja",
2525
]
2626

2727
MODEL_PATH: str = "ELYZA-japanese-Llama-2-7b-instruct-hf"

tests/model_serving/model_runtime/vllm/basic_model_deployment/test_granite_7b_redhat_lab.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
"--model=/mnt/models",
2222
"--uvicorn-log-level=debug",
2323
"--dtype=float16",
24-
"--chat-template=/app/data/template/template_chatglm.jinja",
24+
"--chat-template=/opt/app-root/template/template_chatglm.jinja",
2525
]
2626

2727
MODEL_PATH: str = "granite-7b-redhat-lab"

tests/model_serving/model_runtime/vllm/basic_model_deployment/test_granite_7b_starter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
"--model=/mnt/models",
2222
"--uvicorn-log-level=debug",
2323
"--dtype=float16",
24-
"--chat-template=/app/data/template/template_chatglm.jinja",
24+
"--chat-template=/opt/app-root/template/template_chatglm.jinja",
2525
]
2626

2727
MODEL_PATH: str = "granite-7b-starter"

tests/model_serving/model_runtime/vllm/basic_model_deployment/test_llama31_8B_instruct.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
"--model=/mnt/models",
2222
"--uvicorn-log-level=debug",
2323
"--dtype=float16",
24-
"--chat-template=/app/data/template/tool_chat_template_llama3.1_json.jinja",
24+
"--chat-template=/opt/app-root/template/tool_chat_template_llama3.1_json.jinja",
2525
]
2626

2727
MODEL_PATH: str = "Meta-Llama-3.1-8B"

tests/model_serving/model_runtime/vllm/basic_model_deployment/test_llama3_8B_instruct.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
"--model=/mnt/models",
2222
"--uvicorn-log-level=debug",
2323
"--dtype=float16",
24-
"--chat-template=/app/data/template/template_chatglm.jinja",
24+
"--chat-template=/opt/app-root/template/template_chatglm.jinja",
2525
]
2626

2727
MODEL_PATH: str = "Meta-Llama-3-8B-Instruct"

tests/model_serving/model_runtime/vllm/basic_model_deployment/test_llama_2_13b_chat.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
"--model=/mnt/models",
2323
"--uvicorn-log-level=debug",
2424
"--dtype=float16",
25-
"--chat-template=/app/data/template/template_chatglm.jinja",
25+
"--chat-template=/opt/app-root/template/template_chatglm.jinja",
2626
]
2727

2828
MODEL_PATH: str = "Llama-2-13b-chat-hf"

tests/model_serving/model_runtime/vllm/basic_model_deployment/test_merlinite_7b_lab.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
"--model=/mnt/models",
2222
"--uvicorn-log-level=debug",
2323
"--dtype=float16",
24-
"--chat-template=/app/data/template/template_chatglm.jinja",
24+
"--chat-template=/opt/app-root/template/template_chatglm.jinja",
2525
]
2626

2727
MODEL_PATH: str = "merlinite-7b-lab"

tests/model_serving/model_runtime/vllm/multimodal/test_granite_31_2b_vision.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
LOGGER = get_logger(name=__name__)
1414

1515

16-
SERVING_ARGUMENT: List[str] = ["--model=/mnt/models", "--uvicorn-log-level=debug", "--limit-mm-per-prompt", "image=2"]
16+
SERVING_ARGUMENT: List[str] = ["--model=/mnt/models", "--uvicorn-log-level=debug", '--limit-mm-per-prompt={"image": 2}']
1717

1818
MODEL_PATH: str = "ibm-granite/granite-vision-3.1-2b-preview"
1919

tests/model_serving/model_runtime/vllm/quantization/test_openhermes-2_5_mistral-7b_awq.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
SERVING_ARGUMENT = [
1414
"--model=/mnt/models",
1515
"--uvicorn-log-level=debug",
16-
"--chat-template=/app/data/template/tool_chat_template_mistral.jinja",
16+
"--chat-template=/opt/app-root/template/tool_chat_template_mistral.jinja",
1717
]
1818

1919
MODEL_PATH = "TheBloke/OpenHermes-2.5-Mistral-7B-AWQ"

tests/model_serving/model_runtime/vllm/speculative_decoding/test_granite_7b_lab_draft.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
"--dtype=float16",
2020
"--speculative_config",
2121
'{ "model": "/mnt/models/granite-7b-instruct-accelerator", "num_speculative_tokens": 5 }',
22-
"--use-v2-block-manager",
2322
]
2423

2524
MODEL_PATH: str = "speculative_decoding"
@@ -48,6 +47,9 @@
4847
indirect=True,
4948
)
5049
class TestGraniteLabDraftModel:
50+
@pytest.mark.xfail(
51+
reason="vLLM does not support MLPSpeculatorPreTrainedModel architecture for draft model speculative decoding"
52+
)
5153
def test_spec_draft_inference(
5254
self,
5355
vllm_inference_service: Generator[InferenceService, Any, Any],
@@ -97,6 +99,9 @@ def test_spec_draft_inference(
9799
indirect=True,
98100
)
99101
class TestMultiGraniteLabDraftModel:
102+
@pytest.mark.xfail(
103+
reason="vLLM does not support MLPSpeculatorPreTrainedModel architecture for draft model speculative decoding"
104+
)
100105
def test_multi_spec_draft_inference(
101106
self,
102107
vllm_inference_service: Generator[InferenceService, Any, Any],

0 commit comments

Comments
 (0)