Skip to content

Commit f45dba3

Browse files
committed
fix: Fiw qwen model name in llama-stack core tests
Signed-off-by: Jorge Garcia Oncins <jgarciao@redhat.com>
1 parent f074dcf commit f45dba3

File tree

3 files changed

+7
-10
lines changed

3 files changed

+7
-10
lines changed

tests/fixtures/inference.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from ocp_resources.service import Service
1010
from ocp_resources.serving_runtime import ServingRuntime
1111

12-
from utilities.constants import RuntimeTemplates, KServeDeploymentType
12+
from utilities.constants import RuntimeTemplates, KServeDeploymentType, QWEN_MODEL_NAME
1313
from utilities.inference_utils import create_isvc
1414
from utilities.serving_runtime import ServingRuntimeFromTemplate
1515

@@ -32,10 +32,7 @@ def vllm_cpu_runtime(
3232
"@sha256:ada6b3ba98829eb81ae4f89364d9b431c0222671eafb9a04aa16f31628536af2",
3333
containers={
3434
"kserve-container": {
35-
"args": [
36-
"--port=8032",
37-
"--model=/mnt/models",
38-
],
35+
"args": ["--port=8032", "--model=/mnt/models", "--served-model-name={{.Name}}"],
3936
"ports": [{"containerPort": 8032, "protocol": "TCP"}],
4037
"volumeMounts": [{"mountPath": "/dev/shm", "name": "shm"}],
4138
}
@@ -56,7 +53,7 @@ def qwen_isvc(
5653
) -> Generator[InferenceService, Any, Any]:
5754
with create_isvc(
5855
client=admin_client,
59-
name="qwen-isvc",
56+
name=QWEN_MODEL_NAME,
6057
namespace=model_namespace.name,
6158
deployment_mode=KServeDeploymentType.RAW_DEPLOYMENT,
6259
model_format="vLLM",

tests/llama_stack/core/test_llamastack_core.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,9 @@ def test_model_list(self, minio_pod, minio_data_connection, llama_stack_client):
4646
models = llama_stack_client.models.list()
4747

4848
# We only need to check the first model;
49-
# second is a granite embedding model present by default
50-
assert len(models) == 2
51-
assert models[0].identifier == QWEN_MODEL_NAME
49+
# Second and third are embedding models present by default
50+
assert len(models) >= 2
51+
assert models[0].identifier == f"{LlamaStackProviders.Inference.VLLM_INFERENCE.value}/{QWEN_MODEL_NAME}"
5252
assert models[0].model_type == "llm"
5353
assert models[0].provider_id == LlamaStackProviders.Inference.VLLM_INFERENCE
5454

utilities/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -372,4 +372,4 @@ class RunTimeConfig:
372372
"service": {"hostname": f"{QWEN_ISVC_NAME}-predictor", "port": 8032, "request_timeout": 600}
373373
}
374374
TRUSTYAI_SERVICE_NAME: str = "trustyai-service"
375-
QWEN_MODEL_NAME: str = "qwen2.5-0.5b-instruct"
375+
QWEN_MODEL_NAME: str = "qwen25-05b-instruct"

0 commit comments

Comments
 (0)