From f45dba39c13c89688d6bc160b51d4141b77873b9 Mon Sep 17 00:00:00 2001 From: Jorge Garcia Oncins Date: Fri, 5 Sep 2025 12:20:24 +0200 Subject: [PATCH] fix: Fiw qwen model name in llama-stack core tests Signed-off-by: Jorge Garcia Oncins --- tests/fixtures/inference.py | 9 +++------ tests/llama_stack/core/test_llamastack_core.py | 6 +++--- utilities/constants.py | 2 +- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/tests/fixtures/inference.py b/tests/fixtures/inference.py index aaebd8ffa..2616ce747 100644 --- a/tests/fixtures/inference.py +++ b/tests/fixtures/inference.py @@ -9,7 +9,7 @@ from ocp_resources.service import Service from ocp_resources.serving_runtime import ServingRuntime -from utilities.constants import RuntimeTemplates, KServeDeploymentType +from utilities.constants import RuntimeTemplates, KServeDeploymentType, QWEN_MODEL_NAME from utilities.inference_utils import create_isvc from utilities.serving_runtime import ServingRuntimeFromTemplate @@ -32,10 +32,7 @@ def vllm_cpu_runtime( "@sha256:ada6b3ba98829eb81ae4f89364d9b431c0222671eafb9a04aa16f31628536af2", containers={ "kserve-container": { - "args": [ - "--port=8032", - "--model=/mnt/models", - ], + "args": ["--port=8032", "--model=/mnt/models", "--served-model-name={{.Name}}"], "ports": [{"containerPort": 8032, "protocol": "TCP"}], "volumeMounts": [{"mountPath": "/dev/shm", "name": "shm"}], } @@ -56,7 +53,7 @@ def qwen_isvc( ) -> Generator[InferenceService, Any, Any]: with create_isvc( client=admin_client, - name="qwen-isvc", + name=QWEN_MODEL_NAME, namespace=model_namespace.name, deployment_mode=KServeDeploymentType.RAW_DEPLOYMENT, model_format="vLLM", diff --git a/tests/llama_stack/core/test_llamastack_core.py b/tests/llama_stack/core/test_llamastack_core.py index f070059d7..d687cc356 100644 --- a/tests/llama_stack/core/test_llamastack_core.py +++ b/tests/llama_stack/core/test_llamastack_core.py @@ -46,9 +46,9 @@ def test_model_list(self, minio_pod, minio_data_connection, llama_stack_client): models = llama_stack_client.models.list() # We only need to check the first model; - # second is a granite embedding model present by default - assert len(models) == 2 - assert models[0].identifier == QWEN_MODEL_NAME + # Second and third are embedding models present by default + assert len(models) >= 2 + assert models[0].identifier == f"{LlamaStackProviders.Inference.VLLM_INFERENCE.value}/{QWEN_MODEL_NAME}" assert models[0].model_type == "llm" assert models[0].provider_id == LlamaStackProviders.Inference.VLLM_INFERENCE diff --git a/utilities/constants.py b/utilities/constants.py index 2569ae6f7..d194a28d7 100644 --- a/utilities/constants.py +++ b/utilities/constants.py @@ -372,4 +372,4 @@ class RunTimeConfig: "service": {"hostname": f"{QWEN_ISVC_NAME}-predictor", "port": 8032, "request_timeout": 600} } TRUSTYAI_SERVICE_NAME: str = "trustyai-service" -QWEN_MODEL_NAME: str = "qwen2.5-0.5b-instruct" +QWEN_MODEL_NAME: str = "qwen25-05b-instruct"