Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions tests/fixtures/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from ocp_resources.service import Service
from ocp_resources.serving_runtime import ServingRuntime

from utilities.constants import RuntimeTemplates, KServeDeploymentType
from utilities.constants import RuntimeTemplates, KServeDeploymentType, QWEN_MODEL_NAME
from utilities.inference_utils import create_isvc
from utilities.serving_runtime import ServingRuntimeFromTemplate

Expand All @@ -32,10 +32,7 @@ def vllm_cpu_runtime(
"@sha256:ada6b3ba98829eb81ae4f89364d9b431c0222671eafb9a04aa16f31628536af2",
containers={
"kserve-container": {
"args": [
"--port=8032",
"--model=/mnt/models",
],
"args": ["--port=8032", "--model=/mnt/models", "--served-model-name={{.Name}}"],
"ports": [{"containerPort": 8032, "protocol": "TCP"}],
"volumeMounts": [{"mountPath": "/dev/shm", "name": "shm"}],
}
Expand All @@ -56,7 +53,7 @@ def qwen_isvc(
) -> Generator[InferenceService, Any, Any]:
with create_isvc(
client=admin_client,
name="qwen-isvc",
name=QWEN_MODEL_NAME,
namespace=model_namespace.name,
deployment_mode=KServeDeploymentType.RAW_DEPLOYMENT,
model_format="vLLM",
Expand Down
6 changes: 3 additions & 3 deletions tests/llama_stack/core/test_llamastack_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ def test_model_list(self, minio_pod, minio_data_connection, llama_stack_client):
models = llama_stack_client.models.list()

# We only need to check the first model;
# second is a granite embedding model present by default
assert len(models) == 2
assert models[0].identifier == QWEN_MODEL_NAME
# Second and third are embedding models present by default
assert len(models) >= 2
assert models[0].identifier == f"{LlamaStackProviders.Inference.VLLM_INFERENCE.value}/{QWEN_MODEL_NAME}"
assert models[0].model_type == "llm"
assert models[0].provider_id == LlamaStackProviders.Inference.VLLM_INFERENCE

Expand Down
2 changes: 1 addition & 1 deletion utilities/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,4 +372,4 @@ class RunTimeConfig:
"service": {"hostname": f"{QWEN_ISVC_NAME}-predictor", "port": 8032, "request_timeout": 600}
}
TRUSTYAI_SERVICE_NAME: str = "trustyai-service"
QWEN_MODEL_NAME: str = "qwen2.5-0.5b-instruct"
QWEN_MODEL_NAME: str = "qwen25-05b-instruct"