We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent a49ee47 commit 031e1b6Copy full SHA for 031e1b6
tests/fixtures/inference.py
@@ -9,7 +9,7 @@
9
from ocp_resources.service import Service
10
from ocp_resources.serving_runtime import ServingRuntime
11
12
-from utilities.constants import RuntimeTemplates, KServeDeploymentType
+from utilities.constants import RuntimeTemplates, KServeDeploymentType, QWEN_MODEL_NAME
13
from utilities.inference_utils import create_isvc
14
from utilities.serving_runtime import ServingRuntimeFromTemplate
15
@@ -35,6 +35,7 @@ def vllm_cpu_runtime(
35
"args": [
36
"--port=8032",
37
"--model=/mnt/models",
38
+ f"--served-model-name={QWEN_MODEL_NAME}",
39
],
40
"ports": [{"containerPort": 8032, "protocol": "TCP"}],
41
"volumeMounts": [{"mountPath": "/dev/shm", "name": "shm"}],
0 commit comments