File tree Expand file tree Collapse file tree 2 files changed +2
-2
lines changed
model_explainability/guardrails
model_serving/model_runtime/vllm Expand file tree Collapse file tree 2 files changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -100,7 +100,7 @@ def vllm_runtime(
100100 client = admin_client ,
101101 name = "vllm-runtime-cpu-fp16" ,
102102 namespace = model_namespace .name ,
103- template_name = "vllm-cpu -runtime-template" ,
103+ template_name = "vllm-cuda -runtime-template" ,
104104 deployment_type = KServeDeploymentType .RAW_DEPLOYMENT ,
105105 runtime_image = "quay.io/rh-aiservices-bu/vllm-cpu-openai-ubi9"
106106 "@sha256:d680ff8becb6bbaf83dfee7b2d9b8a2beb130db7fd5aa7f9a6d8286a58cebbfd" ,
Original file line number Diff line number Diff line change 77VLLM_SUPPORTED_QUANTIZATION : list [str ] = ["marlin" , "awq" ]
88# Configurations
99TEMPLATE_MAP : dict [str , str ] = {
10- AcceleratorType .NVIDIA : "vllm-runtime-template" ,
10+ AcceleratorType .NVIDIA : "vllm-cuda- runtime-template" ,
1111 AcceleratorType .AMD : "vllm-rocm-runtime-template" ,
1212 AcceleratorType .GAUDI : "vllm-gaudi-runtime-template" ,
1313}
You can’t perform that action at this time.
0 commit comments