File tree Expand file tree Collapse file tree 2 files changed +23
-17
lines changed
Expand file tree Collapse file tree 2 files changed +23
-17
lines changed Original file line number Diff line number Diff line change @@ -22,7 +22,7 @@ objects:
2222 name : vllm-spyre-s390x-runtime
2323 annotations :
2424 openshift.io/display-name : vLLM Spyre s390x ServingRuntime for KServe
25- opendatahub.io/recommended-accelerators : ' ["ibm.com/spyre_pf "]'
25+ opendatahub.io/recommended-accelerators : ' ["ibm.com/spyre_vf "]'
2626 opendatahub.io/runtime-version : ' v0.10.2.0'
2727 labels :
2828 opendatahub.io/dashboard : ' true'
@@ -34,37 +34,43 @@ objects:
3434 - image : $(vllm-spyre-s390x-image)
3535 name : kserve-container
3636 command :
37- - python3
38- - ' -m'
39- - vllm_tgis_adapter
37+ - /bin/bash
38+ - -c
39+ - source /etc/profile.d/ibm-aiu-setup.sh && exec python3 -m vllm.entrypoints.openai.api_server "$@"
40+ - --
4041 args :
41- - /mnt/models
42+ - ' --model= /mnt/models'
4243 - ' --port=8000'
4344 - ' --served-model-name={{.Name}}'
44- - ' --grpc-port=8033'
4545 env :
4646 - name : HF_HOME
4747 value : /tmp/hf_home
48- - name : FLEX_COMPUTE
49- value : SENTIENT
5048 - name : FLEX_DEVICE
51- value : PF
49+ value : VF
5250 - name : TOKENIZERS_PARALLELISM
5351 value : ' false'
5452 - name : DTLOG_LEVEL
5553 value : error
5654 - name : TORCH_SENDNN_LOG
5755 value : CRITICAL
58- - name : VLLM_SPYRE_WARMUP_BATCH_SIZES
59- value : ' 4 '
60- - name : VLLM_SPYRE_WARMUP_PROMPT_LENS
61- value : ' 1024 '
62- - name : VLLM_SPYRE_WARMUP_NEW_TOKENS
63- value : ' 256 '
56+ - name : VLLM_SPYRE_USE_CB
57+ value : " 1 "
58+ - name : VLLM_SPYRE_REQUIRE_PRECOMPILED_DECODERS
59+ value : " 1 "
60+ - name : TORCH_SENDNN_CACHE_ENABLE
61+ value : " 1 "
6462 ports :
6563 - containerPort : 8000
6664 protocol : TCP
65+ volumeMounts :
66+ - name : shm
67+ mountPath : /dev/shm
6768 multiModel : false
6869 supportedModelFormats :
6970 - autoSelect : true
70- name : vLLM
71+ name : vLLM
72+ volumes :
73+ - name : shm
74+ emptyDir :
75+ medium : Memory
76+ sizeLimit : 2Gi
Original file line number Diff line number Diff line change @@ -34,7 +34,7 @@ objects:
3434 - image : $(vllm-spyre-x86-image)
3535 name : kserve-container
3636 args :
37- - /mnt/models
37+ - ' --model= /mnt/models'
3838 - ' --port=8000'
3939 - ' --served-model-name={{.Name}}'
4040 env :
You can’t perform that action at this time.
0 commit comments