Skip to content

Commit 7f2f255

Browse files
Merge pull request opendatahub-io#590 from brettmthompson/sync-incubating-to-main-102825
Sync incubating to main 102825
2 parents 84511df + bec6934 commit 7f2f255

File tree

2 files changed

+23
-17
lines changed

2 files changed

+23
-17
lines changed

config/runtimes/vllm-spyre-s390x-template.yaml

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ objects:
2222
name: vllm-spyre-s390x-runtime
2323
annotations:
2424
openshift.io/display-name: vLLM Spyre s390x ServingRuntime for KServe
25-
opendatahub.io/recommended-accelerators: '["ibm.com/spyre_pf"]'
25+
opendatahub.io/recommended-accelerators: '["ibm.com/spyre_vf"]'
2626
opendatahub.io/runtime-version: 'v0.10.2.0'
2727
labels:
2828
opendatahub.io/dashboard: 'true'
@@ -34,37 +34,43 @@ objects:
3434
- image: $(vllm-spyre-s390x-image)
3535
name: kserve-container
3636
command:
37-
- python3
38-
- '-m'
39-
- vllm_tgis_adapter
37+
- /bin/bash
38+
- -c
39+
- source /etc/profile.d/ibm-aiu-setup.sh && exec python3 -m vllm.entrypoints.openai.api_server "$@"
40+
- --
4041
args:
41-
- /mnt/models
42+
- '--model=/mnt/models'
4243
- '--port=8000'
4344
- '--served-model-name={{.Name}}'
44-
- '--grpc-port=8033'
4545
env:
4646
- name: HF_HOME
4747
value: /tmp/hf_home
48-
- name: FLEX_COMPUTE
49-
value: SENTIENT
5048
- name: FLEX_DEVICE
51-
value: PF
49+
value: VF
5250
- name: TOKENIZERS_PARALLELISM
5351
value: 'false'
5452
- name: DTLOG_LEVEL
5553
value: error
5654
- name: TORCH_SENDNN_LOG
5755
value: CRITICAL
58-
- name: VLLM_SPYRE_WARMUP_BATCH_SIZES
59-
value: '4'
60-
- name: VLLM_SPYRE_WARMUP_PROMPT_LENS
61-
value: '1024'
62-
- name: VLLM_SPYRE_WARMUP_NEW_TOKENS
63-
value: '256'
56+
- name: VLLM_SPYRE_USE_CB
57+
value: "1"
58+
- name: VLLM_SPYRE_REQUIRE_PRECOMPILED_DECODERS
59+
value: "1"
60+
- name: TORCH_SENDNN_CACHE_ENABLE
61+
value: "1"
6462
ports:
6563
- containerPort: 8000
6664
protocol: TCP
65+
volumeMounts:
66+
- name: shm
67+
mountPath: /dev/shm
6768
multiModel: false
6869
supportedModelFormats:
6970
- autoSelect: true
70-
name: vLLM
71+
name: vLLM
72+
volumes:
73+
- name: shm
74+
emptyDir:
75+
medium: Memory
76+
sizeLimit: 2Gi

config/runtimes/vllm-spyre-x86-template.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ objects:
3434
- image: $(vllm-spyre-x86-image)
3535
name: kserve-container
3636
args:
37-
- /mnt/models
37+
- '--model=/mnt/models'
3838
- '--port=8000'
3939
- '--served-model-name={{.Name}}'
4040
env:

0 commit comments

Comments
 (0)