opendatahub-io · KillianGolds · Feb 25, 2026 · Feb 25, 2026 · Feb 25, 2026 · Feb 25, 2026
diff --git a/config/llmisvcconfig/config-llm-template.yaml b/config/llmisvcconfig/config-llm-template.yaml
@@ -12,9 +12,12 @@ spec:
           - containerPort: 8000
             protocol: TCP
         command:
-          - vllm
-          - serve
-          - /mnt/models
+          - /bin/bash
+          - -c
+          - |
+            [ -f /etc/profile.d/ibm-aiu-setup.sh ] && source /etc/profile.d/ibm-aiu-setup.sh
+            exec vllm serve /mnt/models "$@"
+          - --
         args:
           - --served-model-name
           - "{{ .Spec.Model.Name }}"

diff --git a/config/overlays/odh/accelerators/amd-rocm-config-llm-template.yaml b/config/overlays/odh/accelerators/amd-rocm-config-llm-template.yaml
@@ -0,0 +1,12 @@
+apiVersion: serving.kserve.io/v1alpha2
+kind: LLMInferenceServiceConfig
+metadata:
+  name: kserve-config-llm-template-amd-rocm
+  annotations:
+    openshift.io/display-name: vLLM AMD ROCm GPU LLMInferenceServiceConfig
+    description: vLLM AMD ROCm GPU LLMInferenceServiceConfig for LLMInferenceService.
+spec:
+  template:
+    containers:
+      - name: main
+        image: placeholder
diff --git a/config/overlays/odh/accelerators/ibm-spyre-ppc64le-config-llm-template.yaml b/config/overlays/odh/accelerators/ibm-spyre-ppc64le-config-llm-template.yaml
@@ -0,0 +1,15 @@
+apiVersion: serving.kserve.io/v1alpha2
+kind: LLMInferenceServiceConfig
+metadata:
+  name: kserve-config-llm-template-ibm-spyre-ppc64le
+  annotations:
+    openshift.io/display-name: vLLM IBM Spyre ppc64le LLMInferenceServiceConfig
+    description: vLLM IBM Spyre ppc64le LLMInferenceServiceConfig for LLMInferenceService.
+spec:
+  template:
+    containers:
+      - name: main
+        image: placeholder
+        env:
+          - name: HF_HOME
+            value: /tmp/hf_home
diff --git a/config/overlays/odh/accelerators/ibm-spyre-s390x-config-llm-template.yaml b/config/overlays/odh/accelerators/ibm-spyre-s390x-config-llm-template.yaml
@@ -0,0 +1,29 @@
+apiVersion: serving.kserve.io/v1alpha2
+kind: LLMInferenceServiceConfig
+metadata:
+  name: kserve-config-llm-template-ibm-spyre-s390x
+  annotations:
+    openshift.io/display-name: vLLM IBM Spyre s390x LLMInferenceServiceConfig
+    description: vLLM IBM Spyre s390x LLMInferenceServiceConfig for LLMInferenceService.
+spec:
+  template:
+    containers:
+      - name: main
+        image: placeholder
+        env:
+          - name: HF_HOME
+            value: /tmp/hf_home
+          - name: FLEX_DEVICE
+            value: VF
+          - name: TOKENIZERS_PARALLELISM
+            value: "false"
+          - name: DTLOG_LEVEL
+            value: error
+          - name: TORCH_SENDNN_LOG
+            value: CRITICAL
+          - name: VLLM_SPYRE_USE_CB
+            value: "1"
+          - name: VLLM_SPYRE_REQUIRE_PRECOMPILED_DECODERS
+            value: "1"
+          - name: TORCH_SENDNN_CACHE_ENABLE
+            value: "1"
diff --git a/config/overlays/odh/accelerators/ibm-spyre-x86-config-llm-template.yaml b/config/overlays/odh/accelerators/ibm-spyre-x86-config-llm-template.yaml
@@ -0,0 +1,33 @@
+apiVersion: serving.kserve.io/v1alpha2
+kind: LLMInferenceServiceConfig
+metadata:
+  name: kserve-config-llm-template-ibm-spyre-x86
+  annotations:
+    openshift.io/display-name: vLLM IBM Spyre x86 LLMInferenceServiceConfig
+    description: vLLM IBM Spyre x86 LLMInferenceServiceConfig for LLMInferenceService.
+spec:
+  template:
+    containers:
+      - name: main
+        image: placeholder
+        env:
+          - name: HF_HOME
+            value: /tmp/hf_home
+          - name: FLEX_COMPUTE
+            value: SENTIENT
+          - name: FLEX_DEVICE
+            value: PF
+          - name: TOKENIZERS_PARALLELISM
+            value: "false"
+          - name: DTLOG_LEVEL
+            value: error
+          - name: TORCH_SENDNN_LOG
+            value: CRITICAL
+          - name: VLLM_SPYRE_WARMUP_BATCH_SIZES
+            value: "4"
+          - name: VLLM_SPYRE_WARMUP_PROMPT_LENS
+            value: "1024"
+          - name: VLLM_SPYRE_WARMUP_NEW_TOKENS
+            value: "256"
+          - name: VLLM_SPYRE_REQUIRE_PRECOMPILED_DECODERS
+            value: "0"
diff --git a/config/overlays/odh/accelerators/kustomization.yaml b/config/overlays/odh/accelerators/kustomization.yaml
@@ -0,0 +1,12 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+commonLabels:
+  opendatahub.io/config-type: accelerator
+
+resources:
+- nvidia-cuda-config-llm-template.yaml
+- amd-rocm-config-llm-template.yaml
+- ibm-spyre-s390x-config-llm-template.yaml
+- ibm-spyre-x86-config-llm-template.yaml
+- ibm-spyre-ppc64le-config-llm-template.yaml
diff --git a/config/overlays/odh/accelerators/nvidia-cuda-config-llm-template.yaml b/config/overlays/odh/accelerators/nvidia-cuda-config-llm-template.yaml
@@ -0,0 +1,12 @@
+apiVersion: serving.kserve.io/v1alpha2
+kind: LLMInferenceServiceConfig
+metadata:
+  name: kserve-config-llm-template-nvidia-cuda
+  annotations:
+    openshift.io/display-name: vLLM NVIDIA CUDA GPU LLMInferenceServiceConfig
+    description: vLLM NVIDIA CUDA GPU LLMInferenceServiceConfig for LLMInferenceService.
+spec:
+  template:
+    containers:
+      - name: main
+        image: placeholder
diff --git a/config/overlays/odh/kustomization.yaml b/config/overlays/odh/kustomization.yaml
@@ -7,6 +7,7 @@ resources:
 # - ../../crd/full/localmodel
 - user-cluster-roles.yaml
 - network-policies.yaml
+- accelerators/
 
 components:
 - ../../components/kserve
@@ -55,6 +56,49 @@ replacements:
     fieldPaths:
     - spec.template.spec.containers.[name=manager].image
 
+- source:
+    kind: ConfigMap
+    name: kserve-parameters
+    fieldpath: data.kserve-llm-d-nvidia-cuda
+  targets:
+  - select:
+      kind: LLMInferenceServiceConfig
+      name: kserve-config-llm-template-nvidia-cuda
+    fieldPaths:
+    - spec.template.containers.[name=main].image
+
+- source:
+    kind: ConfigMap
+    name: kserve-parameters
+    fieldpath: data.kserve-llm-d-amd-rocm
+  targets:
+  - select:
+      kind: LLMInferenceServiceConfig
+      name: kserve-config-llm-template-amd-rocm
+    fieldPaths:
+    - spec.template.containers.[name=main].image
+
+- source:
+    kind: ConfigMap
+    name: kserve-parameters
+    fieldpath: data.kserve-llm-d-ibm-spyre
+  targets:
+  - select:
+      kind: LLMInferenceServiceConfig
+      name: kserve-config-llm-template-ibm-spyre-s390x
+    fieldPaths:
+    - spec.template.containers.[name=main].image
+  - select:
+      kind: LLMInferenceServiceConfig
+      name: kserve-config-llm-template-ibm-spyre-x86
+    fieldPaths:
+    - spec.template.containers.[name=main].image
+  - select:
+      kind: LLMInferenceServiceConfig
+      name: kserve-config-llm-template-ibm-spyre-ppc64le
+    fieldPaths:
+    - spec.template.containers.[name=main].image
+
 configMapGenerator:
 - envs:
   - params.env

diff --git a/config/overlays/odh/params.env b/config/overlays/odh/params.env
@@ -3,5 +3,8 @@ llmisvc-controller=quay.io/opendatahub/llmisvc-controller:latest
 kserve-agent=quay.io/opendatahub/kserve-agent:latest
 kserve-router=quay.io/opendatahub/kserve-router:latest
 kserve-storage-initializer=quay.io/opendatahub/kserve-storage-initializer:latest
+kserve-llm-d-nvidia-cuda=registry.redhat.io/rhaiis/vllm-cuda-rhel9@sha256:fc68d623d1bfc36c8cb2fe4a71f19c8578cfb420ce8ce07b20a02c1ee0be0cf3
+kserve-llm-d-amd-rocm=registry.redhat.io/rhaiis/vllm-rocm-rhel9@sha256:d9a48add238cc095fa43eeee17c8c4d104de60c4dc623e0bc7f8c4b53b2b2e97
+kserve-llm-d-ibm-spyre=registry.redhat.io/rhaiis/vllm-spyre-rhel9@sha256:80ae3e435a5be2c1f117f36599103ab05357917dd6e37f0df6613cb3ac2c13ea
 # TODO update when our changes are introduced in the official image
 kube-rbac-proxy=quay.io/opendatahub/odh-kube-auth-proxy@sha256:dcb09fbabd8811f0956ef612a0c9ddd5236804b9bd6548a0647d2b531c9d01b3