diff --git a/Makefile b/Makefile
index 30adbd3..90ce3a3 100644
--- a/Makefile
+++ b/Makefile
@@ -1,9 +1,8 @@
.PHONY: deploy deploy-all undeploy undeploy-kserve status help check-kubeconfig sync clear-cache
-.PHONY: deploy-cert-manager deploy-istio deploy-lws deploy-kserve
+.PHONY: deploy-cert-manager deploy-istio deploy-lws deploy-kserve deploy-opendatahub-prerequisites deploy-cert-manager-pki
.PHONY: test conformance
HELMFILE_CACHE := $(HOME)/.cache/helmfile
-KSERVE_REF ?= release-v0.15
KSERVE_NAMESPACE ?= opendatahub
check-kubeconfig:
@@ -13,8 +12,8 @@ help:
@echo "rhaii-on-xks - Infrastructure for llm-d on xKS (AKS/CoreWeave)"
@echo ""
@echo "Deploy:"
- @echo " make deploy - Deploy cert-manager + istio"
- @echo " make deploy-all - Deploy all (cert-manager + istio + lws)"
+ @echo " make deploy - Deploy cert-manager + istio + lws"
+ @echo " make deploy-all - Deploy all (cert-manager + istio + lws + kserve)"
@echo " make deploy-kserve - Deploy KServe"
@echo ""
@echo "Undeploy:"
@@ -40,10 +39,10 @@ sync: clear-cache
deploy: check-kubeconfig clear-cache
helmfile apply --selector name=cert-manager-operator
helmfile apply --selector name=sail-operator
+ helmfile apply --selector name=lws-operator
@$(MAKE) status
-deploy-all: check-kubeconfig clear-cache
- helmfile apply
+deploy-all: check-kubeconfig deploy-cert-manager deploy-istio deploy-lws deploy-kserve
@$(MAKE) status
deploy-cert-manager: check-kubeconfig clear-cache
@@ -55,35 +54,44 @@ deploy-istio: check-kubeconfig clear-cache
deploy-lws: check-kubeconfig clear-cache
helmfile apply --selector name=lws-operator
-deploy-kserve: check-kubeconfig
- @echo "=== Deploying KServe (ref=$(KSERVE_REF)) ==="
+deploy-opendatahub-prerequisites: check-kubeconfig
+ @echo "=== Deploying OpenDataHub prerequisites ==="
kubectl create namespace $(KSERVE_NAMESPACE) --dry-run=client -o yaml | kubectl apply -f -
-kubectl get secret redhat-pull-secret -n istio-system -o yaml 2>/dev/null | \
sed 's/namespace: istio-system/namespace: $(KSERVE_NAMESPACE)/' | \
kubectl apply -f - 2>/dev/null || true
- kubectl apply -k "https://github.com/opendatahub-io/kserve/config/overlays/odh-test/cert-manager?ref=$(KSERVE_REF)"
+
+deploy-cert-manager-pki: check-kubeconfig deploy-opendatahub-prerequisites
+ @kubectl get crd clusterissuers.cert-manager.io >/dev/null 2>&1 || \
+ (echo "ERROR: cert-manager CRDs not found. Run 'make deploy-cert-manager' first." && exit 1)
+ @echo "Waiting for cert-manager webhook..."
+ -kubectl delete secret cert-manager-webhook-ca -n cert-manager --ignore-not-found 2>/dev/null || true
+ kubectl rollout restart deployment/cert-manager-webhook -n cert-manager
+ kubectl rollout status deployment/cert-manager-webhook -n cert-manager --timeout=120s
+ @sleep 5
+ kubectl apply -f ./charts/kserve/pki-prereq.yaml
kubectl wait --for=condition=Ready clusterissuer/opendatahub-ca-issuer --timeout=120s
- @echo "Applying CRDs and deployment (CR errors expected, will retry)..."
- -kustomize build "https://github.com/opendatahub-io/kserve/config/overlays/odh-xks?ref=$(KSERVE_REF)" | kubectl apply --server-side --force-conflicts -f - 2>/dev/null || true
- @echo "Removing webhooks to allow controller startup..."
- -kubectl delete validatingwebhookconfiguration llminferenceservice.serving.kserve.io llminferenceserviceconfig.serving.kserve.io --ignore-not-found 2>/dev/null || true
- kubectl wait --for=condition=Available deployment/kserve-controller-manager -n $(KSERVE_NAMESPACE) --timeout=300s
- @echo "Controller ready, applying CRs..."
- kustomize build "https://github.com/opendatahub-io/kserve/config/overlays/odh-xks?ref=$(KSERVE_REF)" | kubectl apply --server-side --force-conflicts -f -
+
+deploy-kserve: check-kubeconfig deploy-cert-manager-pki
+ @echo "Applying KServe via Helm..."
+ helmfile sync --wait --selector name=kserve-rhaii-xks --skip-crds
@echo "=== KServe deployed ==="
# Undeploy
-undeploy: check-kubeconfig
+undeploy: check-kubeconfig undeploy-kserve
@./scripts/cleanup.sh -y
undeploy-kserve: check-kubeconfig
-@kubectl delete llminferenceservice --all -A --ignore-not-found 2>/dev/null || true
-@kubectl delete inferencepool --all -A --ignore-not-found 2>/dev/null || true
- -@kubectl delete deployment kserve-controller-manager -n $(KSERVE_NAMESPACE) --ignore-not-found 2>/dev/null || true
+ -@helm uninstall kserve-rhaii-xks --namespace $(KSERVE_NAMESPACE) 2>/dev/null || true
-@kubectl delete validatingwebhookconfiguration llminferenceservice.serving.kserve.io llminferenceserviceconfig.serving.kserve.io --ignore-not-found 2>/dev/null || true
- -@# Removes KServe CRDs and Inference Extension CRDs (InferencePool, InferenceModel)
+ -@# Removes KServe CRDs and Inference Extension CRDs (Helm does not remove CRDs on uninstall)
-@kubectl get crd -o name | grep -E "serving.kserve.io|inference.networking" | xargs -r kubectl delete --ignore-not-found 2>/dev/null || true
- -@kubectl delete clusterissuer opendatahub-ca-issuer --ignore-not-found 2>/dev/null || true
+ -@# Removes cluster-scoped RBAC resources
+ -@kubectl get clusterrole,clusterrolebinding -o name | grep -i kserve | xargs -r kubectl delete --ignore-not-found 2>/dev/null || true
+ -@kubectl delete clusterissuer opendatahub-ca-issuer opendatahub-selfsigned-issuer --ignore-not-found 2>/dev/null || true
+ -@kubectl delete certificate opendatahub-ca -n cert-manager --ignore-not-found 2>/dev/null || true
-@kubectl delete namespace $(KSERVE_NAMESPACE) --ignore-not-found --wait=false 2>/dev/null || true
@echo "=== KServe removed ==="
@@ -103,6 +111,12 @@ status: check-kubeconfig
@echo "lws-operator:"
@kubectl get pods -n openshift-lws-operator 2>/dev/null || echo " Not deployed"
@echo ""
+ @echo "kserve:"
+ @kubectl get pods -n $(KSERVE_NAMESPACE) -l control-plane=kserve-controller-manager 2>/dev/null || echo " Not deployed"
+ @echo ""
+ @echo "kserve config:"
+ @kubectl get llminferenceserviceconfig -n $(KSERVE_NAMESPACE) 2>/dev/null || echo " Not deployed"
+ @echo ""
@echo "=== API Versions ==="
@echo -n "InferencePool API: "
@if kubectl get crd inferencepools.inference.networking.k8s.io >/dev/null 2>&1; then \
diff --git a/README.md b/README.md
index ac68425..edff8e0 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,6 @@ Infrastructure Helm charts for deploying Red Hat AI Inference Server (KServe LLM
| Repository | Purpose |
|------------|---------|
| [llm-d-xks-aks](https://github.com/kwozyman/llm-d-xks-aks) | AKS cluster provisioning (creates cluster + GPU nodes + GPU Operator) |
-| [rhaii-xks-kserve](https://github.com/pierDipi/rhaii-xks-kserve) | KServe Helm charts (WIP) |
## Overview
@@ -18,6 +17,7 @@ Infrastructure Helm charts for deploying Red Hat AI Inference Server (KServe LLM
| cert-manager-operator | 1.15.2 | TLS certificate management |
| sail-operator (Istio) | 3.2.x | Gateway API for inference routing |
| lws-operator | 1.0 | LeaderWorkerSet controller for multi-node workloads |
+| kserve | 3.4.0-ea.1 | KServe controller for LLMInferenceService lifecycle |
### Version Compatibility
@@ -26,14 +26,16 @@ Infrastructure Helm charts for deploying Red Hat AI Inference Server (KServe LLM
| OSSM (Sail Operator) | 3.2.x | Gateway API for inference routing |
| Istio | v1.27.x | Service mesh |
| InferencePool API | v1 | `inference.networking.k8s.io/v1` |
-| KServe | release-v0.15 | LLMInferenceService controller |
+| KServe | rhoai-3.4+ | LLMInferenceService controller |
## Prerequisites
- Kubernetes cluster (AKS or CoreWeave) - see [llm-d-xks-aks](https://github.com/kwozyman/llm-d-xks-aks) for AKS provisioning
-- `kubectl`, `helm` (v3.17+), `helmfile`, `kustomize` (v5.7+)
+- `kubectl`, `helm` (v3.17+), `helmfile`
- Red Hat account (for Sail Operator and vLLM images from `registry.redhat.io`)
+**Cluster readiness check (optional):** Run `cd validation && make container && make run` to verify cloud provider, GPU availability, and instance types before deploying. CRD checks will pass only after operators are deployed. See [Preflight Validation](./validation/README.md).
+
### Red Hat Pull Secret Setup
The Sail Operator and RHAIIS vLLM images are hosted on `registry.redhat.io` which requires authentication.
@@ -93,210 +95,24 @@ useSystemPodmanAuth: true
## Quick Start
-### Step 1: Deploy Infrastructure
-
```bash
git clone https://github.com/opendatahub-io/rhaii-on-xks.git
cd rhaii-on-xks
-# Deploy cert-manager + istio + lws
+# 1. Deploy all components (cert-manager + Istio + LWS + KServe)
make deploy-all
-# Check status
-make status
-```
-
-### Step 2: Deploy KServe
-
-```bash
-make deploy-kserve
-
-# Verify
-kubectl get pods -n opendatahub
-kubectl get llminferenceserviceconfig -n opendatahub
-```
-
-
-Manual steps (click to expand)
-
-```bash
-# Create opendatahub namespace
-kubectl create namespace opendatahub --dry-run=client -o yaml | kubectl apply -f -
-
-# Copy pull secret from istio-system (created by infrastructure deployment)
-kubectl get secret redhat-pull-secret -n istio-system -o yaml | \
- sed 's/namespace: istio-system/namespace: opendatahub/' | \
- kubectl apply -f -
-
-# Apply cert-manager PKI resources first (required for webhook certificates)
-kubectl apply -k "https://github.com/opendatahub-io/kserve/config/overlays/odh-test/cert-manager?ref=release-v0.15"
-kubectl wait --for=condition=Ready clusterissuer/opendatahub-ca-issuer --timeout=120s
-
-# First apply - creates CRDs and deployment (CR errors expected due to webhook)
-kustomize build "https://github.com/opendatahub-io/kserve/config/overlays/odh-xks?ref=release-v0.15" | kubectl apply --server-side --force-conflicts -f - || true
-
-# Delete webhooks to allow controller startup
-kubectl delete validatingwebhookconfiguration llminferenceservice.serving.kserve.io llminferenceserviceconfig.serving.kserve.io --ignore-not-found
-
-# Wait for controller to be ready
-kubectl wait --for=condition=Available deployment/kserve-controller-manager -n opendatahub --timeout=300s
-
-# Second apply - now webhooks work, applies CRs
-kustomize build "https://github.com/opendatahub-io/kserve/config/overlays/odh-xks?ref=release-v0.15" | kubectl apply --server-side --force-conflicts -f -
-
-# Verify LLMInferenceServiceConfig templates exist
-kubectl get llminferenceserviceconfig -n opendatahub
-```
-
-
-
-### Step 3: Set up Gateway
-
-```bash
+# 2. Set up inference gateway
./scripts/setup-gateway.sh
-# Verify
-kubectl get gateway -n opendatahub
-```
-
-
-What the script does (click to expand)
+# 3. Validate deployment
+cd validation && make container && make run
-The script:
-1. Copies the CA bundle from cert-manager to opendatahub namespace
-2. Creates a Gateway with the CA bundle mounted for mTLS to backend services
-3. Patches the Gateway pod to use the pull secret
-
-
-
-### Step 4: Deploy LLMInferenceService
-
-#### Hardware Requirements
-
-| Resource | Per Replica | Notes |
-|----------|-------------|-------|
-| GPU | 1x NVIDIA GPU | A10, A100, H100, or similar |
-| CPU | 2-4 cores | |
-| Memory | 16-32 Gi | Depends on model size |
-
-#### Node Requirements
-
-Ensure your cluster has GPU nodes with the NVIDIA device plugin installed:
-
-```bash
-# Verify GPU nodes are available
-kubectl get nodes -l nvidia.com/gpu.present=true
-
-# Check GPU resources
-kubectl describe nodes | grep -A5 "nvidia.com/gpu"
-```
-
-For AKS, create a GPU node pool:
-```bash
-az aks nodepool add \
- --resource-group \
- --cluster-name \
- --name gpunp \
- --node-count 2 \
- --node-vm-size Standard_NC24ads_A100_v4 \
- --node-taints sku=gpu:NoSchedule \
- --labels nvidia.com/gpu.present=true
-```
-
-#### Deploy Sample Model
-
-```bash
-# Create namespace first
-export NAMESPACE=llm-d-test
-kubectl create namespace $NAMESPACE --dry-run=client -o yaml | kubectl apply -f -
-
-# Copy pull secret from istio-system (created by infrastructure deployment)
-kubectl get secret redhat-pull-secret -n istio-system -o yaml | \
- sed "s/namespace: istio-system/namespace: $NAMESPACE/" | \
- kubectl apply -f -
-
-# Patch default ServiceAccount to use pull secret (all pods will inherit it)
-kubectl patch serviceaccount default -n $NAMESPACE \
- -p '{"imagePullSecrets": [{"name": "redhat-pull-secret"}]}'
-
-# Deploy Qwen2.5-7B model with scheduler
-kubectl apply -n $NAMESPACE -f - <<'EOF'
-apiVersion: serving.kserve.io/v1alpha1
-kind: LLMInferenceService
-metadata:
- name: qwen2-7b-instruct
-spec:
- model:
- name: Qwen/Qwen2.5-7B-Instruct
- uri: hf://Qwen/Qwen2.5-7B-Instruct
- replicas: 1
- router:
- gateway: {}
- route: {}
- scheduler: {} # Enable EPP scheduler for intelligent routing
- template:
- containers:
- - name: main
- resources:
- limits:
- cpu: "4"
- memory: 32Gi
- nvidia.com/gpu: "1"
- requests:
- cpu: "2"
- memory: 16Gi
- nvidia.com/gpu: "1"
- livenessProbe:
- httpGet:
- path: /health
- port: 8000
- scheme: HTTPS
- initialDelaySeconds: 120
- periodSeconds: 30
- timeoutSeconds: 30
- failureThreshold: 5
-EOF
-
-# Watch deployment status
-kubectl get llmisvc -n $NAMESPACE -w
-```
-
-#### Check Deployment Status
-
-```bash
-# Check pods
-kubectl get pods -n $NAMESPACE
-
-# Check events if pods are not starting
-kubectl describe llmisvc qwen2-7b-instruct -n $NAMESPACE
-```
-
-#### Test Inference
-
-```bash
-# Get the service URL
-SERVICE_URL=$(kubectl get llmisvc qwen2-7b-instruct -n $NAMESPACE -o jsonpath='{.status.url}')
-
-# Test with curl (use external gateway IP)
-curl -X POST "${SERVICE_URL}/v1/chat/completions" \
- -H "Content-Type: application/json" \
- -d '{
- "model": "Qwen/Qwen2.5-7B-Instruct",
- "messages": [{"role": "user", "content": "What is Kubernetes?"}],
- "max_tokens": 100
- }'
+# 4. Check status
+make status
```
-#### More Examples
-
-| Example | Description | Path |
-|---------|-------------|------|
-| CPU (OPT-125M) | Simple CPU deployment for testing | `docs/samples/llmisvc/opt-125m-cpu/` |
-| GPU with Scheduler | Intelligent request routing | `docs/samples/llmisvc/single-node-gpu/` |
-| Prefill-Decode | Disaggregated serving | `docs/samples/llmisvc/single-node-gpu/llm-inference-service-pd-qwen2-7b-gpu.yaml` |
-| Multi-node MoE | DeepSeek with expert parallelism | `docs/samples/llmisvc/dp-ep/` |
-
-See the [KServe samples](https://github.com/opendatahub-io/kserve/tree/main/docs/samples/llmisvc) for more examples.
+For deploying LLM inference services, GPU requirements, and testing inference, see the [full deployment guide](./docs/deploying-llm-d-on-managed-kubernetes.md).
---
@@ -304,8 +120,8 @@ See the [KServe samples](https://github.com/opendatahub-io/kserve/tree/main/docs
```bash
# Deploy
-make deploy # cert-manager + istio
-make deploy-all # cert-manager + istio + lws
+make deploy # cert-manager + istio + lws
+make deploy-all # cert-manager + istio + lws + kserve
make deploy-kserve # Deploy KServe
# Undeploy
@@ -345,43 +161,6 @@ lwsOperator:
---
-## KServe Controller Settings
-
-The odh-xks overlay disables several OpenShift-specific features for vanilla Kubernetes (AKS/CoreWeave) compatibility:
-
-```yaml
-# Disabled by default in odh-xks overlay
-- name: LLMISVC_MONITORING_DISABLED
- value: "true" # No Prometheus Operator dependency
-- name: LLMISVC_AUTH_DISABLED
- value: "true" # No Kuadrant/RHCL dependency
-- name: LLMISVC_SCC_DISABLED
- value: "true" # No OpenShift SecurityContextConstraints
-```
-
-| Setting | Why Disabled on xKS |
-|---------|---------------------|
-| `LLMISVC_MONITORING_DISABLED` | Prometheus Operator not required for basic inference |
-| `LLMISVC_AUTH_DISABLED` | Authorino/Kuadrant (Red Hat Connectivity Link) is OpenShift-only |
-| `LLMISVC_SCC_DISABLED` | SecurityContextConstraints are OpenShift-specific |
-
-### Enabling Monitoring
-
-To enable Prometheus monitoring for KServe-managed workloads:
-
-1. Deploy Prometheus Operator on your cluster (see [monitoring-stack/](./monitoring-stack/))
-
-2. Patch the KServe controller to enable monitoring:
-```bash
-kubectl set env deployment/kserve-controller-manager \
- -n opendatahub \
- LLMISVC_MONITORING_DISABLED=false
-```
-
-This enables KServe to automatically create `PodMonitor` resources for vLLM pods.
-
----
-
## Collecting Debug Information
If you encounter issues, collect diagnostic information for troubleshooting or to share with Red Hat support:
@@ -390,92 +169,13 @@ If you encounter issues, collect diagnostic information for troubleshooting or t
./scripts/collect-debug-info.sh
```
-This collects logs, status, and events from all components (cert-manager, Istio, LWS, KServe) into a single directory. See the [Collecting Debug Information](./docs/collecting-debug-information.md) guide for details.
+See the [Collecting Debug Information](./docs/collecting-debug-information.md) guide for details.
---
## Troubleshooting
-### KServe Controller Issues
-
-If the controller pod is stuck in `ContainerCreating` (waiting for certificate):
-```bash
-# Apply cert-manager resources separately first
-kubectl apply -k "https://github.com/opendatahub-io/kserve/config/overlays/odh-test/cert-manager?ref=release-v0.15"
-kubectl wait --for=condition=Ready certificate/kserve-webhook-server -n opendatahub --timeout=120s
-
-# Then re-apply the overlay
-kustomize build "https://github.com/opendatahub-io/kserve/config/overlays/odh-xks?ref=release-v0.15" | kubectl apply --server-side --force-conflicts -f -
-```
-
-If webhook validation blocks apply (manual deployment only - `make deploy-kserve` handles this automatically):
-```bash
-kubectl delete validatingwebhookconfiguration llminferenceservice.serving.kserve.io llminferenceserviceconfig.serving.kserve.io
-kustomize build "https://github.com/opendatahub-io/kserve/config/overlays/odh-xks?ref=release-v0.15" | kubectl apply --server-side --force-conflicts -f -
-```
-
-If you get "no matches for kind LLMInferenceServiceConfig" errors:
-```bash
-# This is a CRD timing issue - run the apply command again after CRDs are registered
-sleep 5
-kustomize build "https://github.com/opendatahub-io/kserve/config/overlays/odh-xks?ref=release-v0.15" | kubectl apply --server-side --force-conflicts -f -
-```
-
-### Gateway Issues
-
-If Gateway pod has `ErrImagePull`:
-```bash
-# Copy pull secret to opendatahub namespace
-kubectl get secret redhat-pull-secret -n istio-system -o yaml | \
- sed 's/namespace: istio-system/namespace: opendatahub/' | kubectl apply -f -
-
-# Patch the gateway ServiceAccount
-kubectl patch sa inference-gateway-istio -n opendatahub \
- -p '{"imagePullSecrets": [{"name": "redhat-pull-secret"}]}'
-
-# Delete the failing pod to trigger restart
-kubectl delete pod -n opendatahub -l gateway.networking.k8s.io/gateway-name=inference-gateway
-```
-
----
-
-## Reinstalling Istio
-
-If you need to do a clean reinstall of Istio:
-
-```bash
-# 1. Delete the Istio CR (triggers istiod cleanup)
-kubectl delete istio default -n istio-system
-
-# 2. Wait for istiod to be removed
-kubectl wait --for=delete pod -l app=istiod -n istio-system --timeout=120s
-
-# 3. Redeploy
-make deploy-istio
-```
-
----
-
-## Architecture
-
-### TLS Certificate Architecture
-
-The odh-xks overlay creates an OpenDataHub-scoped CA:
-1. Self-signed bootstrap issuer creates root CA in cert-manager namespace
-2. ClusterIssuer (`opendatahub-ca-issuer`) uses this CA to sign certificates
-3. KServe controller generates certificates for LLM workload mTLS automatically
-4. Gateway needs CA bundle mounted at `/var/run/secrets/opendatahub/ca.crt`
-
-### Key Differences from OpenShift (ODH) Overlay
-
-| Component | OpenShift (ODH) | Vanilla K8s (odh-xks) |
-|-----------|-----------------|----------------------|
-| Certificates | OpenShift service-ca | cert-manager |
-| Security constraints | SCC included | Removed |
-| Traffic routing | Istio VirtualService | Gateway API |
-| Webhook CA injection | Service annotations | cert-manager annotations |
-| Auth | Authorino/Kuadrant | Disabled |
-| Monitoring | Prometheus included | Disabled (optional) |
+For detailed troubleshooting steps (KServe controller issues, gateway errors, webhook problems, monitoring setup), see the [full deployment guide - Troubleshooting](./docs/deploying-llm-d-on-managed-kubernetes.md#9-troubleshooting).
---
@@ -490,18 +190,24 @@ rhaii-on-xks/
├── charts/
│ ├── cert-manager-operator/ # cert-manager operator Helm chart
│ ├── sail-operator/ # Sail/Istio operator Helm chart
-│ └── lws-operator/ # LWS operator Helm chart
+│ ├── lws-operator/ # LWS operator Helm chart
+│ └── kserve/ # KServe controller Helm chart (auto-generated)
+├── validation/ # Preflight validation checks
+│ ├── llmd_xks_checks.py # Validation script
+│ ├── Containerfile # Container build
+│ └── Makefile # Build and run helpers
└── scripts/
├── cleanup.sh # Cleanup infrastructure (helmfile destroy + finalizers)
└── setup-gateway.sh # Set up Gateway with CA bundle for mTLS
```
-## Operator Charts
+## Charts
-Operator Helm charts are included locally under `charts/`:
+Helm charts are included locally under `charts/`:
- `charts/cert-manager-operator/` — cert-manager operator
- `charts/sail-operator/` — Sail/Istio operator
- `charts/lws-operator/` — LeaderWorkerSet operator
+- `charts/kserve/` — KServe controller (auto-generated from Kustomize overlays, all images from `registry.redhat.io`)
-The helmfile imports these local charts including presync hooks for CRD installation.
+The helmfile imports the infrastructure charts (cert-manager, sail-operator, lws-operator) including presync hooks for CRD installation. The KServe OCI chart is deployed via helmfile from `ghcr.io/opendatahub-io/kserve-rhaii-xks`.
diff --git a/charts/kserve/pki-prereq.yaml b/charts/kserve/pki-prereq.yaml
new file mode 100644
index 0000000..c888ee7
--- /dev/null
+++ b/charts/kserve/pki-prereq.yaml
@@ -0,0 +1,39 @@
+# cert-manager PKI prerequisites for KServe
+# These resources must be applied before installing the KServe chart.
+# The default uses a self-signed CA for internal TLS between inference
+# components. For production, replace opendatahub-selfsigned-issuer with
+# a ClusterIssuer backed by your organization's CA (e.g., Vault, ACM PCA).
+---
+apiVersion: cert-manager.io/v1
+kind: ClusterIssuer
+metadata:
+ name: opendatahub-selfsigned-issuer
+spec:
+ selfSigned: {}
+---
+apiVersion: cert-manager.io/v1
+kind: ClusterIssuer
+metadata:
+ name: opendatahub-ca-issuer
+spec:
+ ca:
+ secretName: opendatahub-ca
+---
+apiVersion: cert-manager.io/v1
+kind: Certificate
+metadata:
+ name: opendatahub-ca
+ namespace: cert-manager
+spec:
+ secretName: opendatahub-ca
+ isCA: true
+ commonName: opendatahub-ca
+ duration: 87600h # 10 years
+ renewBefore: 2160h # 90 days
+ privateKey:
+ algorithm: RSA
+ size: 4096
+ issuerRef:
+ name: opendatahub-selfsigned-issuer
+ kind: ClusterIssuer
+ group: cert-manager.io
diff --git a/docs/deploying-llm-d-on-managed-kubernetes.md b/docs/deploying-llm-d-on-managed-kubernetes.md
index aa15309..be782e2 100644
--- a/docs/deploying-llm-d-on-managed-kubernetes.md
+++ b/docs/deploying-llm-d-on-managed-kubernetes.md
@@ -1,7 +1,7 @@
# Deploying Red Hat AI Inference Server on Managed Kubernetes
**Product:** Red Hat AI Inference Server (RHAIIS)
-**Version:** 0.15
+**Version:** 3.4
**Platforms:** Azure Kubernetes Service (AKS), CoreWeave Kubernetes Service (CKS)
---
@@ -21,16 +21,16 @@ This guide provides step-by-step instructions for deploying Red Hat AI Inference
## Table of Contents
1. [Prerequisites](#1-prerequisites)
+ - [Preflight Validation](#15-preflight-validation-recommended)
2. [Architecture Overview](#2-architecture-overview)
-3. [Deploying Infrastructure Components](#3-deploying-infrastructure-components)
-4. [Deploying the Inference Controller](#4-deploying-the-inference-controller)
-5. [Configuring the Inference Gateway](#5-configuring-the-inference-gateway)
-6. [Deploying an LLM Inference Service](#6-deploying-an-llm-inference-service)
-7. [Verifying the Deployment](#7-verifying-the-deployment)
-8. [Optional: Enabling Monitoring](#8-optional-enabling-monitoring)
-9. [Collecting Debug Information](#9-collecting-debug-information)
-10. [Troubleshooting](#10-troubleshooting)
-11. [Appendix: Component Versions](#appendix-component-versions)
+3. [Deploying All Components](#3-deploying-all-components)
+4. [Configuring the Inference Gateway](#4-configuring-the-inference-gateway)
+5. [Deploying an LLM Inference Service](#5-deploying-an-llm-inference-service)
+6. [Verifying the Deployment](#6-verifying-the-deployment)
+7. [Optional: Enabling Monitoring](#7-optional-enabling-monitoring)
+8. [Collecting Debug Information](#8-collecting-debug-information)
+9. [Troubleshooting](#9-troubleshooting)
+10. [Appendix: Component Versions](#appendix-component-versions)
---
@@ -54,7 +54,6 @@ Install the following tools on your workstation:
| `kubectl` | 1.28+ | Kubernetes CLI |
| `helm` | 3.17+ | Helm package manager |
| `helmfile` | 0.160+ | Declarative Helm deployments |
-| `kustomize` | 5.7+ | Kubernetes manifest customization |
### 1.3 Red Hat Registry Authentication
@@ -122,6 +121,36 @@ kubectl describe nodes | grep -A5 "nvidia.com/gpu"
---
+## 1.5 Preflight Validation (Recommended)
+
+Run the preflight validation checks to verify your cluster is properly configured:
+
+```bash
+# Build the validation container
+cd validation && make container
+
+# Run preflight checks against your cluster
+make run
+```
+
+The preflight tool automatically detects your cloud provider and validates:
+
+| Check | When it passes |
+|-------|----------------|
+| Cloud provider | Cluster is reachable and provider detected (pre-deployment) |
+| Instance type | Supported GPU instance types are present (pre-deployment) |
+| GPU availability | GPU drivers and node labels found (pre-deployment) |
+| cert-manager CRDs | After `make deploy-all` |
+| Sail Operator CRDs | After `make deploy-all` |
+| LWS Operator CRDs | After `make deploy-all` |
+| KServe CRDs | After `make deploy-all` |
+
+> **Tip:** Run before deploying to verify cluster readiness (cloud provider, GPU, instance types). Run again after deployment to confirm all CRDs are installed. See Section 6.4 for full post-deployment validation.
+
+See the [Preflight Validation README](../validation/README.md) for configuration options and standalone usage.
+
+---
+
## 2. Architecture Overview
Red Hat AI Inference Server on managed Kubernetes consists of the following components:
@@ -152,7 +181,7 @@ Red Hat AI Inference Server on managed Kubernetes consists of the following comp
---
-## 3. Deploying Infrastructure Components
+## 3. Deploying All Components
### 3.1 Clone the Deployment Repository
@@ -161,14 +190,16 @@ git clone https://github.com/opendatahub-io/rhaii-on-xks.git
cd rhaii-on-xks
```
-### 3.2 Deploy Infrastructure
+### 3.2 Deploy
-Deploy cert-manager, Istio (Sail Operator), and LeaderWorkerSet:
+Deploy cert-manager, Istio (Sail Operator), LeaderWorkerSet, and KServe:
```bash
make deploy-all
```
+> **Note:** To deploy components individually, use `make deploy-cert-manager`, `make deploy-istio`, `make deploy-lws`, and `make deploy-kserve`.
+
### 3.3 Verify Infrastructure Deployment
```bash
@@ -202,46 +233,13 @@ InferencePool API: v1 (inference.networking.k8s.io)
Istio version: v1.27.5
```
----
-
-## 4. Deploying the Inference Controller
-
-### 4.1 Deploy KServe Controller
-
-```bash
-make deploy-kserve
-```
-
-This command performs the following actions:
-- Creates the `opendatahub` namespace
-- Applies cert-manager PKI resources for webhook certificates
-- Deploys the KServe controller with LLMInferenceService support
-- Configures validating webhooks
-
-### 4.2 Verify Controller Deployment
-
-```bash
-kubectl get pods -n opendatahub
-```
-
-**Expected output:**
-
-```text
-NAME READY STATUS RESTARTS AGE
-kserve-controller-manager-xxxxxxxxx-xxxxx 1/1 Running 0 2m
-```
-
-Verify the LLMInferenceServiceConfig templates are installed:
-
-```bash
-kubectl get llminferenceserviceconfig -n opendatahub
-```
+> **TLS Certificates:** The default configuration uses a self-signed CA for internal mTLS between inference components (router, scheduler, vLLM). This is sufficient for most deployments as the certificates are only used for pod-to-pod communication within the cluster. If your organization requires certificates issued by a corporate PKI, replace the `opendatahub-selfsigned-issuer` with a cert-manager ClusterIssuer backed by your CA (e.g., Vault, AWS ACM PCA, or an external PKI). See the [KServe Chart README - cert-manager PKI Setup](https://github.com/opendatahub-io/rhaii-on-xks/blob/main/charts/kserve/README.md#cert-manager-pki-setup) for details. The KServe chart version is configured in `values.yaml` (`kserveChartVersion`). See the [KServe Chart README](https://github.com/opendatahub-io/rhaii-on-xks/blob/main/charts/kserve/README.md) for chart details and cert-manager PKI prerequisites.
---
-## 5. Configuring the Inference Gateway
+## 4. Configuring the Inference Gateway
-### 5.1 Create the Gateway
+### 4.1 Create the Gateway
Run the gateway setup script:
@@ -254,7 +252,7 @@ This script:
2. Creates an Istio Gateway with the CA bundle mounted for mTLS
3. Configures the Gateway pod with registry authentication
-### 5.2 Verify Gateway Deployment
+### 4.2 Verify Gateway Deployment
```bash
kubectl get gateway -n opendatahub
@@ -275,16 +273,16 @@ kubectl get pods -n opendatahub -l gateway.networking.k8s.io/gateway-name=infere
---
-## 6. Deploying an LLM Inference Service
+## 5. Deploying an LLM Inference Service
-### 6.1 Create the Application Namespace
+### 5.1 Create the Application Namespace
```bash
export NAMESPACE=llm-inference
kubectl create namespace $NAMESPACE
```
-### 6.2 Configure Registry Authentication
+### 5.2 Configure Registry Authentication
Copy the pull secret to your application namespace:
@@ -301,7 +299,7 @@ kubectl patch serviceaccount default -n $NAMESPACE \
-p '{"imagePullSecrets": [{"name": "redhat-pull-secret"}]}'
```
-### 6.3 Deploy the LLMInferenceService
+### 5.3 Deploy the LLMInferenceService
Create the LLMInferenceService resource:
@@ -349,7 +347,7 @@ spec:
EOF
```
-### 6.4 Monitor Deployment Progress
+### 5.4 Monitor Deployment Progress
Watch the LLMInferenceService status:
@@ -361,9 +359,9 @@ The service is ready when the `READY` column shows `True`.
---
-## 7. Verifying the Deployment
+## 6. Verifying the Deployment
-### 7.1 Check Service Status
+### 6.1 Check Service Status
```bash
kubectl get llmisvc -n $NAMESPACE
@@ -376,7 +374,7 @@ NAME READY URL AGE
qwen2-7b-instruct True http://20.xx.xx.xx/llm-inference/... 5m
```
-### 7.2 Check Pod Status
+### 6.2 Check Pod Status
```bash
kubectl get pods -n $NAMESPACE
@@ -384,7 +382,7 @@ kubectl get pods -n $NAMESPACE
All pods should show `Running` status with `1/1` or `2/2` ready containers.
-### 7.3 Test Inference
+### 6.3 Test Inference
Retrieve the service URL:
@@ -405,19 +403,41 @@ curl -X POST "${SERVICE_URL}/v1/chat/completions" \
}'
```
+### 6.4 Run Preflight Validation
+
+Run the full validation suite to confirm all components are properly installed:
+
+```bash
+cd validation && make container && make run
+```
+
+All checks should show `PASSED`:
+
+```text
+cloud_provider PASSED
+instance_type PASSED
+gpu_availability PASSED
+crd_certmanager PASSED
+crd_sailoperator PASSED
+crd_lwsoperator PASSED
+crd_kserve PASSED
+```
+
+If any checks fail, review the suggested actions in the output. See the [Preflight Validation README](../validation/README.md) for configuration options.
+
---
-## 8. Optional: Enabling Monitoring
+## 7. Optional: Enabling Monitoring
Monitoring is disabled by default. Enable it if you need:
- Grafana dashboards for inference metrics
- Workload Variant Autoscaler (WVA) for auto-scaling
-### 8.1 Prerequisites
+### 7.1 Prerequisites
Install Prometheus with ServiceMonitor/PodMonitor CRD support. See the [Monitoring Setup Guide](../monitoring-stack/) for platform-specific instructions.
-### 8.2 Enable Monitoring in KServe
+### 7.2 Enable Monitoring in KServe
```bash
kubectl set env deployment/kserve-controller-manager \
@@ -427,7 +447,7 @@ kubectl set env deployment/kserve-controller-manager \
When enabled, KServe automatically creates `PodMonitor` resources for vLLM pods.
-### 8.3 Verify
+### 7.3 Verify
```bash
# Check PodMonitors created by KServe
@@ -436,7 +456,7 @@ kubectl get podmonitors -n
---
-## 9. Collecting Debug Information
+## 8. Collecting Debug Information
If you encounter issues during or after deployment, collect diagnostic data for troubleshooting:
@@ -465,9 +485,9 @@ See the full guide: [Collecting Debug Information](./collecting-debug-informatio
---
-## 10. Troubleshooting
+## 9. Troubleshooting
-### 10.1 Controller Pod Stuck in ContainerCreating
+### 9.1 Controller Pod Stuck in ContainerCreating
**Symptom:** The `kserve-controller-manager` pod remains in `ContainerCreating` state.
@@ -475,12 +495,17 @@ See the full guide: [Collecting Debug Information](./collecting-debug-informatio
**Resolution:**
+Verify the cert-manager PKI resources are applied (the KServe chart expects `opendatahub-ca-issuer` ClusterIssuer):
+
```bash
-kubectl apply -k "https://github.com/opendatahub-io/kserve/config/overlays/odh-test/cert-manager?ref=release-v0.15"
-kubectl wait --for=condition=Ready certificate/kserve-webhook-server -n opendatahub --timeout=120s
+kubectl get clusterissuer opendatahub-ca-issuer
+kubectl get certificate -n cert-manager
+
+# If missing, re-run the deployment
+make deploy-kserve
```
-### 10.2 Gateway Pod Shows ErrImagePull
+### 9.2 Gateway Pod Shows ErrImagePull
**Symptom:** The Gateway pod fails with `ErrImagePull` or `ImagePullBackOff`.
@@ -499,7 +524,7 @@ kubectl patch sa inference-gateway-istio -n opendatahub \
kubectl delete pod -n opendatahub -l gateway.networking.k8s.io/gateway-name=inference-gateway
```
-### 10.3 LLMInferenceService Pod Shows FailedScheduling
+### 9.3 LLMInferenceService Pod Shows FailedScheduling
**Symptom:** The inference pod shows `FailedScheduling` with message "Insufficient nvidia.com/gpu".
@@ -517,9 +542,9 @@ kubectl delete pod -n opendatahub -l gateway.networking.k8s.io/gateway-name=infe
kubectl get nodes -o jsonpath='{range .items[*]}{.metadata.name}: {.spec.taints}{"\n"}{end}'
```
-3. Add matching tolerations to the LLMInferenceService spec (see Section 6.3).
+3. Add matching tolerations to the LLMInferenceService spec (see Section 5.3).
-### 10.4 Webhook Validation Errors During Deployment
+### 9.4 Webhook Validation Errors During Deployment
**Symptom:** Deployment fails with "no endpoints available for service" webhook errors.
@@ -528,11 +553,13 @@ kubectl delete pod -n opendatahub -l gateway.networking.k8s.io/gateway-name=infe
**Resolution:**
```bash
+# Delete stale webhooks
kubectl delete validatingwebhookconfiguration \
llminferenceservice.serving.kserve.io \
llminferenceserviceconfig.serving.kserve.io \
--ignore-not-found
+# Re-deploy KServe
make deploy-kserve
```
@@ -546,7 +573,7 @@ make deploy-kserve
| Sail Operator (Istio) | 3.2.1 | `registry.redhat.io/openshift-service-mesh/istio-sail-operator-bundle:3.2` |
| Istio | 1.27.x | Dynamic resolution via `v1.27-latest` |
| LeaderWorkerSet | 1.0 | `registry.k8s.io/lws/lws-controller` |
-| KServe Controller | 0.15 | `quay.io/opendatahub/kserve-controller` |
+| KServe Controller | 0.15 (chart 3.4.0-ea.1) | `registry.redhat.io` (via `charts/kserve/`) |
| vLLM | Latest | `registry.redhat.io/rhaiis-tech-preview/vllm-openai-rhel9` |
### API Versions
@@ -565,7 +592,7 @@ make deploy-kserve
For assistance with Red Hat AI Inference Server deployments, contact Red Hat Support or consult the product documentation.
**Additional Resources:**
+- [KServe Chart README](https://github.com/opendatahub-io/rhaii-on-xks/blob/main/charts/kserve/README.md) - KServe Helm chart details, PKI prerequisites, and OCI registry install
+- [Preflight Validation](https://github.com/opendatahub-io/rhaii-on-xks/blob/main/validation/README.md) - Cluster readiness and post-deployment validation checks
- [Monitoring Setup Guide](../monitoring-stack/) - Optional Prometheus/Grafana configuration for dashboards and autoscaling
-- [KServe LLMInferenceService Samples](https://github.com/opendatahub-io/kserve/tree/main/docs/samples/llmisvc)
-- [Gateway API Documentation](https://gateway-api.sigs.k8s.io/)
-- [Istio Documentation](https://istio.io/latest/docs/)
+- [KServe LLMInferenceService Samples](https://github.com/red-hat-data-services/kserve/tree/rhoai-3.4/docs/samples/llmisvc)
diff --git a/helmfile.yaml.gotmpl b/helmfile.yaml.gotmpl
index 7a68b41..7874b90 100644
--- a/helmfile.yaml.gotmpl
+++ b/helmfile.yaml.gotmpl
@@ -5,6 +5,7 @@
# helmfile apply --selector name=cert-manager-operator # Deploy only cert-manager
# helmfile apply --selector name=sail-operator # Deploy only istio
# helmfile apply --selector name=lws-operator # Deploy only lws
+# helmfile sync --selector name=kserve-rhaii-xks # Deploy only Kserve
# helmfile destroy # Remove all
environments:
@@ -37,3 +38,30 @@ helmfiles:
values:
- useSystemPodmanAuth: {{ .Values.useSystemPodmanAuth | default true }}
- pullSecretFile: {{ .Values.pullSecretFile | default "" | quote }}
+
+---
+
+{{ $kserveChart := "oci://ghcr.io/opendatahub-io/kserve-rhaii-xks" }}
+{{ $kserveVersion := .Values.kserveChartVersion | default "3.4.0-ea.1-dev-8a30e66" }}
+
+releases:
+ - name: kserve-rhaii-xks
+ chart: {{ $kserveChart }}
+ # Use dev variant until official builds are released to registry.redhat.io
+ version: {{ $kserveVersion }}
+ namespace: opendatahub
+ disableValidation: true
+ # CRDs are applied separately via presync hook to avoid 1MB secret size limit
+ hooks:
+ # Apply CRDs before helm install via presync
+ - events: ["presync"]
+ showlogs: true
+ command: "sh"
+ args:
+ - "-c"
+ - |
+ set -e
+ CHART_DIR=$(mktemp -d)
+ trap 'rm -rf "$CHART_DIR"' EXIT
+ helm pull {{ $kserveChart }} --version {{ $kserveVersion }} --untar --untardir "$CHART_DIR"
+ kubectl apply -f "$CHART_DIR"/kserve-rhaii-xks/crds/ --server-side
diff --git a/scripts/cleanup.sh b/scripts/cleanup.sh
index 3e3fe51..7b3d3d0 100755
--- a/scripts/cleanup.sh
+++ b/scripts/cleanup.sh
@@ -91,13 +91,21 @@ done
echo "$CRDS" | grep -E "\.cert-manager\.io" | while read -r crd; do
kubectl delete "$crd" --ignore-not-found 2>/dev/null || true
done
-# LWS and cert-manager operator CRDs (exact names to avoid matching other OpenShift operators)
-kubectl delete crd certmanagers.operator.openshift.io leaderworkersetoperators.operator.openshift.io --ignore-not-found 2>/dev/null || true
+# LWS CRDs
+echo "$CRDS" | grep -E "leaderworkerset\.x-k8s\.io" | while read -r crd; do
+ kubectl delete "$crd" --ignore-not-found 2>/dev/null || true
+done
+# Operator CRDs (exact names to avoid matching other OpenShift operators)
+kubectl delete crd certmanagers.operator.openshift.io leaderworkersetoperators.operator.openshift.io istiocsrs.operator.openshift.io --ignore-not-found 2>/dev/null || true
# Gateway API CRDs and Inference Extension CRDs (InferencePool, InferenceModel)
# Matches both inference.networking.k8s.io (v1) and inference.networking.x-k8s.io (v1alpha2)
echo "$CRDS" | grep -E "gateway\.networking\.k8s\.io|inference\.networking\.k8s\.io|inference\.networking\.x-k8s\.io" | while read -r crd; do
kubectl delete "$crd" --ignore-not-found 2>/dev/null || true
done
+# KServe CRDs
+echo "$CRDS" | grep -E "serving\.kserve\.io" | while read -r crd; do
+ kubectl delete "$crd" --ignore-not-found 2>/dev/null || true
+done
# Infrastructure stub CRD
kubectl delete crd infrastructures.config.openshift.io --ignore-not-found 2>/dev/null || true
@@ -107,5 +115,6 @@ kubectl delete namespace cert-manager --ignore-not-found --wait=false 2>/dev/nul
kubectl delete namespace cert-manager-operator --ignore-not-found --wait=false 2>/dev/null || true
kubectl delete namespace istio-system --ignore-not-found --wait=false 2>/dev/null || true
kubectl delete namespace openshift-lws-operator --ignore-not-found --wait=false 2>/dev/null || true
+kubectl delete namespace opendatahub --ignore-not-found --wait=false 2>/dev/null || true
log "=== Cleanup Complete ==="
diff --git a/test/conformance/verify-llm-d-deployment.sh b/test/conformance/verify-llm-d-deployment.sh
index 72f3bd1..aa7b4dd 100755
--- a/test/conformance/verify-llm-d-deployment.sh
+++ b/test/conformance/verify-llm-d-deployment.sh
@@ -703,7 +703,7 @@ while [[ $# -gt 0 ]]; do
done
echo ""
echo "Upstream guides: https://github.com/llm-d/llm-d/tree/main/guides"
- echo "KServe docs: https://github.com/opendatahub-io/kserve/tree/release-v0.15/docs/samples/llmisvc"
+ echo "KServe docs: https://github.com/red-hat-data-services/kserve/tree/rhoai-3.4/docs/samples/llmisvc"
exit 0
;;
--help|-h)
@@ -745,7 +745,7 @@ Profiles:
Documentation:
Upstream: https://github.com/llm-d/llm-d/tree/main/guides
- KServe: https://github.com/opendatahub-io/kserve/tree/release-v0.15/docs/samples/llmisvc
+ KServe: https://github.com/red-hat-data-services/kserve/tree/rhoai-3.4/docs/samples/llmisvc
EOF
exit 0
;;
diff --git a/values.yaml b/values.yaml
index f233dec..c051b52 100644
--- a/values.yaml
+++ b/values.yaml
@@ -26,3 +26,10 @@ sailOperator:
# lws-operator (optional - for multi-node workloads)
lwsOperator:
enabled: true
+
+# =============================================================================
+# KServe
+# =============================================================================
+
+# KServe OCI chart version (dev variant until official builds are on registry.redhat.io)
+kserveChartVersion: "3.4.0-ea.1-dev-8a30e66"