update: bump version for llm-d and all images + add support for podman

zdtsw · zdtsw · commit 0229c806e208 · 2026-03-07T19:25:58.000+01:00
- use env variable LLM_D_RELEASE to control all image in the
  deploy/install.sh
- clone llm-d to local based on local version if match required release
  version
- use env variable CONTAINER_TOOL to support podmano on fedora
- remove/update *ignore files

Signed-off-by: Wen Zhou &lt;wenzhou@redhat.com&gt;
diff --git a/.dockerignore b/.dockerignore
@@ -14,6 +14,7 @@ vendor/
 
 # Submodules and sibling repos (not needed for building the manager binary)
 sample-data/
+llm-d/
 llm-d-infra/
 # If building from a parent repo that includes llmd or GAIE, add:
 # llmd/
diff --git a/.gitignore b/.gitignore
@@ -30,8 +30,6 @@ gpu.cluster
 # llm-d and llm-d-infra directories
 llm-d/
 llm-d-infra/
-llmd/
-llmd-infra/
 
 *.tgz
 actionlint
diff --git a/Makefile b/Makefile
@@ -7,7 +7,7 @@ CLUSTER_GPU_TYPE ?= nvidia-mix
 CLUSTER_NODES ?= 3
 CLUSTER_GPUS ?= 4
 KUBECONFIG ?= $(HOME)/.kube/config
-K8S_VERSION ?= v1.32.0
+K8S_VERSION ?= v1.32.0 # match OCP 4.19
 
 CONTROLLER_NAMESPACE ?= workload-variant-autoscaler-system
 MONITORING_NAMESPACE ?= openshift-user-workload-monitoring
@@ -194,6 +194,7 @@ deploy-e2e-infra: ## Deploy e2e test infrastructure (infra-only: WVA + llm-d, no
 		WVA_IMAGE_REPO=$$IMAGE_REPO \
 		WVA_IMAGE_TAG=$$IMAGE_TAG \
 		WVA_IMAGE_PULL_POLICY=IfNotPresent \
+		CONTAINER_TOOL=$(CONTAINER_TOOL) \
 		./deploy/install.sh; \
 	else \
 		echo "IMG not set - using default image from registry (latest)"; \
@@ -204,6 +205,7 @@ deploy-e2e-infra: ## Deploy e2e test infrastructure (infra-only: WVA + llm-d, no
 		SCALER_BACKEND=$(SCALER_BACKEND) \
 		INSTALL_GATEWAY_CTRLPLANE=true \
 		NAMESPACE_SCOPED=false \
+		CONTAINER_TOOL=$(CONTAINER_TOOL) \
 		./deploy/install.sh; \
 	fi
 
diff --git a/deploy/README.md b/deploy/README.md
@@ -32,6 +32,7 @@ All deployment methods require:
 - **kubectl** (v1.24+) - Kubernetes CLI
 - **helm** (v3.8+) - Package manager for Kubernetes
 - **git** - Git CLI
+- **docker** or **podman** - Container tool for building and loading images
 
 Optional but recommended:
 
@@ -42,6 +43,8 @@ Platform-specific requirements:
 - **OpenShift**: `oc` CLI (v4.12+)
 - **Kind**: `kind` CLI for local testing
 
+**Container Tool Support**: The deployment scripts support both Docker and Podman. Set `CONTAINER_TOOL=podman` to use Podman, or leave unset to use the default (`docker`).
+
 ### Cluster Requirements
 
 **Minimum cluster specifications**:
@@ -269,6 +272,22 @@ kubectl get hpa --all-namespaces | grep -v kube-system  # Should be empty (excep
 - ❌ Model services (tests create these)
 ```
 
+##### Example 8: Using specific llm-d release and Podman
+
+Deploy with a specific llm-d release version and use Podman instead of Docker:
+
+```bash
+export HF_TOKEN="hf_xxxxx"
+export LLM_D_RELEASE="v0.5.0"      # Pin to specific llm-d version
+export CONTAINER_TOOL=podman       # Use Podman instead of Docker
+make deploy-wva-emulated-on-kind
+
+# The LLM_D_RELEASE variable automatically sets:
+# - LLM_D_INFERENCE_SCHEDULER_IMG=ghcr.io/llm-d/llm-d-inference-scheduler:v0.5.0
+# - LLM_D_INFERENCE_SIM_IMG=ghcr.io/llm-d/llm-d-inference-sim:v0.5.0
+# - llm-d repository clone version
+```
+
 ### Method 2: Helm Chart
 
 The WVA can be deployed as a standalone using Helm, assuming you have:
@@ -621,6 +640,12 @@ Each guide includes platform-specific examples, troubleshooting, and quick start
 | `WVA_IMAGE_REPO` | WVA image repository | `ghcr.io/llm-d/llm-d-workload-variant-autoscaler` |
 | `WVA_IMAGE_TAG` | WVA image tag | `latest` |
 | `WVA_IMAGE_PULL_POLICY` | Image pull policy | `Always` |
+| `LLM_D_RELEASE` | llm-d release version (controls all llm-d images) | `v0.5.1` |
+| `LLM_D_INFERENCE_SCHEDULER_IMG` | Override llm-d inference scheduler image | `ghcr.io/llm-d/llm-d-inference-scheduler:$LLM_D_RELEASE` |
+| `LLM_D_INFERENCE_SIM_IMG` | Override llm-d inference simulator image | `ghcr.io/llm-d/llm-d-inference-sim:$LLM_D_RELEASE` |
+| `CONTAINER_TOOL` | Container tool to use (docker or podman) | `docker` |
+
+**Centralized llm-d Version Management**: Setting `LLM_D_RELEASE` automatically configures all llm-d component images to use the same release version. This ensures version consistency across the llm-d inference scheduler and simulator. Individual image variables can override this if needed.
 
 #### Namespace Configuration
 
@@ -682,7 +707,6 @@ HPA_STABILIZATION_SECONDS=30 ./deploy/install.sh
 | `WVA_LOG_LEVEL` | WVA logging level | `info` |
 | `VLLM_SVC_ENABLED` | Enable vLLM Service | `true` |
 | `VLLM_SVC_NODEPORT` | vLLM NodePort | `30000` |
-| `LLM_D_RELEASE` | llm-d version | `v0.3.0` |
 | `VLLM_MAX_NUM_SEQS` | vLLM max concurrent sequences per replica | (unset - uses vLLM default) |
 
 **vLLM Performance Tuning:**
diff --git a/deploy/install.sh b/deploy/install.sh
@@ -47,7 +47,7 @@ CONTROLLER_INSTANCE=${CONTROLLER_INSTANCE:-""}
 # llm-d Configuration
 LLM_D_OWNER=${LLM_D_OWNER:-"llm-d"}
 LLM_D_PROJECT=${LLM_D_PROJECT:-"llm-d"}
-LLM_D_RELEASE=${LLM_D_RELEASE:-"v0.3.0"}
+LLM_D_RELEASE=${LLM_D_RELEASE:-"v0.5.1"}
 LLM_D_MODELSERVICE_NAME=${LLM_D_MODELSERVICE_NAME:-"ms-$WELL_LIT_PATH_NAME-llm-d-modelservice"}
 LLM_D_EPP_NAME=${LLM_D_EPP_NAME:-"gaie-$WELL_LIT_PATH_NAME-epp"}
 CLIENT_PREREQ_DIR=${CLIENT_PREREQ_DIR:-"$WVA_PROJECT/$LLM_D_PROJECT/guides/prereq/client-setup"}
@@ -57,9 +57,8 @@ LLM_D_MODELSERVICE_VALUES=${LLM_D_MODELSERVICE_VALUES:-"$EXAMPLE_DIR/ms-$WELL_LI
 ITL_AVERAGE_LATENCY_MS=${ITL_AVERAGE_LATENCY_MS:-20}
 TTFT_AVERAGE_LATENCY_MS=${TTFT_AVERAGE_LATENCY_MS:-200}
 ENABLE_SCALE_TO_ZERO=${ENABLE_SCALE_TO_ZERO:-true}
-# llm-d-inference scheduler with image with flowcontrol support
-# TODO: update once the llm-d-inference-scheduler v0.5.0 is released
-LLM_D_INFERENCE_SCHEDULER_IMG=${LLM_D_INFERENCE_SCHEDULER_IMG:-"ghcr.io/llm-d/llm-d-inference-scheduler:v0.5.0-rc.1"}
+LLM_D_INFERENCE_SCHEDULER_IMG=${LLM_D_INFERENCE_SCHEDULER_IMG:-"ghcr.io/llm-d/llm-d-inference-scheduler:$LLM_D_RELEASE"}
+LLM_D_INFERENCE_SIM_IMG=${LLM_D_INFERENCE_SIM_IMG:-"ghcr.io/llm-d/llm-d-inference-sim:$LLM_D_RELEASE"}
 
 # Gateway Configuration
 GATEWAY_PROVIDER=${GATEWAY_PROVIDER:-"istio"} # Options: kgateway, istio
@@ -616,7 +615,7 @@ spec:
       serviceAccountName: gaie-sim-sa
       containers:
       - name: epp
-        image: ghcr.io/llm-d/llm-d-inference-scheduler:v0.3.2
+        image: $LLM_D_INFERENCE_SCHEDULER_IMG
         imagePullPolicy: Always
         args:
         - --poolName=$POOL_NAME_2
@@ -687,7 +686,7 @@ spec:
     spec:
       containers:
       - name: vllm
-        image: ghcr.io/llm-d/llm-d-inference-sim:v0.5.1
+        image: $LLM_D_INFERENCE_SIM_IMG
         imagePullPolicy: Always
         args:
         - --model=$MODEL_ID_2
@@ -787,12 +786,23 @@ EOF
 deploy_llm_d_infrastructure() {
     log_info "Deploying llm-d infrastructure..."
 
-     # Clone llm-d repo if not exists
+    # Clone llm-d repo if not exists or if has older version locally
+    if [ -d "$LLM_D_PROJECT/.git" ]; then
+        CURRENT_TAG=$(cd "$LLM_D_PROJECT" && git describe --tags --exact-match 2>/dev/null || echo "unknown")
+        if [ "$CURRENT_TAG" != "$LLM_D_RELEASE" ]; then
+            log_warning "$LLM_D_PROJECT exists but has version '$CURRENT_TAG' (expected: $LLM_D_RELEASE)"
+            rm -rf "$LLM_D_PROJECT"
+        else
+            log_info "$LLM_D_PROJECT directory already exists with correct version ($LLM_D_RELEASE)"
+        fi
+    elif [ -d "$LLM_D_PROJECT" ]; then
+        log_warning "$LLM_D_PROJECT exists but is not a git repository - removing it"
+        rm -rf "$LLM_D_PROJECT"
+    fi
+
     if [ ! -d "$LLM_D_PROJECT" ]; then
         log_info "Cloning $LLM_D_PROJECT repository (release: $LLM_D_RELEASE)"
         git clone -b $LLM_D_RELEASE -- https://github.com/$LLM_D_OWNER/$LLM_D_PROJECT.git $LLM_D_PROJECT &> /dev/null
-    else
-        log_warning "$LLM_D_PROJECT directory already exists, skipping clone"
     fi
 
     # Check for HF_TOKEN (use dummy for emulated deployments)
@@ -839,7 +849,7 @@ deploy_llm_d_infrastructure() {
     # Install Gateway control plane if enabled
     if [[ "$INSTALL_GATEWAY_CTRLPLANE" == "true" ]]; then
         log_info "Installing Gateway control plane ($GATEWAY_PROVIDER)"
-        helmfile apply -f "$GATEWAY_PREREQ_DIR/$GATEWAY_PROVIDER.helmfile.yaml"
+        helmfile apply -f "$GATEWAY_PREREQ_DIR/$GATEWAY_PROVIDER.helmfile.yaml" --suppress-diff
     else
         log_info "Skipping Gateway control plane installation (INSTALL_GATEWAY_CTRLPLANE=false)"
     fi
@@ -930,7 +940,7 @@ deploy_llm_d_infrastructure() {
       helmfile_selector="--selector kind!=autoscaling"
       log_info "Skipping WVA in helmfile (will be deployed separately from local chart)"
     fi
-    helmfile apply -e $GATEWAY_PROVIDER -n ${LLMD_NS} $helmfile_selector
+    helmfile apply -e $GATEWAY_PROVIDER -n ${LLMD_NS} $helmfile_selector --suppress-diff
 
     # Post-deploy: align the WVA vllm-service selector and ServiceMonitor to match
     # the actual pod labels. The llm-d-modelservice chart sets pod labels from
diff --git a/deploy/kind-emulator/install.sh b/deploy/kind-emulator/install.sh
@@ -39,6 +39,9 @@ WVA_LOG_LEVEL="debug" # WVA log level set to debug for emulated environments
 # Initial WVA pool group; install.sh auto-detects the actual InferencePool API group after llm-d deploy and upgrades WVA (scale-from-zero).
 POOL_GROUP=${POOL_GROUP:-"inference.networking.k8s.io"}
 
+# Container tool (docker or podman can pass from Makefile)
+CONTAINER_TOOL=${CONTAINER_TOOL:-docker}
+
 # llm-d Configuration
 LLM_D_INFERENCE_SIM_IMG_REPO=${LLM_D_INFERENCE_SIM_IMG_REPO:-"ghcr.io/llm-d/llm-d-inference-sim"}
 LLM_D_INFERENCE_SIM_IMG_TAG=${LLM_D_INFERENCE_SIM_IMG_TAG:-"latest"}
@@ -173,14 +176,14 @@ load_image() {
         log_info "Using local image only (WVA_IMAGE_PULL_POLICY=IfNotPresent)"
         
         # Check if the image exists locally
-        if ! docker image inspect "$WVA_IMAGE_REPO:$WVA_IMAGE_TAG" >/dev/null 2>&1; then
-            log_error "Image '$WVA_IMAGE_REPO:$WVA_IMAGE_TAG' not found locally - Please build the image first (e.g., 'make docker-build IMG=$WVA_IMAGE_REPO:$WVA_IMAGE_TAG')"
+        if ! $CONTAINER_TOOL image inspect "$WVA_IMAGE_REPO:$WVA_IMAGE_TAG" >/dev/null 2>&1; then
+            log_error "Image '$WVA_IMAGE_REPO:$WVA_IMAGE_TAG' not found locally - Please build the image first (e.g., 'make $CONTAINER_TOOL-build IMG=$WVA_IMAGE_REPO:$WVA_IMAGE_TAG')"
         else
             log_success "Found local image '$WVA_IMAGE_REPO:$WVA_IMAGE_TAG'"
         fi
     else
         # Pull a single-platform image so kind load does not hit "content digest not found"
-        # (multi-platform manifests can reference blobs that are not in the docker save stream).
+        # (multi-platform manifests can reference blobs that are not in the $CONTAINER_TOOL save stream).
         local platform="${KIND_IMAGE_PLATFORM:-}"
         if [ -z "$platform" ]; then
             case "$(uname -m)" in
@@ -202,9 +205,18 @@ load_image() {
     fi
     
     # Load the image into the KIND cluster
-    kind load docker-image "$WVA_IMAGE_REPO:$WVA_IMAGE_TAG" --name "$CLUSTER_NAME"
-    
-    log_success "Image '$WVA_IMAGE_REPO:$WVA_IMAGE_TAG' loaded into KIND cluster '$CLUSTER_NAME'"
+    if [ "$CONTAINER_TOOL" = "podman" ]; then
+        # Podman requires a different approach - save to tar and load archive
+        log_info "Using Podman - saving image to tar archive for Kind loading..."
+        local tmp_tar="/tmp/wva-image-$(date +%s).tar"
+        $CONTAINER_TOOL save -o "$tmp_tar" "$WVA_IMAGE_REPO:$WVA_IMAGE_TAG"
+        kind load image-archive "$tmp_tar" --name "$CLUSTER_NAME"
+        rm -f "$tmp_tar"
+        log_success "Image '$WVA_IMAGE_REPO:$WVA_IMAGE_TAG' loaded into KIND cluster '$CLUSTER_NAME' (via archive)"
+    else
+        kind load docker-image "$WVA_IMAGE_REPO:$WVA_IMAGE_TAG" --name "$CLUSTER_NAME"
+        log_success "Image '$WVA_IMAGE_REPO:$WVA_IMAGE_TAG' loaded into KIND cluster '$CLUSTER_NAME'"
+    fi
 }
 
 #### REQUIRED FUNCTION used by deploy/install.sh ####
diff --git a/deploy/kubernetes/create-kind-cluster-with-nvidia.sh b/deploy/kubernetes/create-kind-cluster-with-nvidia.sh
@@ -1,8 +1,10 @@
 #!/usr/bin/env bash
-
 set -e
 set -o pipefail
 
+# Container tool (docker or podman)
+CONTAINER_TOOL=${CONTAINER_TOOL:-docker}
+
 GPU_OPERATOR_NS=gpu-operator
 
 echo "> Creating Kind cluster"
@@ -22,10 +24,10 @@ echo "> Deploying cert manager"
 kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.15.3/cert-manager.yaml
 
 echo "> Creating symlink in the control-plane container"
-docker exec -ti kind-control-plane ln -s /sbin/ldconfig /sbin/ldconfig.real
+$CONTAINER_TOOL exec -ti kind-control-plane ln -s /sbin/ldconfig /sbin/ldconfig.real
 
 echo "> Unmounting the nvidia devices in the control-plane container"
-docker exec -ti kind-control-plane umount -R /proc/driver/nvidia
+$CONTAINER_TOOL exec -ti kind-control-plane umount -R /proc/driver/nvidia
 
 # According to https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/getting-started.html
 echo "> Adding/updateding the NVIDIA Helm repository"