🐛 Enable scale-from-zero on CKS and OCP with KEDA support

clubanderson · clubanderson · commit dbab13c5d84b · 2026-03-09T23:02:18.000-04:00
- Remove environment skip in scale_from_zero_test.go — test now runs on
  all platforms (KEDA must be pre-installed on the cluster)
- Add retry logic to detect_inference_pool_api_group() to handle the race
  where InferencePool instances haven't been created yet after helmfile deploy
- Make deploy_keda() skip helm install when KEDA CRD already exists
  (pre-installed on OCP via CMA operator, on CKS via helm)
- Remove environment guard on SCALER_BACKEND=keda — supported everywhere

Signed-off-by: Andy Anderson &lt;andy@clubanderson.com&gt;
Signed-off-by: Andrew Anderson &lt;andy@clubanderson.com&gt;
diff --git a/deploy/install.sh b/deploy/install.sh
@@ -505,13 +505,32 @@ set_wva_logging_level() {
 # Detect which InferencePool API group is in use in the cluster (v1 vs v1alpha2).
 # Sets DETECTED_POOL_GROUP to inference.networking.k8s.io or inference.networking.x-k8s.io
 # so WVA can be upgraded to watch the correct group (required for scale-from-zero datastore).
+# Retries up to POOL_DETECT_RETRIES times (default 6, 10s apart) to handle the race where
+# InferencePool instances haven't been created yet after helmfile deploy.
 detect_inference_pool_api_group() {
     DETECTED_POOL_GROUP=""
-    if [ -n "$(kubectl get inferencepools.inference.networking.k8s.io -A -o name --request-timeout=10s 2>/dev/null | head -1)" ]; then
-        DETECTED_POOL_GROUP="inference.networking.k8s.io"
-    elif [ -n "$(kubectl get inferencepools.inference.networking.x-k8s.io -A -o name --request-timeout=10s 2>/dev/null | head -1)" ]; then
-        DETECTED_POOL_GROUP="inference.networking.x-k8s.io"
-    fi
+    local max_retries=${POOL_DETECT_RETRIES:-6}
+    local retry_interval_s=10
+    local attempt=0
+    # Search in the target namespace first (avoids cluster-wide RBAC issues), then fall back to -A.
+    local ns_flag="-A"
+    if [ -n "${LLMD_NS:-}" ]; then
+        ns_flag="-n $LLMD_NS"
+    fi
+    while [ $attempt -lt $max_retries ]; do
+        if [ -n "$(kubectl get inferencepools.inference.networking.k8s.io $ns_flag -o name --request-timeout=10s 2>/dev/null | head -1)" ]; then
+            DETECTED_POOL_GROUP="inference.networking.k8s.io"
+            return
+        elif [ -n "$(kubectl get inferencepools.inference.networking.x-k8s.io $ns_flag -o name --request-timeout=10s 2>/dev/null | head -1)" ]; then
+            DETECTED_POOL_GROUP="inference.networking.x-k8s.io"
+            return
+        fi
+        attempt=$((attempt + 1))
+        if [ $attempt -lt $max_retries ]; then
+            log_info "InferencePool not found yet, retrying in ${retry_interval_s}s ($attempt/$max_retries)..."
+            sleep $retry_interval_s
+        fi
+    done
 }
 
 deploy_wva_controller() {
@@ -1074,6 +1093,12 @@ deploy_llm_d_infrastructure() {
 deploy_keda() {
     log_info "Deploying KEDA (scaler backend)..."
 
+    # Skip install if KEDA ScaledObject CRD already exists (pre-installed on cluster)
+    if kubectl get crd scaledobjects.keda.sh >/dev/null 2>&1; then
+        log_success "KEDA is already installed on this cluster — skipping helm install"
+        return
+    fi
+
     kubectl create namespace "$KEDA_NAMESPACE" --dry-run=client -o yaml | kubectl apply -f -
 
     helm repo add kedacore https://kedacore.github.io/charts 2>/dev/null || true
@@ -1689,12 +1714,9 @@ main() {
     fi
 
     # Deploy scaler backend: KEDA or Prometheus Adapter
-    # KEDA in this script is for kind-emulator e2e only; on OpenShift use the platform CMA / Prometheus Adapter.
+    # KEDA is supported on all environments. On OpenShift and CKS it is typically
+    # pre-installed on the cluster; deploy_keda will detect and skip the install.
     if [ "$SCALER_BACKEND" = "keda" ]; then
-        if [ "$ENVIRONMENT" != "kind-emulator" ]; then
-            log_error "KEDA scaler backend is only supported for kind-emulator environment (ENVIRONMENT=kind-emulator). Current: ENVIRONMENT=$ENVIRONMENT. Use SCALER_BACKEND=prometheus-adapter or run with ENVIRONMENT=kind-emulator."
-            exit 1
-        fi
         deploy_keda
     elif [ "$DEPLOY_PROMETHEUS_ADAPTER" = "true" ]; then
         deploy_prometheus_adapter
diff --git a/test/e2e/scale_from_zero_test.go b/test/e2e/scale_from_zero_test.go
@@ -35,10 +35,9 @@ var _ = Describe("Scale-From-Zero Feature", Label("smoke", "full"), Ordered, fun
 	)
 
 	BeforeAll(func() {
-		// Scale-from-zero is not validated on OpenShift (POOL_GROUP / flow control setup differs; HPA minReplicas=0 often unsupported).
-		if cfg.Environment == "openshift" {
-			Skip("Scale-from-zero test is disabled on OpenShift")
-		}
+		// Scale-from-zero requires GIE flow control, InferenceObjective, and KEDA
+		// (ScaledObject with minReplicas=0). KEDA must be pre-installed on the cluster.
+		// Only kind-emulator installs KEDA at runtime via install.sh.
 
 		// Note: InferencePool should already exist from infra-only deployment
 		// We no longer create InferencePools in individual tests
diff --git a/test/e2e/suite_test.go b/test/e2e/suite_test.go
@@ -53,10 +53,8 @@ var _ = BeforeSuite(func() {
 	By("Loading configuration from environment")
 	cfg = LoadConfigFromEnv()
 
-	// KEDA scaler backend is only supported for kind-emulator (emulated) e2e; on OpenShift use platform CMA / Prometheus Adapter.
-	if cfg.ScalerBackend == "keda" && cfg.Environment != "kind-emulator" {
-		Fail("KEDA scaler backend is only supported for kind-emulator environment. Use ENVIRONMENT=kind-emulator or SCALER_BACKEND=prometheus-adapter.")
-	}
+	// KEDA is supported on all environments — pre-installed on OCP (CMA operator)
+	// and CKS (helm), installed at runtime on kind-emulator via install.sh.
 
 	GinkgoWriter.Printf("=== E2E Test Configuration ===\n")
 	GinkgoWriter.Printf("Environment: %s\n", cfg.Environment)