Skip to content

Commit 7a39fb2

Browse files
committed
πŸ› Enable scale-from-zero on CKS and OCP with KEDA support
- Remove environment skip in scale_from_zero_test.go β€” test now runs on all platforms (KEDA must be pre-installed on the cluster) - Add retry logic to detect_inference_pool_api_group() to handle the race where InferencePool instances haven't been created yet after helmfile deploy - Make deploy_keda() skip helm install when KEDA CRD already exists (pre-installed on OCP via CMA operator, on CKS via helm) - Remove environment guard on SCALER_BACKEND=keda β€” supported everywhere Signed-off-by: Andy Anderson <andy@clubanderson.com> Signed-off-by: Andrew Anderson <andy@clubanderson.com>
1 parent 4af3ef9 commit 7a39fb2

2 files changed

Lines changed: 30 additions & 14 deletions

File tree

β€Ždeploy/install.shβ€Ž

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -505,13 +505,27 @@ set_wva_logging_level() {
505505
# Detect which InferencePool API group is in use in the cluster (v1 vs v1alpha2).
506506
# Sets DETECTED_POOL_GROUP to inference.networking.k8s.io or inference.networking.x-k8s.io
507507
# so WVA can be upgraded to watch the correct group (required for scale-from-zero datastore).
508+
# Retries up to POOL_DETECT_RETRIES times (default 6, 10s apart) to handle the race where
509+
# InferencePool instances haven't been created yet after helmfile deploy.
508510
detect_inference_pool_api_group() {
509511
DETECTED_POOL_GROUP=""
510-
if [ -n "$(kubectl get inferencepools.inference.networking.k8s.io -A -o name --request-timeout=10s 2>/dev/null | head -1)" ]; then
511-
DETECTED_POOL_GROUP="inference.networking.k8s.io"
512-
elif [ -n "$(kubectl get inferencepools.inference.networking.x-k8s.io -A -o name --request-timeout=10s 2>/dev/null | head -1)" ]; then
513-
DETECTED_POOL_GROUP="inference.networking.x-k8s.io"
514-
fi
512+
local max_retries=${POOL_DETECT_RETRIES:-6}
513+
local retry_interval_s=10
514+
local attempt=0
515+
while [ $attempt -lt $max_retries ]; do
516+
if [ -n "$(kubectl get inferencepools.inference.networking.k8s.io -A -o name --request-timeout=10s 2>/dev/null | head -1)" ]; then
517+
DETECTED_POOL_GROUP="inference.networking.k8s.io"
518+
return
519+
elif [ -n "$(kubectl get inferencepools.inference.networking.x-k8s.io -A -o name --request-timeout=10s 2>/dev/null | head -1)" ]; then
520+
DETECTED_POOL_GROUP="inference.networking.x-k8s.io"
521+
return
522+
fi
523+
attempt=$((attempt + 1))
524+
if [ $attempt -lt $max_retries ]; then
525+
log_info "InferencePool not found yet, retrying in ${retry_interval_s}s ($attempt/$max_retries)..."
526+
sleep $retry_interval_s
527+
fi
528+
done
515529
}
516530

517531
deploy_wva_controller() {
@@ -1074,6 +1088,12 @@ deploy_llm_d_infrastructure() {
10741088
deploy_keda() {
10751089
log_info "Deploying KEDA (scaler backend)..."
10761090

1091+
# Skip install if KEDA ScaledObject CRD already exists (pre-installed on cluster)
1092+
if kubectl get crd scaledobjects.keda.sh >/dev/null 2>&1; then
1093+
log_success "KEDA is already installed on this cluster β€” skipping helm install"
1094+
return
1095+
fi
1096+
10771097
kubectl create namespace "$KEDA_NAMESPACE" --dry-run=client -o yaml | kubectl apply -f -
10781098

10791099
helm repo add kedacore https://kedacore.github.io/charts 2>/dev/null || true
@@ -1689,12 +1709,9 @@ main() {
16891709
fi
16901710

16911711
# Deploy scaler backend: KEDA or Prometheus Adapter
1692-
# KEDA in this script is for kind-emulator e2e only; on OpenShift use the platform CMA / Prometheus Adapter.
1712+
# KEDA is supported on all environments. On OpenShift and CKS it is typically
1713+
# pre-installed on the cluster; deploy_keda will detect and skip the install.
16931714
if [ "$SCALER_BACKEND" = "keda" ]; then
1694-
if [ "$ENVIRONMENT" != "kind-emulator" ]; then
1695-
log_error "KEDA scaler backend is only supported for kind-emulator environment (ENVIRONMENT=kind-emulator). Current: ENVIRONMENT=$ENVIRONMENT. Use SCALER_BACKEND=prometheus-adapter or run with ENVIRONMENT=kind-emulator."
1696-
exit 1
1697-
fi
16981715
deploy_keda
16991716
elif [ "$DEPLOY_PROMETHEUS_ADAPTER" = "true" ]; then
17001717
deploy_prometheus_adapter

β€Žtest/e2e/scale_from_zero_test.goβ€Ž

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,9 @@ var _ = Describe("Scale-From-Zero Feature", Label("smoke", "full"), Ordered, fun
3535
)
3636

3737
BeforeAll(func() {
38-
// Scale-from-zero is not validated on OpenShift (POOL_GROUP / flow control setup differs; HPA minReplicas=0 often unsupported).
39-
if cfg.Environment == "openshift" {
40-
Skip("Scale-from-zero test is disabled on OpenShift")
41-
}
38+
// Scale-from-zero requires GIE flow control, InferenceObjective, and KEDA
39+
// (ScaledObject with minReplicas=0). KEDA must be pre-installed on the cluster.
40+
// Only kind-emulator installs KEDA at runtime via install.sh.
4241

4342
// Note: InferencePool should already exist from infra-only deployment
4443
// We no longer create InferencePools in individual tests

0 commit comments

Comments
Β (0)