Skip to content

Commit dbab13c

Browse files
committed
πŸ› Enable scale-from-zero on CKS and OCP with KEDA support
- Remove environment skip in scale_from_zero_test.go β€” test now runs on all platforms (KEDA must be pre-installed on the cluster) - Add retry logic to detect_inference_pool_api_group() to handle the race where InferencePool instances haven't been created yet after helmfile deploy - Make deploy_keda() skip helm install when KEDA CRD already exists (pre-installed on OCP via CMA operator, on CKS via helm) - Remove environment guard on SCALER_BACKEND=keda β€” supported everywhere Signed-off-by: Andy Anderson <andy@clubanderson.com> Signed-off-by: Andrew Anderson <andy@clubanderson.com>
1 parent 4af3ef9 commit dbab13c

3 files changed

Lines changed: 37 additions & 18 deletions

File tree

β€Ždeploy/install.shβ€Ž

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -505,13 +505,32 @@ set_wva_logging_level() {
505505
# Detect which InferencePool API group is in use in the cluster (v1 vs v1alpha2).
506506
# Sets DETECTED_POOL_GROUP to inference.networking.k8s.io or inference.networking.x-k8s.io
507507
# so WVA can be upgraded to watch the correct group (required for scale-from-zero datastore).
508+
# Retries up to POOL_DETECT_RETRIES times (default 6, 10s apart) to handle the race where
509+
# InferencePool instances haven't been created yet after helmfile deploy.
508510
detect_inference_pool_api_group() {
509511
DETECTED_POOL_GROUP=""
510-
if [ -n "$(kubectl get inferencepools.inference.networking.k8s.io -A -o name --request-timeout=10s 2>/dev/null | head -1)" ]; then
511-
DETECTED_POOL_GROUP="inference.networking.k8s.io"
512-
elif [ -n "$(kubectl get inferencepools.inference.networking.x-k8s.io -A -o name --request-timeout=10s 2>/dev/null | head -1)" ]; then
513-
DETECTED_POOL_GROUP="inference.networking.x-k8s.io"
514-
fi
512+
local max_retries=${POOL_DETECT_RETRIES:-6}
513+
local retry_interval_s=10
514+
local attempt=0
515+
# Search in the target namespace first (avoids cluster-wide RBAC issues), then fall back to -A.
516+
local ns_flag="-A"
517+
if [ -n "${LLMD_NS:-}" ]; then
518+
ns_flag="-n $LLMD_NS"
519+
fi
520+
while [ $attempt -lt $max_retries ]; do
521+
if [ -n "$(kubectl get inferencepools.inference.networking.k8s.io $ns_flag -o name --request-timeout=10s 2>/dev/null | head -1)" ]; then
522+
DETECTED_POOL_GROUP="inference.networking.k8s.io"
523+
return
524+
elif [ -n "$(kubectl get inferencepools.inference.networking.x-k8s.io $ns_flag -o name --request-timeout=10s 2>/dev/null | head -1)" ]; then
525+
DETECTED_POOL_GROUP="inference.networking.x-k8s.io"
526+
return
527+
fi
528+
attempt=$((attempt + 1))
529+
if [ $attempt -lt $max_retries ]; then
530+
log_info "InferencePool not found yet, retrying in ${retry_interval_s}s ($attempt/$max_retries)..."
531+
sleep $retry_interval_s
532+
fi
533+
done
515534
}
516535

517536
deploy_wva_controller() {
@@ -1074,6 +1093,12 @@ deploy_llm_d_infrastructure() {
10741093
deploy_keda() {
10751094
log_info "Deploying KEDA (scaler backend)..."
10761095

1096+
# Skip install if KEDA ScaledObject CRD already exists (pre-installed on cluster)
1097+
if kubectl get crd scaledobjects.keda.sh >/dev/null 2>&1; then
1098+
log_success "KEDA is already installed on this cluster β€” skipping helm install"
1099+
return
1100+
fi
1101+
10771102
kubectl create namespace "$KEDA_NAMESPACE" --dry-run=client -o yaml | kubectl apply -f -
10781103

10791104
helm repo add kedacore https://kedacore.github.io/charts 2>/dev/null || true
@@ -1689,12 +1714,9 @@ main() {
16891714
fi
16901715

16911716
# Deploy scaler backend: KEDA or Prometheus Adapter
1692-
# KEDA in this script is for kind-emulator e2e only; on OpenShift use the platform CMA / Prometheus Adapter.
1717+
# KEDA is supported on all environments. On OpenShift and CKS it is typically
1718+
# pre-installed on the cluster; deploy_keda will detect and skip the install.
16931719
if [ "$SCALER_BACKEND" = "keda" ]; then
1694-
if [ "$ENVIRONMENT" != "kind-emulator" ]; then
1695-
log_error "KEDA scaler backend is only supported for kind-emulator environment (ENVIRONMENT=kind-emulator). Current: ENVIRONMENT=$ENVIRONMENT. Use SCALER_BACKEND=prometheus-adapter or run with ENVIRONMENT=kind-emulator."
1696-
exit 1
1697-
fi
16981720
deploy_keda
16991721
elif [ "$DEPLOY_PROMETHEUS_ADAPTER" = "true" ]; then
17001722
deploy_prometheus_adapter

β€Žtest/e2e/scale_from_zero_test.goβ€Ž

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,9 @@ var _ = Describe("Scale-From-Zero Feature", Label("smoke", "full"), Ordered, fun
3535
)
3636

3737
BeforeAll(func() {
38-
// Scale-from-zero is not validated on OpenShift (POOL_GROUP / flow control setup differs; HPA minReplicas=0 often unsupported).
39-
if cfg.Environment == "openshift" {
40-
Skip("Scale-from-zero test is disabled on OpenShift")
41-
}
38+
// Scale-from-zero requires GIE flow control, InferenceObjective, and KEDA
39+
// (ScaledObject with minReplicas=0). KEDA must be pre-installed on the cluster.
40+
// Only kind-emulator installs KEDA at runtime via install.sh.
4241

4342
// Note: InferencePool should already exist from infra-only deployment
4443
// We no longer create InferencePools in individual tests

β€Žtest/e2e/suite_test.goβ€Ž

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,8 @@ var _ = BeforeSuite(func() {
5353
By("Loading configuration from environment")
5454
cfg = LoadConfigFromEnv()
5555

56-
// KEDA scaler backend is only supported for kind-emulator (emulated) e2e; on OpenShift use platform CMA / Prometheus Adapter.
57-
if cfg.ScalerBackend == "keda" && cfg.Environment != "kind-emulator" {
58-
Fail("KEDA scaler backend is only supported for kind-emulator environment. Use ENVIRONMENT=kind-emulator or SCALER_BACKEND=prometheus-adapter.")
59-
}
56+
// KEDA is supported on all environments β€” pre-installed on OCP (CMA operator)
57+
// and CKS (helm), installed at runtime on kind-emulator via install.sh.
6058

6159
GinkgoWriter.Printf("=== E2E Test Configuration ===\n")
6260
GinkgoWriter.Printf("Environment: %s\n", cfg.Environment)

0 commit comments

Comments
Β (0)