Skip to content

Commit f2d2e7b

Browse files
committed
phase 1 clean up
Signed-off-by: Mohammed Abdi <mohammed.munir.abdi@ibm.com>
1 parent 2a6f718 commit f2d2e7b

17 files changed

Lines changed: 328 additions & 253 deletions

Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ E2E_MONITORING_NAMESPACE ?= workload-variant-autoscaler-monitoring
2727
E2E_EMULATED_LLMD_NAMESPACE ?= llm-d-sim
2828

2929
# Flags for deploy/install.sh installation script
30+
# Full e2e / CI-style cluster infra (WVA + llm-d, no chart VA/HPA): prefer `make deploy-e2e-infra`
31+
# (wraps ./deploy/install.sh with INFRA_ONLY=true; set ENVIRONMENT=kubernetes|openshift|kind-emulator).
3032
CREATE_CLUSTER ?= false
3133
DEPLOY_LLM_D ?= true
3234
DELETE_CLUSTER ?= false

deploy/README.md

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -139,8 +139,8 @@ export DEPLOY_PROMETHEUS=true # Deploy Prometheus stack
139139
export DEPLOY_WVA=true # Deploy WVA controller
140140
export DEPLOY_LLM_D=true # Deploy llm-d infrastructure
141141
export DEPLOY_PROMETHEUS_ADAPTER=true # Deploy Prometheus Adapter
142-
export DEPLOY_VA=true # Deploy VariantAutoscaling CR
143-
export DEPLOY_HPA=true # Deploy HPA
142+
export DEPLOY_VA=true # Chart-managed VariantAutoscaling (default off; e2e often creates its own)
143+
export DEPLOY_HPA=true # Chart-managed HPA (default off; enable with DEPLOY_VA for demos)
144144

145145
# HPA Configuration
146146
export HPA_STABILIZATION_SECONDS=240 # HPA stabilization window (default: 240s)
@@ -188,6 +188,9 @@ bash install.sh
188188

189189
```bash
190190
export HF_TOKEN="hf_xxxxx"
191+
# Optional: chart-managed VA + HPA for a single-variant demo (install.sh defaults skip these)
192+
export DEPLOY_VA=true
193+
export DEPLOY_HPA=true
191194
make deploy-wva-on-k8s
192195
```
193196

@@ -198,6 +201,8 @@ export HF_TOKEN="hf_xxxxx"
198201
export MODEL_ID="meta-llama/Llama-2-7b-hf"
199202
export SLO_TPOT=5
200203
export SLO_TTFT=500
204+
export DEPLOY_VA=true
205+
export DEPLOY_HPA=true
201206
make deploy-wva-on-k8s
202207
```
203208

@@ -208,6 +213,7 @@ export DEPLOY_WVA=true
208213
export DEPLOY_LLM_D=false
209214
export DEPLOY_PROMETHEUS=true # Prometheus is needed for metrics - disable if it is already installed in your cluster
210215
export DEPLOY_PROMETHEUS_ADAPTER=false
216+
export DEPLOY_VA=true # Create a VariantAutoscaling CR for the existing model service
211217
export DEPLOY_HPA=false
212218
make deploy-wva-on-k8s
213219
```
@@ -216,6 +222,8 @@ make deploy-wva-on-k8s
216222

217223
```bash
218224
export HF_TOKEN="hf_xxxxx"
225+
export DEPLOY_VA=true
226+
export DEPLOY_HPA=true
219227
export HPA_STABILIZATION_SECONDS=30 # Fast scaling for dev/test (default: 240)
220228
make deploy-wva-on-k8s
221229
```
@@ -224,9 +232,10 @@ make deploy-wva-on-k8s
224232

225233
```bash
226234
export HF_TOKEN="hf_xxxxx"
227-
export HPA_STABILIZATION_SECONDS=0 # Immediate scaling for e2e tests
228-
export VLLM_MAX_NUM_SEQS=8 # Low batch size for easy saturation
229235
export E2E_TESTS_ENABLED=true
236+
export INFRA_ONLY=true # Tests create VA/HPA; see also make deploy-e2e-infra
237+
export HPA_STABILIZATION_SECONDS=0 # Only applies if chart HPA is enabled
238+
export VLLM_MAX_NUM_SEQS=8 # Low batch size for easy saturation
230239
make deploy-wva-on-k8s
231240
```
232241

@@ -236,6 +245,8 @@ make deploy-wva-on-k8s
236245
export HF_TOKEN="hf_xxxxx"
237246
export VLLM_MAX_NUM_SEQS=64 # Match desired max batch size
238247
export MODEL_ID="unsloth/Meta-Llama-3.1-8B"
248+
export DEPLOY_VA=true
249+
export DEPLOY_HPA=true
239250
make deploy-wva-on-k8s
240251
```
241252

@@ -650,8 +661,8 @@ Each guide includes platform-specific examples, troubleshooting, and quick start
650661
| `DEPLOY_WVA` | Deploy WVA controller | `true` |
651662
| `DEPLOY_LLM_D` | Deploy llm-d infrastructure | `true` |
652663
| `DEPLOY_PROMETHEUS_ADAPTER` | Deploy Prometheus Adapter | `true` |
653-
| `DEPLOY_VA` | Deploy VariantAutoscaling CR | `true` |
654-
| `DEPLOY_HPA` | Deploy HPA | `true` |
664+
| `DEPLOY_VA` | Deploy VariantAutoscaling CR via WVA Helm chart | `false` |
665+
| `DEPLOY_HPA` | Deploy HPA via WVA Helm chart | `false` |
655666
| `INFRA_ONLY` | Deploy only infrastructure (skip VA/HPA) | `false` |
656667
| `SKIP_CHECKS` | Skip prerequisite checks | `false` |
657668

deploy/inference-objective-e2e.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
# InferenceObjective for GIE queuing (scale-from-zero e2e and flow control).
2-
# Applied when E2E_TESTS_ENABLED or ENABLE_SCALE_TO_ZERO is true.
1+
# InferenceObjective for GIE queuing (scale-from-zero flow control).
2+
# install.sh applies this when ENABLE_SCALE_TO_ZERO=true and not E2E (e2e applies e2e-default from Go).
33
# poolRef.name is templated by install.sh to match the deployed InferencePool.
44
apiVersion: inference.networking.x-k8s.io/v1alpha2
55
kind: InferenceObjective

deploy/install.sh

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,10 @@ DEPLOY_PROMETHEUS=${DEPLOY_PROMETHEUS:-true}
9292
DEPLOY_WVA=${DEPLOY_WVA:-true}
9393
DEPLOY_LLM_D=${DEPLOY_LLM_D:-true}
9494
DEPLOY_PROMETHEUS_ADAPTER=${DEPLOY_PROMETHEUS_ADAPTER:-true}
95-
DEPLOY_VA=${DEPLOY_VA:-true}
96-
DEPLOY_HPA=${DEPLOY_HPA:-true}
95+
# Infra-first: chart-managed VariantAutoscaling / HPA are opt-in (e2e and operators
96+
# typically create their own CRs). Set DEPLOY_VA=true and DEPLOY_HPA=true for a demo stack.
97+
DEPLOY_VA=${DEPLOY_VA:-false}
98+
DEPLOY_HPA=${DEPLOY_HPA:-false}
9799
HPA_STABILIZATION_SECONDS=${HPA_STABILIZATION_SECONDS:-240}
98100
# HPA minReplicas: 0 enables scale-to-zero (requires HPAScaleToZero feature gate)
99101
# Default to 1 for safety; set to 0 for scale-to-zero testing
@@ -251,8 +253,8 @@ Environment Variables:
251253
DEPLOY_WVA Deploy WVA controller (default: true)
252254
DEPLOY_LLM_D Deploy llm-d infrastructure (default: true)
253255
DEPLOY_PROMETHEUS_ADAPTER Deploy Prometheus Adapter (default: true)
254-
DEPLOY_VA Deploy VariantAutoscaling (default: true)
255-
DEPLOY_HPA Deploy HPA (default: true)
256+
DEPLOY_VA Deploy VariantAutoscaling via chart (default: false)
257+
DEPLOY_HPA Deploy HPA via chart (default: false)
256258
HPA_STABILIZATION_SECONDS HPA stabilization window in seconds (default: 240)
257259
HPA_MIN_REPLICAS HPA minReplicas (default: 1, set to 0 for scale-to-zero)
258260
INFRA_ONLY Deploy only infrastructure (default: false, same as --infra-only flag)
@@ -1067,9 +1069,9 @@ deploy_llm_d_infrastructure() {
10671069
fi
10681070
fi
10691071

1070-
# Deploy InferenceObjective for GIE queuing when flow control is enabled (scale-from-zero / e2e).
1071-
# Enables gateway-level queuing so inference_extension_flow_control_queue_size is populated.
1072-
if [ "$ENABLE_SCALE_TO_ZERO" == "true" ] || [ "$E2E_TESTS_ENABLED" == "true" ]; then
1072+
# Deploy InferenceObjective for GIE queuing when flow control is enabled (scale-from-zero).
1073+
# E2E applies e2e-default from Go (test/e2e/fixtures) so tests do not depend on install.sh for this CR.
1074+
if [ "$E2E_TESTS_ENABLED" != "true" ] && [ "$ENABLE_SCALE_TO_ZERO" == "true" ]; then
10731075
if kubectl get crd inferenceobjectives.inference.networking.x-k8s.io &>/dev/null; then
10741076
local infobj_file="${WVA_PROJECT}/deploy/inference-objective-e2e.yaml"
10751077
if [ -f "$infobj_file" ]; then

deploy/kind-emulator/README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,8 @@ export DEPLOY_PROMETHEUS=true # Deploy Prometheus stack
7272
export DEPLOY_WVA=true # Deploy WVA controller
7373
export DEPLOY_LLM_D=true # Deploy llm-d infrastructure (emulated)
7474
export DEPLOY_PROMETHEUS_ADAPTER=true # Deploy Prometheus Adapter
75-
export DEPLOY_HPA=true # Deploy HPA
75+
export DEPLOY_VA=true # Opt in: chart VariantAutoscaling (default in script: false)
76+
export DEPLOY_HPA=true # Opt in: chart HPA (default in script: false)
7677
```
7778

7879
### Step-by-Step Setup
@@ -97,6 +98,7 @@ export DEPLOY_LLM_D=false
9798
export DEPLOY_PROMETHEUS=true # Prometheus is needed for WVA to scrape metrics
9899
export VLLM_SVC_ENABLED=true
99100
export DEPLOY_PROMETHEUS_ADAPTER=false
101+
export DEPLOY_VA=false
100102
export DEPLOY_HPA=false
101103
make deploy-wva-emulated-on-kind
102104
```
@@ -110,6 +112,8 @@ make deploy-wva-emulated-on-kind
110112
**4. Testing configuration with fast saturation:**
111113

112114
```bash
115+
export DEPLOY_VA=true
116+
export DEPLOY_HPA=true
113117
export VLLM_MAX_NUM_SEQS=8 # Low batch size for easy saturation
114118
export HPA_STABILIZATION_SECONDS=30 # Fast scaling for testing
115119
make deploy-wva-emulated-on-kind

deploy/kind-emulator/install.sh

Lines changed: 19 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -207,75 +207,29 @@ load_image() {
207207
log_success "Image '$WVA_IMAGE_REPO:$WVA_IMAGE_TAG' loaded into KIND cluster '$CLUSTER_NAME'"
208208
}
209209

210+
materialize_namespace() {
211+
kubectl create namespace "$1"
212+
}
213+
210214
#### REQUIRED FUNCTION used by deploy/install.sh ####
211215
create_namespaces() {
212-
log_info "Creating namespaces..."
213-
214-
for ns in $WVA_NS $MONITORING_NAMESPACE $LLMD_NS; do
215-
if kubectl get namespace $ns &> /dev/null; then
216-
log_warning "Namespace $ns already exists"
217-
else
218-
kubectl create namespace $ns
219-
log_success "Namespace $ns created"
220-
fi
221-
done
216+
local _deploy_lib_dir
217+
_deploy_lib_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/../lib"
218+
# shellcheck source=create_namespaces.sh
219+
source "${_deploy_lib_dir}/create_namespaces.sh"
220+
create_namespaces_shared_loop
222221
}
223222

223+
_wva_deploy_lib="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/../lib"
224+
# shellcheck source=deploy_prometheus_kube_stack.sh
225+
source "${_wva_deploy_lib}/deploy_prometheus_kube_stack.sh"
226+
# shellcheck source=delete_namespaces_kube_like.sh
227+
source "${_wva_deploy_lib}/delete_namespaces_kube_like.sh"
228+
224229
#### REQUIRED FUNCTION used by deploy/install.sh ####
225-
# Deploy Prometheus stack with TLS for Kubernetes
230+
# Deploy Prometheus stack with TLS (shared with deploy/kubernetes/install.sh)
226231
deploy_prometheus_stack() {
227-
log_info "Deploying kube-prometheus-stack with TLS..."
228-
229-
# Add helm repo
230-
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts || true
231-
if [ "${SKIP_HELM_REPO_UPDATE:-}" = "true" ]; then
232-
log_info "Skipping helm repo update (SKIP_HELM_REPO_UPDATE=true)"
233-
else
234-
helm repo update
235-
fi
236-
237-
# Create self-signed TLS certificate for Prometheus
238-
log_info "Creating self-signed TLS certificate for Prometheus"
239-
openssl req -x509 -newkey rsa:2048 -nodes \
240-
-keyout /tmp/prometheus-tls.key \
241-
-out /tmp/prometheus-tls.crt \
242-
-days 365 \
243-
-subj "/CN=prometheus" \
244-
-addext "subjectAltName=DNS:kube-prometheus-stack-prometheus.${MONITORING_NAMESPACE}.svc.cluster.local,DNS:kube-prometheus-stack-prometheus.${MONITORING_NAMESPACE}.svc,DNS:prometheus,DNS:localhost" \
245-
&> /dev/null
246-
247-
# Create Kubernetes secret with TLS certificate
248-
log_info "Creating Kubernetes secret for Prometheus TLS"
249-
kubectl create secret tls $PROMETHEUS_SECRET_NAME \
250-
--cert=/tmp/prometheus-tls.crt \
251-
--key=/tmp/prometheus-tls.key \
252-
-n $MONITORING_NAMESPACE \
253-
--dry-run=client -o yaml | kubectl apply -f - &> /dev/null
254-
255-
# Clean up temp files
256-
rm -f /tmp/prometheus-tls.{key,crt}
257-
258-
# Install kube-prometheus-stack with TLS enabled
259-
# Disable Grafana and Alertmanager — WVA only needs Prometheus for metrics collection.
260-
# Use a 10m timeout — 5m is insufficient on busy clusters (e.g. CKS with preemption).
261-
log_info "Installing kube-prometheus-stack with TLS configuration"
262-
helm upgrade --install kube-prometheus-stack prometheus-community/kube-prometheus-stack \
263-
-n $MONITORING_NAMESPACE \
264-
--set prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false \
265-
--set prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues=false \
266-
--set prometheus.service.type=ClusterIP \
267-
--set prometheus.service.port=$PROMETHEUS_PORT \
268-
--set prometheus.prometheusSpec.web.tlsConfig.cert.secret.name=$PROMETHEUS_SECRET_NAME \
269-
--set prometheus.prometheusSpec.web.tlsConfig.cert.secret.key=tls.crt \
270-
--set prometheus.prometheusSpec.web.tlsConfig.keySecret.name=$PROMETHEUS_SECRET_NAME \
271-
--set prometheus.prometheusSpec.web.tlsConfig.keySecret.key=tls.key \
272-
--set grafana.enabled=false \
273-
--set alertmanager.enabled=false \
274-
--timeout=10m \
275-
--wait
276-
277-
log_success "kube-prometheus-stack deployed with TLS"
278-
log_info "Prometheus URL: $PROMETHEUS_URL"
232+
deploy_prometheus_kube_stack
279233
}
280234

281235
# REQUIRED FUNCTION - only for emulated environments ####
@@ -317,36 +271,13 @@ apply_llm_d_infrastructure_fixes() {
317271
fi
318272
}
319273

320-
# Kubernetes-specific Undeployment functions
321274
undeploy_prometheus_stack() {
322-
log_info "Uninstalling kube-prometheus-stack..."
323-
324-
helm uninstall kube-prometheus-stack -n $MONITORING_NAMESPACE 2>/dev/null || \
325-
log_warning "Prometheus stack not found or already uninstalled"
326-
327-
kubectl delete secret $PROMETHEUS_SECRET_NAME -n $MONITORING_NAMESPACE --ignore-not-found
328-
329-
log_success "Prometheus stack uninstalled"
275+
undeploy_prometheus_kube_stack
330276
}
331277

332278
#### REQUIRED FUNCTION used by deploy/install.sh ####
333279
delete_namespaces() {
334-
log_info "Deleting namespaces..."
335-
336-
for ns in $LLMD_NS $WVA_NS $MONITORING_NAMESPACE; do
337-
if kubectl get namespace $ns &> /dev/null; then
338-
if [[ "$ns" == "$LLMD_NS" && "$DEPLOY_LLM_D" == "false" ]] || [[ "$ns" == "$WVA_NS" && "$DEPLOY_WVA" == "false" ]] || [[ "$ns" == "$MONITORING_NAMESPACE" && "$DEPLOY_PROMETHEUS" == "false" ]] ; then
339-
log_info "Skipping deletion of namespace $ns as it was not deployed"
340-
else
341-
log_info "Deleting namespace $ns..."
342-
kubectl delete namespace $ns 2>/dev/null || \
343-
log_warning "Failed to delete namespace $ns"
344-
fi
345-
fi
346-
done
347-
348-
log_success "Namespaces deleted"
349-
280+
delete_namespaces_kube_like
350281
if [ "$DELETE_CLUSTER" = true ]; then
351282
delete_kind_cluster
352283
fi

deploy/kubernetes/README.md

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,8 @@ export DEPLOY_PROMETHEUS=true # Deploy kube-prometheus-stack
118118
export DEPLOY_WVA=true # Deploy WVA controller
119119
export DEPLOY_LLM_D=true # Deploy llm-d infrastructure
120120
export DEPLOY_PROMETHEUS_ADAPTER=true # Deploy Prometheus Adapter
121-
export DEPLOY_HPA=true # Deploy HPA
121+
export DEPLOY_VA=true # Opt in: chart VariantAutoscaling (install.sh default: false)
122+
export DEPLOY_HPA=true # Opt in: chart HPA (install.sh default: false)
122123
```
123124

124125
## Usage Examples
@@ -127,6 +128,8 @@ export DEPLOY_HPA=true # Deploy HPA
127128

128129
```bash
129130
export HF_TOKEN="hf_xxxxx"
131+
export DEPLOY_VA=true
132+
export DEPLOY_HPA=true
130133
make deploy-wva-on-k8s
131134
```
132135

@@ -136,16 +139,19 @@ make deploy-wva-on-k8s
136139
export HF_TOKEN="hf_xxxxx"
137140
export BASE_NAME="my-inference"
138141
export MODEL_ID="meta-llama/Llama-2-7b-hf"
142+
export DEPLOY_VA=true
143+
export DEPLOY_HPA=true
139144
make deploy-wva-on-k8s
140145
```
141146

142147
### Example 3: E2E Testing Configuration
143148

144149
```bash
145150
export HF_TOKEN="hf_xxxxx"
146-
export HPA_STABILIZATION_SECONDS=30 # Fast scaling for testing
147-
export VLLM_MAX_NUM_SEQS=8 # Low batch size for easy saturation
148151
export E2E_TESTS_ENABLED=true
152+
export INFRA_ONLY=true
153+
export HPA_STABILIZATION_SECONDS=30 # Only if chart HPA enabled
154+
export VLLM_MAX_NUM_SEQS=8 # Low batch size for easy saturation
149155
make deploy-wva-on-k8s
150156
```
151157

@@ -157,6 +163,7 @@ export DEPLOY_LLM_D=false
157163
export DEPLOY_PROMETHEUS=true # Prometheus is needed for WVA to scrape metrics
158164
export VLLM_SVC_ENABLED=true
159165
export DEPLOY_PROMETHEUS_ADAPTER=false
166+
export DEPLOY_VA=true
160167
export DEPLOY_HPA=false
161168
make deploy-wva-on-k8s
162169
```

0 commit comments

Comments
 (0)