mamy-CS
diff --git a/‎Makefile‎
Lines changed: 18 additions & 1 deletion b/‎Makefile‎
Lines changed: 18 additions & 1 deletion
diff --git a/‎deploy/README.md‎
Lines changed: 17 additions & 6 deletions b/‎deploy/README.md‎
Lines changed: 17 additions & 6 deletions
diff --git a/‎deploy/inference-objective-e2e.yaml‎
Lines changed: 2 additions & 2 deletions b/‎deploy/inference-objective-e2e.yaml‎
Lines changed: 2 additions & 2 deletions
@@ -27,6 +27,8 @@ E2E_MONITORING_NAMESPACE    ?= workload-variant-autoscaler-monitoring
 E2E_EMULATED_LLMD_NAMESPACE ?= llm-d-sim
 
 # Flags for deploy/install.sh installation script
+# Full e2e / CI-style cluster infra (WVA + llm-d, no chart VA/HPA): prefer `make deploy-e2e-infra`
+# (wraps ./deploy/install.sh with INFRA_ONLY=true; set ENVIRONMENT=kubernetes|openshift|kind-emulator).
 CREATE_CLUSTER ?= false
 DEPLOY_LLM_D ?= true
 DELETE_CLUSTER ?= false
@@ -111,7 +113,7 @@ destroy-kind-cluster:
 .PHONY: deploy-wva-emulated-on-kind
 deploy-wva-emulated-on-kind: ## Deploy WVA + llm-d on Kind (Prometheus Adapter as scaler backend)
 	@echo ">>> Deploying workload-variant-autoscaler (cluster args: $(KIND_ARGS), image: $(IMG))"
-	KIND=$(KIND) KUBECTL=$(KUBECTL) IMG=$(IMG) DEPLOY_LLM_D=$(DEPLOY_LLM_D) ENVIRONMENT=kind-emulator CREATE_CLUSTER=$(CREATE_CLUSTER) CLUSTER_GPU_TYPE=$(CLUSTER_GPU_TYPE) CLUSTER_NODES=$(CLUSTER_NODES) CLUSTER_GPUS=$(CLUSTER_GPUS) MULTI_MODEL_TESTING=$(MULTI_MODEL_TESTING) NAMESPACE_SCOPED=false SCALER_BACKEND=$(SCALER_BACKEND) \
+	KIND=$(KIND) KUBECTL=$(KUBECTL) IMG=$(IMG) DEPLOY_LLM_D=$(DEPLOY_LLM_D) ENVIRONMENT=kind-emulator CREATE_CLUSTER=$(CREATE_CLUSTER) CLUSTER_GPU_TYPE=$(CLUSTER_GPU_TYPE) CLUSTER_NODES=$(CLUSTER_NODES) CLUSTER_GPUS=$(CLUSTER_GPUS) NAMESPACE_SCOPED=false SCALER_BACKEND=$(SCALER_BACKEND) \
 		deploy/install.sh
 
 ## Undeploy WVA from the emulated environment on Kind.
@@ -296,6 +298,21 @@ test-benchmark-with-setup: deploy-e2e-infra test-benchmark
 lint: golangci-lint ## Run golangci-lint linter
 	$(GOLANGCI_LINT) run
 
+.PHONY: lint-deploy-scripts
+lint-deploy-scripts: ## Run bash -n for deploy/install.sh, deploy/lib/*.sh, and deploy plugins
+	@echo "Syntax-checking deploy shell scripts..."
+	@bash -n deploy/install.sh
+	@for script in deploy/lib/*.sh; do bash -n "$$script"; done
+	@for script in deploy/*/install.sh; do if [ -f "$$script" ]; then bash -n "$$script"; fi; done
+	@for script in deploy/kind-emulator/*.sh; do if [ -f "$$script" ]; then bash -n "$$script"; fi; done
+	@echo "deploy script syntax OK"
+
+.PHONY: smoke-deploy-scripts
+smoke-deploy-scripts: lint-deploy-scripts ## Non-interactive deploy script smoke check (source order + arg parsing)
+	@echo "Running deploy script smoke check..."
+	@SKIP_CHECKS=true E2E_TESTS_ENABLED=true INSTALL_GATEWAY_CTRLPLANE=true ENVIRONMENT=kubernetes ./deploy/install.sh --help >/dev/null
+	@echo "deploy script smoke OK"
+
 .PHONY: lint-fix
 lint-fix: golangci-lint ## Run golangci-lint linter and perform fixes
 	$(GOLANGCI_LINT) run --fix
 
@@ -139,8 +139,8 @@ export DEPLOY_PROMETHEUS=true               # Deploy Prometheus stack
 export DEPLOY_WVA=true                      # Deploy WVA controller
 export DEPLOY_LLM_D=true                    # Deploy llm-d infrastructure
 export DEPLOY_PROMETHEUS_ADAPTER=true       # Deploy Prometheus Adapter
-export DEPLOY_VA=true                       # Deploy VariantAutoscaling CR
-export DEPLOY_HPA=true                      # Deploy HPA
+export DEPLOY_VA=true                       # Chart-managed VariantAutoscaling (default off; e2e often creates its own)
+export DEPLOY_HPA=true                      # Chart-managed HPA (default off; enable with DEPLOY_VA for demos)
 
 # HPA Configuration
 export HPA_STABILIZATION_SECONDS=240        # HPA stabilization window (default: 240s)
@@ -188,6 +188,9 @@ bash install.sh
 
 ```bash
 export HF_TOKEN="hf_xxxxx"
+# Optional: chart-managed VA + HPA for a single-variant demo (install.sh defaults skip these)
+export DEPLOY_VA=true
+export DEPLOY_HPA=true
 make deploy-wva-on-k8s
 ```
 
@@ -198,6 +201,8 @@ export HF_TOKEN="hf_xxxxx"
 export MODEL_ID="meta-llama/Llama-2-7b-hf"
 export SLO_TPOT=5
 export SLO_TTFT=500
+export DEPLOY_VA=true
+export DEPLOY_HPA=true
 make deploy-wva-on-k8s
 ```
 
@@ -208,6 +213,7 @@ export DEPLOY_WVA=true
 export DEPLOY_LLM_D=false
 export DEPLOY_PROMETHEUS=true  # Prometheus is needed for metrics - disable if it is already installed in your cluster
 export DEPLOY_PROMETHEUS_ADAPTER=false
+export DEPLOY_VA=true          # Create a VariantAutoscaling CR for the existing model service
 export DEPLOY_HPA=false
 make deploy-wva-on-k8s
 ```
@@ -216,6 +222,8 @@ make deploy-wva-on-k8s
 
 ```bash
 export HF_TOKEN="hf_xxxxx"
+export DEPLOY_VA=true
+export DEPLOY_HPA=true
 export HPA_STABILIZATION_SECONDS=30  # Fast scaling for dev/test (default: 240)
 make deploy-wva-on-k8s
 ```
@@ -224,9 +232,10 @@ make deploy-wva-on-k8s
 
 ```bash
 export HF_TOKEN="hf_xxxxx"
-export HPA_STABILIZATION_SECONDS=0   # Immediate scaling for e2e tests
-export VLLM_MAX_NUM_SEQS=8          # Low batch size for easy saturation
 export E2E_TESTS_ENABLED=true
+export INFRA_ONLY=true               # Tests create VA/HPA; see also make deploy-e2e-infra
+export HPA_STABILIZATION_SECONDS=0   # Only applies if chart HPA is enabled
+export VLLM_MAX_NUM_SEQS=8           # Low batch size for easy saturation
 make deploy-wva-on-k8s
 ```
 
@@ -236,6 +245,8 @@ make deploy-wva-on-k8s
 export HF_TOKEN="hf_xxxxx"
 export VLLM_MAX_NUM_SEQS=64         # Match desired max batch size
 export MODEL_ID="unsloth/Meta-Llama-3.1-8B"
+export DEPLOY_VA=true
+export DEPLOY_HPA=true
 make deploy-wva-on-k8s
 ```
 
@@ -650,8 +661,8 @@ Each guide includes platform-specific examples, troubleshooting, and quick start
 | `DEPLOY_WVA` | Deploy WVA controller | `true` |
 | `DEPLOY_LLM_D` | Deploy llm-d infrastructure | `true` |
 | `DEPLOY_PROMETHEUS_ADAPTER` | Deploy Prometheus Adapter | `true` |
-| `DEPLOY_VA` | Deploy VariantAutoscaling CR | `true` |
-| `DEPLOY_HPA` | Deploy HPA | `true` |
+| `DEPLOY_VA` | Deploy VariantAutoscaling CR via WVA Helm chart | `false` |
+| `DEPLOY_HPA` | Deploy HPA via WVA Helm chart | `false` |
 | `INFRA_ONLY` | Deploy only infrastructure (skip VA/HPA) | `false` |
 | `SKIP_CHECKS` | Skip prerequisite checks | `false` |
 
 
@@ -1,5 +1,5 @@
-# InferenceObjective for GIE queuing (scale-from-zero e2e and flow control).
-# Applied when E2E_TESTS_ENABLED or ENABLE_SCALE_TO_ZERO is true.
+# InferenceObjective for GIE queuing (scale-from-zero flow control).
+# install.sh applies this when ENABLE_SCALE_TO_ZERO=true and not E2E (e2e applies e2e-default from Go).
 # poolRef.name is templated by install.sh to match the deployed InferencePool.
 apiVersion: inference.networking.x-k8s.io/v1alpha2
 kind: InferenceObjective