llm-d · asm582 · Apr 15, 2026 · Apr 15, 2026 · Apr 15, 2026 · Apr 15, 2026
diff --git a/Makefile b/Makefile
@@ -34,6 +34,10 @@ DEPLOY_LLM_D ?= true
 DELETE_CLUSTER ?= false
 DELETE_NAMESPACES ?= false
 
+# Multi-model deployment configuration (used by deploy-multi-model-infra)
+MODELS           ?= Qwen/Qwen3-0.6B,unsloth/Meta-Llama-3.1-8B
+NAMESPACE_SCOPED ?= false
+
 # Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set)
 ifeq (,$(shell go env GOBIN))
 GOBIN=$(shell go env GOPATH)/bin
@@ -214,6 +218,95 @@ deploy-e2e-infra: ## Deploy e2e test infrastructure (infra-only: WVA + llm-d, no
 		./deploy/install.sh; \
 	fi
 
+.PHONY: deploy-e2e-infra-multi-model
+deploy-e2e-infra-multi-model: ## Deploy e2e test infrastructure with two concurrent model services
+	@echo "Deploying multi-model e2e test infrastructure..."
+	./deploy/install-multi-model.sh
+
+# Configurable multi-model deployment for any environment.
+# Usage:
+#   make deploy-multi-model-infra \
+#     ENVIRONMENT=openshift \
+#     WVA_NS=my-namespace LLMD_NS=my-namespace \
+#     NAMESPACE_SCOPED=true \
+#     SKIP_BUILD=true DECODE_REPLICAS=1 \
+#     IMG_TAG=v0.6.0 LLM_D_RELEASE=v0.6.0 \
+#     MODELS="Qwen/Qwen3-0.6B,unsloth/Meta-Llama-3.1-8B"
+.PHONY: deploy-multi-model-infra
+deploy-multi-model-infra: ## Deploy multi-model infra with N models. Set MODELS=m1,m2,... (comma-separated).
+	@echo "Deploying multi-model infrastructure (MODELS=$(MODELS))..."
+	@if [ "$(SKIP_BUILD)" != "true" ]; then \
+		echo "Building WVA image $(IMG)..."; \
+		$(MAKE) docker-build IMG=$(IMG); \
+	else \
+		echo "Skipping image build (SKIP_BUILD=true)"; \
+	fi; \
+	if echo "$(IMG)" | grep -q ":"; then \
+		IMAGE_REPO=$$(echo "$(IMG)" | cut -d: -f1); \
+		IMAGE_TAG=$$(echo "$(IMG)" | cut -d: -f2); \
+	else \
+		IMAGE_REPO="$(IMG)"; \
+		IMAGE_TAG="latest"; \
+	fi; \
+	echo "Using WVA image: $$IMAGE_REPO:$$IMAGE_TAG"; \
+	ENVIRONMENT=$(ENVIRONMENT) \
+	WVA_NS="$(WVA_NS)" \
+	LLMD_NS="$(LLMD_NS)" \
+	NAMESPACE_SCOPED=$(NAMESPACE_SCOPED) \
+	DECODE_REPLICAS=$(DECODE_REPLICAS) \
+	LLM_D_RELEASE=$(LLM_D_RELEASE) \
+	WVA_IMAGE_REPO="$$IMAGE_REPO" \
+	WVA_IMAGE_TAG="$$IMAGE_TAG" \
+	WVA_IMAGE_PULL_POLICY=IfNotPresent \
+	MODELS="$(MODELS)" \
+	./deploy/install-multi-model.sh
+
+# Undeploy multi-model infrastructure.
+# Must use the same MODELS list that was used during deployment.
+.PHONY: undeploy-multi-model-infra
+undeploy-multi-model-infra: ## Undeploy multi-model infra. Use same MODELS=m1,m2,... as deploy.
+	@echo "Undeploying multi-model infrastructure (MODELS=$(MODELS))..."
+	ENVIRONMENT=$(ENVIRONMENT) \
+	WVA_NS="$(WVA_NS)" \
+	LLMD_NS="$(LLMD_NS)" \
+	NAMESPACE_SCOPED=$(NAMESPACE_SCOPED) \
+	DELETE_NAMESPACES=$(DELETE_NAMESPACES) \
+	MODELS="$(MODELS)" \
+	./deploy/install-multi-model.sh --undeploy
+
+# Multi-model scaling test parameters
+MM_MIN_REPLICAS ?= 1
+MM_MAX_REPLICAS ?= 5
+
+# TODO: Merge test-multi-model-scaling into test-benchmark by detecting MODELS env var:
+#   $(eval LABEL_FILTER := $(if $(MODELS),multi-model,phase3a))
+# Then: make test-benchmark MODELS="Qwen/Qwen3-0.6B,unsloth/Meta-Llama-3.1-8B"
+# This eliminates the need for a separate target.
+.PHONY: test-multi-model-scaling
+test-multi-model-scaling: manifests generate fmt vet ## Run multi-model scaling benchmark (VA + HPA + GuideLLM per model)
+	@echo "Running multi-model scaling benchmark (MODELS=$(MODELS))..."
+	KUBECONFIG=$(KUBECONFIG) \
+	ENVIRONMENT=$(ENVIRONMENT) \
+	WVA_NAMESPACE=$(CONTROLLER_NAMESPACE) \
+	LLMD_NAMESPACE=$(LLMD_NS) \
+	MONITORING_NAMESPACE=$(E2E_MONITORING_NAMESPACE) \
+	USE_SIMULATOR=$(USE_SIMULATOR) \
+	SCALER_BACKEND=$(SCALER_BACKEND) \
+	MODEL_ID=$(MODEL_ID) \
+	MODELS="$(MODELS)" \
+	MM_MIN_REPLICAS=$(MM_MIN_REPLICAS) \
+	MM_MAX_REPLICAS=$(MM_MAX_REPLICAS) \
+	GATEWAY_SERVICE_NAME=multi-model-inference-gateway-istio \
+	PROMETHEUS_TOKEN=$$(oc whoami -t 2>/dev/null || echo "") \
+	go test ./test/benchmark/ -timeout 75m -v -ginkgo.v \
+		-ginkgo.label-filter="multi-model"; \
+	TEST_EXIT_CODE=$$?; \
+	echo ""; \
+	echo "=========================================="; \
+	echo "Multi-model benchmark completed. Exit code: $$TEST_EXIT_CODE"; \
+	echo "=========================================="; \
+	exit $$TEST_EXIT_CODE
+
 # Deploy e2e infrastructure with KEDA as scaler backend (installs KEDA, skips Prometheus Adapter).
 # Runs a subset of smoke tests from the e2e suite.
 .PHONY: test-e2e-smoke
@@ -325,6 +418,7 @@ lint: golangci-lint ## Run golangci-lint linter
 lint-deploy-scripts: ## Run bash -n for deploy/install.sh, deploy/lib/*.sh, and deploy plugins
 	@echo "Syntax-checking deploy shell scripts..."
 	@bash -n deploy/install.sh
+	@bash -n deploy/install-multi-model.sh
 	@for script in deploy/lib/*.sh; do bash -n "$$script"; done
 	@for script in deploy/*/install.sh; do if [ -f "$$script" ]; then bash -n "$$script"; fi; done
 	@for script in deploy/kind-emulator/*.sh; do if [ -f "$$script" ]; then bash -n "$$script"; fi; done