Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 94 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ DEPLOY_LLM_D ?= true
DELETE_CLUSTER ?= false
DELETE_NAMESPACES ?= false

# Multi-model deployment configuration (used by deploy-multi-model-infra)
MODELS ?= Qwen/Qwen3-0.6B,unsloth/Meta-Llama-3.1-8B
NAMESPACE_SCOPED ?= false

# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set)
ifeq (,$(shell go env GOBIN))
GOBIN=$(shell go env GOPATH)/bin
Expand Down Expand Up @@ -214,6 +218,95 @@ deploy-e2e-infra: ## Deploy e2e test infrastructure (infra-only: WVA + llm-d, no
./deploy/install.sh; \
fi

.PHONY: deploy-e2e-infra-multi-model
deploy-e2e-infra-multi-model: ## Deploy e2e test infrastructure with two concurrent model services
@echo "Deploying multi-model e2e test infrastructure..."
./deploy/install-multi-model.sh

# Configurable multi-model deployment for any environment.
# Usage:
# make deploy-multi-model-infra \
# ENVIRONMENT=openshift \
# WVA_NS=my-namespace LLMD_NS=my-namespace \
# NAMESPACE_SCOPED=true \
# SKIP_BUILD=true DECODE_REPLICAS=1 \
# IMG_TAG=v0.6.0 LLM_D_RELEASE=v0.6.0 \
# MODELS="Qwen/Qwen3-0.6B,unsloth/Meta-Llama-3.1-8B"
.PHONY: deploy-multi-model-infra
deploy-multi-model-infra: ## Deploy multi-model infra with N models. Set MODELS=m1,m2,... (comma-separated).
@echo "Deploying multi-model infrastructure (MODELS=$(MODELS))..."
@if [ "$(SKIP_BUILD)" != "true" ]; then \
echo "Building WVA image $(IMG)..."; \
$(MAKE) docker-build IMG=$(IMG); \
else \
echo "Skipping image build (SKIP_BUILD=true)"; \
fi; \
if echo "$(IMG)" | grep -q ":"; then \
IMAGE_REPO=$$(echo "$(IMG)" | cut -d: -f1); \
IMAGE_TAG=$$(echo "$(IMG)" | cut -d: -f2); \
else \
IMAGE_REPO="$(IMG)"; \
IMAGE_TAG="latest"; \
fi; \
echo "Using WVA image: $$IMAGE_REPO:$$IMAGE_TAG"; \
ENVIRONMENT=$(ENVIRONMENT) \
WVA_NS="$(WVA_NS)" \
LLMD_NS="$(LLMD_NS)" \
NAMESPACE_SCOPED=$(NAMESPACE_SCOPED) \
DECODE_REPLICAS=$(DECODE_REPLICAS) \
LLM_D_RELEASE=$(LLM_D_RELEASE) \
WVA_IMAGE_REPO="$$IMAGE_REPO" \
WVA_IMAGE_TAG="$$IMAGE_TAG" \
WVA_IMAGE_PULL_POLICY=IfNotPresent \
MODELS="$(MODELS)" \
./deploy/install-multi-model.sh

# Undeploy multi-model infrastructure.
# Must use the same MODELS list that was used during deployment.
.PHONY: undeploy-multi-model-infra
undeploy-multi-model-infra: ## Undeploy multi-model infra. Use same MODELS=m1,m2,... as deploy.
@echo "Undeploying multi-model infrastructure (MODELS=$(MODELS))..."
ENVIRONMENT=$(ENVIRONMENT) \
WVA_NS="$(WVA_NS)" \
LLMD_NS="$(LLMD_NS)" \
NAMESPACE_SCOPED=$(NAMESPACE_SCOPED) \
DELETE_NAMESPACES=$(DELETE_NAMESPACES) \
MODELS="$(MODELS)" \
./deploy/install-multi-model.sh --undeploy

# Multi-model scaling test parameters
MM_MIN_REPLICAS ?= 1
MM_MAX_REPLICAS ?= 5

# TODO: Merge test-multi-model-scaling into test-benchmark by detecting MODELS env var:
# $(eval LABEL_FILTER := $(if $(MODELS),multi-model,phase3a))
# Then: make test-benchmark MODELS="Qwen/Qwen3-0.6B,unsloth/Meta-Llama-3.1-8B"
# This eliminates the need for a separate target.
.PHONY: test-multi-model-scaling
test-multi-model-scaling: manifests generate fmt vet ## Run multi-model scaling benchmark (VA + HPA + GuideLLM per model)
@echo "Running multi-model scaling benchmark (MODELS=$(MODELS))..."
KUBECONFIG=$(KUBECONFIG) \
ENVIRONMENT=$(ENVIRONMENT) \
WVA_NAMESPACE=$(CONTROLLER_NAMESPACE) \
LLMD_NAMESPACE=$(LLMD_NS) \
MONITORING_NAMESPACE=$(E2E_MONITORING_NAMESPACE) \
USE_SIMULATOR=$(USE_SIMULATOR) \
SCALER_BACKEND=$(SCALER_BACKEND) \
MODEL_ID=$(MODEL_ID) \
MODELS="$(MODELS)" \
MM_MIN_REPLICAS=$(MM_MIN_REPLICAS) \
MM_MAX_REPLICAS=$(MM_MAX_REPLICAS) \
GATEWAY_SERVICE_NAME=multi-model-inference-gateway-istio \
PROMETHEUS_TOKEN=$$(oc whoami -t 2>/dev/null || echo "") \
go test ./test/benchmark/ -timeout 75m -v -ginkgo.v \
-ginkgo.label-filter="multi-model"; \
TEST_EXIT_CODE=$$?; \
echo ""; \
echo "=========================================="; \
echo "Multi-model benchmark completed. Exit code: $$TEST_EXIT_CODE"; \
echo "=========================================="; \
exit $$TEST_EXIT_CODE

# Deploy e2e infrastructure with KEDA as scaler backend (installs KEDA, skips Prometheus Adapter).
# Runs a subset of smoke tests from the e2e suite.
.PHONY: test-e2e-smoke
Expand Down Expand Up @@ -325,6 +418,7 @@ lint: golangci-lint ## Run golangci-lint linter
lint-deploy-scripts: ## Run bash -n for deploy/install.sh, deploy/lib/*.sh, and deploy plugins
@echo "Syntax-checking deploy shell scripts..."
@bash -n deploy/install.sh
@bash -n deploy/install-multi-model.sh
@for script in deploy/lib/*.sh; do bash -n "$$script"; done
@for script in deploy/*/install.sh; do if [ -f "$$script" ]; then bash -n "$$script"; fi; done
@for script in deploy/kind-emulator/*.sh; do if [ -f "$$script" ]; then bash -n "$$script"; fi; done
Expand Down
Loading
Loading