Skip to content

Commit 2394e3e

Browse files
committed
rm
Signed-off-by: Mohammed Abdi <mohammed.munir.abdi@ibm.com>
1 parent 2a8eaeb commit 2394e3e

3 files changed

Lines changed: 1 addition & 232 deletions

File tree

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ destroy-kind-cluster:
113113
.PHONY: deploy-wva-emulated-on-kind
114114
deploy-wva-emulated-on-kind: ## Deploy WVA + llm-d on Kind (Prometheus Adapter as scaler backend)
115115
@echo ">>> Deploying workload-variant-autoscaler (cluster args: $(KIND_ARGS), image: $(IMG))"
116-
KIND=$(KIND) KUBECTL=$(KUBECTL) IMG=$(IMG) DEPLOY_LLM_D=$(DEPLOY_LLM_D) ENVIRONMENT=kind-emulator CREATE_CLUSTER=$(CREATE_CLUSTER) CLUSTER_GPU_TYPE=$(CLUSTER_GPU_TYPE) CLUSTER_NODES=$(CLUSTER_NODES) CLUSTER_GPUS=$(CLUSTER_GPUS) MULTI_MODEL_TESTING=$(MULTI_MODEL_TESTING) NAMESPACE_SCOPED=false SCALER_BACKEND=$(SCALER_BACKEND) \
116+
KIND=$(KIND) KUBECTL=$(KUBECTL) IMG=$(IMG) DEPLOY_LLM_D=$(DEPLOY_LLM_D) ENVIRONMENT=kind-emulator CREATE_CLUSTER=$(CREATE_CLUSTER) CLUSTER_GPU_TYPE=$(CLUSTER_GPU_TYPE) CLUSTER_NODES=$(CLUSTER_NODES) CLUSTER_GPUS=$(CLUSTER_GPUS) NAMESPACE_SCOPED=false SCALER_BACKEND=$(SCALER_BACKEND) \
117117
deploy/install.sh
118118

119119
## Undeploy WVA from the emulated environment on Kind.

deploy/install.sh

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,11 +74,6 @@ ACCELERATOR_TYPE=${ACCELERATOR_TYPE:-"H100"}
7474
SLO_TPOT=${SLO_TPOT:-10} # Target time-per-output-token SLO (in ms)
7575
SLO_TTFT=${SLO_TTFT:-1000} # Target time-to-first-token SLO (in ms)
7676

77-
# Multi-model testing configuration (for limiter e2e tests)
78-
# When enabled, deploys a second InferencePool with a different model
79-
MULTI_MODEL_TESTING=${MULTI_MODEL_TESTING:-false}
80-
MODEL_ID_2=${MODEL_ID_2:-"unsloth/Llama-3.2-1B"}
81-
8277
# Prometheus Configuration
8378
PROM_CA_CERT_PATH=${PROM_CA_CERT_PATH:-"/tmp/prometheus-ca.crt"}
8479
PROMETHEUS_SECRET_NAME=${PROMETHEUS_SECRET_NAME:-"prometheus-web-tls"}

deploy/lib/infra_llmd.sh

Lines changed: 0 additions & 226 deletions
Original file line numberDiff line numberDiff line change
@@ -7,227 +7,6 @@
77
# containsElement(), wait_deployment_available_nonfatal(), detect_inference_pool_api_group().
88
#
99

10-
# Deploy second model infrastructure for multi-model/limiter testing
11-
# Creates a second InferencePool, modelservice deployment, and updates HTTPRoute
12-
deploy_second_model_infrastructure() {
13-
log_info "Deploying second model infrastructure for multi-model testing..."
14-
log_info "Second model: $MODEL_ID_2"
15-
16-
local POOL_NAME_2="gaie-sim-2"
17-
local MS_NAME_2="ms-sim-2"
18-
local MODEL_LABEL_2="model-2"
19-
# Sanitize model name for use in Kubernetes labels (replace / with -)
20-
local MODEL_ID_2_SANITIZED=$(echo "$MODEL_ID_2" | tr '/' '-')
21-
22-
# Create second InferencePool with different selector
23-
log_info "Creating second InferencePool: $POOL_NAME_2"
24-
cat <<EOF | kubectl apply -n "$LLMD_NS" -f -
25-
apiVersion: inference.networking.x-k8s.io/v1alpha2
26-
kind: InferencePool
27-
metadata:
28-
name: $POOL_NAME_2
29-
spec:
30-
targetPortNumber: 8000
31-
selector:
32-
llm-d.ai/model-pool: "$MODEL_LABEL_2"
33-
extensionRef:
34-
name: ${POOL_NAME_2}-epp
35-
EOF
36-
37-
# Create EPP deployment for second pool
38-
log_info "Creating EPP deployment for second pool"
39-
cat <<EOF | kubectl apply -n "$LLMD_NS" -f -
40-
apiVersion: apps/v1
41-
kind: Deployment
42-
metadata:
43-
name: ${POOL_NAME_2}-epp
44-
spec:
45-
replicas: 1
46-
selector:
47-
matchLabels:
48-
app: ${POOL_NAME_2}-epp
49-
template:
50-
metadata:
51-
labels:
52-
app: ${POOL_NAME_2}-epp
53-
spec:
54-
serviceAccountName: gaie-sim-sa
55-
containers:
56-
- name: epp
57-
image: ghcr.io/llm-d/llm-d-inference-scheduler:v0.3.2
58-
imagePullPolicy: Always
59-
args:
60-
- --poolName=$POOL_NAME_2
61-
- --poolNamespace=$LLMD_NS
62-
- --extProcPort=9002
63-
- --grpcHealthPort=9003
64-
ports:
65-
- containerPort: 9002
66-
name: grpc
67-
- containerPort: 9003
68-
name: grpc-health
69-
- containerPort: 9090
70-
name: metrics
71-
readinessProbe:
72-
grpc:
73-
port: 9003
74-
initialDelaySeconds: 5
75-
periodSeconds: 10
76-
livenessProbe:
77-
grpc:
78-
port: 9003
79-
initialDelaySeconds: 15
80-
periodSeconds: 20
81-
---
82-
apiVersion: v1
83-
kind: Service
84-
metadata:
85-
name: ${POOL_NAME_2}-epp
86-
spec:
87-
selector:
88-
app: ${POOL_NAME_2}-epp
89-
ports:
90-
- name: grpc
91-
port: 9002
92-
targetPort: 9002
93-
- name: grpc-health
94-
port: 9003
95-
targetPort: 9003
96-
- name: metrics
97-
port: 9090
98-
targetPort: 9090
99-
EOF
100-
101-
# Wait for second EPP to be ready
102-
log_info "Waiting for second EPP deployment to be ready..."
103-
wait_deployment_available_nonfatal \
104-
"$LLMD_NS" \
105-
"${POOL_NAME_2}-epp" \
106-
"120s" \
107-
"Second EPP deployment not ready yet - check 'kubectl get pods -n $LLMD_NS -l app=${POOL_NAME_2}-epp'"
108-
109-
# Create second modelservice deployment (using llm-d-inference-sim)
110-
log_info "Creating second modelservice deployment: $MS_NAME_2"
111-
cat <<EOF | kubectl apply -n "$LLMD_NS" -f -
112-
apiVersion: apps/v1
113-
kind: Deployment
114-
metadata:
115-
name: ${MS_NAME_2}-decode
116-
spec:
117-
replicas: 2
118-
selector:
119-
matchLabels:
120-
app: ${MS_NAME_2}-decode
121-
llm-d.ai/model-pool: "$MODEL_LABEL_2"
122-
template:
123-
metadata:
124-
labels:
125-
app: ${MS_NAME_2}-decode
126-
llm-d.ai/model-pool: "$MODEL_LABEL_2"
127-
llm-d.ai/model: "${MODEL_ID_2_SANITIZED}"
128-
spec:
129-
containers:
130-
- name: vllm
131-
image: ghcr.io/llm-d/llm-d-inference-sim:v0.5.1
132-
imagePullPolicy: Always
133-
args:
134-
- --model=$MODEL_ID_2
135-
- --time-to-first-token=$TTFT_AVERAGE_LATENCY_MS
136-
- --inter-token-latency=$ITL_AVERAGE_LATENCY_MS
137-
- --enable-kvcache
138-
- --kv-cache-size=1024
139-
- --block-size=16
140-
ports:
141-
- containerPort: 8000
142-
name: http
143-
- containerPort: 8200
144-
name: metrics
145-
env:
146-
- name: POD_NAME
147-
valueFrom:
148-
fieldRef:
149-
fieldPath: metadata.name
150-
- name: POD_NAMESPACE
151-
valueFrom:
152-
fieldRef:
153-
fieldPath: metadata.namespace
154-
readinessProbe:
155-
httpGet:
156-
path: /health
157-
port: 8000
158-
periodSeconds: 5
159-
---
160-
apiVersion: v1
161-
kind: Service
162-
metadata:
163-
name: ${MS_NAME_2}-decode
164-
labels:
165-
llm-d.ai/model-pool: "$MODEL_LABEL_2"
166-
spec:
167-
selector:
168-
app: ${MS_NAME_2}-decode
169-
ports:
170-
- name: http
171-
port: 8000
172-
targetPort: 8000
173-
- name: metrics
174-
port: 8200
175-
targetPort: 8200
176-
EOF
177-
178-
# Create InferenceModel for second model (maps model name to pool)
179-
# Note: InferenceModel CRD may not be available in all environments
180-
if kubectl get crd inferencemodels.inference.networking.x-k8s.io &>/dev/null; then
181-
log_info "Creating InferenceModel for second model"
182-
cat <<EOF | kubectl apply -n "$LLMD_NS" -f -
183-
apiVersion: inference.networking.x-k8s.io/v1alpha2
184-
kind: InferenceModel
185-
metadata:
186-
name: ${MS_NAME_2}-model
187-
spec:
188-
modelName: $MODEL_ID_2
189-
criticality: Critical
190-
poolRef:
191-
name: $POOL_NAME_2
192-
targetModels:
193-
- name: $MODEL_ID_2
194-
weight: 100
195-
EOF
196-
else
197-
log_warning "InferenceModel CRD not available - skipping InferenceModel creation for second model"
198-
log_warning "Model routing may need to be configured manually or via HTTPRoute"
199-
fi
200-
201-
# Create PodMonitor for second model metrics
202-
log_info "Creating PodMonitor for second model"
203-
cat <<EOF | kubectl apply -n "$LLMD_NS" -f -
204-
apiVersion: monitoring.coreos.com/v1
205-
kind: PodMonitor
206-
metadata:
207-
name: ${MS_NAME_2}-podmonitor
208-
labels:
209-
release: kube-prometheus-stack
210-
spec:
211-
selector:
212-
matchLabels:
213-
app: ${MS_NAME_2}-decode
214-
podMetricsEndpoints:
215-
- port: metrics
216-
path: /metrics
217-
interval: 15s
218-
EOF
219-
220-
# Wait for second model deployment to be ready
221-
log_info "Waiting for second model deployment to be ready..."
222-
wait_deployment_available_nonfatal \
223-
"$LLMD_NS" \
224-
"${MS_NAME_2}-decode" \
225-
"120s" \
226-
"Second model deployment not ready yet - check 'kubectl get pods -n $LLMD_NS'"
227-
228-
log_success "Second model infrastructure deployed successfully"
229-
}
230-
23110
deploy_llm_d_infrastructure() {
23211
log_info "Deploying llm-d infrastructure..."
23312

@@ -556,11 +335,6 @@ deploy_llm_d_infrastructure() {
556335
fi
557336
fi
558337

559-
# Deploy second model infrastructure for multi-model testing (limiter e2e tests)
560-
if [ "$MULTI_MODEL_TESTING" == "true" ]; then
561-
deploy_second_model_infrastructure
562-
fi
563-
564338
cd "$WVA_PROJECT"
565339
log_success "llm-d infrastructure deployment complete"
566340
}

0 commit comments

Comments
 (0)