Skip to content

Commit b7f3750

Browse files
committed
Sync upstream llm-d/llm-d-workload-variant-autoscaler 52c12e2
# Conflicts: # test/e2e/config.go
2 parents 6b98a1c + 52c12e2 commit b7f3750

18 files changed

Lines changed: 2051 additions & 139 deletions

File tree

.github/workflows/ci-benchmark.yaml

Lines changed: 410 additions & 0 deletions
Large diffs are not rendered by default.

Makefile

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ vet: ## Run go vet against code.
9191

9292
.PHONY: test
9393
test: manifests generate fmt vet setup-envtest helm ## Run tests.
94-
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" PATH=$(LOCALBIN):$(PATH) go test $$(go list ./... | grep -v /e2e) -coverprofile cover.out
94+
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" PATH=$(LOCALBIN):$(PATH) go test $$(go list ./... | grep -v /e2e | grep -v /benchmark) -coverprofile cover.out
9595

9696
# Creates a multi-node Kind cluster
9797
# Adds emulated GPU labels and capacities per node
@@ -269,7 +269,32 @@ test-e2e-smoke-with-setup: deploy-e2e-infra test-e2e-smoke
269269
# Convenience target that deploys infra + runs full test suite.
270270
# Set DELETE_CLUSTER=true to delete Kind cluster after tests (default: keep cluster for debugging).
271271
.PHONY: test-e2e-full-with-setup
272-
test-e2e-full-with-setup: deploy-e2e-infra test-e2e-full
272+
test-e2e-full-with-setup: deploy-e2e-infra test-e2e-full
273+
274+
# Benchmark targets
275+
.PHONY: test-benchmark
276+
test-benchmark: manifests generate fmt vet ## Run benchmark tests (scale-up-latency scenario)
277+
@echo "Running benchmark tests..."
278+
KUBECONFIG=$(KUBECONFIG) \
279+
ENVIRONMENT=$(ENVIRONMENT) \
280+
WVA_NAMESPACE=$(CONTROLLER_NAMESPACE) \
281+
LLMD_NAMESPACE=$(E2E_EMULATED_LLMD_NAMESPACE) \
282+
MONITORING_NAMESPACE=$(E2E_MONITORING_NAMESPACE) \
283+
USE_SIMULATOR=$(USE_SIMULATOR) \
284+
SCALER_BACKEND=$(SCALER_BACKEND) \
285+
MODEL_ID=$(MODEL_ID) \
286+
go test ./test/benchmark/ -timeout 30m -v -ginkgo.v \
287+
-ginkgo.label-filter="benchmark"; \
288+
TEST_EXIT_CODE=$$?; \
289+
echo ""; \
290+
echo "=========================================="; \
291+
echo "Benchmark execution completed. Exit code: $$TEST_EXIT_CODE"; \
292+
echo "=========================================="; \
293+
exit $$TEST_EXIT_CODE
294+
295+
# Convenience target that deploys infra + runs benchmark tests.
296+
.PHONY: test-benchmark-with-setup
297+
test-benchmark-with-setup: deploy-e2e-infra test-benchmark
273298

274299
.PHONY: lint
275300
lint: golangci-lint ## Run golangci-lint linter

config/default/kustomization.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,24 @@ patches:
4141
target:
4242
kind: Deployment
4343

44+
# [NAMESPACE-SELECTOR] Ensure the ServiceMonitor namespaceSelector matches the
45+
# deployment namespace. Without this, the hardcoded value in monitor.yaml won't
46+
# update when an overlay changes the namespace (e.g. deploying into 'opendatahub'
47+
# or `redhat-ods-operator`).
48+
replacements:
49+
- source:
50+
kind: Deployment
51+
name: controller-manager
52+
fieldPath: metadata.namespace
53+
targets:
54+
- select:
55+
kind: ServiceMonitor
56+
group: monitoring.coreos.com
57+
version: v1
58+
name: controller-manager-metrics-monitor
59+
fieldPaths:
60+
- spec.namespaceSelector.matchNames.0
61+
4462
# Uncomment the patches line if you enable Metrics and CertManager
4563
# [METRICS-WITH-CERTS] To enable metrics protected with certManager, uncomment the following line.
4664
# This patch will protect the metrics with certManager self-signed certs.
Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
{
2+
"annotations": {
3+
"list": [
4+
{
5+
"builtIn": 1,
6+
"datasource": { "type": "grafana", "uid": "-- Grafana --" },
7+
"enable": true,
8+
"hide": true,
9+
"iconColor": "rgba(0, 211, 255, 1)",
10+
"name": "Annotations & Alerts",
11+
"type": "dashboard"
12+
}
13+
]
14+
},
15+
"editable": true,
16+
"fiscalYearStartMonth": 0,
17+
"graphTooltip": 1,
18+
"id": null,
19+
"links": [],
20+
"panels": [
21+
{
22+
"id": 1,
23+
"title": "Deployment Replicas",
24+
"type": "timeseries",
25+
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
26+
"datasource": { "type": "prometheus" },
27+
"fieldConfig": {
28+
"defaults": {
29+
"color": { "mode": "palette-classic" },
30+
"custom": {
31+
"drawStyle": "line",
32+
"lineWidth": 2,
33+
"fillOpacity": 10,
34+
"pointSize": 5,
35+
"showPoints": "auto",
36+
"spanNulls": true
37+
},
38+
"unit": "short",
39+
"min": 0
40+
},
41+
"overrides": []
42+
},
43+
"targets": [
44+
{
45+
"expr": "wva_desired_replicas",
46+
"legendFormat": "desired {{variant_name}}",
47+
"refId": "A"
48+
},
49+
{
50+
"expr": "wva_current_replicas",
51+
"legendFormat": "current {{variant_name}}",
52+
"refId": "B"
53+
}
54+
],
55+
"options": { "legend": { "displayMode": "list", "placement": "bottom" } }
56+
},
57+
{
58+
"id": 2,
59+
"title": "WVA Desired Ratio",
60+
"type": "timeseries",
61+
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
62+
"datasource": { "type": "prometheus" },
63+
"fieldConfig": {
64+
"defaults": {
65+
"color": { "mode": "palette-classic" },
66+
"custom": {
67+
"drawStyle": "line",
68+
"lineWidth": 2,
69+
"fillOpacity": 10,
70+
"spanNulls": true
71+
},
72+
"unit": "short",
73+
"min": 0
74+
},
75+
"overrides": []
76+
},
77+
"targets": [
78+
{
79+
"expr": "wva_desired_ratio",
80+
"legendFormat": "ratio {{variant_name}}",
81+
"refId": "A"
82+
}
83+
],
84+
"options": { "legend": { "displayMode": "list", "placement": "bottom" } }
85+
},
86+
{
87+
"id": 3,
88+
"title": "KV Cache Usage",
89+
"type": "timeseries",
90+
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
91+
"datasource": { "type": "prometheus" },
92+
"fieldConfig": {
93+
"defaults": {
94+
"color": { "mode": "palette-classic" },
95+
"custom": {
96+
"drawStyle": "line",
97+
"lineWidth": 2,
98+
"fillOpacity": 20,
99+
"spanNulls": true
100+
},
101+
"unit": "percentunit",
102+
"min": 0,
103+
"max": 1,
104+
"thresholds": {
105+
"mode": "absolute",
106+
"steps": [
107+
{ "color": "green", "value": null },
108+
{ "color": "yellow", "value": 0.7 },
109+
{ "color": "red", "value": 0.9 }
110+
]
111+
}
112+
},
113+
"overrides": []
114+
},
115+
"targets": [
116+
{
117+
"expr": "vllm:kv_cache_usage_perc{namespace=~\"llm-d.*\"}",
118+
"legendFormat": "{{pod}}",
119+
"refId": "A"
120+
},
121+
{
122+
"expr": "avg(vllm:kv_cache_usage_perc{namespace=~\"llm-d.*\"})",
123+
"legendFormat": "avg",
124+
"refId": "B"
125+
}
126+
],
127+
"options": { "legend": { "displayMode": "list", "placement": "bottom" } }
128+
},
129+
{
130+
"id": 4,
131+
"title": "Queue Depth (Requests Waiting)",
132+
"type": "timeseries",
133+
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
134+
"datasource": { "type": "prometheus" },
135+
"fieldConfig": {
136+
"defaults": {
137+
"color": { "mode": "palette-classic" },
138+
"custom": {
139+
"drawStyle": "line",
140+
"lineWidth": 2,
141+
"fillOpacity": 10,
142+
"spanNulls": true
143+
},
144+
"unit": "short",
145+
"min": 0
146+
},
147+
"overrides": []
148+
},
149+
"targets": [
150+
{
151+
"expr": "vllm:num_requests_waiting{namespace=~\"llm-d.*\"}",
152+
"legendFormat": "{{pod}} waiting",
153+
"refId": "A"
154+
},
155+
{
156+
"expr": "vllm:num_requests_running{namespace=~\"llm-d.*\"}",
157+
"legendFormat": "{{pod}} running",
158+
"refId": "B"
159+
}
160+
],
161+
"options": { "legend": { "displayMode": "list", "placement": "bottom" } }
162+
},
163+
{
164+
"id": 5,
165+
"title": "Scaling Activity",
166+
"type": "timeseries",
167+
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 16 },
168+
"datasource": { "type": "prometheus" },
169+
"fieldConfig": {
170+
"defaults": {
171+
"color": { "mode": "palette-classic" },
172+
"custom": {
173+
"drawStyle": "line",
174+
"lineWidth": 2,
175+
"fillOpacity": 10,
176+
"spanNulls": true
177+
},
178+
"unit": "short",
179+
"min": 0
180+
},
181+
"overrides": []
182+
},
183+
"targets": [
184+
{
185+
"expr": "wva_desired_replicas",
186+
"legendFormat": "desired {{variant_name}}",
187+
"refId": "A"
188+
},
189+
{
190+
"expr": "wva_current_replicas",
191+
"legendFormat": "current {{variant_name}}",
192+
"refId": "B"
193+
},
194+
{
195+
"expr": "rate(wva_replica_scaling_total[2m])",
196+
"legendFormat": "scaling rate {{variant_name}} {{direction}}",
197+
"refId": "C"
198+
}
199+
],
200+
"options": { "legend": { "displayMode": "list", "placement": "bottom" } }
201+
}
202+
],
203+
"schemaVersion": 39,
204+
"tags": ["benchmark", "wva", "autoscaling"],
205+
"templating": { "list": [] },
206+
"time": { "from": "now-30m", "to": "now" },
207+
"timepicker": {},
208+
"timezone": "utc",
209+
"title": "WVA Benchmark: Scale-Up Latency",
210+
"uid": "wva-benchmark-scaleup",
211+
"version": 1
212+
}

0 commit comments

Comments
 (0)