Skip to content

Commit 1729424

Browse files
committed
Fix wrong Prometheus metric name in benchmark (gpu_cache → kv_cache)
The benchmark queried vllm:gpu_cache_usage_perc which doesn't exist. The actual metric emitted by the vLLM simulator is vllm:kv_cache_usage_perc as defined in internal/constants/metrics.go.
1 parent ae16698 commit 1729424

2 files changed

Lines changed: 5 additions & 5 deletions

File tree

deploy/grafana/benchmark-dashboard.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,12 +114,12 @@
114114
},
115115
"targets": [
116116
{
117-
"expr": "vllm:gpu_cache_usage_perc{namespace=~\"llm-d.*\"}",
117+
"expr": "vllm:kv_cache_usage_perc{namespace=~\"llm-d.*\"}",
118118
"legendFormat": "{{pod}}",
119119
"refId": "A"
120120
},
121121
{
122-
"expr": "avg(vllm:gpu_cache_usage_perc{namespace=~\"llm-d.*\"})",
122+
"expr": "avg(vllm:kv_cache_usage_perc{namespace=~\"llm-d.*\"})",
123123
"legendFormat": "avg",
124124
"refId": "B"
125125
}

test/benchmark/benchmark_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -147,9 +147,9 @@ var _ = Describe("Scale-Up Latency Benchmark", Label("benchmark"), Ordered, func
147147

148148
By("Waiting for Prometheus to scrape simulator metrics")
149149
Eventually(func(g Gomega) {
150-
_, err := promClient.QueryWithRetry(ctx, `vllm:gpu_cache_usage_perc`)
150+
_, err := promClient.QueryWithRetry(ctx, `vllm:kv_cache_usage_perc`)
151151
g.Expect(err).NotTo(HaveOccurred(), "Prometheus should have KV cache metrics from simulator")
152-
GinkgoWriter.Println("Prometheus confirmed: vllm:gpu_cache_usage_perc is available")
152+
GinkgoWriter.Println("Prometheus confirmed: vllm:kv_cache_usage_perc is available")
153153
}, 5*time.Minute, 15*time.Second).Should(Succeed())
154154

155155
scenarioStart = time.Now()
@@ -345,7 +345,7 @@ var _ = Describe("Scale-Up Latency Benchmark", Label("benchmark"), Ordered, func
345345
sustainedEnd := time.Now()
346346
kvAvg, err := QueryRangeAvg(
347347
promClient.API(),
348-
`avg(vllm:gpu_cache_usage_perc)`,
348+
`avg(vllm:kv_cache_usage_perc)`,
349349
sustainedStart, sustainedEnd,
350350
30*time.Second,
351351
)

0 commit comments

Comments
 (0)