Fix wrong Prometheus metric name in benchmark (gpu_cache → kv_cache)

ev-shindin · ev-shindin · commit 1729424baef8 · 2026-03-18T17:04:10.000+02:00
The benchmark queried vllm:gpu_cache_usage_perc which doesn't exist.
The actual metric emitted by the vLLM simulator is vllm:kv_cache_usage_perc
as defined in internal/constants/metrics.go.
diff --git a/deploy/grafana/benchmark-dashboard.json b/deploy/grafana/benchmark-dashboard.json
@@ -114,12 +114,12 @@
       },
       "targets": [
         {
-          "expr": "vllm:gpu_cache_usage_perc{namespace=~\"llm-d.*\"}",
+          "expr": "vllm:kv_cache_usage_perc{namespace=~\"llm-d.*\"}",
           "legendFormat": "{{pod}}",
           "refId": "A"
         },
         {
-          "expr": "avg(vllm:gpu_cache_usage_perc{namespace=~\"llm-d.*\"})",
+          "expr": "avg(vllm:kv_cache_usage_perc{namespace=~\"llm-d.*\"})",
           "legendFormat": "avg",
           "refId": "B"
         }
diff --git a/test/benchmark/benchmark_test.go b/test/benchmark/benchmark_test.go
@@ -147,9 +147,9 @@ var _ = Describe("Scale-Up Latency Benchmark", Label("benchmark"), Ordered, func
 
 		By("Waiting for Prometheus to scrape simulator metrics")
 		Eventually(func(g Gomega) {
-			_, err := promClient.QueryWithRetry(ctx, `vllm:gpu_cache_usage_perc`)
+			_, err := promClient.QueryWithRetry(ctx, `vllm:kv_cache_usage_perc`)
 			g.Expect(err).NotTo(HaveOccurred(), "Prometheus should have KV cache metrics from simulator")
-			GinkgoWriter.Println("Prometheus confirmed: vllm:gpu_cache_usage_perc is available")
+			GinkgoWriter.Println("Prometheus confirmed: vllm:kv_cache_usage_perc is available")
 		}, 5*time.Minute, 15*time.Second).Should(Succeed())
 
 		scenarioStart = time.Now()
@@ -345,7 +345,7 @@ var _ = Describe("Scale-Up Latency Benchmark", Label("benchmark"), Ordered, func
 		sustainedEnd := time.Now()
 		kvAvg, err := QueryRangeAvg(
 			promClient.API(),
-			`avg(vllm:gpu_cache_usage_perc)`,
+			`avg(vllm:kv_cache_usage_perc)`,
 			sustainedStart, sustainedEnd,
 			30*time.Second,
 		)

Original file line number	Diff line number	Diff line change
`@@ -114,12 +114,12 @@`
`114`	`114`	`},`
`115`	`115`	`"targets": [`
`116`	`116`	`{`
`117`		`- "expr": "vllm:gpu_cache_usage_perc{namespace=~\"llm-d.*\"}",`
	`117`	`+ "expr": "vllm:kv_cache_usage_perc{namespace=~\"llm-d.*\"}",`
`118`	`118`	`"legendFormat": "{{pod}}",`
`119`	`119`	`"refId": "A"`
`120`	`120`	`},`
`121`	`121`	`{`
`122`		`- "expr": "avg(vllm:gpu_cache_usage_perc{namespace=~\"llm-d.*\"})",`
	`122`	`+ "expr": "avg(vllm:kv_cache_usage_perc{namespace=~\"llm-d.*\"})",`
`123`	`123`	`"legendFormat": "avg",`
`124`	`124`	`"refId": "B"`
`125`	`125`	`}`