@@ -129,32 +129,72 @@ var _ = Describe("Scale-Up Latency Benchmark", Label("benchmark"), Ordered, func
129129 Name : vaName ,
130130 }, currentVA )
131131 g .Expect (err ).NotTo (HaveOccurred ())
132- optimized := int32 (currentVA .Status .DesiredOptimizedAlloc .NumReplicas )
133- g .Expect (optimized ).To (BeNumerically (">=" , 1 ), "VA should have optimized >= 1" )
132+ g . Expect (currentVA .Status .DesiredOptimizedAlloc .NumReplicas ). NotTo ( BeNil (), "NumReplicas should be set" )
133+ g .Expect (* currentVA . Status . DesiredOptimizedAlloc . NumReplicas ).To (BeNumerically (">=" , 1 ), "VA should have optimized >= 1" )
134134 }, 5 * time .Minute , 10 * time .Second ).Should (Succeed ())
135135
136+ By ("Verifying external metrics API serves wva_desired_replicas" )
137+ Eventually (func (g Gomega ) {
138+ result , err := k8sClient .RESTClient ().
139+ Get ().
140+ AbsPath ("/apis/external.metrics.k8s.io/v1beta1/namespaces/" + benchCfg .LLMDNamespace + "/wva_desired_replicas" ).
141+ DoRaw (ctx )
142+ g .Expect (err ).NotTo (HaveOccurred (), "External metrics API should be accessible" )
143+ g .Expect (string (result )).To (ContainSubstring ("wva_desired_replicas" ), "Metric should be available" )
144+ g .Expect (string (result )).To (ContainSubstring (vaName ), "Metric should reference the benchmark VA" )
145+ GinkgoWriter .Printf ("External metrics API confirmed: wva_desired_replicas available for %s\n " , vaName )
146+ }, 5 * time .Minute , 10 * time .Second ).Should (Succeed ())
147+
148+ By ("Waiting for Prometheus to scrape simulator metrics" )
149+ Eventually (func (g Gomega ) {
150+ _ , err := promClient .QueryWithRetry (ctx , `vllm:gpu_cache_usage_perc` )
151+ g .Expect (err ).NotTo (HaveOccurred (), "Prometheus should have KV cache metrics from simulator" )
152+ GinkgoWriter .Println ("Prometheus confirmed: vllm:gpu_cache_usage_perc is available" )
153+ }, 5 * time .Minute , 15 * time .Second ).Should (Succeed ())
154+
136155 scenarioStart = time .Now ()
137- GinkgoWriter .Println ("BeforeAll completed — benchmark scenario starting" )
156+ GinkgoWriter .Println ("BeforeAll completed — metrics pipeline verified, benchmark scenario starting" )
138157 })
139158
140159 AfterAll (func () {
141160 results .TotalDurationSec = time .Since (scenarioStart ).Seconds ()
142161
143162 if grafanaClient != nil && benchCfg .GrafanaEnabled {
144163 By ("Capturing Grafana snapshot of benchmark dashboard" )
145- snapshotURL , snapErr := grafanaClient .CreateSnapshot (scenarioStart )
164+ snapResult , snapErr := grafanaClient .CreateSnapshot (scenarioStart )
146165 if snapErr != nil {
147166 GinkgoWriter .Printf ("Warning: failed to create Grafana snapshot: %v\n " , snapErr )
148167 } else {
149- results .GrafanaSnapshotURL = snapshotURL
150- GinkgoWriter .Printf ("Grafana snapshot: %s\n " , snapshotURL )
168+ results .GrafanaSnapshotURL = snapResult . URL
169+ GinkgoWriter .Printf ("Grafana snapshot: %s\n " , snapResult . URL )
151170
152171 if benchCfg .GrafanaSnapshotFile != "" {
153- if writeErr := os .WriteFile (benchCfg .GrafanaSnapshotFile , []byte (snapshotURL + "\n " ), 0644 ); writeErr != nil {
154- GinkgoWriter .Printf ("Warning: failed to write snapshot file: %v\n " , writeErr )
172+ if writeErr := os .WriteFile (benchCfg .GrafanaSnapshotFile , []byte (snapResult .URL + "\n " ), 0644 ); writeErr != nil {
173+ GinkgoWriter .Printf ("Warning: failed to write snapshot URL file: %v\n " , writeErr )
174+ }
175+ }
176+
177+ // Export full snapshot JSON for offline re-import
178+ if benchCfg .GrafanaSnapshotJSONFile != "" {
179+ By ("Exporting Grafana snapshot JSON" )
180+ if exportErr := grafanaClient .ExportSnapshotJSON (snapResult .Key , benchCfg .GrafanaSnapshotJSONFile ); exportErr != nil {
181+ GinkgoWriter .Printf ("Warning: failed to export snapshot JSON: %v\n " , exportErr )
182+ } else {
183+ GinkgoWriter .Printf ("Snapshot JSON exported to %s\n " , benchCfg .GrafanaSnapshotJSONFile )
155184 }
156185 }
157186 }
187+
188+ // Render all panels to PNG
189+ if benchCfg .GrafanaPanelDir != "" {
190+ By ("Rendering dashboard panels to PNG" )
191+ panelFiles , renderErr := grafanaClient .RenderAllPanels (scenarioStart , time .Now (), benchCfg .GrafanaPanelDir )
192+ if renderErr != nil {
193+ GinkgoWriter .Printf ("Warning: panel rendering failed: %v\n " , renderErr )
194+ } else {
195+ GinkgoWriter .Printf ("Rendered %d panels to %s\n " , len (panelFiles ), benchCfg .GrafanaPanelDir )
196+ }
197+ }
158198 }
159199
160200 By ("Writing benchmark results to file" )
@@ -234,7 +274,7 @@ var _ = Describe("Scale-Up Latency Benchmark", Label("benchmark"), Ordered, func
234274 }
235275 }
236276 g .Expect (runningCount ).To (BeNumerically (">=" , benchLoadWorkers ))
237- }, 3 * time .Minute , 5 * time .Second ).Should (Succeed ())
277+ }, 5 * time .Minute , 5 * time .Second ).Should (Succeed ())
238278
239279 By ("Polling replicas to detect scale-up" )
240280 scaleUpDetected := false
0 commit comments