ev-shindin
diff --git a/‎.github/workflows/ci-benchmark.yaml‎
Lines changed: 45 additions & 5 deletions b/‎.github/workflows/ci-benchmark.yaml‎
Lines changed: 45 additions & 5 deletions
diff --git a/‎Makefile‎
Lines changed: 1 addition & 1 deletion b/‎Makefile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎deploy/grafana/benchmark-dashboard.json‎
Lines changed: 5 additions & 0 deletions b/‎deploy/grafana/benchmark-dashboard.json‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎deploy/grafana/benchmark-grafana-values.yaml‎
Lines changed: 16 additions & 6 deletions b/‎deploy/grafana/benchmark-grafana-values.yaml‎
Lines changed: 16 additions & 6 deletions
diff --git a/‎test/benchmark/benchmark_test.go‎
Lines changed: 49 additions & 9 deletions b/‎test/benchmark/benchmark_test.go‎
Lines changed: 49 additions & 9 deletions
diff --git a/‎test/benchmark/config.go‎
Lines changed: 8 additions & 4 deletions b/‎test/benchmark/config.go‎
Lines changed: 8 additions & 4 deletions
@@ -117,6 +117,8 @@ jobs:
     permissions:
       contents: read
       statuses: write
+      pull-requests: write
+      actions: read
     steps:
       - name: Set pending status on PR head
         uses: actions/github-script@v7
@@ -193,6 +195,7 @@ jobs:
           USE_SIMULATOR: "true"
           CREATE_CLUSTER: "true"
           INSTALL_GATEWAY_CTRLPLANE: "true"
+          E2E_TESTS_ENABLED: "true"
           IMG: ${{ steps.build-image.outputs.image }}
           SKIP_BUILD: "true"
           KV_SPARE_TRIGGER: "0.5"
@@ -207,6 +210,8 @@ jobs:
           BENCHMARK_RESULTS_FILE: /tmp/benchmark-results.json
           BENCHMARK_GRAFANA_ENABLED: "true"
           BENCHMARK_GRAFANA_SNAPSHOT_FILE: /tmp/benchmark-grafana-snapshot.txt
+          BENCHMARK_GRAFANA_SNAPSHOT_JSON: /tmp/benchmark-grafana-snapshot.json
+          BENCHMARK_GRAFANA_PANEL_DIR: /tmp/benchmark-panels
           KV_SPARE_TRIGGER: "0.5"
           QUEUE_SPARE_TRIGGER: "4.5"
         run: make test-benchmark
@@ -219,6 +224,8 @@ jobs:
           path: |
             /tmp/benchmark-results.json
             /tmp/benchmark-grafana-snapshot.txt
+            /tmp/benchmark-grafana-snapshot.json
+            /tmp/benchmark-panels/
           if-no-files-found: warn
 
       - name: Post benchmark results as PR comment
@@ -229,6 +236,24 @@ jobs:
             const fs = require('fs');
             const prNumber = parseInt('${{ needs.gate.outputs.pr_number }}');
             const sha = '${{ needs.gate.outputs.pr_head_sha }}';
+            const runId = context.runId;
+            const repoUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}`;
+
+            // Look up the uploaded artifact to get a direct download link
+            let artifactUrl = `${repoUrl}/actions/runs/${runId}`;
+            try {
+              const { data: { artifacts } } = await github.rest.actions.listWorkflowRunArtifacts({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                run_id: runId
+              });
+              const benchArtifact = artifacts.find(a => a.name === 'benchmark-results');
+              if (benchArtifact) {
+                artifactUrl = `${repoUrl}/actions/runs/${runId}/artifacts/${benchArtifact.id}`;
+              }
+            } catch (e) {
+              console.log(`Could not look up artifact: ${e.message}`);
+            }
 
             let resultsTable = '⚠️ Benchmark results file not found or could not be parsed.';
 
@@ -246,17 +271,31 @@ jobs:
             | Avg queue depth | ${data.avgQueueDepth.toFixed(1)} |
             | Replica oscillation (σ) | ${data.replicaOscillation.toFixed(2)} |
             | Total duration | ${data.totalDurationSec.toFixed(0)}s |`;
-
-              if (data.grafanaSnapshotUrl) {
-                resultsTable += `\n| Grafana snapshot | [View dashboard](${data.grafanaSnapshotUrl}) |`;
-              }
             } catch (e) {
               console.log(`Could not read results: ${e.message}`);
             }
 
+            // Check which Grafana artifacts exist
+            const hasSnapshotJson = fs.existsSync('/tmp/benchmark-grafana-snapshot.json');
+            const hasPanels = fs.existsSync('/tmp/benchmark-panels') &&
+              fs.readdirSync('/tmp/benchmark-panels').some(f => f.endsWith('.png'));
+
+            let artifactsSection = '';
+            if (hasSnapshotJson || hasPanels) {
+              const items = [];
+              if (hasSnapshotJson) {
+                items.push('Grafana snapshot JSON (re-import via `POST /api/snapshots`)');
+              }
+              if (hasPanels) {
+                const pngs = fs.readdirSync('/tmp/benchmark-panels').filter(f => f.endsWith('.png'));
+                items.push(`${pngs.length} dashboard panel PNGs`);
+              }
+              artifactsSection = `\n\n📎 **[Download artifacts](${artifactUrl})** — ${items.join(', ')}`;
+            }
+
             const body = `## Benchmark: scale-up-latency (Kind)
 
-            ${resultsTable}
+            ${resultsTable}${artifactsSection}
 
             <details>
             <summary>Environment</summary>
@@ -265,6 +304,7 @@ jobs:
             - Model: unsloth/Meta-Llama-3.1-8B (simulator)
             - Commit: ${sha.substring(0, 7)}
             - Scaler: prometheus-adapter
+            - [Workflow run](${repoUrl}/actions/runs/${runId})
 
             </details>`;
 
 
@@ -283,7 +283,7 @@ test-benchmark: manifests generate fmt vet ## Run benchmark tests (scale-up-late
 	USE_SIMULATOR=$(USE_SIMULATOR) \
 	SCALER_BACKEND=$(SCALER_BACKEND) \
 	MODEL_ID=$(MODEL_ID) \
-	go test ./test/benchmark/ -timeout 25m -v -ginkgo.v \
+	go test ./test/benchmark/ -timeout 30m -v -ginkgo.v \
 		-ginkgo.label-filter="benchmark"; \
 	TEST_EXIT_CODE=$$?; \
 	echo ""; \
 
@@ -19,6 +19,7 @@
   "links": [],
   "panels": [
     {
+      "id": 1,
       "title": "Deployment Replicas",
       "type": "timeseries",
       "gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 },
@@ -54,6 +55,7 @@
       "options": { "legend": { "displayMode": "list", "placement": "bottom" } }
     },
     {
+      "id": 2,
       "title": "WVA Desired Replicas",
       "type": "timeseries",
       "gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 },
@@ -82,6 +84,7 @@
       "options": { "legend": { "displayMode": "list", "placement": "bottom" } }
     },
     {
+      "id": 3,
       "title": "KV Cache Usage",
       "type": "timeseries",
       "gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 },
@@ -124,6 +127,7 @@
       "options": { "legend": { "displayMode": "list", "placement": "bottom" } }
     },
     {
+      "id": 4,
       "title": "Queue Depth (Requests Waiting)",
       "type": "timeseries",
       "gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
@@ -157,6 +161,7 @@
       "options": { "legend": { "displayMode": "list", "placement": "bottom" } }
     },
     {
+      "id": 5,
       "title": "Saturation Metrics",
       "type": "timeseries",
       "gridPos": { "h": 8, "w": 24, "x": 0, "y": 16 },
 
@@ -5,9 +5,6 @@ grafana:
   enabled: true
   adminUser: admin
   adminPassword: admin
-  service:
-    type: ClusterIP
-    port: 3000
   # Allow anonymous access so the benchmark can use the API without auth tokens
   grafana.ini:
     auth.anonymous:
@@ -17,6 +14,19 @@ grafana:
       allow_embedding: true
     snapshots:
       external_enabled: false
+    rendering:
+      server_url: http://kube-prometheus-stack-grafana-image-renderer:8081/render
+      callback_url: http://kube-prometheus-stack-grafana:80/
+  # Image renderer for PNG panel exports (separate Deployment — best-effort)
+  imageRenderer:
+    enabled: true
+    resources:
+      requests:
+        cpu: 50m
+        memory: 100Mi
+      limits:
+        cpu: 200m
+        memory: 256Mi
   # Auto-provision Prometheus as a datasource
   additionalDataSources:
     - name: Prometheus
@@ -37,7 +47,7 @@ grafana:
   resources:
     requests:
       cpu: 100m
-      memory: 128Mi
-    limits:
-      cpu: 300m
       memory: 256Mi
+    limits:
+      cpu: 500m
+      memory: 512Mi
@@ -129,32 +129,72 @@ var _ = Describe("Scale-Up Latency Benchmark", Label("benchmark"), Ordered, func
 				Name:      vaName,
 			}, currentVA)
 			g.Expect(err).NotTo(HaveOccurred())
-			optimized := int32(currentVA.Status.DesiredOptimizedAlloc.NumReplicas)
-			g.Expect(optimized).To(BeNumerically(">=", 1), "VA should have optimized >= 1")
+			g.Expect(currentVA.Status.DesiredOptimizedAlloc.NumReplicas).NotTo(BeNil(), "NumReplicas should be set")
+			g.Expect(*currentVA.Status.DesiredOptimizedAlloc.NumReplicas).To(BeNumerically(">=", 1), "VA should have optimized >= 1")
 		}, 5*time.Minute, 10*time.Second).Should(Succeed())
 
+		By("Verifying external metrics API serves wva_desired_replicas")
+		Eventually(func(g Gomega) {
+			result, err := k8sClient.RESTClient().
+				Get().
+				AbsPath("/apis/external.metrics.k8s.io/v1beta1/namespaces/" + benchCfg.LLMDNamespace + "/wva_desired_replicas").
+				DoRaw(ctx)
+			g.Expect(err).NotTo(HaveOccurred(), "External metrics API should be accessible")
+			g.Expect(string(result)).To(ContainSubstring("wva_desired_replicas"), "Metric should be available")
+			g.Expect(string(result)).To(ContainSubstring(vaName), "Metric should reference the benchmark VA")
+			GinkgoWriter.Printf("External metrics API confirmed: wva_desired_replicas available for %s\n", vaName)
+		}, 5*time.Minute, 10*time.Second).Should(Succeed())
+
+		By("Waiting for Prometheus to scrape simulator metrics")
+		Eventually(func(g Gomega) {
+			_, err := promClient.QueryWithRetry(ctx, `vllm:gpu_cache_usage_perc`)
+			g.Expect(err).NotTo(HaveOccurred(), "Prometheus should have KV cache metrics from simulator")
+			GinkgoWriter.Println("Prometheus confirmed: vllm:gpu_cache_usage_perc is available")
+		}, 5*time.Minute, 15*time.Second).Should(Succeed())
+
 		scenarioStart = time.Now()
-		GinkgoWriter.Println("BeforeAll completed — benchmark scenario starting")
+		GinkgoWriter.Println("BeforeAll completed — metrics pipeline verified, benchmark scenario starting")
 	})
 
 	AfterAll(func() {
 		results.TotalDurationSec = time.Since(scenarioStart).Seconds()
 
 		if grafanaClient != nil && benchCfg.GrafanaEnabled {
 			By("Capturing Grafana snapshot of benchmark dashboard")
-			snapshotURL, snapErr := grafanaClient.CreateSnapshot(scenarioStart)
+			snapResult, snapErr := grafanaClient.CreateSnapshot(scenarioStart)
 			if snapErr != nil {
 				GinkgoWriter.Printf("Warning: failed to create Grafana snapshot: %v\n", snapErr)
 			} else {
-				results.GrafanaSnapshotURL = snapshotURL
-				GinkgoWriter.Printf("Grafana snapshot: %s\n", snapshotURL)
+				results.GrafanaSnapshotURL = snapResult.URL
+				GinkgoWriter.Printf("Grafana snapshot: %s\n", snapResult.URL)
 
 				if benchCfg.GrafanaSnapshotFile != "" {
-					if writeErr := os.WriteFile(benchCfg.GrafanaSnapshotFile, []byte(snapshotURL+"\n"), 0644); writeErr != nil {
-						GinkgoWriter.Printf("Warning: failed to write snapshot file: %v\n", writeErr)
+					if writeErr := os.WriteFile(benchCfg.GrafanaSnapshotFile, []byte(snapResult.URL+"\n"), 0644); writeErr != nil {
+						GinkgoWriter.Printf("Warning: failed to write snapshot URL file: %v\n", writeErr)
+					}
+				}
+
+				// Export full snapshot JSON for offline re-import
+				if benchCfg.GrafanaSnapshotJSONFile != "" {
+					By("Exporting Grafana snapshot JSON")
+					if exportErr := grafanaClient.ExportSnapshotJSON(snapResult.Key, benchCfg.GrafanaSnapshotJSONFile); exportErr != nil {
+						GinkgoWriter.Printf("Warning: failed to export snapshot JSON: %v\n", exportErr)
+					} else {
+						GinkgoWriter.Printf("Snapshot JSON exported to %s\n", benchCfg.GrafanaSnapshotJSONFile)
 					}
 				}
 			}
+
+			// Render all panels to PNG
+			if benchCfg.GrafanaPanelDir != "" {
+				By("Rendering dashboard panels to PNG")
+				panelFiles, renderErr := grafanaClient.RenderAllPanels(scenarioStart, time.Now(), benchCfg.GrafanaPanelDir)
+				if renderErr != nil {
+					GinkgoWriter.Printf("Warning: panel rendering failed: %v\n", renderErr)
+				} else {
+					GinkgoWriter.Printf("Rendered %d panels to %s\n", len(panelFiles), benchCfg.GrafanaPanelDir)
+				}
+			}
 		}
 
 		By("Writing benchmark results to file")
@@ -234,7 +274,7 @@ var _ = Describe("Scale-Up Latency Benchmark", Label("benchmark"), Ordered, func
 				}
 			}
 			g.Expect(runningCount).To(BeNumerically(">=", benchLoadWorkers))
-		}, 3*time.Minute, 5*time.Second).Should(Succeed())
+		}, 5*time.Minute, 5*time.Second).Should(Succeed())
 
 		By("Polling replicas to detect scale-up")
 		scaleUpDetected := false
 
@@ -51,8 +51,10 @@ type BenchmarkConfig struct {
 	BenchmarkResultsFile string
 
 	// Grafana
-	GrafanaEnabled      bool   // Deploy ephemeral Grafana and capture snapshot
-	GrafanaSnapshotFile string // Path to write snapshot URL
+	GrafanaEnabled          bool   // Deploy ephemeral Grafana and capture snapshot
+	GrafanaSnapshotFile     string // Path to write snapshot URL
+	GrafanaSnapshotJSONFile string // Path to export full snapshot JSON (re-importable)
+	GrafanaPanelDir         string // Directory to write rendered panel PNGs
 
 	// Phase durations (seconds, overridable via env for tuning)
 	BaselineDurationSec  int
@@ -102,8 +104,10 @@ func LoadConfigFromEnv() BenchmarkConfig {
 
 		BenchmarkResultsFile: getEnv("BENCHMARK_RESULTS_FILE", "/tmp/benchmark-results.json"),
 
-		GrafanaEnabled:      getEnvBool("BENCHMARK_GRAFANA_ENABLED", true),
-		GrafanaSnapshotFile: getEnv("BENCHMARK_GRAFANA_SNAPSHOT_FILE", "/tmp/benchmark-grafana-snapshot.txt"),
+		GrafanaEnabled:          getEnvBool("BENCHMARK_GRAFANA_ENABLED", true),
+		GrafanaSnapshotFile:     getEnv("BENCHMARK_GRAFANA_SNAPSHOT_FILE", "/tmp/benchmark-grafana-snapshot.txt"),
+		GrafanaSnapshotJSONFile: getEnv("BENCHMARK_GRAFANA_SNAPSHOT_JSON", "/tmp/benchmark-grafana-snapshot.json"),
+		GrafanaPanelDir:         getEnv("BENCHMARK_GRAFANA_PANEL_DIR", "/tmp/benchmark-panels"),
 
 		BaselineDurationSec:  getEnvInt("BENCHMARK_BASELINE_DURATION", 120),
 		SpikeDurationSec:     getEnvInt("BENCHMARK_SPIKE_DURATION", 300),