Add benchmark workflow to main for issue_comment trigger

ev-shindin · ev-shindin · commit 085be2b99bae · 2026-03-18T15:48:01.000+02:00
diff --git a/.github/workflows/ci-benchmark.yaml b/.github/workflows/ci-benchmark.yaml
@@ -0,0 +1,368 @@
+name: CI - Benchmark
+
+concurrency:
+  group: >-
+    ${{
+      github.event_name == 'issue_comment' &&
+      !contains(github.event.comment.body, '/benchmark kind')
+      && format('benchmark-isolated-{0}', github.run_id)
+      || format('benchmark-kind-{0}',
+           github.event.issue.number
+           || github.run_id)
+    }}
+  cancel-in-progress: true
+
+on:
+  issue_comment:
+    types: [created]
+
+jobs:
+  gate:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: write
+    outputs:
+      run_benchmark: ${{ steps.check.outputs.run_benchmark }}
+      pr_number: ${{ steps.check.outputs.pr_number }}
+      pr_head_sha: ${{ steps.check.outputs.pr_head_sha }}
+      pr_head_repo: ${{ steps.check.outputs.pr_head_repo }}
+    steps:
+      - name: Check if benchmark requested
+        id: check
+        uses: actions/github-script@v7
+        with:
+          script: |
+            async function hasWriteAccess(username) {
+              try {
+                const { data: permission } = await github.rest.repos.getCollaboratorPermissionLevel({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  username: username
+                });
+                const privilegedRoles = ['admin', 'maintain', 'write'];
+                return privilegedRoles.includes(permission.permission);
+              } catch (e) {
+                console.log(`Could not get permissions for ${username}: ${e.message}`);
+                return false;
+              }
+            }
+
+            if (context.eventName !== 'issue_comment') {
+              core.setOutput('run_benchmark', 'false');
+              return;
+            }
+
+            const comment = context.payload.comment.body.trim();
+            const issue = context.payload.issue;
+
+            if (!issue.pull_request) {
+              console.log('Comment is not on a PR, skipping');
+              core.setOutput('run_benchmark', 'false');
+              return;
+            }
+
+            const validCommands = ['/benchmark kind'];
+            if (!validCommands.includes(comment)) {
+              console.log(`Comment "${comment}" is not a valid benchmark command, skipping`);
+              core.setOutput('run_benchmark', 'false');
+              return;
+            }
+
+            const commenter = context.payload.comment.user.login;
+            const hasAccess = await hasWriteAccess(commenter);
+            if (!hasAccess) {
+              console.log(`User ${commenter} does not have write access, ignoring ${comment}`);
+              core.setOutput('run_benchmark', 'false');
+              return;
+            }
+
+            const { data: pr } = await github.rest.pulls.get({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              pull_number: issue.number
+            });
+
+            const baseRepo = `${context.repo.owner}/${context.repo.repo}`;
+            const headRepo = pr.head.repo ? pr.head.repo.full_name : baseRepo;
+
+            console.log(`/benchmark kind approved by ${commenter} for PR #${issue.number}`);
+            console.log(`PR head SHA: ${pr.head.sha}`);
+
+            await github.rest.reactions.createForIssueComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              comment_id: context.payload.comment.id,
+              content: 'rocket'
+            });
+
+            const runUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: issue.number,
+              body: `🚀 **Benchmark (Kind)** triggered by \`/benchmark kind\`\n\n[View the benchmark workflow run](${runUrl})`
+            });
+
+            core.setOutput('run_benchmark', 'true');
+            core.setOutput('pr_number', issue.number.toString());
+            core.setOutput('pr_head_sha', pr.head.sha);
+            core.setOutput('pr_head_repo', headRepo);
+
+  benchmark-kind:
+    runs-on: ubuntu-latest
+    needs: [gate]
+    if: needs.gate.outputs.run_benchmark == 'true'
+    timeout-minutes: 45
+    permissions:
+      contents: read
+      statuses: write
+      pull-requests: write
+      actions: read
+    steps:
+      - name: Set pending status on PR head
+        uses: actions/github-script@v7
+        with:
+          script: |
+            await github.rest.repos.createCommitStatus({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              sha: '${{ needs.gate.outputs.pr_head_sha }}',
+              state: 'pending',
+              target_url: `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`,
+              description: 'Benchmark running...',
+              context: '${{ github.workflow }} / benchmark-kind'
+            });
+
+      - name: Validate PR head SHA
+        run: |
+          if [ -z "${{ needs.gate.outputs.pr_head_sha }}" ]; then
+            echo "::error::pr_head_sha is empty — refusing to fall back to main"
+            exit 1
+          fi
+          echo "Checkout will use PR head SHA: ${{ needs.gate.outputs.pr_head_sha }}"
+
+      - name: Checkout source
+        uses: actions/checkout@v4
+        with:
+          repository: ${{ needs.gate.outputs.pr_head_repo || github.repository }}
+          ref: ${{ needs.gate.outputs.pr_head_sha }}
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract Go version from go.mod
+        run: sed -En 's/^go (.*)$/GO_VERSION=\1/p' go.mod >> $GITHUB_ENV
+
+      - name: Set up Go with cache
+        uses: actions/setup-go@v6
+        with:
+          go-version: "${{ env.GO_VERSION }}"
+          cache-dependency-path: ./go.sum
+
+      - name: Install dependencies
+        run: go mod download
+
+      - name: Install Kind
+        run: |
+          ARCH=$(uname -m)
+          case "$ARCH" in
+            x86_64)  KIND_ARCH="amd64" ;;
+            aarch64) KIND_ARCH="arm64" ;;
+            *)       echo "Unsupported architecture: $ARCH"; exit 1 ;;
+          esac
+          curl -Lo ./kind "https://kind.sigs.k8s.io/dl/v0.25.0/kind-linux-${KIND_ARCH}"
+          chmod +x ./kind
+          sudo mv ./kind /usr/local/bin/kind
+          kind version
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Build WVA image locally
+        id: build-image
+        env:
+          CHECKOUT_SHA: ${{ needs.gate.outputs.pr_head_sha }}
+        run: |
+          IMAGE_NAME="llm-d-workload-variant-autoscaler"
+          IMAGE_TAG="bench-${CHECKOUT_SHA:0:7}"
+          FULL_IMAGE="localhost/${IMAGE_NAME}:${IMAGE_TAG}"
+          echo "Building local image: $FULL_IMAGE"
+          make docker-build IMG="$FULL_IMAGE"
+          echo "image=$FULL_IMAGE" >> $GITHUB_OUTPUT
+
+      - name: Deploy e2e infrastructure
+        env:
+          ENVIRONMENT: kind-emulator
+          USE_SIMULATOR: "true"
+          CREATE_CLUSTER: "true"
+          INSTALL_GATEWAY_CTRLPLANE: "true"
+          E2E_TESTS_ENABLED: "true"
+          IMG: ${{ steps.build-image.outputs.image }}
+          SKIP_BUILD: "true"
+          KV_SPARE_TRIGGER: "0.5"
+          QUEUE_SPARE_TRIGGER: "4.5"
+        run: make deploy-e2e-infra
+
+      - name: Run benchmark
+        env:
+          ENVIRONMENT: kind-emulator
+          USE_SIMULATOR: "true"
+          SCALER_BACKEND: prometheus-adapter
+          BENCHMARK_RESULTS_FILE: /tmp/benchmark-results.json
+          BENCHMARK_GRAFANA_ENABLED: "true"
+          BENCHMARK_GRAFANA_SNAPSHOT_FILE: /tmp/benchmark-grafana-snapshot.txt
+          BENCHMARK_GRAFANA_SNAPSHOT_JSON: /tmp/benchmark-grafana-snapshot.json
+          BENCHMARK_GRAFANA_PANEL_DIR: /tmp/benchmark-panels
+          KV_SPARE_TRIGGER: "0.5"
+          QUEUE_SPARE_TRIGGER: "4.5"
+        run: make test-benchmark
+
+      - name: Upload benchmark results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-results
+          path: |
+            /tmp/benchmark-results.json
+            /tmp/benchmark-grafana-snapshot.txt
+            /tmp/benchmark-grafana-snapshot.json
+            /tmp/benchmark-panels/
+          if-no-files-found: warn
+
+      - name: Post benchmark results as PR comment
+        if: always()
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const fs = require('fs');
+            const prNumber = parseInt('${{ needs.gate.outputs.pr_number }}');
+            const sha = '${{ needs.gate.outputs.pr_head_sha }}';
+            const runId = context.runId;
+            const repoUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}`;
+
+            // Look up the uploaded artifact to get a direct download link
+            let artifactUrl = `${repoUrl}/actions/runs/${runId}`;
+            try {
+              const { data: { artifacts } } = await github.rest.actions.listWorkflowRunArtifacts({
+                owner: context.repo.owner,
+                repo: context.repo.repo,
+                run_id: runId
+              });
+              const benchArtifact = artifacts.find(a => a.name === 'benchmark-results');
+              if (benchArtifact) {
+                artifactUrl = `${repoUrl}/actions/runs/${runId}/artifacts/${benchArtifact.id}`;
+              }
+            } catch (e) {
+              console.log(`Could not look up artifact: ${e.message}`);
+            }
+
+            let resultsTable = '⚠️ Benchmark results file not found or could not be parsed.';
+
+            try {
+              const data = JSON.parse(fs.readFileSync('/tmp/benchmark-results.json', 'utf8'));
+
+              const fmtTime = (v) => v < 0 ? 'N/A' : `${v.toFixed(1)}s`;
+
+              resultsTable = `| Metric | Value |
+            |--------|-------|
+            | Scale-up time | ${fmtTime(data.scaleUpTimeSec)} |
+            | Scale-down time | ${fmtTime(data.scaleDownTimeSec)} |
+            | Max replicas | ${data.maxReplicas} |
+            | Avg KV cache usage | ${data.avgKVCacheUsage.toFixed(3)} |
+            | Avg queue depth | ${data.avgQueueDepth.toFixed(1)} |
+            | Replica oscillation (σ) | ${data.replicaOscillation.toFixed(2)} |
+            | Total duration | ${data.totalDurationSec.toFixed(0)}s |`;
+            } catch (e) {
+              console.log(`Could not read results: ${e.message}`);
+            }
+
+            // Check which Grafana artifacts exist
+            const hasSnapshotJson = fs.existsSync('/tmp/benchmark-grafana-snapshot.json');
+            const hasPanels = fs.existsSync('/tmp/benchmark-panels') &&
+              fs.readdirSync('/tmp/benchmark-panels').some(f => f.endsWith('.png'));
+
+            let artifactsSection = '';
+            if (hasSnapshotJson || hasPanels) {
+              const items = [];
+              if (hasSnapshotJson) {
+                items.push('Grafana snapshot JSON (re-import via `POST /api/snapshots`)');
+              }
+              if (hasPanels) {
+                const pngs = fs.readdirSync('/tmp/benchmark-panels').filter(f => f.endsWith('.png'));
+                items.push(`${pngs.length} dashboard panel PNGs`);
+              }
+              artifactsSection = `\n\n📎 **[Download artifacts](${artifactUrl})** — ${items.join(', ')}`;
+            }
+
+            const body = `## Benchmark: scale-up-latency (Kind)
+
+            ${resultsTable}${artifactsSection}
+
+            <details>
+            <summary>Environment</summary>
+
+            - Cluster: Kind (emulated GPUs)
+            - Model: unsloth/Meta-Llama-3.1-8B (simulator)
+            - Commit: ${sha.substring(0, 7)}
+            - Scaler: prometheus-adapter
+            - [Workflow run](${repoUrl}/actions/runs/${runId})
+
+            </details>`;
+
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: prNumber,
+              body: body
+            });
+
+      - name: Cleanup Kind cluster
+        if: always()
+        run: kind delete cluster --name kind-wva-gpu-cluster || true
+
+  report-status:
+    runs-on: ubuntu-latest
+    needs: [gate, benchmark-kind]
+    if: always() && needs.gate.outputs.run_benchmark == 'true'
+    permissions:
+      statuses: write
+    steps:
+      - name: Report status to PR
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const prHeadSha = '${{ needs.gate.outputs.pr_head_sha }}';
+            const benchResult = '${{ needs.benchmark-kind.result }}';
+
+            if (!prHeadSha) {
+              console.log('No PR head SHA available, skipping status report');
+              return;
+            }
+
+            let state, description;
+            if (benchResult === 'success') {
+              state = 'success';
+              description = 'Benchmark completed successfully';
+            } else if (benchResult === 'skipped') {
+              state = 'failure';
+              description = 'Benchmark did not run (prerequisite failed or skipped)';
+            } else if (benchResult === 'cancelled') {
+              state = 'failure';
+              description = 'Benchmark cancelled';
+            } else {
+              state = 'failure';
+              description = 'Benchmark failed';
+            }
+
+            console.log(`Reporting status to PR commit ${prHeadSha}: ${state} - ${description}`);
+
+            await github.rest.repos.createCommitStatus({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              sha: prHeadSha,
+              state: state,
+              target_url: `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`,
+              description: description,
+              context: '${{ github.workflow }} / benchmark-kind'
+            });
+
+            console.log('Status reported successfully');