|
| 1 | +name: CI - Benchmark |
| 2 | + |
| 3 | +concurrency: |
| 4 | + group: >- |
| 5 | + ${{ |
| 6 | + github.event_name == 'issue_comment' && |
| 7 | + !contains(github.event.comment.body, '/benchmark kind') |
| 8 | + && format('benchmark-isolated-{0}', github.run_id) |
| 9 | + || format('benchmark-kind-{0}', |
| 10 | + github.event.issue.number |
| 11 | + || github.run_id) |
| 12 | + }} |
| 13 | + cancel-in-progress: true |
| 14 | + |
| 15 | +on: |
| 16 | + issue_comment: |
| 17 | + types: [created] |
| 18 | + |
| 19 | +jobs: |
| 20 | + gate: |
| 21 | + runs-on: ubuntu-latest |
| 22 | + permissions: |
| 23 | + contents: read |
| 24 | + pull-requests: write |
| 25 | + outputs: |
| 26 | + run_benchmark: ${{ steps.check.outputs.run_benchmark }} |
| 27 | + pr_number: ${{ steps.check.outputs.pr_number }} |
| 28 | + pr_head_sha: ${{ steps.check.outputs.pr_head_sha }} |
| 29 | + pr_head_repo: ${{ steps.check.outputs.pr_head_repo }} |
| 30 | + steps: |
| 31 | + - name: Check if benchmark requested |
| 32 | + id: check |
| 33 | + uses: actions/github-script@v7 |
| 34 | + with: |
| 35 | + script: | |
| 36 | + async function hasWriteAccess(username) { |
| 37 | + try { |
| 38 | + const { data: permission } = await github.rest.repos.getCollaboratorPermissionLevel({ |
| 39 | + owner: context.repo.owner, |
| 40 | + repo: context.repo.repo, |
| 41 | + username: username |
| 42 | + }); |
| 43 | + const privilegedRoles = ['admin', 'maintain', 'write']; |
| 44 | + return privilegedRoles.includes(permission.permission); |
| 45 | + } catch (e) { |
| 46 | + console.log(`Could not get permissions for ${username}: ${e.message}`); |
| 47 | + return false; |
| 48 | + } |
| 49 | + } |
| 50 | +
|
| 51 | + if (context.eventName !== 'issue_comment') { |
| 52 | + core.setOutput('run_benchmark', 'false'); |
| 53 | + return; |
| 54 | + } |
| 55 | +
|
| 56 | + const comment = context.payload.comment.body.trim(); |
| 57 | + const issue = context.payload.issue; |
| 58 | +
|
| 59 | + if (!issue.pull_request) { |
| 60 | + console.log('Comment is not on a PR, skipping'); |
| 61 | + core.setOutput('run_benchmark', 'false'); |
| 62 | + return; |
| 63 | + } |
| 64 | +
|
| 65 | + const validCommands = ['/benchmark kind']; |
| 66 | + if (!validCommands.includes(comment)) { |
| 67 | + console.log(`Comment "${comment}" is not a valid benchmark command, skipping`); |
| 68 | + core.setOutput('run_benchmark', 'false'); |
| 69 | + return; |
| 70 | + } |
| 71 | +
|
| 72 | + const commenter = context.payload.comment.user.login; |
| 73 | + const hasAccess = await hasWriteAccess(commenter); |
| 74 | + if (!hasAccess) { |
| 75 | + console.log(`User ${commenter} does not have write access, ignoring ${comment}`); |
| 76 | + core.setOutput('run_benchmark', 'false'); |
| 77 | + return; |
| 78 | + } |
| 79 | +
|
| 80 | + const { data: pr } = await github.rest.pulls.get({ |
| 81 | + owner: context.repo.owner, |
| 82 | + repo: context.repo.repo, |
| 83 | + pull_number: issue.number |
| 84 | + }); |
| 85 | +
|
| 86 | + const baseRepo = `${context.repo.owner}/${context.repo.repo}`; |
| 87 | + const headRepo = pr.head.repo ? pr.head.repo.full_name : baseRepo; |
| 88 | +
|
| 89 | + console.log(`/benchmark kind approved by ${commenter} for PR #${issue.number}`); |
| 90 | + console.log(`PR head SHA: ${pr.head.sha}`); |
| 91 | +
|
| 92 | + await github.rest.reactions.createForIssueComment({ |
| 93 | + owner: context.repo.owner, |
| 94 | + repo: context.repo.repo, |
| 95 | + comment_id: context.payload.comment.id, |
| 96 | + content: 'rocket' |
| 97 | + }); |
| 98 | +
|
| 99 | + const runUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; |
| 100 | + await github.rest.issues.createComment({ |
| 101 | + owner: context.repo.owner, |
| 102 | + repo: context.repo.repo, |
| 103 | + issue_number: issue.number, |
| 104 | + body: `🚀 **Benchmark (Kind)** triggered by \`/benchmark kind\`\n\n[View the benchmark workflow run](${runUrl})` |
| 105 | + }); |
| 106 | +
|
| 107 | + core.setOutput('run_benchmark', 'true'); |
| 108 | + core.setOutput('pr_number', issue.number.toString()); |
| 109 | + core.setOutput('pr_head_sha', pr.head.sha); |
| 110 | + core.setOutput('pr_head_repo', headRepo); |
| 111 | +
|
| 112 | + benchmark-kind: |
| 113 | + runs-on: ubuntu-latest |
| 114 | + needs: [gate] |
| 115 | + if: needs.gate.outputs.run_benchmark == 'true' |
| 116 | + timeout-minutes: 45 |
| 117 | + permissions: |
| 118 | + contents: read |
| 119 | + statuses: write |
| 120 | + pull-requests: write |
| 121 | + actions: read |
| 122 | + steps: |
| 123 | + - name: Set pending status on PR head |
| 124 | + uses: actions/github-script@v7 |
| 125 | + with: |
| 126 | + script: | |
| 127 | + await github.rest.repos.createCommitStatus({ |
| 128 | + owner: context.repo.owner, |
| 129 | + repo: context.repo.repo, |
| 130 | + sha: '${{ needs.gate.outputs.pr_head_sha }}', |
| 131 | + state: 'pending', |
| 132 | + target_url: `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`, |
| 133 | + description: 'Benchmark running...', |
| 134 | + context: '${{ github.workflow }} / benchmark-kind' |
| 135 | + }); |
| 136 | +
|
| 137 | + - name: Validate PR head SHA |
| 138 | + run: | |
| 139 | + if [ -z "${{ needs.gate.outputs.pr_head_sha }}" ]; then |
| 140 | + echo "::error::pr_head_sha is empty — refusing to fall back to main" |
| 141 | + exit 1 |
| 142 | + fi |
| 143 | + echo "Checkout will use PR head SHA: ${{ needs.gate.outputs.pr_head_sha }}" |
| 144 | +
|
| 145 | + - name: Checkout source |
| 146 | + uses: actions/checkout@v4 |
| 147 | + with: |
| 148 | + repository: ${{ needs.gate.outputs.pr_head_repo || github.repository }} |
| 149 | + ref: ${{ needs.gate.outputs.pr_head_sha }} |
| 150 | + token: ${{ secrets.GITHUB_TOKEN }} |
| 151 | + |
| 152 | + - name: Extract Go version from go.mod |
| 153 | + run: sed -En 's/^go (.*)$/GO_VERSION=\1/p' go.mod >> $GITHUB_ENV |
| 154 | + |
| 155 | + - name: Set up Go with cache |
| 156 | + uses: actions/setup-go@v6 |
| 157 | + with: |
| 158 | + go-version: "${{ env.GO_VERSION }}" |
| 159 | + cache-dependency-path: ./go.sum |
| 160 | + |
| 161 | + - name: Install dependencies |
| 162 | + run: go mod download |
| 163 | + |
| 164 | + - name: Install Kind |
| 165 | + run: | |
| 166 | + ARCH=$(uname -m) |
| 167 | + case "$ARCH" in |
| 168 | + x86_64) KIND_ARCH="amd64" ;; |
| 169 | + aarch64) KIND_ARCH="arm64" ;; |
| 170 | + *) echo "Unsupported architecture: $ARCH"; exit 1 ;; |
| 171 | + esac |
| 172 | + curl -Lo ./kind "https://kind.sigs.k8s.io/dl/v0.25.0/kind-linux-${KIND_ARCH}" |
| 173 | + chmod +x ./kind |
| 174 | + sudo mv ./kind /usr/local/bin/kind |
| 175 | + kind version |
| 176 | +
|
| 177 | + - name: Set up Docker Buildx |
| 178 | + uses: docker/setup-buildx-action@v3 |
| 179 | + |
| 180 | + - name: Build WVA image locally |
| 181 | + id: build-image |
| 182 | + env: |
| 183 | + CHECKOUT_SHA: ${{ needs.gate.outputs.pr_head_sha }} |
| 184 | + run: | |
| 185 | + IMAGE_NAME="llm-d-workload-variant-autoscaler" |
| 186 | + IMAGE_TAG="bench-${CHECKOUT_SHA:0:7}" |
| 187 | + FULL_IMAGE="localhost/${IMAGE_NAME}:${IMAGE_TAG}" |
| 188 | + echo "Building local image: $FULL_IMAGE" |
| 189 | + make docker-build IMG="$FULL_IMAGE" |
| 190 | + echo "image=$FULL_IMAGE" >> $GITHUB_OUTPUT |
| 191 | +
|
| 192 | + - name: Deploy e2e infrastructure |
| 193 | + env: |
| 194 | + ENVIRONMENT: kind-emulator |
| 195 | + USE_SIMULATOR: "true" |
| 196 | + CREATE_CLUSTER: "true" |
| 197 | + INSTALL_GATEWAY_CTRLPLANE: "true" |
| 198 | + E2E_TESTS_ENABLED: "true" |
| 199 | + IMG: ${{ steps.build-image.outputs.image }} |
| 200 | + SKIP_BUILD: "true" |
| 201 | + KV_SPARE_TRIGGER: "0.5" |
| 202 | + QUEUE_SPARE_TRIGGER: "4.5" |
| 203 | + run: make deploy-e2e-infra |
| 204 | + |
| 205 | + - name: Run benchmark |
| 206 | + env: |
| 207 | + ENVIRONMENT: kind-emulator |
| 208 | + USE_SIMULATOR: "true" |
| 209 | + SCALER_BACKEND: prometheus-adapter |
| 210 | + BENCHMARK_RESULTS_FILE: /tmp/benchmark-results.json |
| 211 | + BENCHMARK_GRAFANA_ENABLED: "true" |
| 212 | + BENCHMARK_GRAFANA_SNAPSHOT_FILE: /tmp/benchmark-grafana-snapshot.txt |
| 213 | + BENCHMARK_GRAFANA_SNAPSHOT_JSON: /tmp/benchmark-grafana-snapshot.json |
| 214 | + BENCHMARK_GRAFANA_PANEL_DIR: /tmp/benchmark-panels |
| 215 | + KV_SPARE_TRIGGER: "0.5" |
| 216 | + QUEUE_SPARE_TRIGGER: "4.5" |
| 217 | + run: make test-benchmark |
| 218 | + |
| 219 | + - name: Upload benchmark results |
| 220 | + if: always() |
| 221 | + uses: actions/upload-artifact@v4 |
| 222 | + with: |
| 223 | + name: benchmark-results |
| 224 | + path: | |
| 225 | + /tmp/benchmark-results.json |
| 226 | + /tmp/benchmark-grafana-snapshot.txt |
| 227 | + /tmp/benchmark-grafana-snapshot.json |
| 228 | + /tmp/benchmark-panels/ |
| 229 | + if-no-files-found: warn |
| 230 | + |
| 231 | + - name: Post benchmark results as PR comment |
| 232 | + if: always() |
| 233 | + uses: actions/github-script@v7 |
| 234 | + with: |
| 235 | + script: | |
| 236 | + const fs = require('fs'); |
| 237 | + const prNumber = parseInt('${{ needs.gate.outputs.pr_number }}'); |
| 238 | + const sha = '${{ needs.gate.outputs.pr_head_sha }}'; |
| 239 | + const runId = context.runId; |
| 240 | + const repoUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}`; |
| 241 | +
|
| 242 | + // Look up the uploaded artifact to get a direct download link |
| 243 | + let artifactUrl = `${repoUrl}/actions/runs/${runId}`; |
| 244 | + try { |
| 245 | + const { data: { artifacts } } = await github.rest.actions.listWorkflowRunArtifacts({ |
| 246 | + owner: context.repo.owner, |
| 247 | + repo: context.repo.repo, |
| 248 | + run_id: runId |
| 249 | + }); |
| 250 | + const benchArtifact = artifacts.find(a => a.name === 'benchmark-results'); |
| 251 | + if (benchArtifact) { |
| 252 | + artifactUrl = `${repoUrl}/actions/runs/${runId}/artifacts/${benchArtifact.id}`; |
| 253 | + } |
| 254 | + } catch (e) { |
| 255 | + console.log(`Could not look up artifact: ${e.message}`); |
| 256 | + } |
| 257 | +
|
| 258 | + let resultsTable = '⚠️ Benchmark results file not found or could not be parsed.'; |
| 259 | +
|
| 260 | + try { |
| 261 | + const data = JSON.parse(fs.readFileSync('/tmp/benchmark-results.json', 'utf8')); |
| 262 | +
|
| 263 | + const fmtTime = (v) => v < 0 ? 'N/A' : `${v.toFixed(1)}s`; |
| 264 | +
|
| 265 | + resultsTable = `| Metric | Value | |
| 266 | + |--------|-------| |
| 267 | + | Scale-up time | ${fmtTime(data.scaleUpTimeSec)} | |
| 268 | + | Scale-down time | ${fmtTime(data.scaleDownTimeSec)} | |
| 269 | + | Max replicas | ${data.maxReplicas} | |
| 270 | + | Avg KV cache usage | ${data.avgKVCacheUsage.toFixed(3)} | |
| 271 | + | Avg queue depth | ${data.avgQueueDepth.toFixed(1)} | |
| 272 | + | Replica oscillation (σ) | ${data.replicaOscillation.toFixed(2)} | |
| 273 | + | Total duration | ${data.totalDurationSec.toFixed(0)}s |`; |
| 274 | + } catch (e) { |
| 275 | + console.log(`Could not read results: ${e.message}`); |
| 276 | + } |
| 277 | +
|
| 278 | + // Check which Grafana artifacts exist |
| 279 | + const hasSnapshotJson = fs.existsSync('/tmp/benchmark-grafana-snapshot.json'); |
| 280 | + const hasPanels = fs.existsSync('/tmp/benchmark-panels') && |
| 281 | + fs.readdirSync('/tmp/benchmark-panels').some(f => f.endsWith('.png')); |
| 282 | +
|
| 283 | + let artifactsSection = ''; |
| 284 | + if (hasSnapshotJson || hasPanels) { |
| 285 | + const items = []; |
| 286 | + if (hasSnapshotJson) { |
| 287 | + items.push('Grafana snapshot JSON (re-import via `POST /api/snapshots`)'); |
| 288 | + } |
| 289 | + if (hasPanels) { |
| 290 | + const pngs = fs.readdirSync('/tmp/benchmark-panels').filter(f => f.endsWith('.png')); |
| 291 | + items.push(`${pngs.length} dashboard panel PNGs`); |
| 292 | + } |
| 293 | + artifactsSection = `\n\n📎 **[Download artifacts](${artifactUrl})** — ${items.join(', ')}`; |
| 294 | + } |
| 295 | +
|
| 296 | + const body = `## Benchmark: scale-up-latency (Kind) |
| 297 | +
|
| 298 | + ${resultsTable}${artifactsSection} |
| 299 | +
|
| 300 | + <details> |
| 301 | + <summary>Environment</summary> |
| 302 | +
|
| 303 | + - Cluster: Kind (emulated GPUs) |
| 304 | + - Model: unsloth/Meta-Llama-3.1-8B (simulator) |
| 305 | + - Commit: ${sha.substring(0, 7)} |
| 306 | + - Scaler: prometheus-adapter |
| 307 | + - [Workflow run](${repoUrl}/actions/runs/${runId}) |
| 308 | +
|
| 309 | + </details>`; |
| 310 | +
|
| 311 | + await github.rest.issues.createComment({ |
| 312 | + owner: context.repo.owner, |
| 313 | + repo: context.repo.repo, |
| 314 | + issue_number: prNumber, |
| 315 | + body: body |
| 316 | + }); |
| 317 | +
|
| 318 | + - name: Cleanup Kind cluster |
| 319 | + if: always() |
| 320 | + run: kind delete cluster --name kind-wva-gpu-cluster || true |
| 321 | + |
| 322 | + report-status: |
| 323 | + runs-on: ubuntu-latest |
| 324 | + needs: [gate, benchmark-kind] |
| 325 | + if: always() && needs.gate.outputs.run_benchmark == 'true' |
| 326 | + permissions: |
| 327 | + statuses: write |
| 328 | + steps: |
| 329 | + - name: Report status to PR |
| 330 | + uses: actions/github-script@v7 |
| 331 | + with: |
| 332 | + script: | |
| 333 | + const prHeadSha = '${{ needs.gate.outputs.pr_head_sha }}'; |
| 334 | + const benchResult = '${{ needs.benchmark-kind.result }}'; |
| 335 | +
|
| 336 | + if (!prHeadSha) { |
| 337 | + console.log('No PR head SHA available, skipping status report'); |
| 338 | + return; |
| 339 | + } |
| 340 | +
|
| 341 | + let state, description; |
| 342 | + if (benchResult === 'success') { |
| 343 | + state = 'success'; |
| 344 | + description = 'Benchmark completed successfully'; |
| 345 | + } else if (benchResult === 'skipped') { |
| 346 | + state = 'failure'; |
| 347 | + description = 'Benchmark did not run (prerequisite failed or skipped)'; |
| 348 | + } else if (benchResult === 'cancelled') { |
| 349 | + state = 'failure'; |
| 350 | + description = 'Benchmark cancelled'; |
| 351 | + } else { |
| 352 | + state = 'failure'; |
| 353 | + description = 'Benchmark failed'; |
| 354 | + } |
| 355 | +
|
| 356 | + console.log(`Reporting status to PR commit ${prHeadSha}: ${state} - ${description}`); |
| 357 | +
|
| 358 | + await github.rest.repos.createCommitStatus({ |
| 359 | + owner: context.repo.owner, |
| 360 | + repo: context.repo.repo, |
| 361 | + sha: prHeadSha, |
| 362 | + state: state, |
| 363 | + target_url: `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`, |
| 364 | + description: description, |
| 365 | + context: '${{ github.workflow }} / benchmark-kind' |
| 366 | + }); |
| 367 | +
|
| 368 | + console.log('Status reported successfully'); |
0 commit comments