Skip to content

Commit 085be2b

Browse files
committed
Add benchmark workflow to main for issue_comment trigger
1 parent 0871dfa commit 085be2b

1 file changed

Lines changed: 368 additions & 0 deletions

File tree

Lines changed: 368 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,368 @@
1+
name: CI - Benchmark
2+
3+
concurrency:
4+
group: >-
5+
${{
6+
github.event_name == 'issue_comment' &&
7+
!contains(github.event.comment.body, '/benchmark kind')
8+
&& format('benchmark-isolated-{0}', github.run_id)
9+
|| format('benchmark-kind-{0}',
10+
github.event.issue.number
11+
|| github.run_id)
12+
}}
13+
cancel-in-progress: true
14+
15+
on:
16+
issue_comment:
17+
types: [created]
18+
19+
jobs:
20+
gate:
21+
runs-on: ubuntu-latest
22+
permissions:
23+
contents: read
24+
pull-requests: write
25+
outputs:
26+
run_benchmark: ${{ steps.check.outputs.run_benchmark }}
27+
pr_number: ${{ steps.check.outputs.pr_number }}
28+
pr_head_sha: ${{ steps.check.outputs.pr_head_sha }}
29+
pr_head_repo: ${{ steps.check.outputs.pr_head_repo }}
30+
steps:
31+
- name: Check if benchmark requested
32+
id: check
33+
uses: actions/github-script@v7
34+
with:
35+
script: |
36+
async function hasWriteAccess(username) {
37+
try {
38+
const { data: permission } = await github.rest.repos.getCollaboratorPermissionLevel({
39+
owner: context.repo.owner,
40+
repo: context.repo.repo,
41+
username: username
42+
});
43+
const privilegedRoles = ['admin', 'maintain', 'write'];
44+
return privilegedRoles.includes(permission.permission);
45+
} catch (e) {
46+
console.log(`Could not get permissions for ${username}: ${e.message}`);
47+
return false;
48+
}
49+
}
50+
51+
if (context.eventName !== 'issue_comment') {
52+
core.setOutput('run_benchmark', 'false');
53+
return;
54+
}
55+
56+
const comment = context.payload.comment.body.trim();
57+
const issue = context.payload.issue;
58+
59+
if (!issue.pull_request) {
60+
console.log('Comment is not on a PR, skipping');
61+
core.setOutput('run_benchmark', 'false');
62+
return;
63+
}
64+
65+
const validCommands = ['/benchmark kind'];
66+
if (!validCommands.includes(comment)) {
67+
console.log(`Comment "${comment}" is not a valid benchmark command, skipping`);
68+
core.setOutput('run_benchmark', 'false');
69+
return;
70+
}
71+
72+
const commenter = context.payload.comment.user.login;
73+
const hasAccess = await hasWriteAccess(commenter);
74+
if (!hasAccess) {
75+
console.log(`User ${commenter} does not have write access, ignoring ${comment}`);
76+
core.setOutput('run_benchmark', 'false');
77+
return;
78+
}
79+
80+
const { data: pr } = await github.rest.pulls.get({
81+
owner: context.repo.owner,
82+
repo: context.repo.repo,
83+
pull_number: issue.number
84+
});
85+
86+
const baseRepo = `${context.repo.owner}/${context.repo.repo}`;
87+
const headRepo = pr.head.repo ? pr.head.repo.full_name : baseRepo;
88+
89+
console.log(`/benchmark kind approved by ${commenter} for PR #${issue.number}`);
90+
console.log(`PR head SHA: ${pr.head.sha}`);
91+
92+
await github.rest.reactions.createForIssueComment({
93+
owner: context.repo.owner,
94+
repo: context.repo.repo,
95+
comment_id: context.payload.comment.id,
96+
content: 'rocket'
97+
});
98+
99+
const runUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
100+
await github.rest.issues.createComment({
101+
owner: context.repo.owner,
102+
repo: context.repo.repo,
103+
issue_number: issue.number,
104+
body: `🚀 **Benchmark (Kind)** triggered by \`/benchmark kind\`\n\n[View the benchmark workflow run](${runUrl})`
105+
});
106+
107+
core.setOutput('run_benchmark', 'true');
108+
core.setOutput('pr_number', issue.number.toString());
109+
core.setOutput('pr_head_sha', pr.head.sha);
110+
core.setOutput('pr_head_repo', headRepo);
111+
112+
benchmark-kind:
113+
runs-on: ubuntu-latest
114+
needs: [gate]
115+
if: needs.gate.outputs.run_benchmark == 'true'
116+
timeout-minutes: 45
117+
permissions:
118+
contents: read
119+
statuses: write
120+
pull-requests: write
121+
actions: read
122+
steps:
123+
- name: Set pending status on PR head
124+
uses: actions/github-script@v7
125+
with:
126+
script: |
127+
await github.rest.repos.createCommitStatus({
128+
owner: context.repo.owner,
129+
repo: context.repo.repo,
130+
sha: '${{ needs.gate.outputs.pr_head_sha }}',
131+
state: 'pending',
132+
target_url: `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`,
133+
description: 'Benchmark running...',
134+
context: '${{ github.workflow }} / benchmark-kind'
135+
});
136+
137+
- name: Validate PR head SHA
138+
run: |
139+
if [ -z "${{ needs.gate.outputs.pr_head_sha }}" ]; then
140+
echo "::error::pr_head_sha is empty — refusing to fall back to main"
141+
exit 1
142+
fi
143+
echo "Checkout will use PR head SHA: ${{ needs.gate.outputs.pr_head_sha }}"
144+
145+
- name: Checkout source
146+
uses: actions/checkout@v4
147+
with:
148+
repository: ${{ needs.gate.outputs.pr_head_repo || github.repository }}
149+
ref: ${{ needs.gate.outputs.pr_head_sha }}
150+
token: ${{ secrets.GITHUB_TOKEN }}
151+
152+
- name: Extract Go version from go.mod
153+
run: sed -En 's/^go (.*)$/GO_VERSION=\1/p' go.mod >> $GITHUB_ENV
154+
155+
- name: Set up Go with cache
156+
uses: actions/setup-go@v6
157+
with:
158+
go-version: "${{ env.GO_VERSION }}"
159+
cache-dependency-path: ./go.sum
160+
161+
- name: Install dependencies
162+
run: go mod download
163+
164+
- name: Install Kind
165+
run: |
166+
ARCH=$(uname -m)
167+
case "$ARCH" in
168+
x86_64) KIND_ARCH="amd64" ;;
169+
aarch64) KIND_ARCH="arm64" ;;
170+
*) echo "Unsupported architecture: $ARCH"; exit 1 ;;
171+
esac
172+
curl -Lo ./kind "https://kind.sigs.k8s.io/dl/v0.25.0/kind-linux-${KIND_ARCH}"
173+
chmod +x ./kind
174+
sudo mv ./kind /usr/local/bin/kind
175+
kind version
176+
177+
- name: Set up Docker Buildx
178+
uses: docker/setup-buildx-action@v3
179+
180+
- name: Build WVA image locally
181+
id: build-image
182+
env:
183+
CHECKOUT_SHA: ${{ needs.gate.outputs.pr_head_sha }}
184+
run: |
185+
IMAGE_NAME="llm-d-workload-variant-autoscaler"
186+
IMAGE_TAG="bench-${CHECKOUT_SHA:0:7}"
187+
FULL_IMAGE="localhost/${IMAGE_NAME}:${IMAGE_TAG}"
188+
echo "Building local image: $FULL_IMAGE"
189+
make docker-build IMG="$FULL_IMAGE"
190+
echo "image=$FULL_IMAGE" >> $GITHUB_OUTPUT
191+
192+
- name: Deploy e2e infrastructure
193+
env:
194+
ENVIRONMENT: kind-emulator
195+
USE_SIMULATOR: "true"
196+
CREATE_CLUSTER: "true"
197+
INSTALL_GATEWAY_CTRLPLANE: "true"
198+
E2E_TESTS_ENABLED: "true"
199+
IMG: ${{ steps.build-image.outputs.image }}
200+
SKIP_BUILD: "true"
201+
KV_SPARE_TRIGGER: "0.5"
202+
QUEUE_SPARE_TRIGGER: "4.5"
203+
run: make deploy-e2e-infra
204+
205+
- name: Run benchmark
206+
env:
207+
ENVIRONMENT: kind-emulator
208+
USE_SIMULATOR: "true"
209+
SCALER_BACKEND: prometheus-adapter
210+
BENCHMARK_RESULTS_FILE: /tmp/benchmark-results.json
211+
BENCHMARK_GRAFANA_ENABLED: "true"
212+
BENCHMARK_GRAFANA_SNAPSHOT_FILE: /tmp/benchmark-grafana-snapshot.txt
213+
BENCHMARK_GRAFANA_SNAPSHOT_JSON: /tmp/benchmark-grafana-snapshot.json
214+
BENCHMARK_GRAFANA_PANEL_DIR: /tmp/benchmark-panels
215+
KV_SPARE_TRIGGER: "0.5"
216+
QUEUE_SPARE_TRIGGER: "4.5"
217+
run: make test-benchmark
218+
219+
- name: Upload benchmark results
220+
if: always()
221+
uses: actions/upload-artifact@v4
222+
with:
223+
name: benchmark-results
224+
path: |
225+
/tmp/benchmark-results.json
226+
/tmp/benchmark-grafana-snapshot.txt
227+
/tmp/benchmark-grafana-snapshot.json
228+
/tmp/benchmark-panels/
229+
if-no-files-found: warn
230+
231+
- name: Post benchmark results as PR comment
232+
if: always()
233+
uses: actions/github-script@v7
234+
with:
235+
script: |
236+
const fs = require('fs');
237+
const prNumber = parseInt('${{ needs.gate.outputs.pr_number }}');
238+
const sha = '${{ needs.gate.outputs.pr_head_sha }}';
239+
const runId = context.runId;
240+
const repoUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}`;
241+
242+
// Look up the uploaded artifact to get a direct download link
243+
let artifactUrl = `${repoUrl}/actions/runs/${runId}`;
244+
try {
245+
const { data: { artifacts } } = await github.rest.actions.listWorkflowRunArtifacts({
246+
owner: context.repo.owner,
247+
repo: context.repo.repo,
248+
run_id: runId
249+
});
250+
const benchArtifact = artifacts.find(a => a.name === 'benchmark-results');
251+
if (benchArtifact) {
252+
artifactUrl = `${repoUrl}/actions/runs/${runId}/artifacts/${benchArtifact.id}`;
253+
}
254+
} catch (e) {
255+
console.log(`Could not look up artifact: ${e.message}`);
256+
}
257+
258+
let resultsTable = '⚠️ Benchmark results file not found or could not be parsed.';
259+
260+
try {
261+
const data = JSON.parse(fs.readFileSync('/tmp/benchmark-results.json', 'utf8'));
262+
263+
const fmtTime = (v) => v < 0 ? 'N/A' : `${v.toFixed(1)}s`;
264+
265+
resultsTable = `| Metric | Value |
266+
|--------|-------|
267+
| Scale-up time | ${fmtTime(data.scaleUpTimeSec)} |
268+
| Scale-down time | ${fmtTime(data.scaleDownTimeSec)} |
269+
| Max replicas | ${data.maxReplicas} |
270+
| Avg KV cache usage | ${data.avgKVCacheUsage.toFixed(3)} |
271+
| Avg queue depth | ${data.avgQueueDepth.toFixed(1)} |
272+
| Replica oscillation (σ) | ${data.replicaOscillation.toFixed(2)} |
273+
| Total duration | ${data.totalDurationSec.toFixed(0)}s |`;
274+
} catch (e) {
275+
console.log(`Could not read results: ${e.message}`);
276+
}
277+
278+
// Check which Grafana artifacts exist
279+
const hasSnapshotJson = fs.existsSync('/tmp/benchmark-grafana-snapshot.json');
280+
const hasPanels = fs.existsSync('/tmp/benchmark-panels') &&
281+
fs.readdirSync('/tmp/benchmark-panels').some(f => f.endsWith('.png'));
282+
283+
let artifactsSection = '';
284+
if (hasSnapshotJson || hasPanels) {
285+
const items = [];
286+
if (hasSnapshotJson) {
287+
items.push('Grafana snapshot JSON (re-import via `POST /api/snapshots`)');
288+
}
289+
if (hasPanels) {
290+
const pngs = fs.readdirSync('/tmp/benchmark-panels').filter(f => f.endsWith('.png'));
291+
items.push(`${pngs.length} dashboard panel PNGs`);
292+
}
293+
artifactsSection = `\n\n📎 **[Download artifacts](${artifactUrl})** — ${items.join(', ')}`;
294+
}
295+
296+
const body = `## Benchmark: scale-up-latency (Kind)
297+
298+
${resultsTable}${artifactsSection}
299+
300+
<details>
301+
<summary>Environment</summary>
302+
303+
- Cluster: Kind (emulated GPUs)
304+
- Model: unsloth/Meta-Llama-3.1-8B (simulator)
305+
- Commit: ${sha.substring(0, 7)}
306+
- Scaler: prometheus-adapter
307+
- [Workflow run](${repoUrl}/actions/runs/${runId})
308+
309+
</details>`;
310+
311+
await github.rest.issues.createComment({
312+
owner: context.repo.owner,
313+
repo: context.repo.repo,
314+
issue_number: prNumber,
315+
body: body
316+
});
317+
318+
- name: Cleanup Kind cluster
319+
if: always()
320+
run: kind delete cluster --name kind-wva-gpu-cluster || true
321+
322+
report-status:
323+
runs-on: ubuntu-latest
324+
needs: [gate, benchmark-kind]
325+
if: always() && needs.gate.outputs.run_benchmark == 'true'
326+
permissions:
327+
statuses: write
328+
steps:
329+
- name: Report status to PR
330+
uses: actions/github-script@v7
331+
with:
332+
script: |
333+
const prHeadSha = '${{ needs.gate.outputs.pr_head_sha }}';
334+
const benchResult = '${{ needs.benchmark-kind.result }}';
335+
336+
if (!prHeadSha) {
337+
console.log('No PR head SHA available, skipping status report');
338+
return;
339+
}
340+
341+
let state, description;
342+
if (benchResult === 'success') {
343+
state = 'success';
344+
description = 'Benchmark completed successfully';
345+
} else if (benchResult === 'skipped') {
346+
state = 'failure';
347+
description = 'Benchmark did not run (prerequisite failed or skipped)';
348+
} else if (benchResult === 'cancelled') {
349+
state = 'failure';
350+
description = 'Benchmark cancelled';
351+
} else {
352+
state = 'failure';
353+
description = 'Benchmark failed';
354+
}
355+
356+
console.log(`Reporting status to PR commit ${prHeadSha}: ${state} - ${description}`);
357+
358+
await github.rest.repos.createCommitStatus({
359+
owner: context.repo.owner,
360+
repo: context.repo.repo,
361+
sha: prHeadSha,
362+
state: state,
363+
target_url: `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`,
364+
description: description,
365+
context: '${{ github.workflow }} / benchmark-kind'
366+
});
367+
368+
console.log('Status reported successfully');

0 commit comments

Comments
 (0)