8181 const platform = context.payload.inputs.platform;
8282 const runUrl = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
8383 console.log(`Manual benchmark dispatch for ${platform}`);
84-
8584 core.setOutput('run_benchmark', 'true');
8685 core.setOutput('platform', platform);
8786
@@ -190,10 +189,8 @@ jobs:
190189 IMAGE_TAG="bench-$(printf '%s' "$GIT_REF" | cut -c1-8)"
191190 FULL_IMAGE="${REGISTRY}/${IMAGE_NAME}:${IMAGE_TAG}"
192191 echo "Building image: $FULL_IMAGE"
193-
194192 make docker-build IMG="$FULL_IMAGE"
195193 make docker-push IMG="$FULL_IMAGE"
196-
197194 echo "image_tag=${IMAGE_TAG}" >> $GITHUB_OUTPUT
198195
199196 benchmark-kind :
@@ -292,6 +289,90 @@ jobs:
292289 INSTALL_GRAFANA : " true"
293290 run : make deploy-e2e-infra
294291
292+ - name : Install ko for FMA image builds
293+ uses : ko-build/setup-ko@d006021bd0c28d1ce33a07e7943d48b079944c8d
294+ with :
295+ version : v0.15.2
296+
297+ - name : Clone FMA repository
298+ uses : actions/checkout@v4
299+ with :
300+ repository : llm-d-incubation/llm-d-fast-model-actuation
301+ path : fma-repo
302+ token : ${{ secrets.GITHUB_TOKEN }}
303+
304+ - name : Build and load FMA images into Kind
305+ id : fma-images
306+ run : |
307+ cd fma-repo
308+ # Build all FMA images locally and load into Kind
309+ make build-test-requester-local build-test-launcher-local build-controller-local build-populator-local
310+ make load-test-requester-local load-test-launcher-local load-controller-local load-populator-local CLUSTER_NAME=kind-wva-gpu-cluster
311+ # Export image names for the benchmark step
312+ echo "launcher=$(make echo-var VAR=TEST_LAUNCHER_IMG)" >> $GITHUB_OUTPUT
313+ echo "requester=$(make echo-var VAR=TEST_REQUESTER_IMG)" >> $GITHUB_OUTPUT
314+
315+ - name : Set up FMA prerequisites in Kind cluster
316+ env :
317+ FMA_NAMESPACE : llm-d-sim
318+ run : |
319+ # Label GPU nodes
320+ for node in $(kubectl get nodes -o name | sed 's%^node/%%'); do
321+ kubectl label node $node nvidia.com/gpu.present=true nvidia.com/gpu.product=NVIDIA-L40S nvidia.com/gpu.count=2 --overwrite=true
322+ done
323+
324+ # Create gpu-map ConfigMap with fake GPU mappings
325+ kubectl create cm gpu-map -n "$FMA_NAMESPACE" || true
326+ for node in $(kubectl get nodes -o name | sed 's%^node/%%'); do
327+ kubectl patch cm gpu-map -n "$FMA_NAMESPACE" --type=merge -p="{\"data\":{\"$node\":\"{\\\"GPU-0\\\": 0, \\\"GPU-1\\\": 1}\"}}"
328+ done
329+
330+ # Create service accounts and RBAC
331+ kubectl create sa testreq -n "$FMA_NAMESPACE" || true
332+ kubectl create sa testlauncher -n "$FMA_NAMESPACE" || true
333+ cd fma-repo
334+ kubectl apply -n "$FMA_NAMESPACE" -f - <<'EOF'
335+ apiVersion: rbac.authorization.k8s.io/v1
336+ kind: Role
337+ metadata:
338+ name: testreq
339+ rules:
340+ - apiGroups: ["fma.llm-d.ai"]
341+ resources: ["inferenceserverconfigs", "launcherconfigs"]
342+ verbs: ["get", "list", "watch"]
343+ - apiGroups: [""]
344+ resourceNames: ["gpu-map", "gpu-allocs"]
345+ resources: ["configmaps"]
346+ verbs: ["update", "patch", "get", "list", "watch"]
347+ - apiGroups: [""]
348+ resources: ["configmaps"]
349+ verbs: ["create"]
350+ - apiGroups: [""]
351+ resources: ["pods"]
352+ verbs: ["get", "list", "watch"]
353+ EOF
354+ kubectl create rolebinding testreq --role=testreq --serviceaccount="$FMA_NAMESPACE":testreq -n "$FMA_NAMESPACE" || true
355+ kubectl apply -n "$FMA_NAMESPACE" -f - <<'EOF'
356+ apiVersion: rbac.authorization.k8s.io/v1
357+ kind: Role
358+ metadata:
359+ name: testlauncher
360+ rules:
361+ - apiGroups: [""]
362+ resourceNames: ["gpu-map"]
363+ resources: ["configmaps"]
364+ verbs: ["get", "list", "watch"]
365+ - apiGroups: [""]
366+ resources: ["pods"]
367+ verbs: ["get", "patch"]
368+ EOF
369+ kubectl create rolebinding testlauncher --role=testlauncher --serviceaccount="$FMA_NAMESPACE":testlauncher -n "$FMA_NAMESPACE" || true
370+
371+ - name : Deploy FMA controllers
372+ env :
373+ FMA_REPO_PATH : ./fma-repo
374+ run : make deploy-fma
375+
295376 - name : Run benchmark
296377 env :
297378 ENVIRONMENT : kind-emulator
@@ -304,6 +385,9 @@ jobs:
304385 BENCHMARK_GRAFANA_PANEL_DIR : /tmp/benchmark-panels
305386 KV_SPARE_TRIGGER : " 0.1"
306387 QUEUE_SPARE_TRIGGER : " 3"
388+ FMA_LAUNCHER_IMAGE : ${{ steps.fma-images.outputs.launcher }}
389+ FMA_REQUESTER_IMAGE : ${{ steps.fma-images.outputs.requester }}
390+ FMA_BENCHMARK_RESULTS_FILE : /tmp/fma-benchmark-results.json
307391 run : make test-benchmark
308392
309393 - name : Upload benchmark results
@@ -314,6 +398,7 @@ jobs:
314398 path : |
315399 /tmp/benchmark-results.json
316400 /tmp/prefill-benchmark-results.json
401+ /tmp/fma-benchmark-results.json
317402 /tmp/benchmark-grafana-snapshot.txt
318403 /tmp/benchmark-grafana-snapshot.json
319404 /tmp/benchmark-panels/
@@ -353,6 +438,22 @@ jobs:
353438 const data = JSON.parse(fs.readFileSync('/tmp/benchmark-results.json', 'utf8'));
354439
355440 const fmtTime = (v) => v < 0 ? 'N/A' : `${v.toFixed(1)}s`;
441+ const fmtMs = (v) => v > 0 ? `${v.toFixed(0)}ms` : 'N/A';
442+
443+ let fmaRows = '';
444+ try {
445+ const fmaData = JSON.parse(fs.readFileSync('/tmp/fma-benchmark-results.json', 'utf8'));
446+ fmaRows = `
447+ | **FMA Actuation** | |
448+ | Cold starts | ${fmaData.coldStarts} |
449+ | Avg cold actuation | ${fmtMs(fmaData.avgColdActuationMs)} |
450+ | Warm hits (sleeping woken) | ${fmaData.warmHits} |
451+ | Avg warm actuation | ${fmtMs(fmaData.avgWarmActuationMs)} |
452+ | Hit rate | ${(fmaData.hitRate * 100).toFixed(1)}% |
453+ | Total iterations | ${fmaData.totalIterations} |`;
454+ } catch (e) {
455+ console.log(`Could not read FMA results: ${e.message}`);
456+ }
356457
357458 resultsTable = `| Metric | Value |
358459 |--------|-------|
@@ -362,7 +463,7 @@ jobs:
362463 | Avg KV cache usage | ${data.avgKVCacheUsage.toFixed(3)} |
363464 | Avg queue depth | ${data.avgQueueDepth.toFixed(1)} |
364465 | Replica oscillation (σ) | ${data.replicaOscillation.toFixed(2)} |
365- | Total duration | ${data.totalDurationSec.toFixed(0)}s |`;
466+ | Total duration | ${data.totalDurationSec.toFixed(0)}s |${fmaRows} `;
366467 } catch (e) {
367468 console.log(`Could not read results: ${e.message}`);
368469 }
@@ -1204,7 +1305,6 @@ jobs:
12041305 script : |
12051306 const prHeadSha = '${{ needs.gate.outputs.pr_head_sha }}';
12061307 const platform = '${{ needs.gate.outputs.platform }}';
1207-
12081308 let benchResult;
12091309 if (platform === 'openshift') {
12101310 benchResult = '${{ needs.benchmark-openshift.result }}';
0 commit comments