QVAC-? feat[api]: surface model name + GPU in LLM perf report (QVAC-17830 follow-up) #114

Workflow file for this run

.github/workflows/on-pr-llm-llamacpp.yml at 7d5e649

	name: On PR Trigger (LLM)

	on:
	pull_request_target:
	types:
	- opened
	- synchronize
	- reopened
	- labeled
	branches:
	- main
	- release-*
	- feature-*
	- tmp-*
	paths:
	- "packages/llm-llamacpp/**"
	- ".github/workflows/llamacpp-llm.yml"
	workflow_dispatch:
	workflow_call:

	permissions:
	contents: read
	pull-requests: read
	packages: read
	id-token: write

	env:
	PKG_DIR: packages/llm-llamacpp

	jobs:
	label-gate:
	name: Authorise (label-gate)
	runs-on: ubuntu-latest
	permissions:
	contents: read
	pull-requests: write
	outputs:
	authorised: ${{ steps.gate.outputs.authorised }}
	steps:
	- name: Checkout (label-gate action only)
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
	with:
	ref: ${{ github.event.repository.default_branch }}
	sparse-checkout: .github/actions/label-gate
	sparse-checkout-cone-mode: false
	- name: Run label-gate
	id: gate
	uses: ./.github/actions/label-gate
	with:
	github-token: ${{ secrets.PAT_TOKEN }}
	authorize:
	runs-on: ubuntu-latest
	permissions:
	contents: read
	pull-requests: write
	outputs:
	allowed: ${{ steps.auth.outputs.allowed }}
	steps:
	- name: Checkout
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
	- name: Authorize
	id: auth
	uses: ./.github/actions/authorize-pr
	with:
	github-token: ${{ github.token }}

	verify-fabric-lockstep:
	if: needs.authorize.outputs.allowed == 'true'
	needs: [authorize]
	runs-on: ubuntu-latest
	steps:
	- name: Checkout code
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2

	- name: Verify qvac-fabric versions are lockstep
	id: lockstep
	uses: ./.github/actions/verify-qvac-fabric-lockstep

	- name: Report verified version
	run: 'echo "Verified qvac-fabric version: ${{ steps.lockstep.outputs.version }}"'

	sanity-checks:
	if: needs.label-gate.outputs.authorised == 'true' && (needs.authorize.outputs.allowed == 'true')
	needs:
	- authorize
	- verify-fabric-lockstep
	- label-gate
	runs-on: ubuntu-latest
	steps:
	- name: Checkout code
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
	with:
	fetch-depth: 0

	- name: Run Sanity checks
	uses: ./.github/actions/sanity-checks
	with:
	secret-token: ${{ secrets.GITHUB_TOKEN }}
	pat-token: ${{ secrets.PAT_TOKEN }}
	run-integration: ${{ needs.authorize.outputs.allowed == 'true' }}
	workdir: packages/llm-llamacpp

	cpp-tests:
	if: needs.label-gate.outputs.authorised == 'true' && (needs.authorize.outputs.allowed == 'true')
	needs:
	- authorize
	- sanity-checks
	- label-gate
	uses: ./.github/workflows/cpp-tests-llm.yml
	secrets: inherit
	with:
	workdir: packages/llm-llamacpp
	repository: ${{ github.event.pull_request.head.repo.full_name }}
	ref: ${{ github.event.pull_request.head.ref }}

	cpp-lint:
	if: needs.label-gate.outputs.authorised == 'true' && (needs.authorize.outputs.allowed == 'true')
	uses: ./.github/workflows/cpp-lint.yaml
	needs:
	- authorize
	- label-gate
	secrets: inherit
	with:
	sha: ${{ github.event.pull_request.base.sha }}
	pr_head_sha: ${{ github.event.pull_request.head.sha }}
	workdir: packages/llm-llamacpp

	ts-checks:
	needs: authorize
	runs-on: ubuntu-latest
	steps:
	- name: Checkout code
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2

	- name: Set up Node.js
	uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # 6.3.0
	with:
	node-version: 20

	- name: Install dependencies
	working-directory: packages/llm-llamacpp
	run: npm install

	- name: Type declaration check
	working-directory: packages/llm-llamacpp
	run: npm run test:dts

	- name: Run lint and unit tests
	id: run_lint_and_unit_tests
	uses: ./.github/actions/run-lint-and-unit-tests
	with:
	gpr-token: ${{ secrets.GITHUB_TOKEN }}
	pat-token: ${{ secrets.GITHUB_TOKEN }}
	registry-type: gpr
	workdir: packages/llm-llamacpp

	prebuild:
	needs:
	- authorize
	- sanity-checks
	- label-gate
	if: needs.label-gate.outputs.authorised == 'true' && (needs.authorize.outputs.allowed == 'true')
	permissions:
	contents: write
	packages: write
	pull-requests: write
	id-token: write
	uses: ./.github/workflows/prebuilds-llm-llamacpp.yml
	secrets: inherit
	with:
	repository: ${{ github.event.pull_request.head.repo.full_name }}
	ref: ${{ github.event.pull_request.head.ref }}

	run-integration-tests:
	if: needs.label-gate.outputs.authorised == 'true' && (needs.authorize.outputs.allowed == 'true')
	needs:
	- authorize
	- prebuild
	- label-gate
	permissions:
	contents: read
	packages: read
	id-token: write
	uses: ./.github/workflows/integration-test-llm-llamacpp.yml
	secrets: inherit
	with:
	repository: ${{ github.event.pull_request.head.repo.full_name }}
	ref: ${{ github.event.pull_request.head.ref }}

	run-mobile-integration-tests:
	permissions:
	contents: read
	packages: read
	pull-requests: write # Allow commenting on PRs
	id-token: write
	if: needs.label-gate.outputs.authorised == 'true' && (needs.authorize.outputs.allowed == 'true')
	needs:
	- authorize
	- prebuild
	- label-gate
	uses: ./.github/workflows/integration-mobile-test-llm-llamacpp.yml
	secrets: inherit
	with:
	repository: ${{ github.event.pull_request.head.repo.full_name }}
	ref: ${{ github.event.pull_request.head.ref }}

	# QVAC-17830: per-run joint perf reporter. Lives in the umbrella (not in
	# either child workflow) so it can block on BOTH `run-integration-tests`
	# (desktop matrix) and `run-mobile-integration-tests` (Android / iOS)
	# before scanning for perf-report artifacts. Fixes the race where
	# test-darwin-x64 (or any slow desktop job) finished AFTER the old
	# mobile-local combine-reports had already shipped the summary.
	# SECURITY (CodeQL js/cache-poisoning, alerts 735/736):
	# This job runs in the privileged `pull_request_target` context (see
	# `on:` at the top of this file). The integration matrix it depends
	# on already runs PR-author code on real hardware, so this aggregator
	# by definition consumes attacker-controllable artifacts (the per-leg
	# `performance-report.json` files). The mitigations are:
	# - The checkout below has NO `repository:` / `ref:` override, so
	# in `pull_request_target` context `actions/checkout` falls back
	# to the base-branch SHA. `aggregate.js` therefore runs from the
	# trusted default branch, not from PR-author code. On
	# `workflow_dispatch` runs, `github.ref` resolves to the dispatch
	# branch, so manual verification still uses the latest
	# `aggregate.js` from that branch.
	# - `permissions: contents: read` only — no write access, no
	# packages / id-token / pull-requests permissions.
	# - No use of `actions/cache` here, so there is no shared cache
	# for a malicious PR to poison for the default branch.
	# - `aggregate.js` does not touch GITHUB_TOKEN, secrets, or any
	# external network endpoints; its only outputs are artifact files.
	# - `device.name` (the only attacker-controllable string we let
	# near a filesystem path) is sanitised through a strict
	# `[A-Za-z0-9-]` allowlist + 64-char cap below before use.
	combine-perf-reports:
	name: Combined Performance Report
	needs: [authorize, run-integration-tests, run-mobile-integration-tests]
	if: always() && needs.authorize.outputs.allowed == 'true'
	runs-on: ubuntu-latest
	timeout-minutes: 10
	permissions:
	contents: read
	steps:
	- name: Checkout addon repository
	# No explicit `repository:` / `ref:` — see SECURITY block above.
	# In `pull_request_target` context this resolves to the base
	# branch SHA (trusted code); on `workflow_dispatch` it resolves
	# to the dispatch branch.
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
	with:
	sparse-checkout: \|
	scripts/perf-report
	packages/qvac-lib-infer-llamacpp-llm/media

	- name: Setup Node.js
	uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # 4.4.0
	with:
	node-version: lts/*

	- name: Download all perf report artifacts
	uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # 8.0.1
	with:
	pattern: perf-report-llamacpp-llm-*-${{ github.run_number }}
	path: combined-reports
	continue-on-error: true

	- name: Fix desktop device names
	shell: bash
	run: \|
	# Every desktop matrix entry uses a unique `label` in its
	# artifact name (e.g. linux-x64-cpu / linux-x64-gpu). The sed
	# below strips our fixed prefix + run number; the remainder
	# IS the raw matrix label.
	#
	# QVAC-17830 (combined-report fix): the CI matrix intentionally
	# runs two legs on the same physical Linux x64 target (one
	# no_gpu=true for pure CPU data, one on a GPU runner for CPU+GPU
	# data) and likewise two ubuntu versions for linux-arm64. In the
	# combined summary those should show up as ONE column per
	# platform — otherwise the GPU row on the "-cpu" / "-u22" /
	# "*-u24" columns renders as "-" and looks like missing data
	# even though the data lives in the sibling leg.
	#
	# We therefore fold them to the physical platform name here so
	# `aggregate.js` buckets CPU and GPU measurements under one
	# device. Test labels already carry [CPU]/[GPU], so the rows
	# stay distinct.
	for dir in combined-reports/perf-report-llamacpp-llm-*/; do
	[ -d "$dir" ] \|\| continue
	base=$(basename "$dir")
	platform=$(echo "$base" \| sed "s/^perf-report-llamacpp-llm-//" \| sed "s/-${{ github.run_number }}$//")

	case "$platform" in Android\|iOS) continue ;; esac

	case "$platform" in
	linux-x64-cpu\|linux-x64-gpu) device_name="linux-x64" ;;
	linux-arm64-u22\|linux-arm64-u24) device_name="linux-arm64" ;;
	*) device_name="$platform" ;;
	esac

	for json in $(find "$dir" -name "performance-report.json" 2>/dev/null); do
	if command -v jq >/dev/null 2>&1; then
	jq --arg name "$device_name" '.device.name = $name' "$json" > "${json}.tmp" && mv "${json}.tmp" "$json"
	echo "Patched device name in $json -> $device_name (was matrix label $platform)"
	fi
	done
	done

	- name: Generate combined report
	shell: bash
	run: \|
	# SECURITY (CodeQL alerts 735/736, actions/cache-poisoning/poisonable-step):
	# `aggregate.js` is checked out from the base branch (no
	# `repository:` / `ref:` override on the checkout above), so
	# this step does not execute PR-author code. The job-level
	# `permissions: contents: read` (no actions/cache, no secrets,
	# no GITHUB_TOKEN write usage) bounds the blast radius further
	# — see the block comment on the `combine-perf-reports` job
	# above. The script's only side effect is writing artifact
	# files to `combined-output/`; the artifacts themselves are
	# uploaded via SHA-pinned `actions/upload-artifact` and
	# consumed only by reviewers from the run page.
	if ! find combined-reports -name "performance-report.json" -type f 2>/dev/null \| grep -q .; then
	echo "No performance reports found."
	exit 0
	fi

	echo "=== Reports found ==="
	find combined-reports -name "performance-report.json" -type f

	mkdir -p combined-output

	# QVAC-17830: combined PR summary surfaces BOTH the squashed
	# Mean ± std mini-tables (Total Time / TTFT / TPS, grouped
	# by scenario) AND the per-device detail tables underneath
	# them. Detail-table cells now render `mean ±std` themselves
	# so the std is visible at every metric, not just the
	# rolled-up ones. HTML keeps the same content.
	node scripts/perf-report/aggregate.js \
	--dir combined-reports \
	--addon-type vision \
	--device-details \
	--output-html combined-output/performance-report-combined.html \
	--output-json combined-output/performance-summary-combined.json \
	--output combined-output/performance-report-combined.md

	- name: Generate per-device HTML reports
	if: always()
	shell: bash
	run: \|
	# SECURITY: `aggregate.js` runs from the base branch (see
	# SECURITY block on the job above), but the
	# `performance-report.json` files it consumes are still
	# produced by PR-author code in the matrix legs, so
	# `device.name` is attacker-controlled. We do not interpolate
	# it into shell or pass it as code; we sanitise it to an
	# alphanumeric+hyphen filename slug capped at 64 chars before
	# using it as a path suffix, which prevents path traversal and
	# any kind of shell-meta exposure even if the JSON were crafted.
	mkdir -p per-device-reports
	for json in $(find combined-reports -name "performance-report.json" -type f 2>/dev/null); do
	device_name=$(node -e "const d=JSON.parse(require('fs').readFileSync(process.argv[1],'utf8'));process.stdout.write((d.device&&typeof d.device.name==='string'?d.device.name:'Unknown'))" "$json" 2>/dev/null)
	[ -z "$device_name" ] \|\| [ "$device_name" = "Unknown" ] && continue

	# Strict allowlist: collapse anything that is not [A-Za-z0-9-]
	# to '_', then truncate to 64 characters.
	safe_name=$(printf '%s' "$device_name" \| LC_ALL=C tr -c 'A-Za-z0-9-' '_' \| cut -c1-64)
	[ -z "$safe_name" ] && continue

	tmp_dir=$(mktemp -d)
	mkdir -p "$tmp_dir/device"
	cp "$json" "$tmp_dir/device/performance-report.json"

	echo "Generating HTML for $device_name (slug: $safe_name)..."
	node scripts/perf-report/aggregate.js \
	--dir "$tmp_dir" \
	--addon-type vision \
	--device-details \
	--output-html "per-device-reports/HTML-Report-${safe_name}.html" \
	2>/dev/null \|\| true

	rm -rf "$tmp_dir"
	done
	echo "=== Per-device reports generated ==="
	ls -la per-device-reports/ 2>/dev/null \|\| echo "No per-device reports"

	- name: Upload combined HTML report
	if: always()
	uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # 7.0.0
	with:
	name: HTML-Report-All-Platforms-${{ github.run_number }}
	path: combined-output/performance-report-combined.html
	retention-days: 90
	if-no-files-found: ignore

	- name: Upload per-device HTML reports
	if: always()
	uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # 7.0.0
	with:
	name: HTML-Reports-Per-Device-${{ github.run_number }}
	path: per-device-reports/
	retention-days: 90
	if-no-files-found: ignore

	- name: Write combined summary
	if: always()
	shell: bash
	run: \|
	set +e
	MD_FILE="combined-output/performance-report-combined.md"
	echo "## LLM / VLM Performance Report (All Platforms)" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY
	if [ -f "$MD_FILE" ]; then
	cat "$MD_FILE" >> $GITHUB_STEP_SUMMARY
	else
	echo "No combined performance report available." >> $GITHUB_STEP_SUMMARY
	fi
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "---" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "### Downloadable HTML Reports" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY
	if [ -f "combined-output/performance-report-combined.html" ]; then
	echo "> Full Combined Report (all platforms): download artifact \`HTML-Report-All-Platforms-${{ github.run_number }}\`" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY
	fi
	HAS_DEVICES=0
	for html in per-device-reports/HTML-Report-*.html; do
	[ -f "$html" ] \|\| continue
	if [ "$HAS_DEVICES" -eq 0 ]; then
	echo "> Individual Device Reports: download artifact \`HTML-Reports-Per-Device-${{ github.run_number }}\`" >> $GITHUB_STEP_SUMMARY
	echo ">" >> $GITHUB_STEP_SUMMARY
	echo "> Includes:" >> $GITHUB_STEP_SUMMARY
	HAS_DEVICES=1
	fi
	device=$(basename "$html" .html \| sed 's/^HTML-Report-//' \| tr '-' ' ')
	echo "> - ${device}" >> $GITHUB_STEP_SUMMARY
	done
	echo "" >> $GITHUB_STEP_SUMMARY

	merge-guard:
	needs: [authorize, verify-fabric-lockstep, run-integration-tests, run-mobile-integration-tests, sanity-checks, prebuild, cpp-tests, cpp-lint, ts-checks]
	if: always()
	uses: ./.github/workflows/public-pr.yml
	with:
	sanity-checks-status: ${{ needs.verify-fabric-lockstep.result == 'success' && needs.sanity-checks.result == 'success' }}
	build-status: ${{ needs.prebuild.result == 'success'}}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

QVAC-? feat[api]: surface model name + GPU in LLM perf report (QVAC-17830 follow-up) #114

Workflow file

QVAC-? feat[api]: surface model name + GPU in LLM perf report (QVAC-17830 follow-up) #114

Uh oh!

Workflow file for this run