Benchmark Performance (LLM) #5

Workflow file for this run

.github/workflows/benchmark-performance-infer-llm-llamacpp.yml at 7d5e649

	name: Benchmark Performance (LLM)

	# Manually-triggered benchmark workflow. The umbrella on-pr workflow
	# runs perf tests at the cheap default; this is where iteration
	# counts are cranked up to get mean ± std numbers. Covers desktop +
	# mobile (Android / iOS via Device Farm).

	on:
	workflow_dispatch:
	inputs:
	repository:
	description: "Repository to benchmark"
	required: false
	type: string
	ref:
	description: "Git ref (branch/tag/SHA) to benchmark"
	required: false
	type: string
	qvac_perf_runs:
	description: "QVAC_PERF_RUNS — counted iterations per perf test"
	required: false
	type: string
	default: "3"
	qvac_perf_warmup_runs:
	description: "QVAC_PERF_WARMUP_RUNS — warmup iterations per perf test"
	required: false
	type: string
	default: "1"
	run_desktop:
	description: "Run desktop matrix (Linux / macOS / Windows)"
	required: false
	type: boolean
	default: true
	run_mobile:
	description: "Run mobile matrix (Android / iOS via Device Farm)"
	required: false
	type: boolean
	default: true

	permissions:
	contents: read
	packages: read
	id-token: write

	jobs:
	label-gate:
	name: Authorise (label-gate)
	runs-on: ubuntu-latest
	permissions:
	contents: read
	pull-requests: write
	outputs:
	authorised: ${{ steps.gate.outputs.authorised }}
	steps:
	- name: Checkout (label-gate action only)
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
	with:
	ref: ${{ github.event.repository.default_branch }}
	sparse-checkout: .github/actions/label-gate
	sparse-checkout-cone-mode: false
	- name: Run label-gate
	id: gate
	uses: ./.github/actions/label-gate
	with:
	github-token: ${{ secrets.PAT_TOKEN }}
	context:
	runs-on: ubuntu-latest
	outputs:
	repository: ${{ steps.ctx.outputs.repository }}
	ref: ${{ steps.ctx.outputs.ref }}
	steps:
	- id: ctx
	shell: bash
	env:
	INPUT_REPO: ${{ inputs.repository }}
	INPUT_REF: ${{ inputs.ref }}
	REPO: ${{ github.repository }}
	REF_NAME: ${{ github.ref_name }}
	run: \|
	repo="${INPUT_REPO:-$REPO}"
	ref="${INPUT_REF:-$REF_NAME}"
	echo "repository=$repo" >> "$GITHUB_OUTPUT"
	echo "ref=$ref" >> "$GITHUB_OUTPUT"

	prebuild:
	needs:
	- context
	- label-gate
	permissions:
	contents: write
	packages: write
	pull-requests: write
	id-token: write
	uses: ./.github/workflows/prebuilds-llm-llamacpp.yml
	secrets: inherit
	with:
	repository: ${{ needs.context.outputs.repository }}
	ref: ${{ needs.context.outputs.ref }}

	if: needs.label-gate.outputs.authorised == 'true'
	desktop-benchmarks:
	needs:
	- context
	- prebuild
	- label-gate
	if: needs.label-gate.outputs.authorised == 'true' && (inputs.run_desktop)
	permissions:
	contents: read
	packages: read
	id-token: write
	uses: ./.github/workflows/integration-test-llm-llamacpp.yml
	secrets: inherit
	with:
	repository: ${{ needs.context.outputs.repository }}
	ref: ${{ needs.context.outputs.ref }}
	qvac_perf_runs: ${{ inputs.qvac_perf_runs }}
	qvac_perf_warmup_runs: ${{ inputs.qvac_perf_warmup_runs }}
	qvac_perf_only: true

	mobile-benchmarks:
	needs:
	- context
	- prebuild
	- label-gate
	if: needs.label-gate.outputs.authorised == 'true' && (inputs.run_mobile)
	permissions:
	contents: read
	packages: read
	pull-requests: write
	id-token: write
	uses: ./.github/workflows/integration-mobile-test-llm-llamacpp.yml
	secrets: inherit
	with:
	repository: ${{ needs.context.outputs.repository }}
	ref: ${{ needs.context.outputs.ref }}
	qvac_perf_runs: ${{ inputs.qvac_perf_runs }}
	qvac_perf_warmup_runs: ${{ inputs.qvac_perf_warmup_runs }}
	qvac_perf_only: true

	summarize:
	# `if: always()` lets summarize run even when one of the benchmark
	# jobs was skipped via the run_desktop / run_mobile toggles or
	# failed mid-run; we still want the partial report.
	needs:
	- context
	- desktop-benchmarks
	- mobile-benchmarks
	- label-gate
	if: needs.label-gate.outputs.authorised == 'true' && (always() && needs.context.result == 'success')
	runs-on: ubuntu-latest
	timeout-minutes: 10
	permissions:
	contents: read
	steps:
	- name: Checkout repository
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # 6.0.2
	with:
	repository: ${{ needs.context.outputs.repository }}
	ref: ${{ needs.context.outputs.ref }}
	token: ${{ secrets.PAT_TOKEN }}
	sparse-checkout: \|
	scripts/perf-report
	packages/llm-llamacpp/media

	- name: Setup Node.js
	uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # 4.4.0
	with:
	node-version: lts/*

	- name: Download all perf report artifacts
	uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # 8.0.1
	with:
	pattern: perf-report-llamacpp-llm-*-${{ github.run_number }}
	path: combined-reports
	continue-on-error: true

	- name: Fix desktop device names
	shell: bash
	run: \|
	# Collapse sibling matrix legs (linux-x64-cpu/gpu,
	# linux-arm64-u22/u24) onto one device name so [CPU]/[GPU]
	# rows sit in the same column.
	for dir in combined-reports/perf-report-llamacpp-llm-*/; do
	[ -d "$dir" ] \|\| continue
	base=$(basename "$dir")
	platform=$(echo "$base" \| sed "s/^perf-report-llamacpp-llm-//" \| sed "s/-${{ github.run_number }}$//")

	case "$platform" in Android\|iOS) continue ;; esac

	case "$platform" in
	linux-x64-cpu\|linux-x64-gpu) device_name="linux-x64" ;;
	linux-arm64-u22\|linux-arm64-u24) device_name="linux-arm64" ;;
	*) device_name="$platform" ;;
	esac

	for json in $(find "$dir" -name "performance-report.json" 2>/dev/null); do
	if command -v jq >/dev/null 2>&1; then
	jq --arg name "$device_name" '.device.name = $name' "$json" > "${json}.tmp" && mv "${json}.tmp" "$json"
	echo "Patched device name in $json -> $device_name (was matrix label $platform)"
	fi
	done
	done

	- name: Generate consolidated benchmark report
	run: \|
	if ! find combined-reports -name "performance-report.json" -type f 2>/dev/null \| grep -q .; then
	echo "No performance reports found."
	exit 0
	fi

	mkdir -p benchmark-artifacts

	node scripts/perf-report/aggregate.js \
	--dir combined-reports \
	--addon-type vision \
	--device-details \
	--output-html benchmark-artifacts/llamacpp-llm-performance-findings.html \
	--output-json benchmark-artifacts/llamacpp-llm-performance-findings.json \
	--output benchmark-artifacts/llamacpp-llm-performance-findings.md

	- name: Add summary
	if: always()
	shell: bash
	run: \|
	set +e
	MD_FILE="benchmark-artifacts/llamacpp-llm-performance-findings.md"
	{
	echo "## LLM / VLM Benchmark Report"
	echo ""
	echo "> \`QVAC_PERF_RUNS=${{ inputs.qvac_perf_runs }}\`, \`QVAC_PERF_WARMUP_RUNS=${{ inputs.qvac_perf_warmup_runs }}\`."
	echo ""
	if [ -f "$MD_FILE" ]; then
	cat "$MD_FILE"
	else
	echo "No combined performance report available."
	fi
	} >> "$GITHUB_STEP_SUMMARY"

	- name: Upload consolidated benchmark report
	if: always()
	uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # 7.0.0
	with:
	name: llamacpp-llm-performance-findings
	path: \|
	benchmark-artifacts/llamacpp-llm-performance-findings.md
	benchmark-artifacts/llamacpp-llm-performance-findings.json
	benchmark-artifacts/llamacpp-llm-performance-findings.html
	retention-days: 30
	if-no-files-found: ignore

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Benchmark Performance (LLM) #5

Workflow file

Benchmark Performance (LLM) #5

Uh oh!

Workflow file for this run