ci(perf-gate): block PRs that regress kernel performance against main #86

Workflow file for this run

.github/workflows/conformance.yml at 81c4393

	name: OpenVX Conformance Tests

	on:
	push:
	branches: [master, main, develop]
	paths-ignore:
	- '*/.md'
	- 'docs/**'
	- 'LICENSE'
	- '.gitignore'
	- '.gitattributes'
	- '.editorconfig'
	- '*/.svg'
	- '*/.png'
	- '*/.jpg'
	- '*/.jpeg'
	- '*/.gif'
	- '*/.webp'
	pull_request:
	branches: [master, main, develop]
	paths-ignore:
	- '*/.md'
	- 'docs/**'
	- 'LICENSE'
	- '.gitignore'
	- '.gitattributes'
	- '.editorconfig'
	- '*/.svg'
	- '*/.png'
	- '*/.jpg'
	- '*/.jpeg'
	- '*/.gif'
	- '*/.webp'

	env:
	CARGO_TERM_COLOR: always
	RUST_BACKTRACE: 1

	jobs:
	build:
	runs-on: ubuntu-22.04
	steps:
	- uses: actions/checkout@v4
	with:
	fetch-depth: 0
	submodules: recursive
	- name: Install system dependencies
	run: \|
	sudo apt-get update
	sudo apt-get install -y build-essential cmake
	- name: Install Rust
	run: \|
	curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \| sh -s -- -y --default-toolchain stable
	source $HOME/.cargo/env
	rustc --version
	cargo --version
	- name: Detect host CPU features
	id: cpu
	# GitHub-hosted Linux runners can be backed by either Intel or AMD
	# x86_64 silicon (and ARM-based pools exist too). Rather than
	# hard-coding a vendor, we read /proc/cpuinfo / uname and turn on
	# the matching openvx-vision SIMD Cargo features (sse2 / avx2 on
	# x86_64, neon on aarch64).
	#
	# IMPORTANT: do NOT use `-C target-cpu=native` here. The build,
	# CTS, and benchmark jobs run on independent runner VMs; the build
	# host may expose ISA extensions (e.g. AVX-512) that a downstream
	# CTS runner does not, which produces SIGILL ("Illegal instruction")
	# when the artifact is loaded on a less-capable host. Instead we
	# pin x86_64 to the portable `x86-64-v3` microarch level (SSE4.2 +
	# AVX + AVX2 + BMI1/2 + FMA + F16C) which every modern AMD EPYC
	# and Intel Xeon in GitHub's Azure pool supports, and rely on
	# `#[target_feature]`-gated intrinsics (already in openvx-vision)
	# for anything beyond that — those paths dispatch via
	# `is_x86_feature_detected!` at runtime.
	run: \|
	set -euo pipefail
	ARCH=$(uname -m)
	VENDOR="unknown"
	FLAGS=""
	if [ -r /proc/cpuinfo ]; then
	VENDOR=$(grep -m1 '^vendor_id' /proc/cpuinfo \| awk '{print $3}' \|\| true)
	VENDOR=${VENDOR:-unknown}
	FLAGS=$(grep -m1 '^flags' /proc/cpuinfo \| cut -d: -f2 \|\| true)
	fi
	echo "Architecture : $ARCH"
	echo "CPU vendor : $VENDOR"

	CARGO_FEATURES=""
	RUSTFLAGS_VAL=""

	case "$ARCH" in
	x86_64\|amd64)
	HAS_SSE2=false
	HAS_AVX2=false
	# openvx-core hosts the C-API kernel callbacks (vxAdd /
	# vxSubtract / vxBox3x3 / vxGaussian3x3 / vxColorConvert
	# → crate::simd_kernels). openvx-vision hosts the public
	# Rust-API SIMD kernels. Both crates need the matching
	# feature flag for the SIMD path to actually compile in.
	if echo "$FLAGS" \| grep -qw sse2; then
	CARGO_FEATURES="$CARGO_FEATURES openvx-core/sse2 openvx-vision/sse2"
	HAS_SSE2=true
	echo " + sse2 detected"
	fi
	if echo "$FLAGS" \| grep -qw avx2; then
	CARGO_FEATURES="$CARGO_FEATURES openvx-core/avx2 openvx-vision/avx2"
	HAS_AVX2=true
	echo " + avx2 detected"
	fi
	# Pick the most permissive portable microarch level that
	# the build host satisfies. v3 is safe on every GitHub
	# Linux runner today (AMD EPYC Milan/Genoa, Intel Cascade
	# Lake / Ice Lake), v2 is universal on x86_64 cloud silicon.
	if [ "$HAS_AVX2" = true ]; then
	RUSTFLAGS_VAL="-C target-cpu=x86-64-v3"
	elif [ "$HAS_SSE2" = true ]; then
	RUSTFLAGS_VAL="-C target-cpu=x86-64-v2"
	fi
	;;
	aarch64\|arm64)
	CARGO_FEATURES="$CARGO_FEATURES openvx-core/neon openvx-vision/neon"
	echo " + neon (mandatory on aarch64)"
	;;
	*)
	echo " (no SIMD features enabled for $ARCH — scalar build)"
	;;
	esac

	CARGO_FEATURES=$(echo "$CARGO_FEATURES" \| xargs)
	echo "Cargo features: ${CARGO_FEATURES:-<none>}"
	echo "RUSTFLAGS : ${RUSTFLAGS_VAL:-<none>}"

	{
	echo "arch=$ARCH"
	echo "vendor=$VENDOR"
	echo "cargo_features=$CARGO_FEATURES"
	echo "rustflags=$RUSTFLAGS_VAL"
	} >> "$GITHUB_OUTPUT"
	- name: Build rustVX
	env:
	RUSTFLAGS: ${{ steps.cpu.outputs.rustflags }}
	run: \|
	source $HOME/.cargo/env
	FEATURES="${{ steps.cpu.outputs.cargo_features }}"
	# Build only the FFI cdylib that downstream jobs consume; this
	# is the artifact that becomes libopenvx_ffi.so. Building this
	# one package (instead of the whole workspace) keeps the build
	# tight and lets us forward openvx-vision SIMD features via
	# the `pkg/feature` syntax without needing a passthrough in
	# openvx-ffi's Cargo.toml.
	if [ -n "$FEATURES" ]; then
	echo "Building openvx-ffi with features: $FEATURES"
	cargo build --release -p openvx-ffi --features "$FEATURES"
	else
	echo "Building openvx-ffi with no extra SIMD features"
	cargo build --release -p openvx-ffi
	fi
	- name: Build OpenVX CTS
	run: \|
	cd OpenVX-cts
	mkdir -p include
	if [ -d "../include" ]; then
	cp -r ../include/* include/ 2>/dev/null \|\| true
	fi
	mkdir -p build
	cd build
	cmake .. \
	-DCMAKE_BUILD_TYPE=Release \
	-DCMAKE_C_STANDARD_LIBRARIES="-lm" \
	-DCMAKE_CXX_STANDARD_LIBRARIES="-lm" \
	-DOPENVX_INCLUDES="${{ github.workspace }}/include;${{ github.workspace }}/OpenVX-cts/include" \
	-DOPENVX_LIBRARIES="${{ github.workspace }}/target/release/libopenvx_ffi.so;m" \
	-DOPENVX_CONFORMANCE_VISION=ON \
	-DOPENVX_USE_ENHANCED_VISION=ON
	make -j$(nproc)
	- name: Upload build artifacts
	uses: actions/upload-artifact@v4
	with:
	name: build-artifacts
	# `include/` is bundled so the downstream benchmark job can build
	# openvx-mark against rustVX without needing to check out the
	# rustVX source tree.
	path: \|
	target/release/libopenvx_ffi.so
	OpenVX-cts/build/bin/vx_test_conformance
	OpenVX-cts/test_data/
	include/
	retention-days: 1

	# Build the Khronos OpenVX sample implementation in its own phase, in
	# parallel with the rustVX `build` job, and upload the resulting library
	# + headers as a self-contained archive. The benchmark job below pulls
	# both archives down onto a single runner so rustVX and the Khronos
	# sample are exercised on identical hardware.
	build-khronos-sample:
	name: Build Khronos OpenVX sample
	runs-on: ubuntu-22.04
	steps:
	- name: Install system dependencies
	run: \|
	sudo apt-get update
	sudo apt-get install -y build-essential cmake git python3
	- name: Build Khronos OpenVX sample
	run: \|
	git clone --recursive --depth 1 \
	https://github.com/KhronosGroup/OpenVX-sample-impl.git khronos-sample
	cd khronos-sample
	python3 Build.py --os=Linux --arch=64 --conf=Release
	- name: Stage Khronos sample archive
	run: \|
	set -euo pipefail
	LIB_SRC=$(dirname $(find khronos-sample -name "libopenvx.so" -not -path "/build/" \| head -1))
	echo "Khronos libraries discovered in: $LIB_SRC"
	mkdir -p khronos-stage/lib
	cp "$LIB_SRC"/libopenvx.so "$LIB_SRC"/libvxu.so khronos-stage/lib/
	cp -r khronos-sample/api-docs/include khronos-stage/include
	ls -R khronos-stage
	- name: Upload Khronos sample artifacts
	uses: actions/upload-artifact@v4
	with:
	name: khronos-sample-artifacts
	path: khronos-stage/
	retention-days: 1

	baseline:
	runs-on: ubuntu-22.04
	needs: build
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- name: Download build artifacts
	uses: actions/download-artifact@v4
	with:
	name: build-artifacts
	- name: Run baseline tests
	run: \|
	chmod +x OpenVX-cts/build/bin/vx_test_conformance
	cd OpenVX-cts/build
	export LD_LIBRARY_PATH=${{ github.workspace }}/target/release
	export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/
	timeout 300 ./bin/vx_test_conformance --filter="GraphBase.:Logging.:SmokeTestBase.:SmokeTest.:TargetBase.:Target."

	graph:
	runs-on: ubuntu-22.04
	needs: build
	continue-on-error: true
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- name: Download build artifacts
	uses: actions/download-artifact@v4
	with:
	name: build-artifacts
	- name: Run graph tests
	run: \|
	chmod +x OpenVX-cts/build/bin/vx_test_conformance
	cd OpenVX-cts/build
	export LD_LIBRARY_PATH=${{ github.workspace }}/target/release
	export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/
	timeout 600 ./bin/vx_test_conformance --filter="Graph.:GraphCallback.:GraphDelay.:GraphROI.:UserNode.*"

	data-objects:
	runs-on: ubuntu-22.04
	needs: build
	continue-on-error: true
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- name: Download build artifacts
	uses: actions/download-artifact@v4
	with:
	name: build-artifacts
	- name: Run data object tests
	run: \|
	chmod +x OpenVX-cts/build/bin/vx_test_conformance
	cd OpenVX-cts/build
	export LD_LIBRARY_PATH=${{ github.workspace }}/target/release
	export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/
	timeout 300 ./bin/vx_test_conformance --filter="Scalar.:Array.:ObjectArray.:Matrix.:Convolution.:Distribution.:LUT.:Histogram."

	image-ops:
	runs-on: ubuntu-22.04
	needs: build
	continue-on-error: true
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- name: Download build artifacts
	uses: actions/download-artifact@v4
	with:
	name: build-artifacts
	- name: Run image operation tests
	run: \|
	chmod +x OpenVX-cts/build/bin/vx_test_conformance
	cd OpenVX-cts/build
	export LD_LIBRARY_PATH=${{ github.workspace }}/target/release
	export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/
	timeout 600 ./bin/vx_test_conformance --filter="Image.:vxCopyImagePatch.:vxMapImagePatch.:vxCreateImageFromChannel.:vxCopyRemapPatch.:vxMapRemapPatch."

	vision-color:
	runs-on: ubuntu-22.04
	needs: build
	continue-on-error: true
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- name: Download build artifacts
	uses: actions/download-artifact@v4
	with:
	name: build-artifacts
	- name: Run color and channel tests
	run: \|
	chmod +x OpenVX-cts/build/bin/vx_test_conformance
	cd OpenVX-cts/build
	export LD_LIBRARY_PATH=${{ github.workspace }}/target/release
	export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/
	timeout 300 ./bin/vx_test_conformance --filter="ColorConvert.:ChannelExtract.:ChannelCombine.:vxConvertDepth.:vxuConvertDepth.*"

	vision-filters:
	runs-on: ubuntu-22.04
	needs: build
	continue-on-error: true
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- name: Download build artifacts
	uses: actions/download-artifact@v4
	with:
	name: build-artifacts
	- name: Run filter and morphology tests
	run: \|
	chmod +x OpenVX-cts/build/bin/vx_test_conformance
	cd OpenVX-cts/build
	export LD_LIBRARY_PATH=${{ github.workspace }}/target/release
	export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/
	timeout 600 ./bin/vx_test_conformance --filter="Box3x3.:Gaussian3x3.:Median3x3.:Dilate3x3.:Erode3x3.:Sobel3x3.:Magnitude.:Phase.:NonLinearFilter.:Convolve.:EqualizeHistogram.*"

	vision-arithmetic:
	runs-on: ubuntu-22.04
	needs: build
	continue-on-error: true
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- name: Download build artifacts
	uses: actions/download-artifact@v4
	with:
	name: build-artifacts
	- name: Run arithmetic and bitwise tests
	run: \|
	chmod +x OpenVX-cts/build/bin/vx_test_conformance
	cd OpenVX-cts/build
	export LD_LIBRARY_PATH=${{ github.workspace }}/target/release
	export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/
	timeout 600 ./bin/vx_test_conformance --filter="vxAddSub.:vxuAddSub.:vxMultiply.:vxuMultiply.:vxBinOp8u.:vxuBinOp8u.:vxBinOp16s.:vxuBinOp16s.:vxNot.:vxuNot.:WeightedAverage.:Threshold."

	vision-geometric:
	runs-on: ubuntu-22.04
	needs: build
	continue-on-error: true
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- name: Download build artifacts
	uses: actions/download-artifact@v4
	with:
	name: build-artifacts
	- name: Run geometric transform tests
	run: \|
	chmod +x OpenVX-cts/build/bin/vx_test_conformance
	cd OpenVX-cts/build
	export LD_LIBRARY_PATH=${{ github.workspace }}/target/release
	export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/
	timeout 600 ./bin/vx_test_conformance --filter="Scale.:WarpAffine.:WarpPerspective.:Remap.:HalfScaleGaussian.*"

	vision-features:
	runs-on: ubuntu-22.04
	needs: build
	continue-on-error: true
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- name: Download build artifacts
	uses: actions/download-artifact@v4
	with:
	name: build-artifacts
	- name: Run feature and edge detection tests
	run: \|
	chmod +x OpenVX-cts/build/bin/vx_test_conformance
	cd OpenVX-cts/build
	export LD_LIBRARY_PATH=${{ github.workspace }}/target/release
	export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/
	timeout 600 ./bin/vx_test_conformance --filter="HarrisCorners.:FastCorners.:vxCanny.:vxuCanny."

	vision-statistics:
	runs-on: ubuntu-22.04
	needs: build
	continue-on-error: true
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- name: Download build artifacts
	uses: actions/download-artifact@v4
	with:
	name: build-artifacts
	- name: Run statistics and analysis tests
	run: \|
	chmod +x OpenVX-cts/build/bin/vx_test_conformance
	cd OpenVX-cts/build
	export LD_LIBRARY_PATH=${{ github.workspace }}/target/release
	export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/
	timeout 300 ./bin/vx_test_conformance --filter="MeanStdDev.:MinMaxLoc.:Integral.*"

	vision-pyramid:
	runs-on: ubuntu-22.04
	needs: build
	continue-on-error: true
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- name: Download build artifacts
	uses: actions/download-artifact@v4
	with:
	name: build-artifacts
	- name: Run pyramid and optical flow tests
	run: \|
	chmod +x OpenVX-cts/build/bin/vx_test_conformance
	cd OpenVX-cts/build
	export LD_LIBRARY_PATH=${{ github.workspace }}/target/release
	export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/
	timeout 300 ./bin/vx_test_conformance --filter="GaussianPyramid.:LaplacianPyramid.:LaplacianReconstruct.:OptFlowPyrLK."

	# Enhanced Vision Phase 1 — only the kernels rustVX has actually
	# implemented from the OpenVX 1.3 Enhanced Vision feature set. The CTS
	# binary is built with `OPENVX_USE_ENHANCED_VISION=ON`, but this job
	# filters strictly to the kernels Phase 1 ships (vxMin / vxMax). The
	# remaining Enhanced Vision symbols are exposed as link stubs in
	# rustVX so the binary can build; they are not exercised here and will
	# be replaced by real kernels in subsequent phases.
	enhanced-vision:
	name: "enhanced-vision (Phase 1 — Min/Max)"
	runs-on: ubuntu-22.04
	needs: build
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- name: Download build artifacts
	uses: actions/download-artifact@v4
	with:
	name: build-artifacts
	- name: Run Enhanced Vision Phase 1 tests
	run: \|
	chmod +x OpenVX-cts/build/bin/vx_test_conformance
	cd OpenVX-cts/build
	export LD_LIBRARY_PATH=${{ github.workspace }}/target/release
	export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/
	timeout 120 ./bin/vx_test_conformance --filter="Min.:Max."

	# Performance benchmark using openvx-mark, comparing rustVX against the
	# Khronos OpenVX sample implementation on the SAME runner so the two
	# numbers come from identical hardware. This job does NOT rebuild either
	# implementation — it just downloads the archives produced by the
	# `build` and `build-khronos-sample` phases above, builds the openvx-mark
	# tool against each, runs the same workload, and compares the JSON
	# reports. The CTS jobs above use `continue-on-error: true`, so this
	# job effectively gates on `build`, `build-khronos-sample`, and
	# `baseline` succeeding (matching the existing CTS gate).
	benchmark:
	name: Benchmark & compare (rustVX vs Khronos sample)
	runs-on: ubuntu-22.04
	needs:
	- build
	- build-khronos-sample
	- baseline
	- graph
	- data-objects
	- image-ops
	- vision-color
	- vision-filters
	- vision-arithmetic
	- vision-geometric
	- vision-features
	- vision-statistics
	- vision-pyramid
	continue-on-error: true
	steps:
	- name: Install system dependencies
	run: \|
	sudo apt-get update
	sudo apt-get install -y build-essential cmake git python3

	- name: Download rustVX archive
	uses: actions/download-artifact@v4
	with:
	name: build-artifacts
	path: ${{ github.workspace }}/rustvx-pkg

	- name: Download Khronos sample archive
	uses: actions/download-artifact@v4
	with:
	name: khronos-sample-artifacts
	path: ${{ github.workspace }}/khronos-pkg

	- name: Expose rustVX as libopenvx / libvxu
	id: rustvx
	# openvx-mark uses `find_library(NAMES openvx)` and
	# `find_library(NAMES vxu)`. rustVX ships a single
	# `libopenvx_ffi.so` that exports the full set of `vx`/`vxu`
	# symbols, so symlink the two classic Khronos library names to
	# it without changing rustVX's own build output.
	run: \|
	set -euo pipefail
	LIB_DIR=${{ github.workspace }}/rustvx-pkg/target/release
	chmod -R u+rwX "$LIB_DIR"
	cd "$LIB_DIR"
	ln -sf libopenvx_ffi.so libopenvx.so
	ln -sf libopenvx_ffi.so libvxu.so
	ls -la libopenvx.so libvxu.so
	echo "lib_dir=$LIB_DIR" >> "$GITHUB_OUTPUT"
	echo "include_dir=${{ github.workspace }}/rustvx-pkg/include" >> "$GITHUB_OUTPUT"

	- name: Inspect Khronos sample archive
	id: khronos
	run: \|
	set -euo pipefail
	LIB_DIR=${{ github.workspace }}/khronos-pkg/lib
	INCLUDE_DIR=${{ github.workspace }}/khronos-pkg/include
	ls -la "$LIB_DIR"
	echo "lib_dir=$LIB_DIR" >> "$GITHUB_OUTPUT"
	echo "include_dir=$INCLUDE_DIR" >> "$GITHUB_OUTPUT"

	- name: Clone openvx-mark
	run: \|
	git clone --depth 1 https://github.com/kiritigowda/openvx-mark.git \
	${{ github.workspace }}/openvx-mark

	# ---------------------------------------------------------------------
	# rustVX benchmark
	# ---------------------------------------------------------------------
	- name: Build openvx-mark against rustVX
	run: \|
	mkdir -p ${{ github.workspace }}/openvx-mark/build-rustvx
	cd ${{ github.workspace }}/openvx-mark/build-rustvx
	cmake \
	-DCMAKE_BUILD_TYPE=Release \
	-DOPENVX_INCLUDES=${{ steps.rustvx.outputs.include_dir }} \
	-DOPENVX_LIB_DIR=${{ steps.rustvx.outputs.lib_dir }} \
	..
	cmake --build . -j$(nproc)

	- name: Run benchmark (rustVX)
	run: \|
	cd ${{ github.workspace }}/openvx-mark/build-rustvx
	export LD_LIBRARY_PATH=${{ steps.rustvx.outputs.lib_dir }}:$LD_LIBRARY_PATH
	./openvx-mark --resolution FHD --iterations 20 --warmup 5

	# ---------------------------------------------------------------------
	# Khronos sample benchmark
	# ---------------------------------------------------------------------
	- name: Build openvx-mark against Khronos sample
	run: \|
	mkdir -p ${{ github.workspace }}/openvx-mark/build-khronos
	cd ${{ github.workspace }}/openvx-mark/build-khronos
	cmake \
	-DCMAKE_BUILD_TYPE=Release \
	-DOPENVX_INCLUDES=${{ steps.khronos.outputs.include_dir }} \
	-DOPENVX_LIB_DIR=${{ steps.khronos.outputs.lib_dir }} \
	..
	cmake --build . -j$(nproc)

	- name: Run benchmark (Khronos sample)
	run: \|
	cd ${{ github.workspace }}/openvx-mark/build-khronos
	export LD_LIBRARY_PATH=${{ steps.khronos.outputs.lib_dir }}:$LD_LIBRARY_PATH
	./openvx-mark --resolution FHD --iterations 20 --warmup 5

	# ---------------------------------------------------------------------
	# rustVX-on-main benchmark (perf-gate input)
	#
	# On pull_request runs we additionally build `origin/${{ github.base_ref }}`
	# (i.e. main, in practice) on the same runner VM, run openvx-mark
	# against it with the same workload as the PR run, and upload the
	# resulting JSON. The downstream `perf-gate` job consumes that
	# JSON together with the PR's `benchmark-results-rustvx` artifact
	# to decide whether the PR regresses against main. Same-VM is the
	# whole point — hardware variance between separate runs would
	# swamp any real regression.
	#
	# Skipped on push events to main (there is no "merge target" to
	# diff against, and we do not want pushes to main to gate against
	# themselves).
	# ---------------------------------------------------------------------
	- name: Check out merge-base ref (main)
	if: github.event_name == 'pull_request'
	uses: actions/checkout@v4
	with:
	ref: ${{ github.base_ref }}
	path: rustvx-main-src
	fetch-depth: 1
	submodules: recursive

	- name: Build rustVX from merge-base ref
	if: github.event_name == 'pull_request'
	id: rustvx_main
	env:
	# Reuse the same auto-detected SIMD feature set the PR build
	# used. `steps.cpu` is the CPU-detect step from the `build`
	# job — its outputs are not visible across jobs, so we
	# re-detect here. The detection is a few hundred ms and the
	# logic is identical to the build job's.
	CARGO_TERM_COLOR: always
	RUST_BACKTRACE: 1
	run: \|
	set -euo pipefail
	# Install Rust if not present (runner image normally has it,
	# but the Khronos-only path above doesn't depend on it, so
	# we install defensively).
	if ! command -v cargo >/dev/null 2>&1; then
	curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \
	\| sh -s -- -y --default-toolchain stable
	fi
	source $HOME/.cargo/env \|\| true

	# Re-detect host CPU features (mirrors `build` job).
	ARCH=$(uname -m)
	FLAGS=""
	if [ -r /proc/cpuinfo ]; then
	FLAGS=$(grep -m1 '^flags' /proc/cpuinfo \| cut -d: -f2 \|\| true)
	fi
	CARGO_FEATURES=""
	RUSTFLAGS_VAL=""
	case "$ARCH" in
	x86_64\|amd64)
	if echo "$FLAGS" \| grep -qw sse2; then
	CARGO_FEATURES="$CARGO_FEATURES openvx-core/sse2 openvx-vision/sse2"
	fi
	if echo "$FLAGS" \| grep -qw avx2; then
	CARGO_FEATURES="$CARGO_FEATURES openvx-core/avx2 openvx-vision/avx2"
	RUSTFLAGS_VAL="-C target-cpu=x86-64-v3"
	elif echo "$FLAGS" \| grep -qw sse2; then
	RUSTFLAGS_VAL="-C target-cpu=x86-64-v2"
	fi
	;;
	aarch64\|arm64)
	CARGO_FEATURES="openvx-core/neon openvx-vision/neon"
	;;
	esac
	CARGO_FEATURES=$(echo "$CARGO_FEATURES" \| xargs)

	cd ${{ github.workspace }}/rustvx-main-src
	export RUSTFLAGS="$RUSTFLAGS_VAL"
	if [ -n "$CARGO_FEATURES" ]; then
	cargo build --release -p openvx-ffi --features "$CARGO_FEATURES"
	else
	cargo build --release -p openvx-ffi
	fi

	# Stage main's lib alongside the PR's, with the libopenvx /
	# libvxu symlinks openvx-mark needs to find at link time.
	MAIN_LIB_DIR=${{ github.workspace }}/rustvx-main-pkg
	mkdir -p "$MAIN_LIB_DIR"
	cp target/release/libopenvx_ffi.so "$MAIN_LIB_DIR/"
	cd "$MAIN_LIB_DIR"
	ln -sf libopenvx_ffi.so libopenvx.so
	ln -sf libopenvx_ffi.so libvxu.so
	ls -la
	echo "lib_dir=$MAIN_LIB_DIR" >> "$GITHUB_OUTPUT"
	echo "include_dir=${{ github.workspace }}/rustvx-main-src/include" >> "$GITHUB_OUTPUT"

	- name: Build openvx-mark against rustVX-on-main
	if: github.event_name == 'pull_request'
	run: \|
	mkdir -p ${{ github.workspace }}/openvx-mark/build-rustvx-main
	cd ${{ github.workspace }}/openvx-mark/build-rustvx-main
	cmake \
	-DCMAKE_BUILD_TYPE=Release \
	-DOPENVX_INCLUDES=${{ steps.rustvx_main.outputs.include_dir }} \
	-DOPENVX_LIB_DIR=${{ steps.rustvx_main.outputs.lib_dir }} \
	..
	cmake --build . -j$(nproc)

	- name: Run benchmark (rustVX-on-main)
	if: github.event_name == 'pull_request'
	run: \|
	cd ${{ github.workspace }}/openvx-mark/build-rustvx-main
	export LD_LIBRARY_PATH=${{ steps.rustvx_main.outputs.lib_dir }}:$LD_LIBRARY_PATH
	./openvx-mark --resolution FHD --iterations 20 --warmup 5

	# ---------------------------------------------------------------------
	# Compare results
	# ---------------------------------------------------------------------
	- name: Compare benchmark results (rustVX vs Khronos)
	run: \|
	RUSTVX=${{ github.workspace }}/openvx-mark/build-rustvx/benchmark_results/benchmark_results.json
	KHRONOS=${{ github.workspace }}/openvx-mark/build-khronos/benchmark_results/benchmark_results.json

	if [ ! -f "$RUSTVX" ] \|\| [ ! -f "$KHRONOS" ]; then
	echo "Skipping comparison — one or both benchmark results missing"
	ls -la "$(dirname $RUSTVX)" 2>/dev/null \|\| true
	ls -la "$(dirname $KHRONOS)" 2>/dev/null \|\| true
	exit 0
	fi

	# `compare_reports.py` defines Speedup as
	# speedup = throughput(report_b) / throughput(report_a)
	# i.e. ">1.00 means report_b is faster". To make the Speedup
	# column read as "rustVX over Khronos" (>1.00x = rustVX wins),
	# pass Khronos first (baseline / report_a) and rustVX second
	# (candidate / report_b).
	python3 ${{ github.workspace }}/openvx-mark/scripts/compare_reports.py \
	"$KHRONOS" "$RUSTVX" \
	--output ${{ github.workspace }}/openvx-mark/comparison

	- name: Post comparison to job summary
	if: always()
	run: \|
	COMPARISON=${{ github.workspace }}/openvx-mark/comparison.md
	RUSTVX=${{ github.workspace }}/openvx-mark/build-rustvx/benchmark_results/benchmark_results.json
	KHRONOS=${{ github.workspace }}/openvx-mark/build-khronos/benchmark_results/benchmark_results.json

	# ----- Headline: aggregate speedup of rustVX over Khronos sample -----
	if [ -f "$RUSTVX" ] && [ -f "$KHRONOS" ]; then
	python3 - "$RUSTVX" "$KHRONOS" >> "$GITHUB_STEP_SUMMARY" <<'PY'
	import json, math, sys

	rustvx_path, khronos_path = sys.argv[1], sys.argv[2]
	with open(rustvx_path) as f: rustvx = json.load(f)
	with open(khronos_path) as f: khronos = json.load(f)

	def by_key(report):
	return {(r['name'], r['mode'], r['resolution']): r
	for r in report.get('results', [])}

	a = by_key(rustvx)
	b = by_key(khronos)
	shared = sorted(set(a) & set(b))

	speedups = []
	wins, losses = 0, 0
	best = (None, 0.0)
	worst = (None, math.inf)

	for key in shared:
	ra, rb = a[key], b[key]
	if not (ra.get('verified', True) and rb.get('verified', True)):
	continue
	mps_r = ra.get('megapixels_per_sec', 0)
	mps_k = rb.get('megapixels_per_sec', 0)
	if mps_r <= 0 or mps_k <= 0:
	continue
	s = mps_r / mps_k # >1.0 = rustVX faster than Khronos
	speedups.append(s)
	if s > 1.0: wins += 1
	elif s < 1.0: losses += 1
	if s > best[1]: best = (key, s)
	if s < worst[1]: worst = (key, s)

	print('# rustVX vs Khronos sample — headline')
	print()
	if not speedups:
	print('_No verified benchmarks were directly comparable._')
	else:
	geomean = math.exp(sum(math.log(s) for s in speedups) / len(speedups))
	median = sorted(speedups)[len(speedups) // 2]
	print('\| Metric \| Value \|')
	print('\|:---\|---:\|')
	print(f'\| Geomean speedup (rustVX / Khronos) \| {geomean:.2f}x \|')
	print(f'\| Median speedup (rustVX / Khronos) \| {median:.2f}x \|')
	print(f'\| Benchmarks compared \| {len(speedups)} \|')
	print(f'\| rustVX faster \| {wins} \|')
	print(f'\| Khronos sample faster \| {losses} \|')
	if best[0]:
	bk, bv = best
	print(f'\| Best rustVX speedup \| {bv:.2f}x ({bk[0]} / {bk[1]} / {bk[2]}) \|')
	if worst[0] and worst[1] != math.inf:
	wk, wv = worst
	print(f'\| Worst rustVX speedup \| {wv:.2f}x ({wk[0]} / {wk[1]} / {wk[2]}) \|')
	print()
	if geomean >= 1.0:
	print(f'> rustVX is {geomean:.2f}x faster than the Khronos sample on average (geomean across {len(speedups)} verified benchmarks).')
	else:
	print(f'> rustVX is {1.0/geomean:.2f}x slower than the Khronos sample on average (geomean across {len(speedups)} verified benchmarks).')
	print()
	PY
	fi

	# ----- Detailed comparison table from compare_reports.py -----
	if [ -f "$COMPARISON" ]; then
	cat "$COMPARISON" >> "$GITHUB_STEP_SUMMARY"
	else
	echo "_No comparison report was produced._" >> "$GITHUB_STEP_SUMMARY"
	fi

	- name: Upload rustVX benchmark results
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: benchmark-results-rustvx
	path: ${{ github.workspace }}/openvx-mark/build-rustvx/benchmark_results/
	if-no-files-found: ignore

	- name: Upload rustVX-on-main benchmark results
	if: always() && github.event_name == 'pull_request'
	uses: actions/upload-artifact@v4
	with:
	name: benchmark-results-rustvx-main
	path: ${{ github.workspace }}/openvx-mark/build-rustvx-main/benchmark_results/
	if-no-files-found: ignore

	- name: Upload Khronos sample benchmark results
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: benchmark-results-khronos-sample
	path: ${{ github.workspace }}/openvx-mark/build-khronos/benchmark_results/
	if-no-files-found: ignore

	- name: Upload comparison report
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: benchmark-comparison
	path: ${{ github.workspace }}/openvx-mark/comparison.*
	if-no-files-found: ignore

	# ---------------------------------------------------------------------------
	# Perf gate
	#
	# Direct rustVX-PR vs rustVX-main comparison on the SAME runner VM (the
	# `benchmark` job above benches both back-to-back, so hardware variance
	# between the two is zero). This job downloads the two JSON artifacts
	# the bench job produced, runs `.github/scripts/perf_gate.py` to apply
	# the regression thresholds, posts a markdown verdict to the PR's job
	# summary, and exits non-zero (i.e. fails the workflow) on regression.
	#
	# Threshold rationale (see `.github/scripts/perf_gate.py` for full
	# docstring and per-flag semantics):
	#
	# * --geomean-floor 0.97 -> aggregate move > 3% slower fails. This
	# is the real signal for actual perf bugs
	# that affect multiple kernels.
	# * --kernel-floor 0.75 -> a SINGLE-kernel hard fail requires
	# > 25% regression. This is intentionally
	# generous: we measured ~10-15% between-
	# run drift on otherwise-identical
	# binaries on the same VM (cache state,
	# thermal, VM-host neighbour load), well
	# above the within-run CV% the bench
	# itself reports. A tighter per-kernel
	# floor produced false positives on
	# no-op PRs (CI run 25614982597).
	# * --warn-floor 0.90 -> soft-warn band [0.75, 0.90). Below 10%
	# we treat as noise.
	# * --max-cv 5.0 -> auto-skip kernels above this within-
	# run CV%; combined with the looser
	# per-kernel floor this gives us a clean
	# signal-to-noise ratio.
	#
	# Trigger:
	# * pull_request only — push events to main do not gate against
	# themselves (there's no merge target to diff against).
	# ---------------------------------------------------------------------------
	perf-gate:
	name: Perf gate (PR vs main)
	if: github.event_name == 'pull_request'
	runs-on: ubuntu-22.04
	needs: benchmark
	steps:
	- name: Checkout repo (for the perf_gate script)
	uses: actions/checkout@v4
	with:
	fetch-depth: 1

	- name: Download PR rustVX benchmark results
	uses: actions/download-artifact@v4
	with:
	name: benchmark-results-rustvx
	path: ${{ github.workspace }}/bench-pr

	- name: Download main rustVX benchmark results
	uses: actions/download-artifact@v4
	with:
	name: benchmark-results-rustvx-main
	path: ${{ github.workspace }}/bench-main

	- name: Run perf gate
	run: \|
	set -euo pipefail
	PR=${{ github.workspace }}/bench-pr/benchmark_results.json
	MAIN=${{ github.workspace }}/bench-main/benchmark_results.json
	if [ ! -f "$PR" ] \|\| [ ! -f "$MAIN" ]; then
	echo "::error::Missing benchmark JSONs (PR=$PR, MAIN=$MAIN). Did the bench job upload them?"
	ls -la ${{ github.workspace }}/bench-pr ${{ github.workspace }}/bench-main 2>/dev/null \|\| true
	exit 1
	fi

	python3 ${{ github.workspace }}/.github/scripts/perf_gate.py \
	"$MAIN" "$PR" \
	--geomean-floor 0.97 \
	--kernel-floor 0.75 \
	--warn-floor 0.90 \
	--max-cv 5.0 \
	--summary-out "$GITHUB_STEP_SUMMARY"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

ci(perf-gate): block PRs that regress kernel performance against main #86

Workflow file

ci(perf-gate): block PRs that regress kernel performance against main #86

Uh oh!

Workflow file for this run