Skip to content

ci(perf-gate): block PRs that regress kernel performance against main #86

ci(perf-gate): block PRs that regress kernel performance against main

ci(perf-gate): block PRs that regress kernel performance against main #86

Workflow file for this run

name: OpenVX Conformance Tests
on:
push:
branches: [master, main, develop]
paths-ignore:
- '**/*.md'
- 'docs/**'
- 'LICENSE'
- '.gitignore'
- '.gitattributes'
- '.editorconfig'
- '**/*.svg'
- '**/*.png'
- '**/*.jpg'
- '**/*.jpeg'
- '**/*.gif'
- '**/*.webp'
pull_request:
branches: [master, main, develop]
paths-ignore:
- '**/*.md'
- 'docs/**'
- 'LICENSE'
- '.gitignore'
- '.gitattributes'
- '.editorconfig'
- '**/*.svg'
- '**/*.png'
- '**/*.jpg'
- '**/*.jpeg'
- '**/*.gif'
- '**/*.webp'
env:
CARGO_TERM_COLOR: always
RUST_BACKTRACE: 1
jobs:
build:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: recursive
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y build-essential cmake
- name: Install Rust
run: |
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
source $HOME/.cargo/env
rustc --version
cargo --version
- name: Detect host CPU features
id: cpu
# GitHub-hosted Linux runners can be backed by either Intel or AMD
# x86_64 silicon (and ARM-based pools exist too). Rather than
# hard-coding a vendor, we read /proc/cpuinfo / uname and turn on
# the matching openvx-vision SIMD Cargo features (sse2 / avx2 on
# x86_64, neon on aarch64).
#
# IMPORTANT: do NOT use `-C target-cpu=native` here. The build,
# CTS, and benchmark jobs run on independent runner VMs; the build
# host may expose ISA extensions (e.g. AVX-512) that a downstream
# CTS runner does not, which produces SIGILL ("Illegal instruction")
# when the artifact is loaded on a less-capable host. Instead we
# pin x86_64 to the portable `x86-64-v3` microarch level (SSE4.2 +
# AVX + AVX2 + BMI1/2 + FMA + F16C) which every modern AMD EPYC
# and Intel Xeon in GitHub's Azure pool supports, and rely on
# `#[target_feature]`-gated intrinsics (already in openvx-vision)
# for anything beyond that — those paths dispatch via
# `is_x86_feature_detected!` at runtime.
run: |
set -euo pipefail
ARCH=$(uname -m)
VENDOR="unknown"
FLAGS=""
if [ -r /proc/cpuinfo ]; then
VENDOR=$(grep -m1 '^vendor_id' /proc/cpuinfo | awk '{print $3}' || true)
VENDOR=${VENDOR:-unknown}
FLAGS=$(grep -m1 '^flags' /proc/cpuinfo | cut -d: -f2 || true)
fi
echo "Architecture : $ARCH"
echo "CPU vendor : $VENDOR"
CARGO_FEATURES=""
RUSTFLAGS_VAL=""
case "$ARCH" in
x86_64|amd64)
HAS_SSE2=false
HAS_AVX2=false
# openvx-core hosts the C-API kernel callbacks (vxAdd /
# vxSubtract / vxBox3x3 / vxGaussian3x3 / vxColorConvert
# → crate::simd_kernels). openvx-vision hosts the public
# Rust-API SIMD kernels. Both crates need the matching
# feature flag for the SIMD path to actually compile in.
if echo "$FLAGS" | grep -qw sse2; then
CARGO_FEATURES="$CARGO_FEATURES openvx-core/sse2 openvx-vision/sse2"
HAS_SSE2=true
echo " + sse2 detected"
fi
if echo "$FLAGS" | grep -qw avx2; then
CARGO_FEATURES="$CARGO_FEATURES openvx-core/avx2 openvx-vision/avx2"
HAS_AVX2=true
echo " + avx2 detected"
fi
# Pick the most permissive *portable* microarch level that
# the build host satisfies. v3 is safe on every GitHub
# Linux runner today (AMD EPYC Milan/Genoa, Intel Cascade
# Lake / Ice Lake), v2 is universal on x86_64 cloud silicon.
if [ "$HAS_AVX2" = true ]; then
RUSTFLAGS_VAL="-C target-cpu=x86-64-v3"
elif [ "$HAS_SSE2" = true ]; then
RUSTFLAGS_VAL="-C target-cpu=x86-64-v2"
fi
;;
aarch64|arm64)
CARGO_FEATURES="$CARGO_FEATURES openvx-core/neon openvx-vision/neon"
echo " + neon (mandatory on aarch64)"
;;
*)
echo " (no SIMD features enabled for $ARCH — scalar build)"
;;
esac
CARGO_FEATURES=$(echo "$CARGO_FEATURES" | xargs)
echo "Cargo features: ${CARGO_FEATURES:-<none>}"
echo "RUSTFLAGS : ${RUSTFLAGS_VAL:-<none>}"
{
echo "arch=$ARCH"
echo "vendor=$VENDOR"
echo "cargo_features=$CARGO_FEATURES"
echo "rustflags=$RUSTFLAGS_VAL"
} >> "$GITHUB_OUTPUT"
- name: Build rustVX
env:
RUSTFLAGS: ${{ steps.cpu.outputs.rustflags }}
run: |
source $HOME/.cargo/env
FEATURES="${{ steps.cpu.outputs.cargo_features }}"
# Build only the FFI cdylib that downstream jobs consume; this
# is the artifact that becomes libopenvx_ffi.so. Building this
# one package (instead of the whole workspace) keeps the build
# tight and lets us forward openvx-vision SIMD features via
# the `pkg/feature` syntax without needing a passthrough in
# openvx-ffi's Cargo.toml.
if [ -n "$FEATURES" ]; then
echo "Building openvx-ffi with features: $FEATURES"
cargo build --release -p openvx-ffi --features "$FEATURES"
else
echo "Building openvx-ffi with no extra SIMD features"
cargo build --release -p openvx-ffi
fi
- name: Build OpenVX CTS
run: |
cd OpenVX-cts
mkdir -p include
if [ -d "../include" ]; then
cp -r ../include/* include/ 2>/dev/null || true
fi
mkdir -p build
cd build
cmake .. \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_STANDARD_LIBRARIES="-lm" \
-DCMAKE_CXX_STANDARD_LIBRARIES="-lm" \
-DOPENVX_INCLUDES="${{ github.workspace }}/include;${{ github.workspace }}/OpenVX-cts/include" \
-DOPENVX_LIBRARIES="${{ github.workspace }}/target/release/libopenvx_ffi.so;m" \
-DOPENVX_CONFORMANCE_VISION=ON \
-DOPENVX_USE_ENHANCED_VISION=ON
make -j$(nproc)
- name: Upload build artifacts
uses: actions/upload-artifact@v4
with:
name: build-artifacts
# `include/` is bundled so the downstream benchmark job can build
# openvx-mark against rustVX without needing to check out the
# rustVX source tree.
path: |
target/release/libopenvx_ffi.so
OpenVX-cts/build/bin/vx_test_conformance
OpenVX-cts/test_data/
include/
retention-days: 1
# Build the Khronos OpenVX sample implementation in its own phase, in
# parallel with the rustVX `build` job, and upload the resulting library
# + headers as a self-contained archive. The benchmark job below pulls
# both archives down onto a single runner so rustVX and the Khronos
# sample are exercised on identical hardware.
build-khronos-sample:
name: Build Khronos OpenVX sample
runs-on: ubuntu-22.04
steps:
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y build-essential cmake git python3
- name: Build Khronos OpenVX sample
run: |
git clone --recursive --depth 1 \
https://github.com/KhronosGroup/OpenVX-sample-impl.git khronos-sample
cd khronos-sample
python3 Build.py --os=Linux --arch=64 --conf=Release
- name: Stage Khronos sample archive
run: |
set -euo pipefail
LIB_SRC=$(dirname $(find khronos-sample -name "libopenvx.so" -not -path "*/build/*" | head -1))
echo "Khronos libraries discovered in: $LIB_SRC"
mkdir -p khronos-stage/lib
cp "$LIB_SRC"/libopenvx*.so "$LIB_SRC"/libvxu*.so khronos-stage/lib/
cp -r khronos-sample/api-docs/include khronos-stage/include
ls -R khronos-stage
- name: Upload Khronos sample artifacts
uses: actions/upload-artifact@v4
with:
name: khronos-sample-artifacts
path: khronos-stage/
retention-days: 1
baseline:
runs-on: ubuntu-22.04
needs: build
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Download build artifacts
uses: actions/download-artifact@v4
with:
name: build-artifacts
- name: Run baseline tests
run: |
chmod +x OpenVX-cts/build/bin/vx_test_conformance
cd OpenVX-cts/build
export LD_LIBRARY_PATH=${{ github.workspace }}/target/release
export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/
timeout 300 ./bin/vx_test_conformance --filter="GraphBase.*:Logging.*:SmokeTestBase.*:SmokeTest.*:TargetBase.*:Target.*"
graph:
runs-on: ubuntu-22.04
needs: build
continue-on-error: true
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Download build artifacts
uses: actions/download-artifact@v4
with:
name: build-artifacts
- name: Run graph tests
run: |
chmod +x OpenVX-cts/build/bin/vx_test_conformance
cd OpenVX-cts/build
export LD_LIBRARY_PATH=${{ github.workspace }}/target/release
export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/
timeout 600 ./bin/vx_test_conformance --filter="Graph.*:GraphCallback.*:GraphDelay.*:GraphROI.*:UserNode.*"
data-objects:
runs-on: ubuntu-22.04
needs: build
continue-on-error: true
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Download build artifacts
uses: actions/download-artifact@v4
with:
name: build-artifacts
- name: Run data object tests
run: |
chmod +x OpenVX-cts/build/bin/vx_test_conformance
cd OpenVX-cts/build
export LD_LIBRARY_PATH=${{ github.workspace }}/target/release
export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/
timeout 300 ./bin/vx_test_conformance --filter="Scalar.*:Array.*:ObjectArray.*:Matrix.*:Convolution.*:Distribution.*:LUT.*:Histogram.*"
image-ops:
runs-on: ubuntu-22.04
needs: build
continue-on-error: true
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Download build artifacts
uses: actions/download-artifact@v4
with:
name: build-artifacts
- name: Run image operation tests
run: |
chmod +x OpenVX-cts/build/bin/vx_test_conformance
cd OpenVX-cts/build
export LD_LIBRARY_PATH=${{ github.workspace }}/target/release
export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/
timeout 600 ./bin/vx_test_conformance --filter="Image.*:vxCopyImagePatch.*:vxMapImagePatch.*:vxCreateImageFromChannel.*:vxCopyRemapPatch.*:vxMapRemapPatch.*"
vision-color:
runs-on: ubuntu-22.04
needs: build
continue-on-error: true
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Download build artifacts
uses: actions/download-artifact@v4
with:
name: build-artifacts
- name: Run color and channel tests
run: |
chmod +x OpenVX-cts/build/bin/vx_test_conformance
cd OpenVX-cts/build
export LD_LIBRARY_PATH=${{ github.workspace }}/target/release
export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/
timeout 300 ./bin/vx_test_conformance --filter="ColorConvert.*:ChannelExtract.*:ChannelCombine.*:vxConvertDepth.*:vxuConvertDepth.*"
vision-filters:
runs-on: ubuntu-22.04
needs: build
continue-on-error: true
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Download build artifacts
uses: actions/download-artifact@v4
with:
name: build-artifacts
- name: Run filter and morphology tests
run: |
chmod +x OpenVX-cts/build/bin/vx_test_conformance
cd OpenVX-cts/build
export LD_LIBRARY_PATH=${{ github.workspace }}/target/release
export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/
timeout 600 ./bin/vx_test_conformance --filter="Box3x3.*:Gaussian3x3.*:Median3x3.*:Dilate3x3.*:Erode3x3.*:Sobel3x3.*:Magnitude.*:Phase.*:NonLinearFilter.*:Convolve.*:EqualizeHistogram.*"
vision-arithmetic:
runs-on: ubuntu-22.04
needs: build
continue-on-error: true
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Download build artifacts
uses: actions/download-artifact@v4
with:
name: build-artifacts
- name: Run arithmetic and bitwise tests
run: |
chmod +x OpenVX-cts/build/bin/vx_test_conformance
cd OpenVX-cts/build
export LD_LIBRARY_PATH=${{ github.workspace }}/target/release
export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/
timeout 600 ./bin/vx_test_conformance --filter="vxAddSub.*:vxuAddSub.*:vxMultiply.*:vxuMultiply.*:vxBinOp8u.*:vxuBinOp8u.*:vxBinOp16s.*:vxuBinOp16s.*:vxNot.*:vxuNot.*:WeightedAverage.*:Threshold.*"
vision-geometric:
runs-on: ubuntu-22.04
needs: build
continue-on-error: true
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Download build artifacts
uses: actions/download-artifact@v4
with:
name: build-artifacts
- name: Run geometric transform tests
run: |
chmod +x OpenVX-cts/build/bin/vx_test_conformance
cd OpenVX-cts/build
export LD_LIBRARY_PATH=${{ github.workspace }}/target/release
export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/
timeout 600 ./bin/vx_test_conformance --filter="Scale.*:WarpAffine.*:WarpPerspective.*:Remap.*:HalfScaleGaussian.*"
vision-features:
runs-on: ubuntu-22.04
needs: build
continue-on-error: true
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Download build artifacts
uses: actions/download-artifact@v4
with:
name: build-artifacts
- name: Run feature and edge detection tests
run: |
chmod +x OpenVX-cts/build/bin/vx_test_conformance
cd OpenVX-cts/build
export LD_LIBRARY_PATH=${{ github.workspace }}/target/release
export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/
timeout 600 ./bin/vx_test_conformance --filter="HarrisCorners.*:FastCorners.*:vxCanny.*:vxuCanny.*"
vision-statistics:
runs-on: ubuntu-22.04
needs: build
continue-on-error: true
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Download build artifacts
uses: actions/download-artifact@v4
with:
name: build-artifacts
- name: Run statistics and analysis tests
run: |
chmod +x OpenVX-cts/build/bin/vx_test_conformance
cd OpenVX-cts/build
export LD_LIBRARY_PATH=${{ github.workspace }}/target/release
export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/
timeout 300 ./bin/vx_test_conformance --filter="MeanStdDev.*:MinMaxLoc.*:Integral.*"
vision-pyramid:
runs-on: ubuntu-22.04
needs: build
continue-on-error: true
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Download build artifacts
uses: actions/download-artifact@v4
with:
name: build-artifacts
- name: Run pyramid and optical flow tests
run: |
chmod +x OpenVX-cts/build/bin/vx_test_conformance
cd OpenVX-cts/build
export LD_LIBRARY_PATH=${{ github.workspace }}/target/release
export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/
timeout 300 ./bin/vx_test_conformance --filter="GaussianPyramid.*:LaplacianPyramid.*:LaplacianReconstruct.*:OptFlowPyrLK.*"
# Enhanced Vision Phase 1 — only the kernels rustVX has actually
# implemented from the OpenVX 1.3 Enhanced Vision feature set. The CTS
# binary is built with `OPENVX_USE_ENHANCED_VISION=ON`, but this job
# filters strictly to the kernels Phase 1 ships (vxMin / vxMax). The
# remaining Enhanced Vision symbols are exposed as link stubs in
# rustVX so the binary can build; they are not exercised here and will
# be replaced by real kernels in subsequent phases.
enhanced-vision:
name: "enhanced-vision (Phase 1 — Min/Max)"
runs-on: ubuntu-22.04
needs: build
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Download build artifacts
uses: actions/download-artifact@v4
with:
name: build-artifacts
- name: Run Enhanced Vision Phase 1 tests
run: |
chmod +x OpenVX-cts/build/bin/vx_test_conformance
cd OpenVX-cts/build
export LD_LIBRARY_PATH=${{ github.workspace }}/target/release
export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/
timeout 120 ./bin/vx_test_conformance --filter="Min.*:Max.*"
# Performance benchmark using openvx-mark, comparing rustVX against the
# Khronos OpenVX sample implementation on the SAME runner so the two
# numbers come from identical hardware. This job does NOT rebuild either
# implementation — it just downloads the archives produced by the
# `build` and `build-khronos-sample` phases above, builds the openvx-mark
# tool against each, runs the same workload, and compares the JSON
# reports. The CTS jobs above use `continue-on-error: true`, so this
# job effectively gates on `build`, `build-khronos-sample`, and
# `baseline` succeeding (matching the existing CTS gate).
benchmark:
name: Benchmark & compare (rustVX vs Khronos sample)
runs-on: ubuntu-22.04
needs:
- build
- build-khronos-sample
- baseline
- graph
- data-objects
- image-ops
- vision-color
- vision-filters
- vision-arithmetic
- vision-geometric
- vision-features
- vision-statistics
- vision-pyramid
continue-on-error: true
steps:
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y build-essential cmake git python3
- name: Download rustVX archive
uses: actions/download-artifact@v4
with:
name: build-artifacts
path: ${{ github.workspace }}/rustvx-pkg
- name: Download Khronos sample archive
uses: actions/download-artifact@v4
with:
name: khronos-sample-artifacts
path: ${{ github.workspace }}/khronos-pkg
- name: Expose rustVX as libopenvx / libvxu
id: rustvx
# openvx-mark uses `find_library(NAMES openvx)` and
# `find_library(NAMES vxu)`. rustVX ships a single
# `libopenvx_ffi.so` that exports the full set of `vx*`/`vxu*`
# symbols, so symlink the two classic Khronos library names to
# it without changing rustVX's own build output.
run: |
set -euo pipefail
LIB_DIR=${{ github.workspace }}/rustvx-pkg/target/release
chmod -R u+rwX "$LIB_DIR"
cd "$LIB_DIR"
ln -sf libopenvx_ffi.so libopenvx.so
ln -sf libopenvx_ffi.so libvxu.so
ls -la libopenvx*.so libvxu*.so
echo "lib_dir=$LIB_DIR" >> "$GITHUB_OUTPUT"
echo "include_dir=${{ github.workspace }}/rustvx-pkg/include" >> "$GITHUB_OUTPUT"
- name: Inspect Khronos sample archive
id: khronos
run: |
set -euo pipefail
LIB_DIR=${{ github.workspace }}/khronos-pkg/lib
INCLUDE_DIR=${{ github.workspace }}/khronos-pkg/include
ls -la "$LIB_DIR"
echo "lib_dir=$LIB_DIR" >> "$GITHUB_OUTPUT"
echo "include_dir=$INCLUDE_DIR" >> "$GITHUB_OUTPUT"
- name: Clone openvx-mark
run: |
git clone --depth 1 https://github.com/kiritigowda/openvx-mark.git \
${{ github.workspace }}/openvx-mark
# ---------------------------------------------------------------------
# rustVX benchmark
# ---------------------------------------------------------------------
- name: Build openvx-mark against rustVX
run: |
mkdir -p ${{ github.workspace }}/openvx-mark/build-rustvx
cd ${{ github.workspace }}/openvx-mark/build-rustvx
cmake \
-DCMAKE_BUILD_TYPE=Release \
-DOPENVX_INCLUDES=${{ steps.rustvx.outputs.include_dir }} \
-DOPENVX_LIB_DIR=${{ steps.rustvx.outputs.lib_dir }} \
..
cmake --build . -j$(nproc)
- name: Run benchmark (rustVX)
run: |
cd ${{ github.workspace }}/openvx-mark/build-rustvx
export LD_LIBRARY_PATH=${{ steps.rustvx.outputs.lib_dir }}:$LD_LIBRARY_PATH
./openvx-mark --resolution FHD --iterations 20 --warmup 5
# ---------------------------------------------------------------------
# Khronos sample benchmark
# ---------------------------------------------------------------------
- name: Build openvx-mark against Khronos sample
run: |
mkdir -p ${{ github.workspace }}/openvx-mark/build-khronos
cd ${{ github.workspace }}/openvx-mark/build-khronos
cmake \
-DCMAKE_BUILD_TYPE=Release \
-DOPENVX_INCLUDES=${{ steps.khronos.outputs.include_dir }} \
-DOPENVX_LIB_DIR=${{ steps.khronos.outputs.lib_dir }} \
..
cmake --build . -j$(nproc)
- name: Run benchmark (Khronos sample)
run: |
cd ${{ github.workspace }}/openvx-mark/build-khronos
export LD_LIBRARY_PATH=${{ steps.khronos.outputs.lib_dir }}:$LD_LIBRARY_PATH
./openvx-mark --resolution FHD --iterations 20 --warmup 5
# ---------------------------------------------------------------------
# rustVX-on-main benchmark (perf-gate input)
#
# On pull_request runs we additionally build `origin/${{ github.base_ref }}`
# (i.e. main, in practice) on the *same* runner VM, run openvx-mark
# against it with the same workload as the PR run, and upload the
# resulting JSON. The downstream `perf-gate` job consumes that
# JSON together with the PR's `benchmark-results-rustvx` artifact
# to decide whether the PR regresses against main. Same-VM is the
# whole point — hardware variance between separate runs would
# swamp any real regression.
#
# Skipped on push events to main (there is no "merge target" to
# diff against, and we do not want pushes to main to gate against
# themselves).
# ---------------------------------------------------------------------
- name: Check out merge-base ref (main)
if: github.event_name == 'pull_request'
uses: actions/checkout@v4
with:
ref: ${{ github.base_ref }}
path: rustvx-main-src
fetch-depth: 1
submodules: recursive
- name: Build rustVX from merge-base ref
if: github.event_name == 'pull_request'
id: rustvx_main
env:
# Reuse the same auto-detected SIMD feature set the PR build
# used. `steps.cpu` is the CPU-detect step from the `build`
# job — its outputs are not visible across jobs, so we
# re-detect here. The detection is a few hundred ms and the
# logic is identical to the build job's.
CARGO_TERM_COLOR: always
RUST_BACKTRACE: 1
run: |
set -euo pipefail
# Install Rust if not present (runner image normally has it,
# but the Khronos-only path above doesn't depend on it, so
# we install defensively).
if ! command -v cargo >/dev/null 2>&1; then
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \
| sh -s -- -y --default-toolchain stable
fi
source $HOME/.cargo/env || true
# Re-detect host CPU features (mirrors `build` job).
ARCH=$(uname -m)
FLAGS=""
if [ -r /proc/cpuinfo ]; then
FLAGS=$(grep -m1 '^flags' /proc/cpuinfo | cut -d: -f2 || true)
fi
CARGO_FEATURES=""
RUSTFLAGS_VAL=""
case "$ARCH" in
x86_64|amd64)
if echo "$FLAGS" | grep -qw sse2; then
CARGO_FEATURES="$CARGO_FEATURES openvx-core/sse2 openvx-vision/sse2"
fi
if echo "$FLAGS" | grep -qw avx2; then
CARGO_FEATURES="$CARGO_FEATURES openvx-core/avx2 openvx-vision/avx2"
RUSTFLAGS_VAL="-C target-cpu=x86-64-v3"
elif echo "$FLAGS" | grep -qw sse2; then
RUSTFLAGS_VAL="-C target-cpu=x86-64-v2"
fi
;;
aarch64|arm64)
CARGO_FEATURES="openvx-core/neon openvx-vision/neon"
;;
esac
CARGO_FEATURES=$(echo "$CARGO_FEATURES" | xargs)
cd ${{ github.workspace }}/rustvx-main-src
export RUSTFLAGS="$RUSTFLAGS_VAL"
if [ -n "$CARGO_FEATURES" ]; then
cargo build --release -p openvx-ffi --features "$CARGO_FEATURES"
else
cargo build --release -p openvx-ffi
fi
# Stage main's lib alongside the PR's, with the libopenvx /
# libvxu symlinks openvx-mark needs to find at link time.
MAIN_LIB_DIR=${{ github.workspace }}/rustvx-main-pkg
mkdir -p "$MAIN_LIB_DIR"
cp target/release/libopenvx_ffi.so "$MAIN_LIB_DIR/"
cd "$MAIN_LIB_DIR"
ln -sf libopenvx_ffi.so libopenvx.so
ln -sf libopenvx_ffi.so libvxu.so
ls -la
echo "lib_dir=$MAIN_LIB_DIR" >> "$GITHUB_OUTPUT"
echo "include_dir=${{ github.workspace }}/rustvx-main-src/include" >> "$GITHUB_OUTPUT"
- name: Build openvx-mark against rustVX-on-main
if: github.event_name == 'pull_request'
run: |
mkdir -p ${{ github.workspace }}/openvx-mark/build-rustvx-main
cd ${{ github.workspace }}/openvx-mark/build-rustvx-main
cmake \
-DCMAKE_BUILD_TYPE=Release \
-DOPENVX_INCLUDES=${{ steps.rustvx_main.outputs.include_dir }} \
-DOPENVX_LIB_DIR=${{ steps.rustvx_main.outputs.lib_dir }} \
..
cmake --build . -j$(nproc)
- name: Run benchmark (rustVX-on-main)
if: github.event_name == 'pull_request'
run: |
cd ${{ github.workspace }}/openvx-mark/build-rustvx-main
export LD_LIBRARY_PATH=${{ steps.rustvx_main.outputs.lib_dir }}:$LD_LIBRARY_PATH
./openvx-mark --resolution FHD --iterations 20 --warmup 5
# ---------------------------------------------------------------------
# Compare results
# ---------------------------------------------------------------------
- name: Compare benchmark results (rustVX vs Khronos)
run: |
RUSTVX=${{ github.workspace }}/openvx-mark/build-rustvx/benchmark_results/benchmark_results.json
KHRONOS=${{ github.workspace }}/openvx-mark/build-khronos/benchmark_results/benchmark_results.json
if [ ! -f "$RUSTVX" ] || [ ! -f "$KHRONOS" ]; then
echo "Skipping comparison — one or both benchmark results missing"
ls -la "$(dirname $RUSTVX)" 2>/dev/null || true
ls -la "$(dirname $KHRONOS)" 2>/dev/null || true
exit 0
fi
# `compare_reports.py` defines Speedup as
# speedup = throughput(report_b) / throughput(report_a)
# i.e. ">1.00 means report_b is faster". To make the Speedup
# column read as "rustVX over Khronos" (>1.00x = rustVX wins),
# pass Khronos first (baseline / report_a) and rustVX second
# (candidate / report_b).
python3 ${{ github.workspace }}/openvx-mark/scripts/compare_reports.py \
"$KHRONOS" "$RUSTVX" \
--output ${{ github.workspace }}/openvx-mark/comparison
- name: Post comparison to job summary
if: always()
run: |
COMPARISON=${{ github.workspace }}/openvx-mark/comparison.md
RUSTVX=${{ github.workspace }}/openvx-mark/build-rustvx/benchmark_results/benchmark_results.json
KHRONOS=${{ github.workspace }}/openvx-mark/build-khronos/benchmark_results/benchmark_results.json
# ----- Headline: aggregate speedup of rustVX over Khronos sample -----
if [ -f "$RUSTVX" ] && [ -f "$KHRONOS" ]; then
python3 - "$RUSTVX" "$KHRONOS" >> "$GITHUB_STEP_SUMMARY" <<'PY'
import json, math, sys
rustvx_path, khronos_path = sys.argv[1], sys.argv[2]
with open(rustvx_path) as f: rustvx = json.load(f)
with open(khronos_path) as f: khronos = json.load(f)
def by_key(report):
return {(r['name'], r['mode'], r['resolution']): r
for r in report.get('results', [])}
a = by_key(rustvx)
b = by_key(khronos)
shared = sorted(set(a) & set(b))
speedups = []
wins, losses = 0, 0
best = (None, 0.0)
worst = (None, math.inf)
for key in shared:
ra, rb = a[key], b[key]
if not (ra.get('verified', True) and rb.get('verified', True)):
continue
mps_r = ra.get('megapixels_per_sec', 0)
mps_k = rb.get('megapixels_per_sec', 0)
if mps_r <= 0 or mps_k <= 0:
continue
s = mps_r / mps_k # >1.0 = rustVX faster than Khronos
speedups.append(s)
if s > 1.0: wins += 1
elif s < 1.0: losses += 1
if s > best[1]: best = (key, s)
if s < worst[1]: worst = (key, s)
print('# rustVX vs Khronos sample — headline')
print()
if not speedups:
print('_No verified benchmarks were directly comparable._')
else:
geomean = math.exp(sum(math.log(s) for s in speedups) / len(speedups))
median = sorted(speedups)[len(speedups) // 2]
print('| Metric | Value |')
print('|:---|---:|')
print(f'| Geomean speedup (rustVX / Khronos) | **{geomean:.2f}x** |')
print(f'| Median speedup (rustVX / Khronos) | {median:.2f}x |')
print(f'| Benchmarks compared | {len(speedups)} |')
print(f'| rustVX faster | {wins} |')
print(f'| Khronos sample faster | {losses} |')
if best[0]:
bk, bv = best
print(f'| Best rustVX speedup | {bv:.2f}x ({bk[0]} / {bk[1]} / {bk[2]}) |')
if worst[0] and worst[1] != math.inf:
wk, wv = worst
print(f'| Worst rustVX speedup | {wv:.2f}x ({wk[0]} / {wk[1]} / {wk[2]}) |')
print()
if geomean >= 1.0:
print(f'> rustVX is **{geomean:.2f}x** faster than the Khronos sample on average (geomean across {len(speedups)} verified benchmarks).')
else:
print(f'> rustVX is **{1.0/geomean:.2f}x slower** than the Khronos sample on average (geomean across {len(speedups)} verified benchmarks).')
print()
PY
fi
# ----- Detailed comparison table from compare_reports.py -----
if [ -f "$COMPARISON" ]; then
cat "$COMPARISON" >> "$GITHUB_STEP_SUMMARY"
else
echo "_No comparison report was produced._" >> "$GITHUB_STEP_SUMMARY"
fi
- name: Upload rustVX benchmark results
if: always()
uses: actions/upload-artifact@v4
with:
name: benchmark-results-rustvx
path: ${{ github.workspace }}/openvx-mark/build-rustvx/benchmark_results/
if-no-files-found: ignore
- name: Upload rustVX-on-main benchmark results
if: always() && github.event_name == 'pull_request'
uses: actions/upload-artifact@v4
with:
name: benchmark-results-rustvx-main
path: ${{ github.workspace }}/openvx-mark/build-rustvx-main/benchmark_results/
if-no-files-found: ignore
- name: Upload Khronos sample benchmark results
if: always()
uses: actions/upload-artifact@v4
with:
name: benchmark-results-khronos-sample
path: ${{ github.workspace }}/openvx-mark/build-khronos/benchmark_results/
if-no-files-found: ignore
- name: Upload comparison report
if: always()
uses: actions/upload-artifact@v4
with:
name: benchmark-comparison
path: ${{ github.workspace }}/openvx-mark/comparison.*
if-no-files-found: ignore
# ---------------------------------------------------------------------------
# Perf gate
#
# Direct rustVX-PR vs rustVX-main comparison on the SAME runner VM (the
# `benchmark` job above benches both back-to-back, so hardware variance
# between the two is zero). This job downloads the two JSON artifacts
# the bench job produced, runs `.github/scripts/perf_gate.py` to apply
# the regression thresholds, posts a markdown verdict to the PR's job
# summary, and exits non-zero (i.e. fails the workflow) on regression.
#
# Threshold rationale (see `.github/scripts/perf_gate.py` for full
# docstring and per-flag semantics):
#
# * --geomean-floor 0.97 -> aggregate move > 3% slower fails. This
# is the real signal for actual perf bugs
# that affect multiple kernels.
# * --kernel-floor 0.75 -> a SINGLE-kernel hard fail requires
# > 25% regression. This is intentionally
# generous: we measured ~10-15% between-
# run drift on otherwise-identical
# binaries on the same VM (cache state,
# thermal, VM-host neighbour load), well
# above the within-run CV% the bench
# itself reports. A tighter per-kernel
# floor produced false positives on
# no-op PRs (CI run 25614982597).
# * --warn-floor 0.90 -> soft-warn band [0.75, 0.90). Below 10%
# we treat as noise.
# * --max-cv 5.0 -> auto-skip kernels above this within-
# run CV%; combined with the looser
# per-kernel floor this gives us a clean
# signal-to-noise ratio.
#
# Trigger:
# * pull_request only — push events to main do not gate against
# themselves (there's no merge target to diff against).
# ---------------------------------------------------------------------------
perf-gate:
name: Perf gate (PR vs main)
if: github.event_name == 'pull_request'
runs-on: ubuntu-22.04
needs: benchmark
steps:
- name: Checkout repo (for the perf_gate script)
uses: actions/checkout@v4
with:
fetch-depth: 1
- name: Download PR rustVX benchmark results
uses: actions/download-artifact@v4
with:
name: benchmark-results-rustvx
path: ${{ github.workspace }}/bench-pr
- name: Download main rustVX benchmark results
uses: actions/download-artifact@v4
with:
name: benchmark-results-rustvx-main
path: ${{ github.workspace }}/bench-main
- name: Run perf gate
run: |
set -euo pipefail
PR=${{ github.workspace }}/bench-pr/benchmark_results.json
MAIN=${{ github.workspace }}/bench-main/benchmark_results.json
if [ ! -f "$PR" ] || [ ! -f "$MAIN" ]; then
echo "::error::Missing benchmark JSONs (PR=$PR, MAIN=$MAIN). Did the bench job upload them?"
ls -la ${{ github.workspace }}/bench-pr ${{ github.workspace }}/bench-main 2>/dev/null || true
exit 1
fi
python3 ${{ github.workspace }}/.github/scripts/perf_gate.py \
"$MAIN" "$PR" \
--geomean-floor 0.97 \
--kernel-floor 0.75 \
--warn-floor 0.90 \
--max-cv 5.0 \
--summary-out "$GITHUB_STEP_SUMMARY"