ci(perf-gate): block PRs that regress kernel performance against main #86
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: OpenVX Conformance Tests | |
| on: | |
| push: | |
| branches: [master, main, develop] | |
| paths-ignore: | |
| - '**/*.md' | |
| - 'docs/**' | |
| - 'LICENSE' | |
| - '.gitignore' | |
| - '.gitattributes' | |
| - '.editorconfig' | |
| - '**/*.svg' | |
| - '**/*.png' | |
| - '**/*.jpg' | |
| - '**/*.jpeg' | |
| - '**/*.gif' | |
| - '**/*.webp' | |
| pull_request: | |
| branches: [master, main, develop] | |
| paths-ignore: | |
| - '**/*.md' | |
| - 'docs/**' | |
| - 'LICENSE' | |
| - '.gitignore' | |
| - '.gitattributes' | |
| - '.editorconfig' | |
| - '**/*.svg' | |
| - '**/*.png' | |
| - '**/*.jpg' | |
| - '**/*.jpeg' | |
| - '**/*.gif' | |
| - '**/*.webp' | |
| env: | |
| CARGO_TERM_COLOR: always | |
| RUST_BACKTRACE: 1 | |
| jobs: | |
| build: | |
| runs-on: ubuntu-22.04 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| submodules: recursive | |
| - name: Install system dependencies | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y build-essential cmake | |
| - name: Install Rust | |
| run: | | |
| curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable | |
| source $HOME/.cargo/env | |
| rustc --version | |
| cargo --version | |
| - name: Detect host CPU features | |
| id: cpu | |
| # GitHub-hosted Linux runners can be backed by either Intel or AMD | |
| # x86_64 silicon (and ARM-based pools exist too). Rather than | |
| # hard-coding a vendor, we read /proc/cpuinfo / uname and turn on | |
| # the matching openvx-vision SIMD Cargo features (sse2 / avx2 on | |
| # x86_64, neon on aarch64). | |
| # | |
| # IMPORTANT: do NOT use `-C target-cpu=native` here. The build, | |
| # CTS, and benchmark jobs run on independent runner VMs; the build | |
| # host may expose ISA extensions (e.g. AVX-512) that a downstream | |
| # CTS runner does not, which produces SIGILL ("Illegal instruction") | |
| # when the artifact is loaded on a less-capable host. Instead we | |
| # pin x86_64 to the portable `x86-64-v3` microarch level (SSE4.2 + | |
| # AVX + AVX2 + BMI1/2 + FMA + F16C) which every modern AMD EPYC | |
| # and Intel Xeon in GitHub's Azure pool supports, and rely on | |
| # `#[target_feature]`-gated intrinsics (already in openvx-vision) | |
| # for anything beyond that — those paths dispatch via | |
| # `is_x86_feature_detected!` at runtime. | |
| run: | | |
| set -euo pipefail | |
| ARCH=$(uname -m) | |
| VENDOR="unknown" | |
| FLAGS="" | |
| if [ -r /proc/cpuinfo ]; then | |
| VENDOR=$(grep -m1 '^vendor_id' /proc/cpuinfo | awk '{print $3}' || true) | |
| VENDOR=${VENDOR:-unknown} | |
| FLAGS=$(grep -m1 '^flags' /proc/cpuinfo | cut -d: -f2 || true) | |
| fi | |
| echo "Architecture : $ARCH" | |
| echo "CPU vendor : $VENDOR" | |
| CARGO_FEATURES="" | |
| RUSTFLAGS_VAL="" | |
| case "$ARCH" in | |
| x86_64|amd64) | |
| HAS_SSE2=false | |
| HAS_AVX2=false | |
| # openvx-core hosts the C-API kernel callbacks (vxAdd / | |
| # vxSubtract / vxBox3x3 / vxGaussian3x3 / vxColorConvert | |
| # → crate::simd_kernels). openvx-vision hosts the public | |
| # Rust-API SIMD kernels. Both crates need the matching | |
| # feature flag for the SIMD path to actually compile in. | |
| if echo "$FLAGS" | grep -qw sse2; then | |
| CARGO_FEATURES="$CARGO_FEATURES openvx-core/sse2 openvx-vision/sse2" | |
| HAS_SSE2=true | |
| echo " + sse2 detected" | |
| fi | |
| if echo "$FLAGS" | grep -qw avx2; then | |
| CARGO_FEATURES="$CARGO_FEATURES openvx-core/avx2 openvx-vision/avx2" | |
| HAS_AVX2=true | |
| echo " + avx2 detected" | |
| fi | |
| # Pick the most permissive *portable* microarch level that | |
| # the build host satisfies. v3 is safe on every GitHub | |
| # Linux runner today (AMD EPYC Milan/Genoa, Intel Cascade | |
| # Lake / Ice Lake), v2 is universal on x86_64 cloud silicon. | |
| if [ "$HAS_AVX2" = true ]; then | |
| RUSTFLAGS_VAL="-C target-cpu=x86-64-v3" | |
| elif [ "$HAS_SSE2" = true ]; then | |
| RUSTFLAGS_VAL="-C target-cpu=x86-64-v2" | |
| fi | |
| ;; | |
| aarch64|arm64) | |
| CARGO_FEATURES="$CARGO_FEATURES openvx-core/neon openvx-vision/neon" | |
| echo " + neon (mandatory on aarch64)" | |
| ;; | |
| *) | |
| echo " (no SIMD features enabled for $ARCH — scalar build)" | |
| ;; | |
| esac | |
| CARGO_FEATURES=$(echo "$CARGO_FEATURES" | xargs) | |
| echo "Cargo features: ${CARGO_FEATURES:-<none>}" | |
| echo "RUSTFLAGS : ${RUSTFLAGS_VAL:-<none>}" | |
| { | |
| echo "arch=$ARCH" | |
| echo "vendor=$VENDOR" | |
| echo "cargo_features=$CARGO_FEATURES" | |
| echo "rustflags=$RUSTFLAGS_VAL" | |
| } >> "$GITHUB_OUTPUT" | |
| - name: Build rustVX | |
| env: | |
| RUSTFLAGS: ${{ steps.cpu.outputs.rustflags }} | |
| run: | | |
| source $HOME/.cargo/env | |
| FEATURES="${{ steps.cpu.outputs.cargo_features }}" | |
| # Build only the FFI cdylib that downstream jobs consume; this | |
| # is the artifact that becomes libopenvx_ffi.so. Building this | |
| # one package (instead of the whole workspace) keeps the build | |
| # tight and lets us forward openvx-vision SIMD features via | |
| # the `pkg/feature` syntax without needing a passthrough in | |
| # openvx-ffi's Cargo.toml. | |
| if [ -n "$FEATURES" ]; then | |
| echo "Building openvx-ffi with features: $FEATURES" | |
| cargo build --release -p openvx-ffi --features "$FEATURES" | |
| else | |
| echo "Building openvx-ffi with no extra SIMD features" | |
| cargo build --release -p openvx-ffi | |
| fi | |
| - name: Build OpenVX CTS | |
| run: | | |
| cd OpenVX-cts | |
| mkdir -p include | |
| if [ -d "../include" ]; then | |
| cp -r ../include/* include/ 2>/dev/null || true | |
| fi | |
| mkdir -p build | |
| cd build | |
| cmake .. \ | |
| -DCMAKE_BUILD_TYPE=Release \ | |
| -DCMAKE_C_STANDARD_LIBRARIES="-lm" \ | |
| -DCMAKE_CXX_STANDARD_LIBRARIES="-lm" \ | |
| -DOPENVX_INCLUDES="${{ github.workspace }}/include;${{ github.workspace }}/OpenVX-cts/include" \ | |
| -DOPENVX_LIBRARIES="${{ github.workspace }}/target/release/libopenvx_ffi.so;m" \ | |
| -DOPENVX_CONFORMANCE_VISION=ON \ | |
| -DOPENVX_USE_ENHANCED_VISION=ON | |
| make -j$(nproc) | |
| - name: Upload build artifacts | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| # `include/` is bundled so the downstream benchmark job can build | |
| # openvx-mark against rustVX without needing to check out the | |
| # rustVX source tree. | |
| path: | | |
| target/release/libopenvx_ffi.so | |
| OpenVX-cts/build/bin/vx_test_conformance | |
| OpenVX-cts/test_data/ | |
| include/ | |
| retention-days: 1 | |
| # Build the Khronos OpenVX sample implementation in its own phase, in | |
| # parallel with the rustVX `build` job, and upload the resulting library | |
| # + headers as a self-contained archive. The benchmark job below pulls | |
| # both archives down onto a single runner so rustVX and the Khronos | |
| # sample are exercised on identical hardware. | |
| build-khronos-sample: | |
| name: Build Khronos OpenVX sample | |
| runs-on: ubuntu-22.04 | |
| steps: | |
| - name: Install system dependencies | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y build-essential cmake git python3 | |
| - name: Build Khronos OpenVX sample | |
| run: | | |
| git clone --recursive --depth 1 \ | |
| https://github.com/KhronosGroup/OpenVX-sample-impl.git khronos-sample | |
| cd khronos-sample | |
| python3 Build.py --os=Linux --arch=64 --conf=Release | |
| - name: Stage Khronos sample archive | |
| run: | | |
| set -euo pipefail | |
| LIB_SRC=$(dirname $(find khronos-sample -name "libopenvx.so" -not -path "*/build/*" | head -1)) | |
| echo "Khronos libraries discovered in: $LIB_SRC" | |
| mkdir -p khronos-stage/lib | |
| cp "$LIB_SRC"/libopenvx*.so "$LIB_SRC"/libvxu*.so khronos-stage/lib/ | |
| cp -r khronos-sample/api-docs/include khronos-stage/include | |
| ls -R khronos-stage | |
| - name: Upload Khronos sample artifacts | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: khronos-sample-artifacts | |
| path: khronos-stage/ | |
| retention-days: 1 | |
| baseline: | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Download build artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| - name: Run baseline tests | |
| run: | | |
| chmod +x OpenVX-cts/build/bin/vx_test_conformance | |
| cd OpenVX-cts/build | |
| export LD_LIBRARY_PATH=${{ github.workspace }}/target/release | |
| export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/ | |
| timeout 300 ./bin/vx_test_conformance --filter="GraphBase.*:Logging.*:SmokeTestBase.*:SmokeTest.*:TargetBase.*:Target.*" | |
| graph: | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| continue-on-error: true | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Download build artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| - name: Run graph tests | |
| run: | | |
| chmod +x OpenVX-cts/build/bin/vx_test_conformance | |
| cd OpenVX-cts/build | |
| export LD_LIBRARY_PATH=${{ github.workspace }}/target/release | |
| export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/ | |
| timeout 600 ./bin/vx_test_conformance --filter="Graph.*:GraphCallback.*:GraphDelay.*:GraphROI.*:UserNode.*" | |
| data-objects: | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| continue-on-error: true | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Download build artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| - name: Run data object tests | |
| run: | | |
| chmod +x OpenVX-cts/build/bin/vx_test_conformance | |
| cd OpenVX-cts/build | |
| export LD_LIBRARY_PATH=${{ github.workspace }}/target/release | |
| export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/ | |
| timeout 300 ./bin/vx_test_conformance --filter="Scalar.*:Array.*:ObjectArray.*:Matrix.*:Convolution.*:Distribution.*:LUT.*:Histogram.*" | |
| image-ops: | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| continue-on-error: true | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Download build artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| - name: Run image operation tests | |
| run: | | |
| chmod +x OpenVX-cts/build/bin/vx_test_conformance | |
| cd OpenVX-cts/build | |
| export LD_LIBRARY_PATH=${{ github.workspace }}/target/release | |
| export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/ | |
| timeout 600 ./bin/vx_test_conformance --filter="Image.*:vxCopyImagePatch.*:vxMapImagePatch.*:vxCreateImageFromChannel.*:vxCopyRemapPatch.*:vxMapRemapPatch.*" | |
| vision-color: | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| continue-on-error: true | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Download build artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| - name: Run color and channel tests | |
| run: | | |
| chmod +x OpenVX-cts/build/bin/vx_test_conformance | |
| cd OpenVX-cts/build | |
| export LD_LIBRARY_PATH=${{ github.workspace }}/target/release | |
| export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/ | |
| timeout 300 ./bin/vx_test_conformance --filter="ColorConvert.*:ChannelExtract.*:ChannelCombine.*:vxConvertDepth.*:vxuConvertDepth.*" | |
| vision-filters: | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| continue-on-error: true | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Download build artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| - name: Run filter and morphology tests | |
| run: | | |
| chmod +x OpenVX-cts/build/bin/vx_test_conformance | |
| cd OpenVX-cts/build | |
| export LD_LIBRARY_PATH=${{ github.workspace }}/target/release | |
| export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/ | |
| timeout 600 ./bin/vx_test_conformance --filter="Box3x3.*:Gaussian3x3.*:Median3x3.*:Dilate3x3.*:Erode3x3.*:Sobel3x3.*:Magnitude.*:Phase.*:NonLinearFilter.*:Convolve.*:EqualizeHistogram.*" | |
| vision-arithmetic: | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| continue-on-error: true | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Download build artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| - name: Run arithmetic and bitwise tests | |
| run: | | |
| chmod +x OpenVX-cts/build/bin/vx_test_conformance | |
| cd OpenVX-cts/build | |
| export LD_LIBRARY_PATH=${{ github.workspace }}/target/release | |
| export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/ | |
| timeout 600 ./bin/vx_test_conformance --filter="vxAddSub.*:vxuAddSub.*:vxMultiply.*:vxuMultiply.*:vxBinOp8u.*:vxuBinOp8u.*:vxBinOp16s.*:vxuBinOp16s.*:vxNot.*:vxuNot.*:WeightedAverage.*:Threshold.*" | |
| vision-geometric: | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| continue-on-error: true | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Download build artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| - name: Run geometric transform tests | |
| run: | | |
| chmod +x OpenVX-cts/build/bin/vx_test_conformance | |
| cd OpenVX-cts/build | |
| export LD_LIBRARY_PATH=${{ github.workspace }}/target/release | |
| export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/ | |
| timeout 600 ./bin/vx_test_conformance --filter="Scale.*:WarpAffine.*:WarpPerspective.*:Remap.*:HalfScaleGaussian.*" | |
| vision-features: | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| continue-on-error: true | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Download build artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| - name: Run feature and edge detection tests | |
| run: | | |
| chmod +x OpenVX-cts/build/bin/vx_test_conformance | |
| cd OpenVX-cts/build | |
| export LD_LIBRARY_PATH=${{ github.workspace }}/target/release | |
| export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/ | |
| timeout 600 ./bin/vx_test_conformance --filter="HarrisCorners.*:FastCorners.*:vxCanny.*:vxuCanny.*" | |
| vision-statistics: | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| continue-on-error: true | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Download build artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| - name: Run statistics and analysis tests | |
| run: | | |
| chmod +x OpenVX-cts/build/bin/vx_test_conformance | |
| cd OpenVX-cts/build | |
| export LD_LIBRARY_PATH=${{ github.workspace }}/target/release | |
| export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/ | |
| timeout 300 ./bin/vx_test_conformance --filter="MeanStdDev.*:MinMaxLoc.*:Integral.*" | |
| vision-pyramid: | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| continue-on-error: true | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Download build artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| - name: Run pyramid and optical flow tests | |
| run: | | |
| chmod +x OpenVX-cts/build/bin/vx_test_conformance | |
| cd OpenVX-cts/build | |
| export LD_LIBRARY_PATH=${{ github.workspace }}/target/release | |
| export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/ | |
| timeout 300 ./bin/vx_test_conformance --filter="GaussianPyramid.*:LaplacianPyramid.*:LaplacianReconstruct.*:OptFlowPyrLK.*" | |
| # Enhanced Vision Phase 1 — only the kernels rustVX has actually | |
| # implemented from the OpenVX 1.3 Enhanced Vision feature set. The CTS | |
| # binary is built with `OPENVX_USE_ENHANCED_VISION=ON`, but this job | |
| # filters strictly to the kernels Phase 1 ships (vxMin / vxMax). The | |
| # remaining Enhanced Vision symbols are exposed as link stubs in | |
| # rustVX so the binary can build; they are not exercised here and will | |
| # be replaced by real kernels in subsequent phases. | |
| enhanced-vision: | |
| name: "enhanced-vision (Phase 1 — Min/Max)" | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Download build artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| - name: Run Enhanced Vision Phase 1 tests | |
| run: | | |
| chmod +x OpenVX-cts/build/bin/vx_test_conformance | |
| cd OpenVX-cts/build | |
| export LD_LIBRARY_PATH=${{ github.workspace }}/target/release | |
| export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/ | |
| timeout 120 ./bin/vx_test_conformance --filter="Min.*:Max.*" | |
| # Performance benchmark using openvx-mark, comparing rustVX against the | |
| # Khronos OpenVX sample implementation on the SAME runner so the two | |
| # numbers come from identical hardware. This job does NOT rebuild either | |
| # implementation — it just downloads the archives produced by the | |
| # `build` and `build-khronos-sample` phases above, builds the openvx-mark | |
| # tool against each, runs the same workload, and compares the JSON | |
| # reports. The CTS jobs above use `continue-on-error: true`, so this | |
| # job effectively gates on `build`, `build-khronos-sample`, and | |
| # `baseline` succeeding (matching the existing CTS gate). | |
| benchmark: | |
| name: Benchmark & compare (rustVX vs Khronos sample) | |
| runs-on: ubuntu-22.04 | |
| needs: | |
| - build | |
| - build-khronos-sample | |
| - baseline | |
| - graph | |
| - data-objects | |
| - image-ops | |
| - vision-color | |
| - vision-filters | |
| - vision-arithmetic | |
| - vision-geometric | |
| - vision-features | |
| - vision-statistics | |
| - vision-pyramid | |
| continue-on-error: true | |
| steps: | |
| - name: Install system dependencies | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y build-essential cmake git python3 | |
| - name: Download rustVX archive | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| path: ${{ github.workspace }}/rustvx-pkg | |
| - name: Download Khronos sample archive | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: khronos-sample-artifacts | |
| path: ${{ github.workspace }}/khronos-pkg | |
| - name: Expose rustVX as libopenvx / libvxu | |
| id: rustvx | |
| # openvx-mark uses `find_library(NAMES openvx)` and | |
| # `find_library(NAMES vxu)`. rustVX ships a single | |
| # `libopenvx_ffi.so` that exports the full set of `vx*`/`vxu*` | |
| # symbols, so symlink the two classic Khronos library names to | |
| # it without changing rustVX's own build output. | |
| run: | | |
| set -euo pipefail | |
| LIB_DIR=${{ github.workspace }}/rustvx-pkg/target/release | |
| chmod -R u+rwX "$LIB_DIR" | |
| cd "$LIB_DIR" | |
| ln -sf libopenvx_ffi.so libopenvx.so | |
| ln -sf libopenvx_ffi.so libvxu.so | |
| ls -la libopenvx*.so libvxu*.so | |
| echo "lib_dir=$LIB_DIR" >> "$GITHUB_OUTPUT" | |
| echo "include_dir=${{ github.workspace }}/rustvx-pkg/include" >> "$GITHUB_OUTPUT" | |
| - name: Inspect Khronos sample archive | |
| id: khronos | |
| run: | | |
| set -euo pipefail | |
| LIB_DIR=${{ github.workspace }}/khronos-pkg/lib | |
| INCLUDE_DIR=${{ github.workspace }}/khronos-pkg/include | |
| ls -la "$LIB_DIR" | |
| echo "lib_dir=$LIB_DIR" >> "$GITHUB_OUTPUT" | |
| echo "include_dir=$INCLUDE_DIR" >> "$GITHUB_OUTPUT" | |
| - name: Clone openvx-mark | |
| run: | | |
| git clone --depth 1 https://github.com/kiritigowda/openvx-mark.git \ | |
| ${{ github.workspace }}/openvx-mark | |
| # --------------------------------------------------------------------- | |
| # rustVX benchmark | |
| # --------------------------------------------------------------------- | |
| - name: Build openvx-mark against rustVX | |
| run: | | |
| mkdir -p ${{ github.workspace }}/openvx-mark/build-rustvx | |
| cd ${{ github.workspace }}/openvx-mark/build-rustvx | |
| cmake \ | |
| -DCMAKE_BUILD_TYPE=Release \ | |
| -DOPENVX_INCLUDES=${{ steps.rustvx.outputs.include_dir }} \ | |
| -DOPENVX_LIB_DIR=${{ steps.rustvx.outputs.lib_dir }} \ | |
| .. | |
| cmake --build . -j$(nproc) | |
| - name: Run benchmark (rustVX) | |
| run: | | |
| cd ${{ github.workspace }}/openvx-mark/build-rustvx | |
| export LD_LIBRARY_PATH=${{ steps.rustvx.outputs.lib_dir }}:$LD_LIBRARY_PATH | |
| ./openvx-mark --resolution FHD --iterations 20 --warmup 5 | |
| # --------------------------------------------------------------------- | |
| # Khronos sample benchmark | |
| # --------------------------------------------------------------------- | |
| - name: Build openvx-mark against Khronos sample | |
| run: | | |
| mkdir -p ${{ github.workspace }}/openvx-mark/build-khronos | |
| cd ${{ github.workspace }}/openvx-mark/build-khronos | |
| cmake \ | |
| -DCMAKE_BUILD_TYPE=Release \ | |
| -DOPENVX_INCLUDES=${{ steps.khronos.outputs.include_dir }} \ | |
| -DOPENVX_LIB_DIR=${{ steps.khronos.outputs.lib_dir }} \ | |
| .. | |
| cmake --build . -j$(nproc) | |
| - name: Run benchmark (Khronos sample) | |
| run: | | |
| cd ${{ github.workspace }}/openvx-mark/build-khronos | |
| export LD_LIBRARY_PATH=${{ steps.khronos.outputs.lib_dir }}:$LD_LIBRARY_PATH | |
| ./openvx-mark --resolution FHD --iterations 20 --warmup 5 | |
| # --------------------------------------------------------------------- | |
| # rustVX-on-main benchmark (perf-gate input) | |
| # | |
| # On pull_request runs we additionally build `origin/${{ github.base_ref }}` | |
| # (i.e. main, in practice) on the *same* runner VM, run openvx-mark | |
| # against it with the same workload as the PR run, and upload the | |
| # resulting JSON. The downstream `perf-gate` job consumes that | |
| # JSON together with the PR's `benchmark-results-rustvx` artifact | |
| # to decide whether the PR regresses against main. Same-VM is the | |
| # whole point — hardware variance between separate runs would | |
| # swamp any real regression. | |
| # | |
| # Skipped on push events to main (there is no "merge target" to | |
| # diff against, and we do not want pushes to main to gate against | |
| # themselves). | |
| # --------------------------------------------------------------------- | |
| - name: Check out merge-base ref (main) | |
| if: github.event_name == 'pull_request' | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ github.base_ref }} | |
| path: rustvx-main-src | |
| fetch-depth: 1 | |
| submodules: recursive | |
| - name: Build rustVX from merge-base ref | |
| if: github.event_name == 'pull_request' | |
| id: rustvx_main | |
| env: | |
| # Reuse the same auto-detected SIMD feature set the PR build | |
| # used. `steps.cpu` is the CPU-detect step from the `build` | |
| # job — its outputs are not visible across jobs, so we | |
| # re-detect here. The detection is a few hundred ms and the | |
| # logic is identical to the build job's. | |
| CARGO_TERM_COLOR: always | |
| RUST_BACKTRACE: 1 | |
| run: | | |
| set -euo pipefail | |
| # Install Rust if not present (runner image normally has it, | |
| # but the Khronos-only path above doesn't depend on it, so | |
| # we install defensively). | |
| if ! command -v cargo >/dev/null 2>&1; then | |
| curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \ | |
| | sh -s -- -y --default-toolchain stable | |
| fi | |
| source $HOME/.cargo/env || true | |
| # Re-detect host CPU features (mirrors `build` job). | |
| ARCH=$(uname -m) | |
| FLAGS="" | |
| if [ -r /proc/cpuinfo ]; then | |
| FLAGS=$(grep -m1 '^flags' /proc/cpuinfo | cut -d: -f2 || true) | |
| fi | |
| CARGO_FEATURES="" | |
| RUSTFLAGS_VAL="" | |
| case "$ARCH" in | |
| x86_64|amd64) | |
| if echo "$FLAGS" | grep -qw sse2; then | |
| CARGO_FEATURES="$CARGO_FEATURES openvx-core/sse2 openvx-vision/sse2" | |
| fi | |
| if echo "$FLAGS" | grep -qw avx2; then | |
| CARGO_FEATURES="$CARGO_FEATURES openvx-core/avx2 openvx-vision/avx2" | |
| RUSTFLAGS_VAL="-C target-cpu=x86-64-v3" | |
| elif echo "$FLAGS" | grep -qw sse2; then | |
| RUSTFLAGS_VAL="-C target-cpu=x86-64-v2" | |
| fi | |
| ;; | |
| aarch64|arm64) | |
| CARGO_FEATURES="openvx-core/neon openvx-vision/neon" | |
| ;; | |
| esac | |
| CARGO_FEATURES=$(echo "$CARGO_FEATURES" | xargs) | |
| cd ${{ github.workspace }}/rustvx-main-src | |
| export RUSTFLAGS="$RUSTFLAGS_VAL" | |
| if [ -n "$CARGO_FEATURES" ]; then | |
| cargo build --release -p openvx-ffi --features "$CARGO_FEATURES" | |
| else | |
| cargo build --release -p openvx-ffi | |
| fi | |
| # Stage main's lib alongside the PR's, with the libopenvx / | |
| # libvxu symlinks openvx-mark needs to find at link time. | |
| MAIN_LIB_DIR=${{ github.workspace }}/rustvx-main-pkg | |
| mkdir -p "$MAIN_LIB_DIR" | |
| cp target/release/libopenvx_ffi.so "$MAIN_LIB_DIR/" | |
| cd "$MAIN_LIB_DIR" | |
| ln -sf libopenvx_ffi.so libopenvx.so | |
| ln -sf libopenvx_ffi.so libvxu.so | |
| ls -la | |
| echo "lib_dir=$MAIN_LIB_DIR" >> "$GITHUB_OUTPUT" | |
| echo "include_dir=${{ github.workspace }}/rustvx-main-src/include" >> "$GITHUB_OUTPUT" | |
| - name: Build openvx-mark against rustVX-on-main | |
| if: github.event_name == 'pull_request' | |
| run: | | |
| mkdir -p ${{ github.workspace }}/openvx-mark/build-rustvx-main | |
| cd ${{ github.workspace }}/openvx-mark/build-rustvx-main | |
| cmake \ | |
| -DCMAKE_BUILD_TYPE=Release \ | |
| -DOPENVX_INCLUDES=${{ steps.rustvx_main.outputs.include_dir }} \ | |
| -DOPENVX_LIB_DIR=${{ steps.rustvx_main.outputs.lib_dir }} \ | |
| .. | |
| cmake --build . -j$(nproc) | |
| - name: Run benchmark (rustVX-on-main) | |
| if: github.event_name == 'pull_request' | |
| run: | | |
| cd ${{ github.workspace }}/openvx-mark/build-rustvx-main | |
| export LD_LIBRARY_PATH=${{ steps.rustvx_main.outputs.lib_dir }}:$LD_LIBRARY_PATH | |
| ./openvx-mark --resolution FHD --iterations 20 --warmup 5 | |
| # --------------------------------------------------------------------- | |
| # Compare results | |
| # --------------------------------------------------------------------- | |
| - name: Compare benchmark results (rustVX vs Khronos) | |
| run: | | |
| RUSTVX=${{ github.workspace }}/openvx-mark/build-rustvx/benchmark_results/benchmark_results.json | |
| KHRONOS=${{ github.workspace }}/openvx-mark/build-khronos/benchmark_results/benchmark_results.json | |
| if [ ! -f "$RUSTVX" ] || [ ! -f "$KHRONOS" ]; then | |
| echo "Skipping comparison — one or both benchmark results missing" | |
| ls -la "$(dirname $RUSTVX)" 2>/dev/null || true | |
| ls -la "$(dirname $KHRONOS)" 2>/dev/null || true | |
| exit 0 | |
| fi | |
| # `compare_reports.py` defines Speedup as | |
| # speedup = throughput(report_b) / throughput(report_a) | |
| # i.e. ">1.00 means report_b is faster". To make the Speedup | |
| # column read as "rustVX over Khronos" (>1.00x = rustVX wins), | |
| # pass Khronos first (baseline / report_a) and rustVX second | |
| # (candidate / report_b). | |
| python3 ${{ github.workspace }}/openvx-mark/scripts/compare_reports.py \ | |
| "$KHRONOS" "$RUSTVX" \ | |
| --output ${{ github.workspace }}/openvx-mark/comparison | |
| - name: Post comparison to job summary | |
| if: always() | |
| run: | | |
| COMPARISON=${{ github.workspace }}/openvx-mark/comparison.md | |
| RUSTVX=${{ github.workspace }}/openvx-mark/build-rustvx/benchmark_results/benchmark_results.json | |
| KHRONOS=${{ github.workspace }}/openvx-mark/build-khronos/benchmark_results/benchmark_results.json | |
| # ----- Headline: aggregate speedup of rustVX over Khronos sample ----- | |
| if [ -f "$RUSTVX" ] && [ -f "$KHRONOS" ]; then | |
| python3 - "$RUSTVX" "$KHRONOS" >> "$GITHUB_STEP_SUMMARY" <<'PY' | |
| import json, math, sys | |
| rustvx_path, khronos_path = sys.argv[1], sys.argv[2] | |
| with open(rustvx_path) as f: rustvx = json.load(f) | |
| with open(khronos_path) as f: khronos = json.load(f) | |
| def by_key(report): | |
| return {(r['name'], r['mode'], r['resolution']): r | |
| for r in report.get('results', [])} | |
| a = by_key(rustvx) | |
| b = by_key(khronos) | |
| shared = sorted(set(a) & set(b)) | |
| speedups = [] | |
| wins, losses = 0, 0 | |
| best = (None, 0.0) | |
| worst = (None, math.inf) | |
| for key in shared: | |
| ra, rb = a[key], b[key] | |
| if not (ra.get('verified', True) and rb.get('verified', True)): | |
| continue | |
| mps_r = ra.get('megapixels_per_sec', 0) | |
| mps_k = rb.get('megapixels_per_sec', 0) | |
| if mps_r <= 0 or mps_k <= 0: | |
| continue | |
| s = mps_r / mps_k # >1.0 = rustVX faster than Khronos | |
| speedups.append(s) | |
| if s > 1.0: wins += 1 | |
| elif s < 1.0: losses += 1 | |
| if s > best[1]: best = (key, s) | |
| if s < worst[1]: worst = (key, s) | |
| print('# rustVX vs Khronos sample — headline') | |
| print() | |
| if not speedups: | |
| print('_No verified benchmarks were directly comparable._') | |
| else: | |
| geomean = math.exp(sum(math.log(s) for s in speedups) / len(speedups)) | |
| median = sorted(speedups)[len(speedups) // 2] | |
| print('| Metric | Value |') | |
| print('|:---|---:|') | |
| print(f'| Geomean speedup (rustVX / Khronos) | **{geomean:.2f}x** |') | |
| print(f'| Median speedup (rustVX / Khronos) | {median:.2f}x |') | |
| print(f'| Benchmarks compared | {len(speedups)} |') | |
| print(f'| rustVX faster | {wins} |') | |
| print(f'| Khronos sample faster | {losses} |') | |
| if best[0]: | |
| bk, bv = best | |
| print(f'| Best rustVX speedup | {bv:.2f}x ({bk[0]} / {bk[1]} / {bk[2]}) |') | |
| if worst[0] and worst[1] != math.inf: | |
| wk, wv = worst | |
| print(f'| Worst rustVX speedup | {wv:.2f}x ({wk[0]} / {wk[1]} / {wk[2]}) |') | |
| print() | |
| if geomean >= 1.0: | |
| print(f'> rustVX is **{geomean:.2f}x** faster than the Khronos sample on average (geomean across {len(speedups)} verified benchmarks).') | |
| else: | |
| print(f'> rustVX is **{1.0/geomean:.2f}x slower** than the Khronos sample on average (geomean across {len(speedups)} verified benchmarks).') | |
| print() | |
| PY | |
| fi | |
| # ----- Detailed comparison table from compare_reports.py ----- | |
| if [ -f "$COMPARISON" ]; then | |
| cat "$COMPARISON" >> "$GITHUB_STEP_SUMMARY" | |
| else | |
| echo "_No comparison report was produced._" >> "$GITHUB_STEP_SUMMARY" | |
| fi | |
| - name: Upload rustVX benchmark results | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: benchmark-results-rustvx | |
| path: ${{ github.workspace }}/openvx-mark/build-rustvx/benchmark_results/ | |
| if-no-files-found: ignore | |
| - name: Upload rustVX-on-main benchmark results | |
| if: always() && github.event_name == 'pull_request' | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: benchmark-results-rustvx-main | |
| path: ${{ github.workspace }}/openvx-mark/build-rustvx-main/benchmark_results/ | |
| if-no-files-found: ignore | |
| - name: Upload Khronos sample benchmark results | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: benchmark-results-khronos-sample | |
| path: ${{ github.workspace }}/openvx-mark/build-khronos/benchmark_results/ | |
| if-no-files-found: ignore | |
| - name: Upload comparison report | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: benchmark-comparison | |
| path: ${{ github.workspace }}/openvx-mark/comparison.* | |
| if-no-files-found: ignore | |
| # --------------------------------------------------------------------------- | |
| # Perf gate | |
| # | |
| # Direct rustVX-PR vs rustVX-main comparison on the SAME runner VM (the | |
| # `benchmark` job above benches both back-to-back, so hardware variance | |
| # between the two is zero). This job downloads the two JSON artifacts | |
| # the bench job produced, runs `.github/scripts/perf_gate.py` to apply | |
| # the regression thresholds, posts a markdown verdict to the PR's job | |
| # summary, and exits non-zero (i.e. fails the workflow) on regression. | |
| # | |
| # Threshold rationale (see `.github/scripts/perf_gate.py` for full | |
| # docstring and per-flag semantics): | |
| # | |
| # * --geomean-floor 0.97 -> aggregate move > 3% slower fails. This | |
| # is the real signal for actual perf bugs | |
| # that affect multiple kernels. | |
| # * --kernel-floor 0.75 -> a SINGLE-kernel hard fail requires | |
| # > 25% regression. This is intentionally | |
| # generous: we measured ~10-15% between- | |
| # run drift on otherwise-identical | |
| # binaries on the same VM (cache state, | |
| # thermal, VM-host neighbour load), well | |
| # above the within-run CV% the bench | |
| # itself reports. A tighter per-kernel | |
| # floor produced false positives on | |
| # no-op PRs (CI run 25614982597). | |
| # * --warn-floor 0.90 -> soft-warn band [0.75, 0.90). Below 10% | |
| # we treat as noise. | |
| # * --max-cv 5.0 -> auto-skip kernels above this within- | |
| # run CV%; combined with the looser | |
| # per-kernel floor this gives us a clean | |
| # signal-to-noise ratio. | |
| # | |
| # Trigger: | |
| # * pull_request only — push events to main do not gate against | |
| # themselves (there's no merge target to diff against). | |
| # --------------------------------------------------------------------------- | |
| perf-gate: | |
| name: Perf gate (PR vs main) | |
| if: github.event_name == 'pull_request' | |
| runs-on: ubuntu-22.04 | |
| needs: benchmark | |
| steps: | |
| - name: Checkout repo (for the perf_gate script) | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 1 | |
| - name: Download PR rustVX benchmark results | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: benchmark-results-rustvx | |
| path: ${{ github.workspace }}/bench-pr | |
| - name: Download main rustVX benchmark results | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: benchmark-results-rustvx-main | |
| path: ${{ github.workspace }}/bench-main | |
| - name: Run perf gate | |
| run: | | |
| set -euo pipefail | |
| PR=${{ github.workspace }}/bench-pr/benchmark_results.json | |
| MAIN=${{ github.workspace }}/bench-main/benchmark_results.json | |
| if [ ! -f "$PR" ] || [ ! -f "$MAIN" ]; then | |
| echo "::error::Missing benchmark JSONs (PR=$PR, MAIN=$MAIN). Did the bench job upload them?" | |
| ls -la ${{ github.workspace }}/bench-pr ${{ github.workspace }}/bench-main 2>/dev/null || true | |
| exit 1 | |
| fi | |
| python3 ${{ github.workspace }}/.github/scripts/perf_gate.py \ | |
| "$MAIN" "$PR" \ | |
| --geomean-floor 0.97 \ | |
| --kernel-floor 0.75 \ | |
| --warn-floor 0.90 \ | |
| --max-cv 5.0 \ | |
| --summary-out "$GITHUB_STEP_SUMMARY" |