ci(perf-gate): block PRs that regress kernel performance against main #87
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: OpenVX Conformance Tests | |
| on: | |
| push: | |
| branches: [master, main, develop] | |
| paths-ignore: | |
| - '**/*.md' | |
| - 'docs/**' | |
| - 'LICENSE' | |
| - '.gitignore' | |
| - '.gitattributes' | |
| - '.editorconfig' | |
| - '**/*.svg' | |
| - '**/*.png' | |
| - '**/*.jpg' | |
| - '**/*.jpeg' | |
| - '**/*.gif' | |
| - '**/*.webp' | |
| pull_request: | |
| branches: [master, main, develop] | |
| paths-ignore: | |
| - '**/*.md' | |
| - 'docs/**' | |
| - 'LICENSE' | |
| - '.gitignore' | |
| - '.gitattributes' | |
| - '.editorconfig' | |
| - '**/*.svg' | |
| - '**/*.png' | |
| - '**/*.jpg' | |
| - '**/*.jpeg' | |
| - '**/*.gif' | |
| - '**/*.webp' | |
| env: | |
| CARGO_TERM_COLOR: always | |
| RUST_BACKTRACE: 1 | |
| jobs: | |
| build: | |
| runs-on: ubuntu-22.04 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| submodules: recursive | |
| - name: Install system dependencies | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y build-essential cmake | |
| - name: Install Rust | |
| run: | | |
| curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable | |
| source $HOME/.cargo/env | |
| rustc --version | |
| cargo --version | |
| - name: Detect host CPU features | |
| id: cpu | |
| # GitHub-hosted Linux runners can be backed by either Intel or AMD | |
| # x86_64 silicon (and ARM-based pools exist too). Rather than | |
| # hard-coding a vendor, we read /proc/cpuinfo / uname and turn on | |
| # the matching openvx-vision SIMD Cargo features (sse2 / avx2 on | |
| # x86_64, neon on aarch64). | |
| # | |
| # IMPORTANT: do NOT use `-C target-cpu=native` here. The build, | |
| # CTS, and benchmark jobs run on independent runner VMs; the build | |
| # host may expose ISA extensions (e.g. AVX-512) that a downstream | |
| # CTS runner does not, which produces SIGILL ("Illegal instruction") | |
| # when the artifact is loaded on a less-capable host. Instead we | |
| # pin x86_64 to the portable `x86-64-v3` microarch level (SSE4.2 + | |
| # AVX + AVX2 + BMI1/2 + FMA + F16C) which every modern AMD EPYC | |
| # and Intel Xeon in GitHub's Azure pool supports, and rely on | |
| # `#[target_feature]`-gated intrinsics (already in openvx-vision) | |
| # for anything beyond that — those paths dispatch via | |
| # `is_x86_feature_detected!` at runtime. | |
| run: | | |
| set -euo pipefail | |
| ARCH=$(uname -m) | |
| VENDOR="unknown" | |
| FLAGS="" | |
| if [ -r /proc/cpuinfo ]; then | |
| VENDOR=$(grep -m1 '^vendor_id' /proc/cpuinfo | awk '{print $3}' || true) | |
| VENDOR=${VENDOR:-unknown} | |
| FLAGS=$(grep -m1 '^flags' /proc/cpuinfo | cut -d: -f2 || true) | |
| fi | |
| echo "Architecture : $ARCH" | |
| echo "CPU vendor : $VENDOR" | |
| CARGO_FEATURES="" | |
| RUSTFLAGS_VAL="" | |
| case "$ARCH" in | |
| x86_64|amd64) | |
| HAS_SSE2=false | |
| HAS_AVX2=false | |
| # openvx-core hosts the C-API kernel callbacks (vxAdd / | |
| # vxSubtract / vxBox3x3 / vxGaussian3x3 / vxColorConvert | |
| # → crate::simd_kernels). openvx-vision hosts the public | |
| # Rust-API SIMD kernels. Both crates need the matching | |
| # feature flag for the SIMD path to actually compile in. | |
| if echo "$FLAGS" | grep -qw sse2; then | |
| CARGO_FEATURES="$CARGO_FEATURES openvx-core/sse2 openvx-vision/sse2" | |
| HAS_SSE2=true | |
| echo " + sse2 detected" | |
| fi | |
| if echo "$FLAGS" | grep -qw avx2; then | |
| CARGO_FEATURES="$CARGO_FEATURES openvx-core/avx2 openvx-vision/avx2" | |
| HAS_AVX2=true | |
| echo " + avx2 detected" | |
| fi | |
| # Pick the most permissive *portable* microarch level that | |
| # the build host satisfies. v3 is safe on every GitHub | |
| # Linux runner today (AMD EPYC Milan/Genoa, Intel Cascade | |
| # Lake / Ice Lake), v2 is universal on x86_64 cloud silicon. | |
| if [ "$HAS_AVX2" = true ]; then | |
| RUSTFLAGS_VAL="-C target-cpu=x86-64-v3" | |
| elif [ "$HAS_SSE2" = true ]; then | |
| RUSTFLAGS_VAL="-C target-cpu=x86-64-v2" | |
| fi | |
| ;; | |
| aarch64|arm64) | |
| CARGO_FEATURES="$CARGO_FEATURES openvx-core/neon openvx-vision/neon" | |
| echo " + neon (mandatory on aarch64)" | |
| ;; | |
| *) | |
| echo " (no SIMD features enabled for $ARCH — scalar build)" | |
| ;; | |
| esac | |
| CARGO_FEATURES=$(echo "$CARGO_FEATURES" | xargs) | |
| echo "Cargo features: ${CARGO_FEATURES:-<none>}" | |
| echo "RUSTFLAGS : ${RUSTFLAGS_VAL:-<none>}" | |
| { | |
| echo "arch=$ARCH" | |
| echo "vendor=$VENDOR" | |
| echo "cargo_features=$CARGO_FEATURES" | |
| echo "rustflags=$RUSTFLAGS_VAL" | |
| } >> "$GITHUB_OUTPUT" | |
| - name: Build rustVX | |
| env: | |
| RUSTFLAGS: ${{ steps.cpu.outputs.rustflags }} | |
| run: | | |
| source $HOME/.cargo/env | |
| FEATURES="${{ steps.cpu.outputs.cargo_features }}" | |
| # Build only the FFI cdylib that downstream jobs consume; this | |
| # is the artifact that becomes libopenvx_ffi.so. Building this | |
| # one package (instead of the whole workspace) keeps the build | |
| # tight and lets us forward openvx-vision SIMD features via | |
| # the `pkg/feature` syntax without needing a passthrough in | |
| # openvx-ffi's Cargo.toml. | |
| if [ -n "$FEATURES" ]; then | |
| echo "Building openvx-ffi with features: $FEATURES" | |
| cargo build --release -p openvx-ffi --features "$FEATURES" | |
| else | |
| echo "Building openvx-ffi with no extra SIMD features" | |
| cargo build --release -p openvx-ffi | |
| fi | |
| - name: Build OpenVX CTS | |
| run: | | |
| cd OpenVX-cts | |
| mkdir -p include | |
| if [ -d "../include" ]; then | |
| cp -r ../include/* include/ 2>/dev/null || true | |
| fi | |
| mkdir -p build | |
| cd build | |
| cmake .. \ | |
| -DCMAKE_BUILD_TYPE=Release \ | |
| -DCMAKE_C_STANDARD_LIBRARIES="-lm" \ | |
| -DCMAKE_CXX_STANDARD_LIBRARIES="-lm" \ | |
| -DOPENVX_INCLUDES="${{ github.workspace }}/include;${{ github.workspace }}/OpenVX-cts/include" \ | |
| -DOPENVX_LIBRARIES="${{ github.workspace }}/target/release/libopenvx_ffi.so;m" \ | |
| -DOPENVX_CONFORMANCE_VISION=ON \ | |
| -DOPENVX_USE_ENHANCED_VISION=ON | |
| make -j$(nproc) | |
| - name: Upload build artifacts | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| # `include/` is bundled so the downstream benchmark job can build | |
| # openvx-mark against rustVX without needing to check out the | |
| # rustVX source tree. | |
| path: | | |
| target/release/libopenvx_ffi.so | |
| OpenVX-cts/build/bin/vx_test_conformance | |
| OpenVX-cts/test_data/ | |
| include/ | |
| retention-days: 1 | |
| # Build rustVX from the merge-target ref (i.e. main, in practice) in | |
| # its own phase, in parallel with the PR's `build` job. The downstream | |
| # `perf-gate` job pulls *both* archives down onto a single runner so | |
| # the PR-vs-main bench comparison runs on identical hardware against | |
| # binaries that were each built with their own branch's source tree. | |
| # | |
| # Both this job and `build` run on the same `ubuntu-22.04` runner pool | |
| # with the same auto-detection logic, so the resulting libopenvx_ffi.so | |
| # binaries should have matching compile-time SIMD feature sets in | |
| # practice. (If GitHub's pool ever produces a heterogeneous mix, the | |
| # gate will surface it as an obvious cross-the-board regression rather | |
| # than silently report nonsense; we have not seen this happen yet.) | |
| # | |
| # Skipped on push events to main (no merge target to diff against). | |
| build-main: | |
| name: Build rustVX (main) | |
| if: github.event_name == 'pull_request' | |
| runs-on: ubuntu-22.04 | |
| steps: | |
| - name: Checkout merge target ref | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ github.base_ref }} | |
| fetch-depth: 0 | |
| submodules: recursive | |
| - name: Install system dependencies | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y build-essential cmake | |
| - name: Install Rust | |
| run: | | |
| curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable | |
| source $HOME/.cargo/env | |
| rustc --version | |
| cargo --version | |
| - name: Detect host CPU features | |
| id: cpu | |
| # Identical to `build` job's detection step. Replicated rather | |
| # than refactored into a composite action to keep the workflow | |
| # self-contained. | |
| run: | | |
| set -euo pipefail | |
| ARCH=$(uname -m) | |
| VENDOR="unknown" | |
| FLAGS="" | |
| if [ -r /proc/cpuinfo ]; then | |
| VENDOR=$(grep -m1 '^vendor_id' /proc/cpuinfo | awk '{print $3}' || true) | |
| VENDOR=${VENDOR:-unknown} | |
| FLAGS=$(grep -m1 '^flags' /proc/cpuinfo | cut -d: -f2 || true) | |
| fi | |
| echo "Architecture : $ARCH" | |
| echo "CPU vendor : $VENDOR" | |
| CARGO_FEATURES="" | |
| RUSTFLAGS_VAL="" | |
| case "$ARCH" in | |
| x86_64|amd64) | |
| HAS_SSE2=false | |
| HAS_AVX2=false | |
| if echo "$FLAGS" | grep -qw sse2; then | |
| CARGO_FEATURES="$CARGO_FEATURES openvx-core/sse2 openvx-vision/sse2" | |
| HAS_SSE2=true | |
| echo " + sse2 detected" | |
| fi | |
| if echo "$FLAGS" | grep -qw avx2; then | |
| CARGO_FEATURES="$CARGO_FEATURES openvx-core/avx2 openvx-vision/avx2" | |
| HAS_AVX2=true | |
| echo " + avx2 detected" | |
| fi | |
| if [ "$HAS_AVX2" = true ]; then | |
| RUSTFLAGS_VAL="-C target-cpu=x86-64-v3" | |
| elif [ "$HAS_SSE2" = true ]; then | |
| RUSTFLAGS_VAL="-C target-cpu=x86-64-v2" | |
| fi | |
| ;; | |
| aarch64|arm64) | |
| CARGO_FEATURES="$CARGO_FEATURES openvx-core/neon openvx-vision/neon" | |
| echo " + neon (mandatory on aarch64)" | |
| ;; | |
| *) | |
| echo " (no SIMD features enabled for $ARCH — scalar build)" | |
| ;; | |
| esac | |
| CARGO_FEATURES=$(echo "$CARGO_FEATURES" | xargs) | |
| echo "Cargo features: ${CARGO_FEATURES:-<none>}" | |
| echo "RUSTFLAGS : ${RUSTFLAGS_VAL:-<none>}" | |
| { | |
| echo "arch=$ARCH" | |
| echo "vendor=$VENDOR" | |
| echo "cargo_features=$CARGO_FEATURES" | |
| echo "rustflags=$RUSTFLAGS_VAL" | |
| } >> "$GITHUB_OUTPUT" | |
| - name: Build rustVX (main) | |
| env: | |
| RUSTFLAGS: ${{ steps.cpu.outputs.rustflags }} | |
| run: | | |
| source $HOME/.cargo/env | |
| FEATURES="${{ steps.cpu.outputs.cargo_features }}" | |
| if [ -n "$FEATURES" ]; then | |
| echo "Building openvx-ffi (main) with features: $FEATURES" | |
| cargo build --release -p openvx-ffi --features "$FEATURES" | |
| else | |
| echo "Building openvx-ffi (main) with no extra SIMD features" | |
| cargo build --release -p openvx-ffi | |
| fi | |
| - name: Upload main build artifacts | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: build-artifacts-main | |
| # No CTS payload here — only the perf-gate job consumes this | |
| # artifact, and it only needs the libopenvx_ffi.so + the | |
| # standard headers (for openvx-mark to compile against). | |
| path: | | |
| target/release/libopenvx_ffi.so | |
| include/ | |
| retention-days: 1 | |
| # Build the Khronos OpenVX sample implementation in its own phase, in | |
| # parallel with the rustVX `build` job, and upload the resulting library | |
| # + headers as a self-contained archive. The benchmark job below pulls | |
| # both archives down onto a single runner so rustVX and the Khronos | |
| # sample are exercised on identical hardware. | |
| build-khronos-sample: | |
| name: Build Khronos OpenVX sample | |
| runs-on: ubuntu-22.04 | |
| steps: | |
| - name: Install system dependencies | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y build-essential cmake git python3 | |
| - name: Build Khronos OpenVX sample | |
| run: | | |
| git clone --recursive --depth 1 \ | |
| https://github.com/KhronosGroup/OpenVX-sample-impl.git khronos-sample | |
| cd khronos-sample | |
| python3 Build.py --os=Linux --arch=64 --conf=Release | |
| - name: Stage Khronos sample archive | |
| run: | | |
| set -euo pipefail | |
| LIB_SRC=$(dirname $(find khronos-sample -name "libopenvx.so" -not -path "*/build/*" | head -1)) | |
| echo "Khronos libraries discovered in: $LIB_SRC" | |
| mkdir -p khronos-stage/lib | |
| cp "$LIB_SRC"/libopenvx*.so "$LIB_SRC"/libvxu*.so khronos-stage/lib/ | |
| cp -r khronos-sample/api-docs/include khronos-stage/include | |
| ls -R khronos-stage | |
| - name: Upload Khronos sample artifacts | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: khronos-sample-artifacts | |
| path: khronos-stage/ | |
| retention-days: 1 | |
| baseline: | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Download build artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| - name: Run baseline tests | |
| run: | | |
| chmod +x OpenVX-cts/build/bin/vx_test_conformance | |
| cd OpenVX-cts/build | |
| export LD_LIBRARY_PATH=${{ github.workspace }}/target/release | |
| export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/ | |
| timeout 300 ./bin/vx_test_conformance --filter="GraphBase.*:Logging.*:SmokeTestBase.*:SmokeTest.*:TargetBase.*:Target.*" | |
| graph: | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| continue-on-error: true | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Download build artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| - name: Run graph tests | |
| run: | | |
| chmod +x OpenVX-cts/build/bin/vx_test_conformance | |
| cd OpenVX-cts/build | |
| export LD_LIBRARY_PATH=${{ github.workspace }}/target/release | |
| export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/ | |
| timeout 600 ./bin/vx_test_conformance --filter="Graph.*:GraphCallback.*:GraphDelay.*:GraphROI.*:UserNode.*" | |
| data-objects: | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| continue-on-error: true | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Download build artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| - name: Run data object tests | |
| run: | | |
| chmod +x OpenVX-cts/build/bin/vx_test_conformance | |
| cd OpenVX-cts/build | |
| export LD_LIBRARY_PATH=${{ github.workspace }}/target/release | |
| export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/ | |
| timeout 300 ./bin/vx_test_conformance --filter="Scalar.*:Array.*:ObjectArray.*:Matrix.*:Convolution.*:Distribution.*:LUT.*:Histogram.*" | |
| image-ops: | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| continue-on-error: true | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Download build artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| - name: Run image operation tests | |
| run: | | |
| chmod +x OpenVX-cts/build/bin/vx_test_conformance | |
| cd OpenVX-cts/build | |
| export LD_LIBRARY_PATH=${{ github.workspace }}/target/release | |
| export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/ | |
| timeout 600 ./bin/vx_test_conformance --filter="Image.*:vxCopyImagePatch.*:vxMapImagePatch.*:vxCreateImageFromChannel.*:vxCopyRemapPatch.*:vxMapRemapPatch.*" | |
| vision-color: | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| continue-on-error: true | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Download build artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| - name: Run color and channel tests | |
| run: | | |
| chmod +x OpenVX-cts/build/bin/vx_test_conformance | |
| cd OpenVX-cts/build | |
| export LD_LIBRARY_PATH=${{ github.workspace }}/target/release | |
| export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/ | |
| timeout 300 ./bin/vx_test_conformance --filter="ColorConvert.*:ChannelExtract.*:ChannelCombine.*:vxConvertDepth.*:vxuConvertDepth.*" | |
| vision-filters: | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| continue-on-error: true | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Download build artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| - name: Run filter and morphology tests | |
| run: | | |
| chmod +x OpenVX-cts/build/bin/vx_test_conformance | |
| cd OpenVX-cts/build | |
| export LD_LIBRARY_PATH=${{ github.workspace }}/target/release | |
| export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/ | |
| timeout 600 ./bin/vx_test_conformance --filter="Box3x3.*:Gaussian3x3.*:Median3x3.*:Dilate3x3.*:Erode3x3.*:Sobel3x3.*:Magnitude.*:Phase.*:NonLinearFilter.*:Convolve.*:EqualizeHistogram.*" | |
| vision-arithmetic: | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| continue-on-error: true | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Download build artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| - name: Run arithmetic and bitwise tests | |
| run: | | |
| chmod +x OpenVX-cts/build/bin/vx_test_conformance | |
| cd OpenVX-cts/build | |
| export LD_LIBRARY_PATH=${{ github.workspace }}/target/release | |
| export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/ | |
| timeout 600 ./bin/vx_test_conformance --filter="vxAddSub.*:vxuAddSub.*:vxMultiply.*:vxuMultiply.*:vxBinOp8u.*:vxuBinOp8u.*:vxBinOp16s.*:vxuBinOp16s.*:vxNot.*:vxuNot.*:WeightedAverage.*:Threshold.*" | |
| vision-geometric: | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| continue-on-error: true | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Download build artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| - name: Run geometric transform tests | |
| run: | | |
| chmod +x OpenVX-cts/build/bin/vx_test_conformance | |
| cd OpenVX-cts/build | |
| export LD_LIBRARY_PATH=${{ github.workspace }}/target/release | |
| export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/ | |
| timeout 600 ./bin/vx_test_conformance --filter="Scale.*:WarpAffine.*:WarpPerspective.*:Remap.*:HalfScaleGaussian.*" | |
| vision-features: | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| continue-on-error: true | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Download build artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| - name: Run feature and edge detection tests | |
| run: | | |
| chmod +x OpenVX-cts/build/bin/vx_test_conformance | |
| cd OpenVX-cts/build | |
| export LD_LIBRARY_PATH=${{ github.workspace }}/target/release | |
| export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/ | |
| timeout 600 ./bin/vx_test_conformance --filter="HarrisCorners.*:FastCorners.*:vxCanny.*:vxuCanny.*" | |
| vision-statistics: | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| continue-on-error: true | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Download build artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| - name: Run statistics and analysis tests | |
| run: | | |
| chmod +x OpenVX-cts/build/bin/vx_test_conformance | |
| cd OpenVX-cts/build | |
| export LD_LIBRARY_PATH=${{ github.workspace }}/target/release | |
| export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/ | |
| timeout 300 ./bin/vx_test_conformance --filter="MeanStdDev.*:MinMaxLoc.*:Integral.*" | |
| vision-pyramid: | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| continue-on-error: true | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Download build artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| - name: Run pyramid and optical flow tests | |
| run: | | |
| chmod +x OpenVX-cts/build/bin/vx_test_conformance | |
| cd OpenVX-cts/build | |
| export LD_LIBRARY_PATH=${{ github.workspace }}/target/release | |
| export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/ | |
| timeout 300 ./bin/vx_test_conformance --filter="GaussianPyramid.*:LaplacianPyramid.*:LaplacianReconstruct.*:OptFlowPyrLK.*" | |
| # Enhanced Vision Phase 1 — only the kernels rustVX has actually | |
| # implemented from the OpenVX 1.3 Enhanced Vision feature set. The CTS | |
| # binary is built with `OPENVX_USE_ENHANCED_VISION=ON`, but this job | |
| # filters strictly to the kernels Phase 1 ships (vxMin / vxMax). The | |
| # remaining Enhanced Vision symbols are exposed as link stubs in | |
| # rustVX so the binary can build; they are not exercised here and will | |
| # be replaced by real kernels in subsequent phases. | |
| enhanced-vision: | |
| name: "enhanced-vision (Phase 1 — Min/Max)" | |
| runs-on: ubuntu-22.04 | |
| needs: build | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Download build artifacts | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| - name: Run Enhanced Vision Phase 1 tests | |
| run: | | |
| chmod +x OpenVX-cts/build/bin/vx_test_conformance | |
| cd OpenVX-cts/build | |
| export LD_LIBRARY_PATH=${{ github.workspace }}/target/release | |
| export VX_TEST_DATA_PATH=${{ github.workspace }}/OpenVX-cts/test_data/ | |
| timeout 120 ./bin/vx_test_conformance --filter="Min.*:Max.*" | |
| # Performance benchmark using openvx-mark, comparing rustVX against the | |
| # Khronos OpenVX sample implementation on the SAME runner so the two | |
| # numbers come from identical hardware. This job does NOT rebuild either | |
| # implementation — it just downloads the archives produced by the | |
| # `build` and `build-khronos-sample` phases above, builds the openvx-mark | |
| # tool against each, runs the same workload, and compares the JSON | |
| # reports. The CTS jobs above use `continue-on-error: true`, so this | |
| # job effectively gates on `build`, `build-khronos-sample`, and | |
| # `baseline` succeeding (matching the existing CTS gate). | |
| benchmark: | |
| name: Benchmark & compare (rustVX vs Khronos sample) | |
| runs-on: ubuntu-22.04 | |
| needs: | |
| - build | |
| - build-khronos-sample | |
| - baseline | |
| - graph | |
| - data-objects | |
| - image-ops | |
| - vision-color | |
| - vision-filters | |
| - vision-arithmetic | |
| - vision-geometric | |
| - vision-features | |
| - vision-statistics | |
| - vision-pyramid | |
| continue-on-error: true | |
| steps: | |
| - name: Install system dependencies | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y build-essential cmake git python3 | |
| - name: Download rustVX archive | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| path: ${{ github.workspace }}/rustvx-pkg | |
| - name: Download Khronos sample archive | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: khronos-sample-artifacts | |
| path: ${{ github.workspace }}/khronos-pkg | |
| - name: Expose rustVX as libopenvx / libvxu | |
| id: rustvx | |
| # openvx-mark uses `find_library(NAMES openvx)` and | |
| # `find_library(NAMES vxu)`. rustVX ships a single | |
| # `libopenvx_ffi.so` that exports the full set of `vx*`/`vxu*` | |
| # symbols, so symlink the two classic Khronos library names to | |
| # it without changing rustVX's own build output. | |
| run: | | |
| set -euo pipefail | |
| LIB_DIR=${{ github.workspace }}/rustvx-pkg/target/release | |
| chmod -R u+rwX "$LIB_DIR" | |
| cd "$LIB_DIR" | |
| ln -sf libopenvx_ffi.so libopenvx.so | |
| ln -sf libopenvx_ffi.so libvxu.so | |
| ls -la libopenvx*.so libvxu*.so | |
| echo "lib_dir=$LIB_DIR" >> "$GITHUB_OUTPUT" | |
| echo "include_dir=${{ github.workspace }}/rustvx-pkg/include" >> "$GITHUB_OUTPUT" | |
| - name: Inspect Khronos sample archive | |
| id: khronos | |
| run: | | |
| set -euo pipefail | |
| LIB_DIR=${{ github.workspace }}/khronos-pkg/lib | |
| INCLUDE_DIR=${{ github.workspace }}/khronos-pkg/include | |
| ls -la "$LIB_DIR" | |
| echo "lib_dir=$LIB_DIR" >> "$GITHUB_OUTPUT" | |
| echo "include_dir=$INCLUDE_DIR" >> "$GITHUB_OUTPUT" | |
| - name: Clone openvx-mark | |
| run: | | |
| git clone --depth 1 https://github.com/kiritigowda/openvx-mark.git \ | |
| ${{ github.workspace }}/openvx-mark | |
| # --------------------------------------------------------------------- | |
| # rustVX benchmark | |
| # --------------------------------------------------------------------- | |
| - name: Build openvx-mark against rustVX | |
| run: | | |
| mkdir -p ${{ github.workspace }}/openvx-mark/build-rustvx | |
| cd ${{ github.workspace }}/openvx-mark/build-rustvx | |
| cmake \ | |
| -DCMAKE_BUILD_TYPE=Release \ | |
| -DOPENVX_INCLUDES=${{ steps.rustvx.outputs.include_dir }} \ | |
| -DOPENVX_LIB_DIR=${{ steps.rustvx.outputs.lib_dir }} \ | |
| .. | |
| cmake --build . -j$(nproc) | |
| - name: Run benchmark (rustVX) | |
| run: | | |
| cd ${{ github.workspace }}/openvx-mark/build-rustvx | |
| export LD_LIBRARY_PATH=${{ steps.rustvx.outputs.lib_dir }}:$LD_LIBRARY_PATH | |
| ./openvx-mark --resolution FHD --iterations 20 --warmup 5 | |
| # --------------------------------------------------------------------- | |
| # Khronos sample benchmark | |
| # --------------------------------------------------------------------- | |
| - name: Build openvx-mark against Khronos sample | |
| run: | | |
| mkdir -p ${{ github.workspace }}/openvx-mark/build-khronos | |
| cd ${{ github.workspace }}/openvx-mark/build-khronos | |
| cmake \ | |
| -DCMAKE_BUILD_TYPE=Release \ | |
| -DOPENVX_INCLUDES=${{ steps.khronos.outputs.include_dir }} \ | |
| -DOPENVX_LIB_DIR=${{ steps.khronos.outputs.lib_dir }} \ | |
| .. | |
| cmake --build . -j$(nproc) | |
| - name: Run benchmark (Khronos sample) | |
| run: | | |
| cd ${{ github.workspace }}/openvx-mark/build-khronos | |
| export LD_LIBRARY_PATH=${{ steps.khronos.outputs.lib_dir }}:$LD_LIBRARY_PATH | |
| ./openvx-mark --resolution FHD --iterations 20 --warmup 5 | |
| # --------------------------------------------------------------------- | |
| # Compare results | |
| # --------------------------------------------------------------------- | |
| - name: Compare benchmark results (rustVX vs Khronos) | |
| run: | | |
| RUSTVX=${{ github.workspace }}/openvx-mark/build-rustvx/benchmark_results/benchmark_results.json | |
| KHRONOS=${{ github.workspace }}/openvx-mark/build-khronos/benchmark_results/benchmark_results.json | |
| if [ ! -f "$RUSTVX" ] || [ ! -f "$KHRONOS" ]; then | |
| echo "Skipping comparison — one or both benchmark results missing" | |
| ls -la "$(dirname $RUSTVX)" 2>/dev/null || true | |
| ls -la "$(dirname $KHRONOS)" 2>/dev/null || true | |
| exit 0 | |
| fi | |
| # `compare_reports.py` defines Speedup as | |
| # speedup = throughput(report_b) / throughput(report_a) | |
| # i.e. ">1.00 means report_b is faster". To make the Speedup | |
| # column read as "rustVX over Khronos" (>1.00x = rustVX wins), | |
| # pass Khronos first (baseline / report_a) and rustVX second | |
| # (candidate / report_b). | |
| python3 ${{ github.workspace }}/openvx-mark/scripts/compare_reports.py \ | |
| "$KHRONOS" "$RUSTVX" \ | |
| --output ${{ github.workspace }}/openvx-mark/comparison | |
| - name: Post comparison to job summary | |
| if: always() | |
| run: | | |
| COMPARISON=${{ github.workspace }}/openvx-mark/comparison.md | |
| RUSTVX=${{ github.workspace }}/openvx-mark/build-rustvx/benchmark_results/benchmark_results.json | |
| KHRONOS=${{ github.workspace }}/openvx-mark/build-khronos/benchmark_results/benchmark_results.json | |
| # ----- Headline: aggregate speedup of rustVX over Khronos sample ----- | |
| if [ -f "$RUSTVX" ] && [ -f "$KHRONOS" ]; then | |
| python3 - "$RUSTVX" "$KHRONOS" >> "$GITHUB_STEP_SUMMARY" <<'PY' | |
| import json, math, sys | |
| rustvx_path, khronos_path = sys.argv[1], sys.argv[2] | |
| with open(rustvx_path) as f: rustvx = json.load(f) | |
| with open(khronos_path) as f: khronos = json.load(f) | |
| def by_key(report): | |
| return {(r['name'], r['mode'], r['resolution']): r | |
| for r in report.get('results', [])} | |
| a = by_key(rustvx) | |
| b = by_key(khronos) | |
| shared = sorted(set(a) & set(b)) | |
| speedups = [] | |
| wins, losses = 0, 0 | |
| best = (None, 0.0) | |
| worst = (None, math.inf) | |
| for key in shared: | |
| ra, rb = a[key], b[key] | |
| if not (ra.get('verified', True) and rb.get('verified', True)): | |
| continue | |
| mps_r = ra.get('megapixels_per_sec', 0) | |
| mps_k = rb.get('megapixels_per_sec', 0) | |
| if mps_r <= 0 or mps_k <= 0: | |
| continue | |
| s = mps_r / mps_k # >1.0 = rustVX faster than Khronos | |
| speedups.append(s) | |
| if s > 1.0: wins += 1 | |
| elif s < 1.0: losses += 1 | |
| if s > best[1]: best = (key, s) | |
| if s < worst[1]: worst = (key, s) | |
| print('# rustVX vs Khronos sample — headline') | |
| print() | |
| if not speedups: | |
| print('_No verified benchmarks were directly comparable._') | |
| else: | |
| geomean = math.exp(sum(math.log(s) for s in speedups) / len(speedups)) | |
| median = sorted(speedups)[len(speedups) // 2] | |
| print('| Metric | Value |') | |
| print('|:---|---:|') | |
| print(f'| Geomean speedup (rustVX / Khronos) | **{geomean:.2f}x** |') | |
| print(f'| Median speedup (rustVX / Khronos) | {median:.2f}x |') | |
| print(f'| Benchmarks compared | {len(speedups)} |') | |
| print(f'| rustVX faster | {wins} |') | |
| print(f'| Khronos sample faster | {losses} |') | |
| if best[0]: | |
| bk, bv = best | |
| print(f'| Best rustVX speedup | {bv:.2f}x ({bk[0]} / {bk[1]} / {bk[2]}) |') | |
| if worst[0] and worst[1] != math.inf: | |
| wk, wv = worst | |
| print(f'| Worst rustVX speedup | {wv:.2f}x ({wk[0]} / {wk[1]} / {wk[2]}) |') | |
| print() | |
| if geomean >= 1.0: | |
| print(f'> rustVX is **{geomean:.2f}x** faster than the Khronos sample on average (geomean across {len(speedups)} verified benchmarks).') | |
| else: | |
| print(f'> rustVX is **{1.0/geomean:.2f}x slower** than the Khronos sample on average (geomean across {len(speedups)} verified benchmarks).') | |
| print() | |
| PY | |
| fi | |
| # ----- Detailed comparison table from compare_reports.py ----- | |
| if [ -f "$COMPARISON" ]; then | |
| cat "$COMPARISON" >> "$GITHUB_STEP_SUMMARY" | |
| else | |
| echo "_No comparison report was produced._" >> "$GITHUB_STEP_SUMMARY" | |
| fi | |
| - name: Upload rustVX benchmark results | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: benchmark-results-rustvx | |
| path: ${{ github.workspace }}/openvx-mark/build-rustvx/benchmark_results/ | |
| if-no-files-found: ignore | |
| - name: Upload Khronos sample benchmark results | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: benchmark-results-khronos-sample | |
| path: ${{ github.workspace }}/openvx-mark/build-khronos/benchmark_results/ | |
| if-no-files-found: ignore | |
| - name: Upload comparison report | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: benchmark-comparison | |
| path: ${{ github.workspace }}/openvx-mark/comparison.* | |
| if-no-files-found: ignore | |
| # --------------------------------------------------------------------------- | |
| # Perf gate (PR rustVX vs main rustVX) | |
| # | |
| # This job is self-contained and runs in parallel with the existing | |
| # `benchmark` job (which still does the rustVX-vs-Khronos comparison | |
| # report). It pulls down the two rustVX `libopenvx_ffi.so` archives | |
| # produced in Phase 1 (`build` for the PR, `build-main` for the merge | |
| # target), builds openvx-mark twice (once per library), runs both | |
| # benches back-to-back on this single runner VM, and compares the | |
| # results with `.github/scripts/perf_gate.py`. Same-VM bench is the | |
| # whole point — hardware variance between separate runs would swamp | |
| # any real regression. | |
| # | |
| # Threshold rationale (see `.github/scripts/perf_gate.py` for full | |
| # docstring and per-flag semantics): | |
| # | |
| # * --geomean-floor 0.97 -> aggregate move > 3% slower fails; | |
| # this is the real signal for actual | |
| # perf bugs that touch multiple kernels. | |
| # * --kernel-floor 0.75 -> a SINGLE-kernel hard fail requires | |
| # > 25% regression. Generous to absorb | |
| # the ~10-15% between-run drift we've | |
| # measured on otherwise-identical | |
| # binaries on the same VM (cache state, | |
| # thermal, VM-host neighbour load). | |
| # * --warn-floor 0.90 -> soft-warn band [0.75, 0.90). Below | |
| # 10% we treat as noise. | |
| # * --max-cv 5.0 -> auto-skip kernels above this within- | |
| # run CV%. | |
| # | |
| # Trigger: | |
| # * pull_request only — push events to main do not gate against | |
| # themselves (there's no merge target to diff against). | |
| # --------------------------------------------------------------------------- | |
| perf-gate: | |
| name: Perf gate (PR vs main) | |
| if: github.event_name == 'pull_request' | |
| runs-on: ubuntu-22.04 | |
| needs: | |
| - build | |
| - build-main | |
| steps: | |
| - name: Checkout repo (for the perf_gate script) | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 1 | |
| - name: Install system dependencies | |
| run: | | |
| sudo apt-get update | |
| sudo apt-get install -y build-essential cmake git python3 | |
| - name: Download PR rustVX archive | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: build-artifacts | |
| path: ${{ github.workspace }}/rustvx-pr-pkg | |
| - name: Download main rustVX archive | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: build-artifacts-main | |
| path: ${{ github.workspace }}/rustvx-main-pkg | |
| - name: Stage PR rustVX (libopenvx / libvxu symlinks for openvx-mark) | |
| id: pr_rustvx | |
| run: | | |
| set -euo pipefail | |
| LIB_DIR=${{ github.workspace }}/rustvx-pr-pkg/target/release | |
| chmod -R u+rwX "$LIB_DIR" | |
| cd "$LIB_DIR" | |
| ln -sf libopenvx_ffi.so libopenvx.so | |
| ln -sf libopenvx_ffi.so libvxu.so | |
| ls -la libopenvx*.so libvxu*.so | |
| echo "lib_dir=$LIB_DIR" >> "$GITHUB_OUTPUT" | |
| echo "include_dir=${{ github.workspace }}/rustvx-pr-pkg/include" >> "$GITHUB_OUTPUT" | |
| - name: Stage main rustVX (libopenvx / libvxu symlinks for openvx-mark) | |
| id: main_rustvx | |
| run: | | |
| set -euo pipefail | |
| LIB_DIR=${{ github.workspace }}/rustvx-main-pkg/target/release | |
| chmod -R u+rwX "$LIB_DIR" | |
| cd "$LIB_DIR" | |
| ln -sf libopenvx_ffi.so libopenvx.so | |
| ln -sf libopenvx_ffi.so libvxu.so | |
| ls -la libopenvx*.so libvxu*.so | |
| echo "lib_dir=$LIB_DIR" >> "$GITHUB_OUTPUT" | |
| echo "include_dir=${{ github.workspace }}/rustvx-main-pkg/include" >> "$GITHUB_OUTPUT" | |
| - name: Clone openvx-mark | |
| run: | | |
| git clone --depth 1 https://github.com/kiritigowda/openvx-mark.git \ | |
| ${{ github.workspace }}/openvx-mark | |
| # Build openvx-mark once per library. The two CMake configs differ | |
| # only in the include / lib paths, so we keep them in separate | |
| # build trees to avoid any re-link confusion. | |
| - name: Build openvx-mark against rustVX-PR | |
| run: | | |
| mkdir -p ${{ github.workspace }}/openvx-mark/build-pr | |
| cd ${{ github.workspace }}/openvx-mark/build-pr | |
| cmake \ | |
| -DCMAKE_BUILD_TYPE=Release \ | |
| -DOPENVX_INCLUDES=${{ steps.pr_rustvx.outputs.include_dir }} \ | |
| -DOPENVX_LIB_DIR=${{ steps.pr_rustvx.outputs.lib_dir }} \ | |
| .. | |
| cmake --build . -j$(nproc) | |
| - name: Build openvx-mark against rustVX-main | |
| run: | | |
| mkdir -p ${{ github.workspace }}/openvx-mark/build-main | |
| cd ${{ github.workspace }}/openvx-mark/build-main | |
| cmake \ | |
| -DCMAKE_BUILD_TYPE=Release \ | |
| -DOPENVX_INCLUDES=${{ steps.main_rustvx.outputs.include_dir }} \ | |
| -DOPENVX_LIB_DIR=${{ steps.main_rustvx.outputs.lib_dir }} \ | |
| .. | |
| cmake --build . -j$(nproc) | |
| # Throwaway warmup runs to neutralise the "first bench is slower | |
| # because of cold OS caches / first-time page faults / runtime | |
| # feature-detection initial dispatch" effect we observed in | |
| # practice. Both real-measurement runs below then start from a | |
| # warm-VM state. | |
| - name: Warmup runs (throwaway) | |
| run: | | |
| set +e | |
| cd ${{ github.workspace }}/openvx-mark/build-pr | |
| export LD_LIBRARY_PATH=${{ steps.pr_rustvx.outputs.lib_dir }}:$LD_LIBRARY_PATH | |
| ./openvx-mark --resolution FHD --iterations 5 --warmup 0 \ | |
| --output ${{ github.workspace }}/warmup-pr 2>/dev/null || true | |
| cd ${{ github.workspace }}/openvx-mark/build-main | |
| export LD_LIBRARY_PATH=${{ steps.main_rustvx.outputs.lib_dir }}:$LD_LIBRARY_PATH | |
| ./openvx-mark --resolution FHD --iterations 5 --warmup 0 \ | |
| --output ${{ github.workspace }}/warmup-main 2>/dev/null || true | |
| true | |
| - name: Run benchmark (rustVX-PR) | |
| run: | | |
| cd ${{ github.workspace }}/openvx-mark/build-pr | |
| export LD_LIBRARY_PATH=${{ steps.pr_rustvx.outputs.lib_dir }}:$LD_LIBRARY_PATH | |
| ./openvx-mark --resolution FHD --iterations 20 --warmup 5 | |
| - name: Run benchmark (rustVX-main) | |
| run: | | |
| cd ${{ github.workspace }}/openvx-mark/build-main | |
| export LD_LIBRARY_PATH=${{ steps.main_rustvx.outputs.lib_dir }}:$LD_LIBRARY_PATH | |
| ./openvx-mark --resolution FHD --iterations 20 --warmup 5 | |
| - name: Run perf gate | |
| run: | | |
| set -euo pipefail | |
| PR=${{ github.workspace }}/openvx-mark/build-pr/benchmark_results/benchmark_results.json | |
| MAIN=${{ github.workspace }}/openvx-mark/build-main/benchmark_results/benchmark_results.json | |
| if [ ! -f "$PR" ] || [ ! -f "$MAIN" ]; then | |
| echo "::error::Missing benchmark JSONs (PR=$PR, MAIN=$MAIN)." | |
| ls -la "$(dirname "$PR")" "$(dirname "$MAIN")" 2>/dev/null || true | |
| exit 1 | |
| fi | |
| python3 ${{ github.workspace }}/.github/scripts/perf_gate.py \ | |
| "$MAIN" "$PR" \ | |
| --geomean-floor 0.97 \ | |
| --kernel-floor 0.75 \ | |
| --warn-floor 0.90 \ | |
| --max-cv 5.0 \ | |
| --summary-out "$GITHUB_STEP_SUMMARY" | |
| - name: Upload PR rustVX benchmark results (perf-gate) | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: perf-gate-results-pr | |
| path: ${{ github.workspace }}/openvx-mark/build-pr/benchmark_results/ | |
| if-no-files-found: ignore | |
| - name: Upload main rustVX benchmark results (perf-gate) | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: perf-gate-results-main | |
| path: ${{ github.workspace }}/openvx-mark/build-main/benchmark_results/ | |
| if-no-files-found: ignore |