Skip to content

Benchmarks

Benchmarks #67

Workflow file for this run

name: Benchmarks
on:
workflow_dispatch:
inputs:
branch:
description: "Git branch to benchmark"
required: false
default: "main"
type: string
timeout:
description: "Timeout per document in seconds"
required: false
default: "900"
type: string
pull_request:
branches:
- main
paths:
- 'tools/benchmark-harness/**'
- '.github/workflows/benchmarks.yaml'
env:
ITERATIONS: "3"
PDFIUM_VERSION: "7578"
PDFIUM_STATIC_VERSION: "7442b"
ORT_VERSION: "1.23.2"
MACOSX_DEPLOYMENT_TARGET: "14.0"
CARGO_TERM_COLOR: always
CARGO_INCREMENTAL: 0
CARGO_PROFILE_DEV_DEBUG: 0
RUST_BACKTRACE: short
RUST_MIN_STACK: 16777216
RUSTFLAGS: "-C strip=symbols"
permissions:
contents: read
defaults:
run:
shell: bash
jobs:
setup:
name: Build harness + native libs
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
outputs:
artifact-name: benchmarks-target
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Ensure benchmark harness exists
run: scripts/benchmarks/ensure-benchmark-harness-exists.sh
- name: Install system dependencies
uses: ./.github/actions/install-system-deps
- name: Setup OpenSSL
uses: ./.github/actions/setup-openssl
- name: Setup Rust
uses: ./.github/actions/setup-rust
with:
cache-key-prefix: benchmarks-setup
use-sccache: true
- name: Cache PDFium
uses: ./.github/actions/cache-pdfium
with:
pdfium-version: ${{ env.PDFIUM_VERSION }}
- name: Download PDFium runtime
run: scripts/download_pdfium_runtime.sh
- name: Setup ONNX Runtime
uses: ./.github/actions/setup-onnx-runtime
with:
ort-version: ${{ env.ORT_VERSION }}
- name: Install Task
uses: ./.github/actions/install-task
- name: Cache benchmark harness
uses: ./.github/actions/cache-benchmark-harness
with:
build-profile: release
- name: Build FFI library
uses: ./.github/actions/build-rust-ffi
with:
crate-name: kreuzberg-ffi
build-profile: release
- name: Log disk space before artifact upload
run: scripts/ci/validate/show-disk-space.sh "Disk space before artifact upload"
- name: Upload full build artifacts (for Kreuzberg jobs)
uses: actions/upload-artifact@v6
with:
name: benchmarks-target
path: |
target/release
retention-days: 7
- name: Upload benchmark harness binary (for third-party jobs)
uses: actions/upload-artifact@v6
with:
name: benchmark-harness-binary
path: |
target/release/benchmark-harness
retention-days: 7
- name: Log disk space after artifact upload
run: scripts/ci/validate/show-disk-space.sh "Disk space after artifact upload"
bench-native:
name: kreuzberg-native (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download build artifacts
uses: actions/download-artifact@v7
with:
name: ${{ needs.setup.outputs.artifact-name }}
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Install system dependencies
uses: ./.github/actions/install-system-deps
- name: Setup OpenSSL
uses: ./.github/actions/setup-openssl
- name: Setup Rust toolchain
uses: ./.github/actions/setup-rust
with:
cache-key-prefix: benchmarks-native
- name: Setup ONNX Runtime
uses: ./.github/actions/setup-onnx-runtime
with:
ort-version: ${{ env.ORT_VERSION }}
- name: Run benchmark
env:
FRAMEWORK: kreuzberg-native
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-kreuzberg-native-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
bench-python-sync:
name: kreuzberg-python-sync (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download build artifacts
uses: actions/download-artifact@v7
with:
name: ${{ needs.setup.outputs.artifact-name }}
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Install system dependencies
uses: ./.github/actions/install-system-deps
- name: Setup OpenSSL
uses: ./.github/actions/setup-openssl
- name: Setup Rust toolchain
uses: ./.github/actions/setup-rust
with:
cache-key-prefix: benchmarks-python-sync
- name: Setup Python
uses: ./.github/actions/setup-python-env
with:
python-version: "3.11"
cache-prefix: benchmark-python
- name: Setup ONNX Runtime
uses: ./.github/actions/setup-onnx-runtime
with:
ort-version: ${{ env.ORT_VERSION }}
- name: Install Task
uses: ./.github/actions/install-task
- name: Build Python bindings
run: task python:build:bindings
- name: Run benchmark
env:
FRAMEWORK: kreuzberg-python-sync
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-kreuzberg-python-sync-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
bench-python-async:
name: kreuzberg-python-async (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download build artifacts
uses: actions/download-artifact@v7
with:
name: ${{ needs.setup.outputs.artifact-name }}
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Install system dependencies
uses: ./.github/actions/install-system-deps
- name: Setup OpenSSL
uses: ./.github/actions/setup-openssl
- name: Setup Rust toolchain
uses: ./.github/actions/setup-rust
with:
cache-key-prefix: benchmarks-python-async
- name: Setup Python
uses: ./.github/actions/setup-python-env
with:
python-version: "3.11"
cache-prefix: benchmark-python
- name: Setup ONNX Runtime
uses: ./.github/actions/setup-onnx-runtime
with:
ort-version: ${{ env.ORT_VERSION }}
- name: Install Task
uses: ./.github/actions/install-task
- name: Build Python bindings
run: task python:build:bindings
- name: Run benchmark
env:
FRAMEWORK: kreuzberg-python-async
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-kreuzberg-python-async-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
bench-python-batch:
name: kreuzberg-python-batch (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download build artifacts
uses: actions/download-artifact@v7
with:
name: ${{ needs.setup.outputs.artifact-name }}
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Install system dependencies
uses: ./.github/actions/install-system-deps
- name: Setup OpenSSL
uses: ./.github/actions/setup-openssl
- name: Setup Rust toolchain
uses: ./.github/actions/setup-rust
with:
cache-key-prefix: benchmarks-python-batch
- name: Setup Python
uses: ./.github/actions/setup-python-env
with:
python-version: "3.11"
cache-prefix: benchmark-python
- name: Setup ONNX Runtime
uses: ./.github/actions/setup-onnx-runtime
with:
ort-version: ${{ env.ORT_VERSION }}
- name: Install Task
uses: ./.github/actions/install-task
- name: Build Python bindings
run: task python:build:bindings
- name: Run benchmark
env:
FRAMEWORK: kreuzberg-python-batch
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-kreuzberg-python-batch-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
bench-node-async:
name: kreuzberg-node-async (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download build artifacts
uses: actions/download-artifact@v7
with:
name: ${{ needs.setup.outputs.artifact-name }}
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Install system dependencies
uses: ./.github/actions/install-system-deps
- name: Setup OpenSSL
uses: ./.github/actions/setup-openssl
- name: Setup Rust toolchain
uses: ./.github/actions/setup-rust
with:
cache-key-prefix: benchmarks-node-async
use-sccache: 'false'
- name: Setup Node
uses: ./.github/actions/setup-node-workspace
- name: Verify Node setup (async)
run: scripts/ci/benchmarks/verify-node-setup.sh "Node setup (async)"
- name: Setup ONNX Runtime
uses: ./.github/actions/setup-onnx-runtime
with:
ort-version: ${{ env.ORT_VERSION }}
- name: Install Task
uses: ./.github/actions/install-task
- name: Build Node bindings and install locally
env:
TARGET: x86_64-unknown-linux-gnu
run: task node:build:bindings
- name: Run benchmark
env:
FRAMEWORK: kreuzberg-node-async
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-kreuzberg-node-async-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
bench-node-batch:
name: kreuzberg-node-batch (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download build artifacts
uses: actions/download-artifact@v7
with:
name: ${{ needs.setup.outputs.artifact-name }}
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Install system dependencies
uses: ./.github/actions/install-system-deps
- name: Setup OpenSSL
uses: ./.github/actions/setup-openssl
- name: Setup Rust toolchain
uses: ./.github/actions/setup-rust
with:
cache-key-prefix: benchmarks-node-batch
use-sccache: 'false'
- name: Setup Node
uses: ./.github/actions/setup-node-workspace
- name: Verify Node setup (batch)
run: scripts/ci/benchmarks/verify-node-setup.sh "Node setup (batch)"
- name: Setup ONNX Runtime
uses: ./.github/actions/setup-onnx-runtime
with:
ort-version: ${{ env.ORT_VERSION }}
- name: Install Task
uses: ./.github/actions/install-task
- name: Build Node bindings and install locally
env:
TARGET: x86_64-unknown-linux-gnu
run: task node:build:bindings
- name: Run benchmark
env:
FRAMEWORK: kreuzberg-node-batch
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-kreuzberg-node-batch-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
bench-wasm-async:
name: kreuzberg-wasm-async (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download build artifacts
uses: actions/download-artifact@v7
with:
name: ${{ needs.setup.outputs.artifact-name }}
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Install system dependencies
uses: ./.github/actions/install-system-deps
- name: Setup OpenSSL
uses: ./.github/actions/setup-openssl
- name: Setup Rust toolchain
uses: ./.github/actions/setup-rust
with:
cache-key-prefix: benchmarks-wasm-async
- name: Setup Node
uses: ./.github/actions/setup-node-workspace
- name: Install Task
uses: ./.github/actions/install-task
- name: Build WASM bindings
run: task wasm:build:bindings
- name: Run benchmark
env:
FRAMEWORK: kreuzberg-wasm-async
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-kreuzberg-wasm-async-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
bench-wasm-batch:
name: kreuzberg-wasm-batch (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download build artifacts
uses: actions/download-artifact@v7
with:
name: ${{ needs.setup.outputs.artifact-name }}
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Install system dependencies
uses: ./.github/actions/install-system-deps
- name: Setup OpenSSL
uses: ./.github/actions/setup-openssl
- name: Setup Rust toolchain
uses: ./.github/actions/setup-rust
with:
cache-key-prefix: benchmarks-wasm-batch
- name: Setup Node
uses: ./.github/actions/setup-node-workspace
- name: Install Task
uses: ./.github/actions/install-task
- name: Build WASM bindings
run: task wasm:build:bindings
- name: Run benchmark
env:
FRAMEWORK: kreuzberg-wasm-batch
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-kreuzberg-wasm-batch-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
bench-ruby-sync:
name: kreuzberg-ruby-sync (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download build artifacts
uses: actions/download-artifact@v7
with:
name: ${{ needs.setup.outputs.artifact-name }}
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Install system dependencies
uses: ./.github/actions/install-system-deps
- name: Setup OpenSSL
uses: ./.github/actions/setup-openssl
- name: Setup Rust toolchain
uses: ./.github/actions/setup-rust
with:
cache-key-prefix: benchmarks-ruby-sync
- name: Setup Ruby
uses: ruby/setup-ruby@v1
with:
ruby-version: "3.2"
- name: Setup ONNX Runtime
uses: ./.github/actions/setup-onnx-runtime
with:
ort-version: ${{ env.ORT_VERSION }}
- name: Install Task
uses: ./.github/actions/install-task
- name: Build FFI library
id: ffi
shell: bash
run: |
echo "=== Building FFI Library ==="
echo "Building kreuzberg-ffi from source (no caching)"
# Try building with sccache first (if enabled)
if cargo build --release --package kreuzberg-ffi 2>&1 | tee /tmp/build.log; then
echo "✓ Build succeeded"
else
# Check if it was an sccache error (DNS failures, cache storage issues)
if grep -q "sccache.*error" /tmp/build.log || grep -q "cache storage failed" /tmp/build.log || grep -q "dns error" /tmp/build.log; then
echo "⚠️ sccache failed (likely transient network issue), retrying without cache..."
export RUSTC_WRAPPER=""
export SCCACHE_GHA_ENABLED=false
cargo build --release --package kreuzberg-ffi
else
# Not an sccache error, propagate the failure
exit 1
fi
fi
- name: Set library paths for FFI
shell: bash
run: |
echo "PKG_CONFIG_PATH=${{ github.workspace }}/crates/kreuzberg-ffi:$PKG_CONFIG_PATH" >> "$GITHUB_ENV"
echo "LD_LIBRARY_PATH=${{ github.workspace }}/target/release:$LD_LIBRARY_PATH" >> "$GITHUB_ENV"
- name: Vendor kreuzberg core for Ruby
run: scripts/ci/ruby/vendor-kreuzberg-core.sh
- name: Build Ruby native gem
env:
PLATFORM: x86_64-linux
run: task ruby:build:bindings
- name: Run benchmark
env:
FRAMEWORK: kreuzberg-ruby-sync
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-kreuzberg-ruby-sync-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
bench-ruby-batch:
name: kreuzberg-ruby-batch (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download build artifacts
uses: actions/download-artifact@v7
with:
name: ${{ needs.setup.outputs.artifact-name }}
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Install system dependencies
uses: ./.github/actions/install-system-deps
- name: Setup OpenSSL
uses: ./.github/actions/setup-openssl
- name: Setup Rust toolchain
uses: ./.github/actions/setup-rust
with:
cache-key-prefix: benchmarks-ruby-batch
- name: Setup Ruby
uses: ruby/setup-ruby@v1
with:
ruby-version: "3.2"
- name: Setup ONNX Runtime
uses: ./.github/actions/setup-onnx-runtime
with:
ort-version: ${{ env.ORT_VERSION }}
- name: Install Task
uses: ./.github/actions/install-task
- name: Build FFI library
id: ffi
shell: bash
run: |
echo "=== Building FFI Library ==="
echo "Building kreuzberg-ffi from source (no caching)"
# Try building with sccache first (if enabled)
if cargo build --release --package kreuzberg-ffi 2>&1 | tee /tmp/build.log; then
echo "✓ Build succeeded"
else
# Check if it was an sccache error (DNS failures, cache storage issues)
if grep -q "sccache.*error" /tmp/build.log || grep -q "cache storage failed" /tmp/build.log || grep -q "dns error" /tmp/build.log; then
echo "⚠️ sccache failed (likely transient network issue), retrying without cache..."
export RUSTC_WRAPPER=""
export SCCACHE_GHA_ENABLED=false
cargo build --release --package kreuzberg-ffi
else
# Not an sccache error, propagate the failure
exit 1
fi
fi
- name: Set library paths for FFI
shell: bash
run: |
echo "PKG_CONFIG_PATH=${{ github.workspace }}/crates/kreuzberg-ffi:$PKG_CONFIG_PATH" >> "$GITHUB_ENV"
echo "LD_LIBRARY_PATH=${{ github.workspace }}/target/release:$LD_LIBRARY_PATH" >> "$GITHUB_ENV"
- name: Vendor kreuzberg core for Ruby
run: scripts/ci/ruby/vendor-kreuzberg-core.sh
- name: Build Ruby native gem
env:
PLATFORM: x86_64-linux
run: task ruby:build:bindings
- name: Run benchmark
env:
FRAMEWORK: kreuzberg-ruby-batch
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-kreuzberg-ruby-batch-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
bench-go-sync:
name: kreuzberg-go-sync (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download build artifacts
uses: actions/download-artifact@v7
with:
name: ${{ needs.setup.outputs.artifact-name }}
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Install system dependencies
uses: ./.github/actions/install-system-deps
- name: Setup OpenSSL
uses: ./.github/actions/setup-openssl
- name: Setup Rust toolchain
uses: ./.github/actions/setup-rust
with:
cache-key-prefix: benchmarks-go-sync
- name: Setup Go
uses: actions/setup-go@v5
with:
go-version: "1.25.x"
cache-dependency-path: packages/go/v4/go.sum
- name: Setup ONNX Runtime
uses: ./.github/actions/setup-onnx-runtime
with:
ort-version: ${{ env.ORT_VERSION }}
- name: Install Task
uses: ./.github/actions/install-task
- name: Build FFI library
id: ffi
shell: bash
run: |
echo "=== Building FFI Library ==="
echo "Building kreuzberg-ffi from source (no caching)"
# Try building with sccache first (if enabled)
if cargo build --release --package kreuzberg-ffi 2>&1 | tee /tmp/build.log; then
echo "✓ Build succeeded"
else
# Check if it was an sccache error (DNS failures, cache storage issues)
if grep -q "sccache.*error" /tmp/build.log || grep -q "cache storage failed" /tmp/build.log || grep -q "dns error" /tmp/build.log; then
echo "⚠️ sccache failed (likely transient network issue), retrying without cache..."
export RUSTC_WRAPPER=""
export SCCACHE_GHA_ENABLED=false
cargo build --release --package kreuzberg-ffi
else
# Not an sccache error, propagate the failure
exit 1
fi
fi
- name: Set library paths for FFI
shell: bash
run: |
echo "PKG_CONFIG_PATH=${{ github.workspace }}/crates/kreuzberg-ffi:$PKG_CONFIG_PATH" >> "$GITHUB_ENV"
echo "LD_LIBRARY_PATH=${{ github.workspace }}/target/release:$LD_LIBRARY_PATH" >> "$GITHUB_ENV"
- name: Prime Go modules
working-directory: packages/go/v4
run: go mod download
- name: Prime benchmark Go modules
working-directory: tools/benchmark-harness/scripts
run: go mod download
- name: Build Go bindings
run: task go:build:bindings
- name: Run benchmark
env:
FRAMEWORK: kreuzberg-go-sync
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
KREUZBERG_BENCHMARK_DEBUG: "true"
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-kreuzberg-go-sync-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
bench-go-batch:
name: kreuzberg-go-batch (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download build artifacts
uses: actions/download-artifact@v7
with:
name: ${{ needs.setup.outputs.artifact-name }}
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Install system dependencies
uses: ./.github/actions/install-system-deps
- name: Setup OpenSSL
uses: ./.github/actions/setup-openssl
- name: Setup Rust toolchain
uses: ./.github/actions/setup-rust
with:
cache-key-prefix: benchmarks-go-batch
- name: Setup Go
uses: actions/setup-go@v5
with:
go-version: "1.25.x"
cache-dependency-path: packages/go/v4/go.sum
- name: Setup ONNX Runtime
uses: ./.github/actions/setup-onnx-runtime
with:
ort-version: ${{ env.ORT_VERSION }}
- name: Install Task
uses: ./.github/actions/install-task
- name: Build FFI library
id: ffi
shell: bash
run: |
echo "=== Building FFI Library ==="
echo "Building kreuzberg-ffi from source (no caching)"
# Try building with sccache first (if enabled)
if cargo build --release --package kreuzberg-ffi 2>&1 | tee /tmp/build.log; then
echo "✓ Build succeeded"
else
# Check if it was an sccache error (DNS failures, cache storage issues)
if grep -q "sccache.*error" /tmp/build.log || grep -q "cache storage failed" /tmp/build.log || grep -q "dns error" /tmp/build.log; then
echo "⚠️ sccache failed (likely transient network issue), retrying without cache..."
export RUSTC_WRAPPER=""
export SCCACHE_GHA_ENABLED=false
cargo build --release --package kreuzberg-ffi
else
# Not an sccache error, propagate the failure
exit 1
fi
fi
- name: Set library paths for FFI
shell: bash
run: |
echo "PKG_CONFIG_PATH=${{ github.workspace }}/crates/kreuzberg-ffi:$PKG_CONFIG_PATH" >> "$GITHUB_ENV"
echo "LD_LIBRARY_PATH=${{ github.workspace }}/target/release:$LD_LIBRARY_PATH" >> "$GITHUB_ENV"
- name: Prime Go modules
working-directory: packages/go/v4
run: go mod download
- name: Prime benchmark Go modules
working-directory: tools/benchmark-harness/scripts
run: go mod download
- name: Build Go bindings
run: task go:build:bindings
- name: Run benchmark
env:
FRAMEWORK: kreuzberg-go-batch
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
KREUZBERG_BENCHMARK_DEBUG: "true"
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-kreuzberg-go-batch-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
bench-java-sync:
name: kreuzberg-java-sync (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download build artifacts
uses: actions/download-artifact@v7
with:
name: ${{ needs.setup.outputs.artifact-name }}
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Install system dependencies
uses: ./.github/actions/install-system-deps
- name: Setup OpenSSL
uses: ./.github/actions/setup-openssl
- name: Setup Rust toolchain
uses: ./.github/actions/setup-rust
with:
cache-key-prefix: benchmarks-java-sync
- name: Setup Java
uses: actions/setup-java@v4
id: setup-java
with:
distribution: 'temurin'
java-version: '25'
- name: Setup ONNX Runtime
uses: ./.github/actions/setup-onnx-runtime
with:
ort-version: ${{ env.ORT_VERSION }}
- name: Install Task
uses: ./.github/actions/install-task
- name: Build FFI library
id: ffi
shell: bash
run: |
echo "=== Building FFI Library ==="
echo "Building kreuzberg-ffi from source (no caching)"
# Try building with sccache first (if enabled)
if cargo build --release --package kreuzberg-ffi 2>&1 | tee /tmp/build.log; then
echo "✓ Build succeeded"
else
# Check if it was an sccache error (DNS failures, cache storage issues)
if grep -q "sccache.*error" /tmp/build.log || grep -q "cache storage failed" /tmp/build.log || grep -q "dns error" /tmp/build.log; then
echo "⚠️ sccache failed (likely transient network issue), retrying without cache..."
export RUSTC_WRAPPER=""
export SCCACHE_GHA_ENABLED=false
cargo build --release --package kreuzberg-ffi
else
# Not an sccache error, propagate the failure
exit 1
fi
fi
- name: Set library paths for FFI
shell: bash
run: |
echo "PKG_CONFIG_PATH=${{ github.workspace }}/crates/kreuzberg-ffi:$PKG_CONFIG_PATH" >> "$GITHUB_ENV"
echo "LD_LIBRARY_PATH=${{ github.workspace }}/target/release:$LD_LIBRARY_PATH" >> "$GITHUB_ENV"
- name: Build Java bindings
run: task java:build:bindings
- name: Run benchmark
env:
FRAMEWORK: kreuzberg-java-sync
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-kreuzberg-java-sync-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
bench-csharp-sync:
name: kreuzberg-csharp-sync (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download build artifacts
uses: actions/download-artifact@v7
with:
name: ${{ needs.setup.outputs.artifact-name }}
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Install system dependencies
uses: ./.github/actions/install-system-deps
- name: Setup OpenSSL
uses: ./.github/actions/setup-openssl
- name: Setup Rust toolchain
uses: ./.github/actions/setup-rust
with:
cache-key-prefix: benchmarks-csharp-sync
- name: Setup .NET
uses: actions/setup-dotnet@v5
with:
dotnet-version: "10.0.x"
- name: Setup ONNX Runtime
uses: ./.github/actions/setup-onnx-runtime
with:
ort-version: ${{ env.ORT_VERSION }}
- name: Install Task
uses: ./.github/actions/install-task
- name: Build FFI library
id: ffi
shell: bash
run: |
echo "=== Building FFI Library ==="
echo "Building kreuzberg-ffi from source (no caching)"
# Try building with sccache first (if enabled)
if cargo build --release --package kreuzberg-ffi 2>&1 | tee /tmp/build.log; then
echo "✓ Build succeeded"
else
# Check if it was an sccache error (DNS failures, cache storage issues)
if grep -q "sccache.*error" /tmp/build.log || grep -q "cache storage failed" /tmp/build.log || grep -q "dns error" /tmp/build.log; then
echo "⚠️ sccache failed (likely transient network issue), retrying without cache..."
export RUSTC_WRAPPER=""
export SCCACHE_GHA_ENABLED=false
cargo build --release --package kreuzberg-ffi
else
# Not an sccache error, propagate the failure
exit 1
fi
fi
- name: Set library paths for FFI
shell: bash
run: |
echo "PKG_CONFIG_PATH=${{ github.workspace }}/crates/kreuzberg-ffi:$PKG_CONFIG_PATH" >> "$GITHUB_ENV"
echo "LD_LIBRARY_PATH=${{ github.workspace }}/target/release:$LD_LIBRARY_PATH" >> "$GITHUB_ENV"
- name: Build C# bindings
run: task csharp:build:bindings
- name: Run benchmark
env:
FRAMEWORK: kreuzberg-csharp-sync
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-kreuzberg-csharp-sync-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
bench-elixir-sync:
name: kreuzberg-elixir-sync (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download build artifacts
uses: actions/download-artifact@v7
with:
name: ${{ needs.setup.outputs.artifact-name }}
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Install system dependencies
uses: ./.github/actions/install-system-deps
- name: Setup OpenSSL
uses: ./.github/actions/setup-openssl
- name: Setup Rust toolchain
uses: ./.github/actions/setup-rust
with:
cache-key-prefix: benchmarks-elixir-sync
- name: Setup Elixir
uses: erlef/setup-beam@v1
with:
elixir-version: "1.17.0"
otp-version: "27.0"
- name: Setup ONNX Runtime
uses: ./.github/actions/setup-onnx-runtime
with:
ort-version: ${{ env.ORT_VERSION }}
- name: Install Task
uses: ./.github/actions/install-task
- name: Build Elixir bindings
env:
KREUZBERG_BUILD: "1"
run: task elixir:build:bindings
- name: Run benchmark
env:
FRAMEWORK: kreuzberg-elixir-sync
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-kreuzberg-elixir-sync-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
bench-elixir-batch:
name: kreuzberg-elixir-batch (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download build artifacts
uses: actions/download-artifact@v7
with:
name: ${{ needs.setup.outputs.artifact-name }}
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Install system dependencies
uses: ./.github/actions/install-system-deps
- name: Setup OpenSSL
uses: ./.github/actions/setup-openssl
- name: Setup Rust toolchain
uses: ./.github/actions/setup-rust
with:
cache-key-prefix: benchmarks-elixir-batch
- name: Setup Elixir
uses: erlef/setup-beam@v1
with:
elixir-version: "1.17.0"
otp-version: "27.0"
- name: Setup ONNX Runtime
uses: ./.github/actions/setup-onnx-runtime
with:
ort-version: ${{ env.ORT_VERSION }}
- name: Install Task
uses: ./.github/actions/install-task
- name: Build Elixir bindings
env:
KREUZBERG_BUILD: "1"
run: task elixir:build:bindings
- name: Run benchmark
env:
FRAMEWORK: kreuzberg-elixir-batch
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-kreuzberg-elixir-batch-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
bench-php-sync:
name: kreuzberg-php-sync (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download build artifacts
uses: actions/download-artifact@v7
with:
name: ${{ needs.setup.outputs.artifact-name }}
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Install system dependencies
uses: ./.github/actions/install-system-deps
- name: Setup OpenSSL
uses: ./.github/actions/setup-openssl
- name: Setup Rust toolchain
uses: ./.github/actions/setup-rust
with:
cache-key-prefix: benchmarks-php-sync
- name: Setup PHP
uses: shivammathur/setup-php@v2
with:
php-version: "8.3"
extensions: ffi
- name: Setup ONNX Runtime
uses: ./.github/actions/setup-onnx-runtime
with:
ort-version: ${{ env.ORT_VERSION }}
- name: Install Task
uses: ./.github/actions/install-task
- name: Build FFI library
id: ffi
shell: bash
run: |
echo "=== Building FFI Library ==="
echo "Building kreuzberg-ffi from source (no caching)"
# Try building with sccache first (if enabled)
if cargo build --release --package kreuzberg-ffi 2>&1 | tee /tmp/build.log; then
echo "✓ Build succeeded"
else
# Check if it was an sccache error (DNS failures, cache storage issues)
if grep -q "sccache.*error" /tmp/build.log || grep -q "cache storage failed" /tmp/build.log || grep -q "dns error" /tmp/build.log; then
echo "⚠️ sccache failed (likely transient network issue), retrying without cache..."
export RUSTC_WRAPPER=""
export SCCACHE_GHA_ENABLED=false
cargo build --release --package kreuzberg-ffi
else
# Not an sccache error, propagate the failure
exit 1
fi
fi
- name: Set library paths for FFI
shell: bash
run: |
echo "PKG_CONFIG_PATH=${{ github.workspace }}/crates/kreuzberg-ffi:$PKG_CONFIG_PATH" >> "$GITHUB_ENV"
echo "LD_LIBRARY_PATH=${{ github.workspace }}/target/release:$LD_LIBRARY_PATH" >> "$GITHUB_ENV"
- name: Build PHP bindings
run: task php:build:ci
- name: Run benchmark
env:
FRAMEWORK: kreuzberg-php-sync
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-kreuzberg-php-sync-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
bench-php-batch:
name: kreuzberg-php-batch (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download build artifacts
uses: actions/download-artifact@v7
with:
name: ${{ needs.setup.outputs.artifact-name }}
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Install system dependencies
uses: ./.github/actions/install-system-deps
- name: Setup OpenSSL
uses: ./.github/actions/setup-openssl
- name: Setup Rust toolchain
uses: ./.github/actions/setup-rust
with:
cache-key-prefix: benchmarks-php-batch
- name: Setup PHP
uses: shivammathur/setup-php@v2
with:
php-version: "8.3"
extensions: ffi
- name: Setup ONNX Runtime
uses: ./.github/actions/setup-onnx-runtime
with:
ort-version: ${{ env.ORT_VERSION }}
- name: Install Task
uses: ./.github/actions/install-task
- name: Build FFI library
id: ffi
shell: bash
run: |
echo "=== Building FFI Library ==="
echo "Building kreuzberg-ffi from source (no caching)"
# Try building with sccache first (if enabled)
if cargo build --release --package kreuzberg-ffi 2>&1 | tee /tmp/build.log; then
echo "✓ Build succeeded"
else
# Check if it was an sccache error (DNS failures, cache storage issues)
if grep -q "sccache.*error" /tmp/build.log || grep -q "cache storage failed" /tmp/build.log || grep -q "dns error" /tmp/build.log; then
echo "⚠️ sccache failed (likely transient network issue), retrying without cache..."
export RUSTC_WRAPPER=""
export SCCACHE_GHA_ENABLED=false
cargo build --release --package kreuzberg-ffi
else
# Not an sccache error, propagate the failure
exit 1
fi
fi
- name: Set library paths for FFI
shell: bash
run: |
echo "PKG_CONFIG_PATH=${{ github.workspace }}/crates/kreuzberg-ffi:$PKG_CONFIG_PATH" >> "$GITHUB_ENV"
echo "LD_LIBRARY_PATH=${{ github.workspace }}/target/release:$LD_LIBRARY_PATH" >> "$GITHUB_ENV"
- name: Build PHP bindings
run: task php:build:ci
- name: Run benchmark
env:
FRAMEWORK: kreuzberg-php-batch
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-kreuzberg-php-batch-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
# Third-party benchmarks are temporarily disabled while kreuzberg variants are stabilized.
bench-docling:
name: docling (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download benchmark harness binary
uses: actions/download-artifact@v7
with:
name: benchmark-harness-binary
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Setup Python
uses: ./.github/actions/setup-python-env
with:
python-version: "3.11"
cache-prefix: benchmark-python
- name: Sync benchmark dependencies
run: uv sync --group benchmark --no-install-workspace
- name: Run benchmark
env:
FRAMEWORK: docling
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-docling-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
bench-docling-batch:
name: docling-batch (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download benchmark harness binary
uses: actions/download-artifact@v7
with:
name: benchmark-harness-binary
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Setup Python
uses: ./.github/actions/setup-python-env
with:
python-version: "3.11"
cache-prefix: benchmark-python
- name: Sync benchmark dependencies
run: uv sync --group benchmark --no-install-workspace
- name: Run benchmark
env:
FRAMEWORK: docling-batch
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-docling-batch-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
bench-markitdown:
name: markitdown (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download benchmark harness binary
uses: actions/download-artifact@v7
with:
name: benchmark-harness-binary
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Setup Python
uses: ./.github/actions/setup-python-env
with:
python-version: "3.11"
cache-prefix: benchmark-python
- name: Sync benchmark dependencies
run: uv sync --group benchmark --no-install-workspace
- name: Run benchmark
env:
FRAMEWORK: markitdown
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-markitdown-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
bench-pandoc:
name: pandoc (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download benchmark harness binary
uses: actions/download-artifact@v7
with:
name: benchmark-harness-binary
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Setup Python
uses: ./.github/actions/setup-python-env
with:
python-version: "3.11"
cache-prefix: benchmark-python
- name: Sync benchmark dependencies
run: uv sync --group benchmark --no-install-workspace
- name: Install pandoc
run: |
sudo apt-get update
sudo apt-get install -y pandoc
pandoc --version
- name: Run benchmark
env:
FRAMEWORK: pandoc
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-pandoc-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
bench-unstructured:
name: unstructured (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download benchmark harness binary
uses: actions/download-artifact@v7
with:
name: benchmark-harness-binary
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Setup Python
uses: ./.github/actions/setup-python-env
with:
python-version: "3.11"
cache-prefix: benchmark-python
- name: Sync benchmark dependencies
run: uv sync --group benchmark --no-install-workspace
- name: Run benchmark
env:
FRAMEWORK: unstructured
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-unstructured-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
bench-tika-sync:
name: tika-sync (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download benchmark harness binary
uses: actions/download-artifact@v7
with:
name: benchmark-harness-binary
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Setup Java
uses: actions/setup-java@v4
id: setup-java
with:
distribution: 'temurin'
java-version: '25'
- name: Run benchmark
env:
FRAMEWORK: tika-sync
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-tika-sync-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
bench-tika-batch:
name: tika-batch (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download benchmark harness binary
uses: actions/download-artifact@v7
with:
name: benchmark-harness-binary
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Setup Java
uses: actions/setup-java@v4
id: setup-java
with:
distribution: 'temurin'
java-version: '25'
- name: Run benchmark
env:
FRAMEWORK: tika-batch
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-tika-batch-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
bench-pymupdf4llm:
name: pymupdf4llm (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download benchmark harness binary
uses: actions/download-artifact@v7
with:
name: benchmark-harness-binary
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Setup Python
uses: ./.github/actions/setup-python-env
with:
python-version: "3.11"
cache-prefix: benchmark-python
- name: Sync benchmark dependencies
run: uv sync --group benchmark --no-install-workspace
- name: Run benchmark
env:
FRAMEWORK: pymupdf4llm
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-pymupdf4llm-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
bench-pdfplumber:
name: pdfplumber (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download benchmark harness binary
uses: actions/download-artifact@v7
with:
name: benchmark-harness-binary
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Setup Python
uses: ./.github/actions/setup-python-env
with:
python-version: "3.11"
cache-prefix: benchmark-python
- name: Sync benchmark dependencies
run: uv sync --group benchmark --no-install-workspace
- name: Run benchmark
env:
FRAMEWORK: pdfplumber
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-pdfplumber-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
bench-pdfplumber-batch:
name: pdfplumber-batch (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download benchmark harness binary
uses: actions/download-artifact@v7
with:
name: benchmark-harness-binary
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Setup Python
uses: ./.github/actions/setup-python-env
with:
python-version: "3.11"
cache-prefix: benchmark-python
- name: Sync benchmark dependencies
run: uv sync --group benchmark --no-install-workspace
- name: Run benchmark
env:
FRAMEWORK: pdfplumber-batch
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-pdfplumber-batch-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
bench-mineru:
name: mineru (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download benchmark harness binary
uses: actions/download-artifact@v7
with:
name: benchmark-harness-binary
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Setup Python
uses: ./.github/actions/setup-python-env
with:
python-version: "3.11"
cache-prefix: benchmark-python
- name: Sync benchmark dependencies
run: uv sync --group benchmark --no-install-workspace
- name: Run benchmark
env:
FRAMEWORK: mineru
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-mineru-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30
bench-mineru-batch:
name: mineru-batch (${{ matrix.mode }})
needs: setup
runs-on: ubuntu-latest
timeout-minutes: 180
permissions:
contents: read
strategy:
fail-fast: false
matrix:
mode: [single-file, batch]
steps:
- uses: actions/checkout@v4
with:
ref: ${{ github.event.inputs.branch || github.ref }}
- name: Free disk space
run: scripts/ci/benchmarks/free-disk-space.sh
- name: Download benchmark harness binary
uses: actions/download-artifact@v7
with:
name: benchmark-harness-binary
path: target/release
- name: Restore benchmark binary permissions
run: scripts/benchmarks/restore-binary-permissions.sh
- name: Setup Python
uses: ./.github/actions/setup-python-env
with:
python-version: "3.11"
cache-prefix: benchmark-python
- name: Sync benchmark dependencies
run: uv sync --group benchmark --no-install-workspace
- name: Run benchmark
env:
FRAMEWORK: mineru-batch
MODE: ${{ matrix.mode }}
ITERATIONS: ${{ env.ITERATIONS }}
TIMEOUT: ${{ github.event.inputs.timeout }}
run: scripts/benchmarks/run-benchmark.sh
- name: Upload artifacts
if: always()
uses: actions/upload-artifact@v6
with:
name: benchmarks-mineru-batch-${{ matrix.mode }}-${{ github.run_id }}
path: benchmark-results/
retention-days: 30