Skip to content

chore: updated and linted #448

chore: updated and linted

chore: updated and linted #448

Workflow file for this run

name: CI Ruby
'on':
push:
branches: [main]
paths:
- '.github/actions/**'
- '.github/workflows/ci-ruby.yaml'
- '.cargo/config.toml'
- 'rust-toolchain.toml'
- 'Cargo.toml'
- 'Cargo.lock'
- 'crates/kreuzberg/**'
- 'crates/kreuzberg-ffi/**'
- 'crates/kreuzberg-tesseract/**'
- 'packages/ruby/**'
- 'e2e/ruby/**'
- 'test_documents/**'
- 'fixtures/**'
- 'tools/e2e-generator/**'
- 'Gemfile'
- 'Gemfile.lock'
- 'packages/ruby/Rakefile'
- 'scripts/ci/ruby/**'
- 'scripts/ci/cache/**'
- 'scripts/ci/actions/**'
pull_request:
branches: [main]
paths:
- '.github/actions/**'
- '.github/workflows/ci-ruby.yaml'
- '.cargo/config.toml'
- 'rust-toolchain.toml'
- 'Cargo.toml'
- 'Cargo.lock'
- 'crates/kreuzberg/**'
- 'crates/kreuzberg-ffi/**'
- 'crates/kreuzberg-tesseract/**'
- 'packages/ruby/**'
- 'e2e/ruby/**'
- 'test_documents/**'
- 'fixtures/**'
- 'tools/e2e-generator/**'
- 'Gemfile'
- 'Gemfile.lock'
- 'packages/ruby/Rakefile'
- 'scripts/ci/ruby/**'
- 'scripts/ci/cache/**'
- 'scripts/ci/actions/**'
concurrency:
group: ci-ruby-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
env:
CARGO_TERM_COLOR: always
CARGO_INCREMENTAL: 0
CARGO_PROFILE_DEV_DEBUG: 0
RUST_BACKTRACE: full
RUST_MIN_STACK: 16777216
PDFIUM_VERSION: "7578"
ORT_VERSION: "1.23.2"
MACOSX_DEPLOYMENT_TARGET: "14.0"
CARGO_LOG: cargo::core::compiler::fingerprint=info
RUST_LOG: debug
BUILD_PROFILE: "ci"
jobs:
build-ruby-gem:
name: Ruby Gem (${{ matrix.os }})
if: ${{ github.actor != 'dependabot[bot]' }}
timeout-minutes: 180
strategy:
fail-fast: true
matrix:
os: [ubuntu-latest, ubuntu-24.04-arm, macos-latest, windows-latest]
runs-on: ${{ matrix.os }}
# Note: BUNDLE_GEMFILE is set per-step to avoid Windows path issues.
# On Windows, github.workspace paths like D:/a/... get mangled to D:/d/a/... in bash shells.
steps:
- uses: actions/checkout@v4
- name: Install system dependencies
uses: ./.github/actions/install-system-deps
- name: Free disk space before setup
if: startsWith(matrix.os, 'ubuntu')
uses: ./.github/actions/free-disk-space-linux
with:
show-initial: "true"
show-final: "true"
- name: Setup OpenSSL
uses: ./.github/actions/setup-openssl
- name: Verify OpenSSL setup
shell: bash
run: |
echo "=== OpenSSL Verification ==="
echo "Platform: $(uname -m)"
echo "OPENSSL_DIR: ${OPENSSL_DIR:-<not set>}"
echo "OPENSSL_LIB_DIR: ${OPENSSL_LIB_DIR:-<not set>}"
echo "OPENSSL_INCLUDE_DIR: ${OPENSSL_INCLUDE_DIR:-<not set>}"
if [ -n "${OPENSSL_DIR:-}" ]; then
echo ""
echo "Checking for libssl:"
find "$OPENSSL_DIR" -name "libssl*" 2>/dev/null | head -5 || echo "No libssl found"
fi
- name: Setup Rust
uses: ./.github/actions/setup-rust
with:
cache-key-prefix: ruby-${{ matrix.os }}
toolchain: ${{ runner.os == 'Windows' && 'stable-x86_64-pc-windows-gnu' || '' }}
target: ${{ runner.os == 'Windows' && 'x86_64-pc-windows-gnu' || '' }}
- name: Setup Ruby
uses: ruby/setup-ruby@v1
with:
ruby-version: "3.2"
bundler: none
bundler-cache: false
working-directory: packages/ruby
- name: Install Bundler
shell: bash
run: scripts/ci/ruby/install-bundler.sh
- name: Set BUNDLE_GEMFILE (Unix)
if: runner.os != 'Windows'
shell: bash
run: echo "BUNDLE_GEMFILE=${{ github.workspace }}/packages/ruby/Gemfile" >> "$GITHUB_ENV"
- name: Install Ruby dependencies
shell: bash
working-directory: packages/ruby
run: |
bundle install
- name: Print Ruby environment (pre-setup)
shell: bash
run: |
echo "=== Ruby Environment ==="
echo "Ruby version: $(ruby --version)"
echo "Ruby platform: $(ruby -e 'puts RUBY_PLATFORM')"
echo "Gem home: $(gem env home)"
echo "Gem paths: $(gem env | grep -A 10 'EXECUTABLE')"
echo ""
echo "=== Rust Environment ==="
echo "Rustc version: $(rustc -vV)"
echo "Cargo version: $(cargo --version)"
echo ""
echo "=== System Info ==="
echo "OS: $(uname -s)"
echo "Arch: $(uname -m)"
if [ "$RUNNER_OS" = "Windows" ]; then
echo "Windows build: x86_64-pc-windows-gnu"
fi
- name: Configure short paths for Windows MAX_PATH mitigation
if: runner.os == 'Windows'
shell: pwsh
run: pwsh -File scripts/ci/ruby/windows-max-path-mitigation.ps1
- name: Install Ruby deps
if: runner.os != 'Windows'
shell: bash
run: scripts/ci/ruby/install-ruby-deps.sh
- name: Install Ruby deps (Windows)
if: runner.os == 'Windows'
shell: pwsh
run: scripts/ci/ruby/install-ruby-deps.ps1
- name: Print Bundler and Ruby deps info
shell: bash
run: |
echo "=== Bundler Info ==="
bundle --version
bundle config list || true
echo ""
echo "=== Installed gems (top 20) ==="
gem list | head -20
- name: Cache PDFium
uses: ./.github/actions/cache-pdfium
with:
pdfium-version: ${{ env.PDFIUM_VERSION }}
- name: Download PDFium
uses: ./.github/actions/download-pdfium
with:
pdfium-version: ${{ env.PDFIUM_VERSION }}
- name: Stage PDFium runtime
uses: ./.github/actions/stage-pdfium-runtime
with:
destination: ${{ env.CARGO_TARGET_DIR || 'target' }}/release
- name: Setup ONNX Runtime
uses: ./.github/actions/setup-onnx-runtime
with:
ort-version: ${{ env.ORT_VERSION }}
- name: Install Task
uses: ./.github/actions/install-task
- name: Configure aws-lc-sys build for Windows
if: runner.os == 'Windows'
shell: bash
run: |
{
echo "AWS_LC_SYS_CMAKE_BUILDER=1"
echo "AWS_LC_SYS_NO_ASM=1"
echo "CMAKE_BUILD_PARALLEL_LEVEL=1"
} >> "$GITHUB_ENV"
- name: Configure bindgen compatibility headers (Windows)
if: matrix.os == 'windows-latest'
shell: pwsh
run: scripts/ci/ruby/configure-bindgen-windows.ps1
- name: Vendor kreuzberg core crate
shell: bash
run: python3 scripts/ci/ruby/vendor-kreuzberg-core.py
- name: Configure Tesseract build environment (Windows)
if: runner.os == 'Windows'
shell: pwsh
run: scripts/ci/ruby/configure-tesseract-windows.ps1
- name: Print Rust toolchain info (pre-build)
shell: bash
run: |
echo "=== Rust Toolchain ==="
rustup toolchain list
rustup show
echo ""
echo "=== Rust Targets ==="
rustup target list | grep installed
echo ""
echo "=== Rust compiler details ==="
rustc -vV
rustc --print target-list | grep windows || true
echo ""
echo "=== Cargo config ==="
if [ -f .cargo/config.toml ]; then
cat .cargo/config.toml
fi
echo ""
echo "=== Environment Variables ==="
env | grep -E "RUST|CARGO|BINDGEN|AWS_LC|CMAKE|CC|CXX|OPENSSL" | sort || true
- name: Verify Windows toolchain (Windows only)
if: runner.os == 'Windows'
shell: bash
run: |
echo "=== Windows Toolchain Verification ==="
echo "Checking MinGW/GCC toolchain..."
which gcc || echo "gcc not found in PATH"
which g++ || echo "g++ not found in PATH"
gcc --version 2>/dev/null || echo "gcc version check failed"
g++ --version 2>/dev/null || echo "g++ version check failed"
echo ""
echo "Checking x86_64-pc-windows-gnu target..."
rustup target list | grep x86_64-pc-windows-gnu
echo ""
echo "Checking linker..."
rustc --print target-cpus 2>/dev/null | head -5 || true
echo ""
echo "PATH (first 10 entries):"
echo "$PATH" | tr ':' '\n' | head -10
echo ""
echo "MSYSTEM: ${MSYSTEM:-not set}"
echo "MSYSTEM_PREFIX: ${MSYSTEM_PREFIX:-not set}"
echo ""
echo "=== Bindgen Configuration ==="
env | grep BINDGEN || echo "No BINDGEN vars set yet"
- name: Clean cargo fingerprints to avoid Windows path conflicts
if: runner.os == 'Windows'
shell: pwsh
run: |
Write-Host "Cleaning Cargo fingerprints (Windows MAX_PATH mitigation uses different target dir)"
# DO NOT clean packages/ruby/tmp - rb_sys needs its build state there
# The fingerprint errors occur because rb_sys uses packages/ruby/tmp as its target dir
# and deleting it causes Cargo to fail finding expected fingerprints
# Clean regular target directory fingerprints
$targetPath = "target\.fingerprint"
if (Test-Path $targetPath) {
Write-Host "Cleaning target/.fingerprint..."
Get-ChildItem -Path "target" -Filter ".fingerprint" -Recurse -Force -ErrorAction SilentlyContinue |
Remove-Item -Recurse -Force -ErrorAction SilentlyContinue
}
# Clean C:\t directory (the actual CARGO_TARGET_DIR on Windows)
$ctPath = "C:\t"
if (Test-Path $ctPath) {
Write-Host "Cleaning C:\t (CARGO_TARGET_DIR)..."
try {
# Remove .fingerprint directories recursively
Get-ChildItem -Path $ctPath -Filter ".fingerprint" -Recurse -Force -ErrorAction SilentlyContinue |
Remove-Item -Recurse -Force -ErrorAction SilentlyContinue
# Also try to clean the whole directory if possible
Remove-Item -Path $ctPath -Recurse -Force -ErrorAction SilentlyContinue
Write-Host "Successfully cleaned C:\t"
} catch {
Write-Host "Warning: Could not fully clean C:\t, but fingerprints removed: $_"
}
} else {
Write-Host "C:\t not found (will be created during build)"
}
Write-Host "Done (kept packages/ruby/tmp for rb_sys build state)"
- name: Build FFI library
id: ffi
shell: bash
run: |
echo "=== Building FFI Library ==="
echo "Platform: ${{ runner.os }}"
echo "Building kreuzberg-ffi from source (no caching)"
echo ""
echo "Current directory: $(pwd)"
echo "Target directory: ${CARGO_TARGET_DIR:-target}"
echo ""
# Set verbose cargo output for Windows to diagnose failures
if [ "${{ runner.os }}" = "Windows" ]; then
export CARGO_LOG="cargo::core::compiler::fingerprint=info,cargo::core::compiler::build_context=debug"
export RUST_BACKTRACE=1
echo "Windows build: Using verbose cargo logging"
echo "CARGO_LOG=$CARGO_LOG"
echo "Target: x86_64-pc-windows-gnu"
echo ""
fi
# Build with verbose output
echo "Starting cargo build..."
cargo build --release --package kreuzberg-ffi --verbose --message-format short 2>&1 | tee /tmp/cargo-build.log || {
EXIT_CODE=$?
echo ""
echo "=== BUILD FAILED (exit code: $EXIT_CODE) ==="
echo ""
echo "=== Last 100 lines of build output ==="
tail -100 /tmp/cargo-build.log || true
echo ""
echo "=== Checking for common Windows build issues ==="
if [ "${{ runner.os }}" = "Windows" ]; then
echo "Checking for linker errors..."
grep -i "linker.*error\|ld.*error\|undefined reference" /tmp/cargo-build.log | tail -20 || echo "No linker errors found in log"
echo ""
echo "Checking for bindgen errors..."
grep -i "bindgen.*error\|clang.*error" /tmp/cargo-build.log | tail -20 || echo "No bindgen errors found in log"
echo ""
echo "Checking GCC/MinGW availability..."
which gcc g++ x86_64-w64-mingw32-gcc 2>/dev/null || echo "MinGW tools not in PATH"
fi
exit $EXIT_CODE
}
echo ""
echo "=== Build completed successfully ==="
- name: Print post-FFI build status (Windows)
if: runner.os == 'Windows'
shell: pwsh
run: |
Write-Host "=== Windows Rust Build Status ==="
Write-Host "Checking for compiled FFI libraries..."
$libPath = "target\release"
if (Test-Path $libPath) {
Write-Host "Target directory exists: $libPath"
Get-ChildItem -Path $libPath -Filter "*.dll" | ForEach-Object { Write-Host " DLL: $($_.Name)" }
Get-ChildItem -Path $libPath -Filter "*.lib" | ForEach-Object { Write-Host " LIB: $($_.Name)" }
Get-ChildItem -Path $libPath -Filter "*.a" | ForEach-Object { Write-Host " Archive: $($_.Name)" }
} else {
Write-Host "ERROR: Target directory not found: $libPath"
}
Write-Host ""
Write-Host "=== Rust linker info ==="
$rustupHome = $env:RUSTUP_HOME
Write-Host "RUSTUP_HOME: $rustupHome"
$cargoHome = $env:CARGO_HOME
Write-Host "CARGO_HOME: $cargoHome"
- name: Build Ruby gem
shell: bash
run: |
echo "=== Building Ruby Gem ==="
echo "Platform: ${{ runner.os }}"
echo ""
echo "=== Ruby Build Environment ==="
echo "Ruby version: $(ruby --version)"
echo "Ruby platform: $(ruby -e 'puts RUBY_PLATFORM')"
echo "RbConfig CONFIG: $(ruby -rrbconfig -e 'puts RbConfig::CONFIG["host"]')"
echo ""
if [ "${{ runner.os }}" = "Windows" ]; then
echo "=== Windows-specific environment ==="
echo "Checking for required libraries in target/release..."
ls -lh target/release/*.{dll,a} 2>/dev/null || echo "No DLL/archive files found yet"
echo ""
fi
task ruby:build:ci 2>&1 | tee /tmp/gem-build.log || {
EXIT_CODE=$?
echo ""
echo "=== GEM BUILD FAILED (exit code: $EXIT_CODE) ==="
echo ""
echo "=== Last 100 lines of gem build output ==="
tail -100 /tmp/gem-build.log || true
echo ""
if [ "${{ runner.os }}" = "Windows" ]; then
echo "=== Windows gem build diagnostics ==="
echo "Checking for native extension errors..."
grep -i "error\|failed\|fatal" /tmp/gem-build.log | tail -30 || echo "No obvious errors found"
echo ""
echo "Checking mkmf.log if exists..."
find packages/ruby -name "mkmf.log" -exec tail -50 {} \; 2>/dev/null || echo "mkmf.log not found"
fi
exit $EXIT_CODE
}
env:
RB_SYS_VERBOSE: "1"
CARGO_BUILD_JOBS: "1"
RUST_BACKTRACE: "1"
- name: Print gem build artifacts
if: always()
shell: bash
run: |
echo "=== Gem build artifacts ==="
cd packages/ruby
if [ -d "pkg" ]; then
echo "Gems built:"
ls -lh pkg/*.gem 2>/dev/null || echo "No gem files found"
else
echo "ERROR: pkg directory not found"
fi
echo ""
echo "=== Ruby native extension artifacts ==="
find lib -name "*.so" -o -name "*.dll" -o -name "*.dylib" 2>/dev/null | head -10
echo ""
echo "=== Compilation log (if exists) ==="
if [ -f "mkmf.log" ]; then
tail -50 mkmf.log
fi
- name: Upload gem
uses: actions/upload-artifact@v6
with:
name: gem-${{ matrix.os }}
path: packages/ruby/pkg/*.gem
retention-days: 7
- name: Verify FFI build artifacts
if: always()
shell: bash
run: |
echo ""
echo "=== FFI Build Artifacts ==="
echo "Platform: ${{ matrix.os }}"
echo ""
echo "FFI artifacts:"
echo " target/release/:"
ls -lh target/release/libkreuzberg_ffi* 2>/dev/null || echo " (none found)"
if [[ -d "target/x86_64-pc-windows-gnu/release" ]]; then
echo " target/x86_64-pc-windows-gnu/release/:"
ls -lh target/x86_64-pc-windows-gnu/release/libkreuzberg_ffi* 2>/dev/null || echo " (none found)"
fi
echo " pkg-config file:"
ls -lh crates/kreuzberg-ffi/kreuzberg-ffi.pc 2>/dev/null || echo " (not found)"
- name: Upload build logs (on failure)
if: failure()
uses: actions/upload-artifact@v6
with:
name: build-logs-${{ matrix.os }}
path: |
/tmp/cargo-build.log
/tmp/gem-build.log
packages/ruby/**/mkmf.log
packages/ruby/tmp/**/*.log
retention-days: 7
if-no-files-found: ignore
- name: Cleanup Rust cache
if: always()
uses: ./.github/actions/cleanup-rust-cache
test-ruby:
name: Ruby Tests (${{ matrix.os }})
if: ${{ github.actor != 'dependabot[bot]' }}
needs: build-ruby-gem
timeout-minutes: 180
strategy:
fail-fast: true
matrix:
os: [ubuntu-latest, ubuntu-24.04-arm, macos-latest, windows-latest]
runs-on: ${{ matrix.os }}
env:
RUSTC_WRAPPER: ""
# Note: BUNDLE_GEMFILE is set per-step to avoid Windows path issues.
# On Windows, github.workspace paths like D:/a/... get mangled to D:/d/a/... in bash shells.
steps:
- uses: actions/checkout@v4
- name: Install system dependencies
uses: ./.github/actions/install-system-deps
- name: Setup Rust
uses: ./.github/actions/setup-rust
with:
cache-key-prefix: ruby-test-${{ matrix.os }}
toolchain: ${{ runner.os == 'Windows' && 'stable-x86_64-pc-windows-gnu' || '' }}
target: ${{ runner.os == 'Windows' && 'x86_64-pc-windows-gnu' || '' }}
- name: Setup Ruby
uses: ruby/setup-ruby@v1
with:
ruby-version: "3.2"
bundler: none
bundler-cache: false
working-directory: packages/ruby
- name: Install Bundler
shell: bash
run: scripts/ci/ruby/install-bundler.sh
- name: Set BUNDLE_GEMFILE (Unix)
if: runner.os != 'Windows'
shell: bash
run: echo "BUNDLE_GEMFILE=${{ github.workspace }}/packages/ruby/Gemfile" >> "$GITHUB_ENV"
- name: Install Ruby dependencies
shell: bash
working-directory: packages/ruby
run: |
bundle install
- name: Install Ruby deps
if: runner.os != 'Windows'
shell: bash
run: scripts/ci/ruby/install-ruby-deps.sh
- name: Configure short paths for Windows MAX_PATH mitigation
if: runner.os == 'Windows'
shell: pwsh
run: pwsh -File scripts/ci/ruby/windows-max-path-mitigation.ps1
- name: Install Ruby deps (Windows)
if: runner.os == 'Windows'
shell: pwsh
run: scripts/ci/ruby/install-ruby-deps.ps1
- name: Install rb_sys build helper
run: gem install rb_sys -v "~> 0.9"
shell: bash
- name: Download gem
uses: actions/download-artifact@v7
with:
name: gem-${{ matrix.os }}
path: packages/ruby/pkg/
- name: Configure bindgen compatibility headers (Windows)
if: runner.os == 'Windows'
shell: pwsh
run: scripts/ci/ruby/configure-bindgen-windows.ps1
- name: Print test Ruby environment
shell: bash
run: |
echo "=== Pre-test Ruby environment ==="
echo "Ruby version: $(ruby --version)"
echo "Ruby platform: $(ruby -e 'puts RUBY_PLATFORM')"
echo ""
echo "=== Installed gems ==="
gem list
echo ""
echo "=== LD_LIBRARY_PATH / DYLD_LIBRARY_PATH ==="
echo "LD_LIBRARY_PATH: ${LD_LIBRARY_PATH:-<not set>}"
echo "DYLD_LIBRARY_PATH: ${DYLD_LIBRARY_PATH:-<not set>}"
- name: Download PDFium
uses: ./.github/actions/download-pdfium
with:
pdfium-version: ${{ env.PDFIUM_VERSION }}
- name: Stage PDFium runtime
uses: ./.github/actions/stage-pdfium-runtime
with:
destination: ${{ env.CARGO_TARGET_DIR || 'target' }}/release
- name: Install gem
shell: bash
run: scripts/ci/ruby/install-gem.sh
- name: Configure Tesseract build environment (Windows)
if: runner.os == 'Windows'
shell: pwsh
run: scripts/ci/ruby/configure-tesseract-windows.ps1
- name: Vendor kreuzberg core for native extension
shell: bash
run: python3 scripts/ci/ruby/vendor-kreuzberg-core.py
- name: Print pre-extension compilation status
shell: bash
run: |
echo "=== Pre-extension compilation status ==="
echo "packages/ruby directory contents:"
cd packages/ruby && find . -maxdepth 1 -type d \( -name ext -o -name lib -o -name tmp \)
echo ""
echo "Native extension directory:"
if [ -d "ext/kreuzberg_rb" ]; then
find ext/kreuzberg_rb -maxdepth 1 -type f -o -maxdepth 1 -type d | head -20
fi
echo ""
echo "=== Ruby gem environment ==="
ruby -e "require 'rbconfig'; puts RbConfig::CONFIG"
- name: Build local native extension
shell: bash
run: scripts/ci/ruby/compile-extension.sh
- name: Print post-extension compilation status
if: always()
shell: bash
run: |
echo "=== Post-extension compilation status ==="
cd packages/ruby
echo "lib directory contents:"
if [ -d "lib" ]; then
find lib -type f | head -20
else
echo "ERROR: lib directory not found"
fi
echo ""
echo "=== Looking for compiled extension ==="
find . -name "*.so" -o -name "*.dll" -o -name "*.dylib" 2>/dev/null | head -20
echo ""
if [ -f "mkmf.log" ]; then
echo "=== mkmf.log (last 100 lines) ==="
tail -100 mkmf.log
fi
- name: Install Task
uses: ./.github/actions/install-task
- name: Build kreuzberg CLI binary
shell: bash
run: cargo build --release --package kreuzberg-cli
- name: Run Ruby tests
shell: bash
run: task ruby:test:ci
- name: Run E2E tests
shell: bash
run: task ruby:e2e:test