Skip to content

chore: updated and linted #447

chore: updated and linted

chore: updated and linted #447

Workflow file for this run

name: CI Java
on:
push:
branches: [main]
paths:
- '.github/actions/**'
- '.github/workflows/ci-java.yaml'
- '.cargo/config.toml'
- 'rust-toolchain.toml'
- 'Cargo.toml'
- 'Cargo.lock'
- 'crates/kreuzberg/**'
- 'crates/kreuzberg-ffi/**'
- 'crates/kreuzberg-tesseract/**'
- 'packages/java/**'
- 'e2e/java/**'
- 'test_documents/**'
- 'fixtures/**'
- 'tools/e2e-generator/**'
- 'packages/java/pom.xml'
- 'scripts/ci/java/**'
- 'scripts/ci/cache/**'
- 'scripts/ci/actions/**'
pull_request:
branches: [main]
paths:
- '.github/actions/**'
- '.github/workflows/ci-java.yaml'
- '.cargo/config.toml'
- 'rust-toolchain.toml'
- 'Cargo.toml'
- 'Cargo.lock'
- 'crates/kreuzberg/**'
- 'crates/kreuzberg-ffi/**'
- 'crates/kreuzberg-tesseract/**'
- 'packages/java/**'
- 'e2e/java/**'
- 'test_documents/**'
- 'fixtures/**'
- 'tools/e2e-generator/**'
- 'packages/java/pom.xml'
- 'scripts/ci/java/**'
- 'scripts/ci/cache/**'
- 'scripts/ci/actions/**'
concurrency:
group: ci-java-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
env:
CARGO_TERM_COLOR: always
CARGO_INCREMENTAL: 0
CARGO_PROFILE_DEV_DEBUG: 0
RUST_BACKTRACE: short
RUST_MIN_STACK: 16777216
PDFIUM_VERSION: "7578"
PDFIUM_STATIC_VERSION: "7442b"
ORT_VERSION: "1.23.2"
MACOSX_DEPLOYMENT_TARGET: "14.0"
BUILD_PROFILE: "ci"
jobs:
build-java:
name: Java Bindings (${{ matrix.os }})
if: ${{ github.actor != 'dependabot[bot]' }}
timeout-minutes: 180
strategy:
fail-fast: true
matrix:
os: [ubuntu-latest, ubuntu-24.04-arm, macos-latest, windows-latest]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Install system dependencies
uses: ./.github/actions/install-system-deps
- name: Free disk space before setup
if: runner.os == 'Linux'
uses: ./.github/actions/free-disk-space-linux
with:
show-initial: "true"
show-final: "true"
- name: Setup OpenSSL
uses: ./.github/actions/setup-openssl
- name: Setup Rust
uses: ./.github/actions/setup-rust
with:
cache-key-prefix: java-${{ matrix.os }}
- name: Setup Java
uses: actions/setup-java@v4
id: setup-java
with:
distribution: 'temurin'
java-version: '25'
- name: Setup Maven 3.9.11
uses: ./.github/actions/setup-maven-4
- name: Cache PDFium
uses: ./.github/actions/cache-pdfium
with:
pdfium-version: ${{ env.PDFIUM_VERSION }}
- name: Download PDFium
uses: ./.github/actions/download-pdfium
with:
pdfium-version: ${{ env.PDFIUM_VERSION }}
- name: Stage PDFium runtime
uses: ./.github/actions/stage-pdfium-runtime
with:
destination: target/release
- name: Setup ONNX Runtime
uses: ./.github/actions/setup-onnx-runtime
with:
ort-version: ${{ env.ORT_VERSION }}
dest-dir: target/release
- name: Install Task
uses: ./.github/actions/install-task
- name: Build FFI library
id: ffi
shell: bash
run: |
echo "=== Building FFI Library ==="
echo "Building kreuzberg-ffi from source (no caching)"
cargo build --release --package kreuzberg-ffi
- name: Set library paths for JNI
shell: bash
run: |
echo "PKG_CONFIG_PATH=${{ github.workspace }}/target/release:${PKG_CONFIG_PATH:-}" >> "$GITHUB_ENV"
if [[ "${{ runner.os }}" == "macOS" ]]; then
echo "DYLD_LIBRARY_PATH=${{ github.workspace }}/target/release:${DYLD_LIBRARY_PATH:-}" >> "$GITHUB_ENV"
elif [[ "${{ runner.os }}" == "Linux" ]]; then
echo "LD_LIBRARY_PATH=${{ github.workspace }}/target/release:${LD_LIBRARY_PATH:-}" >> "$GITHUB_ENV"
fi
- name: Log platform information
shell: bash
run: |
echo "=== Platform Information ==="
uname -a
echo ""
echo "=== Java Version ==="
java -version 2>&1
(java -XshowSettings:properties 2>&1 || true) | head -20
echo ""
echo "=== Maven Version ==="
mvn --version
echo ""
echo "=== Disk Space ==="
df -h
echo ""
echo "=== Memory Info ==="
if [[ "$RUNNER_OS" == "macOS" ]]; then
vm_stat || true
elif [[ "$RUNNER_OS" == "Linux" ]]; then
free -h
elif [[ "$RUNNER_OS" == "Windows" ]]; then
Get-ComputerInfo | Select-Object -Property TotalPhysicalMemory, CsPhyicallyInstalledSystemMemory || true
fi
- name: Build Java bindings
shell: bash
run: task java:build:ci
- name: Upload JAR
uses: actions/upload-artifact@v6
with:
name: java-jar-${{ matrix.os }}
path: packages/java/target/*.jar
retention-days: 7
- name: Upload build JVM logs
if: always()
uses: actions/upload-artifact@v6
with:
name: java-build-logs-${{ matrix.os }}
path: |
packages/java/maven-jvm.log
if-no-files-found: ignore
retention-days: 7
- name: Cleanup Rust cache
if: always()
uses: ./.github/actions/cleanup-rust-cache
test-java:
name: Java Tests (${{ matrix.os }})
if: ${{ github.actor != 'dependabot[bot]' }}
needs: build-java
timeout-minutes: 180
strategy:
fail-fast: true
matrix:
os: [ubuntu-latest, ubuntu-24.04-arm, macos-latest, windows-latest]
runs-on: ${{ matrix.os }}
env:
KREUZBERG_FFI_DIR: ${{ github.workspace }}/target/release
steps:
- uses: actions/checkout@v4
- name: Install system dependencies
uses: ./.github/actions/install-system-deps
- name: Setup OpenSSL
uses: ./.github/actions/setup-openssl
- name: Setup Rust
uses: ./.github/actions/setup-rust
with:
cache-key-prefix: java-tests-${{ matrix.os }}
- name: Cache PDFium
uses: ./.github/actions/cache-pdfium
with:
pdfium-version: ${{ env.PDFIUM_VERSION }}
- name: Download PDFium
uses: ./.github/actions/download-pdfium
with:
pdfium-version: ${{ env.PDFIUM_VERSION }}
- name: Stage PDFium runtime
uses: ./.github/actions/stage-pdfium-runtime
with:
destination: target/release
- name: Setup ONNX Runtime
uses: ./.github/actions/setup-onnx-runtime
with:
ort-version: ${{ env.ORT_VERSION }}
dest-dir: target/release
- name: Setup Java
uses: actions/setup-java@v4
id: setup-java
with:
distribution: 'temurin'
java-version: '25'
- name: Setup Maven 3.9.11
uses: ./.github/actions/setup-maven-4
- name: Install Task
uses: ./.github/actions/install-task
- name: Download JAR
uses: actions/download-artifact@v7
with:
name: java-jar-${{ matrix.os }}
path: packages/java/target/
- name: Prepare JVM crash logs
shell: bash
run: mkdir -p packages/java/target/jvm-crash
- name: Build FFI library
id: ffi
shell: bash
run: |
echo "=== Building FFI Library ==="
echo "Building kreuzberg-ffi from source (no caching)"
cargo build --release --package kreuzberg-ffi
- name: Set library paths for JNI
shell: bash
run: |
echo "PKG_CONFIG_PATH=${{ github.workspace }}/target/release:${PKG_CONFIG_PATH:-}" >> "$GITHUB_ENV"
if [[ "${{ runner.os }}" == "macOS" ]]; then
echo "DYLD_LIBRARY_PATH=${{ github.workspace }}/target/release:${DYLD_LIBRARY_PATH:-}" >> "$GITHUB_ENV"
elif [[ "${{ runner.os }}" == "Linux" ]]; then
echo "LD_LIBRARY_PATH=${{ github.workspace }}/target/release:${LD_LIBRARY_PATH:-}" >> "$GITHUB_ENV"
fi
- name: Log test environment (pre-test diagnostics)
shell: bash
run: |
echo "=== Test Environment Information ==="
echo "OS: $RUNNER_OS"
echo "Platform: $(uname -a)"
echo ""
echo "=== Java Configuration ==="
java -version 2>&1
java -XshowSettings:properties 2>&1 | grep -E "java.version|java.vm|java.home|java.library.path|sun.arch.data.model" || true
echo ""
echo "=== System Resources ==="
if [[ "$RUNNER_OS" == "macOS" ]]; then
system_profiler SPHardwareDataType | grep Memory | sed 's/^/Memory: /'
echo "CPU: $(sysctl -n hw.ncpu) cores"
echo "Available memory: $(vm_stat | grep 'Pages free' | awk '{print $3 * 4096 / 1024 / 1024 / 1024}') GB"
elif [[ "$RUNNER_OS" == "Linux" ]]; then
lscpu | head -10
echo "Memory:"
free -h | head -2
fi
echo ""
echo "=== Library Paths ==="
echo "DYLD_LIBRARY_PATH: ${DYLD_LIBRARY_PATH:-not set}"
echo "LD_LIBRARY_PATH: ${LD_LIBRARY_PATH:-not set}"
echo "KREUZBERG_FFI_DIR: ${KREUZBERG_FFI_DIR:-not set}"
echo ""
echo "=== FFI Library Check ==="
if [ -d "$KREUZBERG_FFI_DIR" ]; then
echo "FFI directory contents:"
ls -lh "$KREUZBERG_FFI_DIR"/{*.so,*.dylib,*.dll,libkreuzberg*} 2>/dev/null || true
fi
- name: Run Java tests
env:
JAVA_TOOL_OPTIONS: >-
-XX:ErrorFile=${{ github.workspace }}/packages/java/target/jvm-crash/hs_err_pid%p.log
-XX:+UnlockDiagnosticVMOptions
-XX:+LogVMOutput
-XX:LogFile=${{ github.workspace }}/packages/java/target/jvm-crash/jvm.log
-XX:+HeapDumpOnOutOfMemoryError
-XX:HeapDumpPath=${{ github.workspace }}/packages/java/target/jvm-crash/heapdump.hprof
-Xlog:os=info
RUST_BACKTRACE: full
run: task java:test:ci
- name: Collect macOS system diagnostics (on failure)
if: failure() && runner.os == 'macOS'
shell: bash
continue-on-error: true
run: |
echo "=== macOS System Diagnostics ==="
echo ""
echo "=== System Memory ==="
vm_stat
echo ""
echo "=== Process Memory Usage ==="
ps aux | head -10
echo ""
echo "=== Disk Usage ==="
df -h
echo ""
echo "=== Network Configuration ==="
ifconfig 2>/dev/null | grep -E "inet |flags" | head -20 || true
echo ""
echo "=== Recent System Logs (last 100 lines) ==="
log show --last 5m --level debug 2>/dev/null | tail -100 || true
echo ""
echo "=== dyld diagnostics ==="
export DYLD_PRINT_APIS=1
export DYLD_PRINT_SEGMENTS=1
export DYLD_PRINT_LIBRARIES=1
echo "DYLD settings enabled for next test run"
- name: Run E2E tests (Unix)
if: runner.os != 'Windows'
shell: bash
run: task -vv e2e:java:test
- name: Run E2E tests (Windows)
if: runner.os == 'Windows'
shell: pwsh
run: task -vv e2e:java:test
- name: Upload Java unit test reports
if: always()
uses: actions/upload-artifact@v6
with:
name: java-unit-reports-${{ matrix.os }}
path: |
packages/java/target/surefire-reports
if-no-files-found: ignore
retention-days: 7
- name: Upload JVM crash logs
if: always()
uses: actions/upload-artifact@v6
with:
name: java-crash-logs-${{ matrix.os }}
path: |
packages/java/target/jvm-crash
if-no-files-found: ignore
retention-days: 7
- name: Upload test JVM logs
if: always()
uses: actions/upload-artifact@v6
with:
name: java-test-jvm-logs-${{ matrix.os }}
path: |
packages/java/test-jvm.log
packages/java/test-jvm-os.log
if-no-files-found: ignore
retention-days: 7
- name: Upload Java E2E test reports
if: always()
uses: actions/upload-artifact@v6
with:
name: java-e2e-reports-${{ matrix.os }}
path: |
e2e/java/target/surefire-reports
if-no-files-found: ignore
retention-days: 7