Skip to content

chore: updated and linted #430

chore: updated and linted

chore: updated and linted #430

Workflow file for this run

name: CI Python
on:
push:
branches: [main]
paths:
- '.github/actions/**'
- '.github/workflows/ci-python.yaml'
- '.cargo/config.toml'
- 'rust-toolchain.toml'
- 'Cargo.toml'
- 'Cargo.lock'
- 'crates/kreuzberg/**'
- 'crates/kreuzberg-py/**'
- 'crates/kreuzberg-tesseract/**'
- 'packages/python/**'
- 'e2e/python/**'
- 'test_documents/**'
- 'fixtures/**'
- 'tools/e2e-generator/**'
- 'uv.lock'
- 'pyproject.toml'
- 'scripts/ci/python/**'
- 'scripts/ci/cache/**'
- 'scripts/ci/actions/**'
pull_request:
branches: [main]
paths:
- '.github/actions/**'
- '.github/workflows/ci-python.yaml'
- '.cargo/config.toml'
- 'rust-toolchain.toml'
- 'Cargo.toml'
- 'Cargo.lock'
- 'crates/kreuzberg/**'
- 'crates/kreuzberg-py/**'
- 'crates/kreuzberg-tesseract/**'
- 'packages/python/**'
- 'e2e/python/**'
- 'test_documents/**'
- 'fixtures/**'
- 'tools/e2e-generator/**'
- 'uv.lock'
- 'pyproject.toml'
- 'scripts/ci/python/**'
- 'scripts/ci/cache/**'
- 'scripts/ci/actions/**'
concurrency:
group: ci-python-${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
env:
CARGO_TERM_COLOR: always
CARGO_INCREMENTAL: 0
CARGO_PROFILE_DEV_DEBUG: 0
RUST_BACKTRACE: short
RUST_MIN_STACK: 16777216
PDFIUM_VERSION: "7578"
PDFIUM_STATIC_VERSION: "7442b"
ORT_VERSION: "1.23.2"
MACOSX_DEPLOYMENT_TARGET: "14.0"
BUILD_PROFILE: "ci"
jobs:
build-and-smoke-python:
name: Python Build + Smoke (${{ matrix.target }})
if: ${{ github.actor != 'dependabot[bot]' }}
timeout-minutes: 180
strategy:
fail-fast: true
matrix:
include:
- os: ubuntu-latest
target: x86_64-unknown-linux-gnu
platform: linux-x86_64
- os: ubuntu-24.04-arm
target: aarch64-unknown-linux-gnu
platform: linux-arm64
- os: macos-latest
target: aarch64-apple-darwin
platform: macos-arm64
- os: windows-latest
target: x86_64-pc-windows-msvc
platform: windows-x86_64
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Install system dependencies
uses: ./.github/actions/install-system-deps
- name: Free disk space before setup
if: startsWith(matrix.os, 'ubuntu')
uses: ./.github/actions/free-disk-space-linux
with:
show-initial: "true"
show-final: "true"
- name: Setup OpenSSL
uses: ./.github/actions/setup-openssl
- name: Setup Rust
uses: ./.github/actions/setup-rust
with:
cache-key-prefix: python-${{ matrix.platform }}
target: ${{ matrix.target }}
- name: Setup Python
uses: ./.github/actions/setup-python-env
with:
python-version: "3.13"
cache-prefix: wheels-${{ matrix.platform }}
- name: Cache PDFium
uses: ./.github/actions/cache-pdfium
with:
pdfium-version: ${{ env.PDFIUM_VERSION }}
- name: Download PDFium
uses: ./.github/actions/download-pdfium
with:
pdfium-version: ${{ env.PDFIUM_VERSION }}
- name: Stage PDFium runtime
uses: ./.github/actions/stage-pdfium-runtime
with:
destination: target/release
- name: Setup ONNX Runtime
uses: ./.github/actions/setup-onnx-runtime
with:
ort-version: ${{ env.ORT_VERSION }}
- name: Clean previous wheel artifacts
shell: bash
run: scripts/ci/python/clean-artifacts.sh
- name: Install Task
uses: ./.github/actions/install-task
- name: Build CLI with features
shell: bash
run: |
echo "=== Building CLI Binary with Features ==="
cargo build --release --package kreuzberg-cli --features all
mkdir -p packages/python/kreuzberg
if [ "${{ runner.os }}" = "Windows" ]; then
cp target/release/kreuzberg.exe packages/python/kreuzberg/
else
cp target/release/kreuzberg packages/python/kreuzberg/
fi
- name: Build FFI library
id: ffi
shell: bash
run: |
echo "=== Building FFI Library ==="
echo "Building kreuzberg-py from source (no caching)"
cargo build --release --package kreuzberg-py
- name: Build Python wheels
id: wheels
shell: bash
run: |
echo "=== Building Python Wheels ==="
echo "Building Python wheels from source (no caching)"
task python:build:ci
- name: Report cache statistics
if: always()
shell: bash
run: |
echo "=== Python Build Cache Statistics ==="
echo "Platform: ${{ matrix.platform }}"
echo "Target: ${{ matrix.target }}"
if [ -d "target/wheels" ]; then
echo "Wheels artifacts:"
du -sh target/wheels || echo "target/wheels: empty"
find target/wheels -maxdepth 1 -name "*.whl" -exec ls -lh {} \; 2>/dev/null | awk '{print $9, $5}' || echo "No wheel files found"
fi
if [ -d "packages/python/dist" ]; then
echo "Python dist artifacts:"
du -sh packages/python/dist || echo "packages/python/dist: empty"
find packages/python/dist -maxdepth 1 -name "*.whl" -exec ls -lh {} \; 2>/dev/null | awk '{print $9, $5}' || echo "No dist wheel files found"
fi
- name: Upload wheels
uses: actions/upload-artifact@v6
with:
name: wheels-${{ matrix.platform }}
path: target/wheels/*.whl
retention-days: 7
- name: Cleanup Rust cache
if: always()
uses: ./.github/actions/cleanup-rust-cache
- name: Smoke test wheel
shell: bash
run: scripts/ci/python/smoke-test-wheel.sh
build-python-sdist:
name: Python Sdist
if: ${{ github.actor != 'dependabot[bot]' }}
timeout-minutes: 180
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Setup Rust
uses: ./.github/actions/setup-rust
with:
cache-key-prefix: python-sdist
- name: Setup Python
uses: ./.github/actions/setup-python-env
with:
python-version: "3.13"
cache-prefix: sdist
- name: Install Task
uses: ./.github/actions/install-task
- name: Build sdist
shell: bash
run: task python:build:sdist
- name: Upload sdist
uses: actions/upload-artifact@v6
with:
name: python-sdist
path: target/wheels/*.tar.gz
retention-days: 7
test-python:
name: Python Tests (${{ matrix.os }})
if: ${{ github.actor != 'dependabot[bot]' }}
needs: build-and-smoke-python
timeout-minutes: 180
strategy:
fail-fast: true
matrix:
include:
- os: ubuntu-latest
coverage: true
- os: ubuntu-24.04-arm
coverage: false
- os: macos-latest
coverage: false
- os: windows-latest
coverage: false
runs-on: ${{ matrix.os }}
env:
HF_HOME: ~/.cache/huggingface
TRANSFORMERS_CACHE: ~/.cache/huggingface
RUSTC_WRAPPER: ''
steps:
- uses: actions/checkout@v4
- name: Install system dependencies
uses: ./.github/actions/install-system-deps
- name: Setup Python
uses: ./.github/actions/setup-python-env
with:
python-version: "3.13"
cache-prefix: test-py-3.13
- name: Cache HuggingFace models
uses: actions/cache@v5
with:
path: ~/.cache/huggingface
key: huggingface-${{ runner.os }}-py-3.13-${{ hashFiles('packages/python/pyproject.toml') || 'fallback' }}
- name: Download wheels
uses: actions/download-artifact@v7
with:
name: ${{ matrix.os == 'ubuntu-latest' && 'wheels-linux-x86_64' || matrix.os == 'ubuntu-24.04-arm' && 'wheels-linux-arm64' || matrix.os == 'macos-latest' && 'wheels-macos-arm64' || 'wheels-windows-x86_64' }}
path: dist/
- name: Install wheel
shell: bash
run: scripts/ci/python/install-wheel.sh
- name: Install SpaCy model (Linux only)
if: startsWith(matrix.os, 'ubuntu')
run: scripts/ci/python/install-spacy-model.sh
- name: Download PDFium
uses: ./.github/actions/download-pdfium
with:
pdfium-version: ${{ env.PDFIUM_VERSION }}
- name: Stage PDFium runtime
uses: ./.github/actions/stage-pdfium-runtime
with:
destination: target/release
- name: Setup ONNX Runtime
uses: ./.github/actions/setup-onnx-runtime
with:
ort-version: ${{ env.ORT_VERSION }}
- name: Install Task
uses: ./.github/actions/install-task
- name: Run Python tests
shell: bash
run: task python:test:ci
- name: Upload coverage
if: matrix.coverage
uses: deepsourcelabs/test-coverage-action@master
with:
key: python
coverage-file: packages/python/coverage.lcov
dsn: ${{ secrets.DEEPSOURCE_DSN }}
- name: Run E2E tests
shell: bash
env:
PYTEST_TIMEOUT: 300
run: task python:e2e:test