Skip to content

[zephyr] Fix coordinator loop crash causing silent pipeline hangs #6045

[zephyr] Fix coordinator loop crash causing silent pipeline hangs

[zephyr] Fix coordinator loop crash causing silent pipeline hangs #6045

name: Marin - Tests
on:
push:
branches:
- main
pull_request:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
changes:
runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: read
outputs:
should_run: ${{ steps.filter.outputs.relevant }}
steps:
- uses: actions/checkout@v4
- uses: dorny/paths-filter@v3
id: filter
with:
filters: |
relevant:
- 'lib/marin/**'
- 'tests/**'
- 'uv.lock'
- 'pyproject.toml'
- '.github/workflows/marin-unit-tests.yaml'
marin-tests:
needs: changes
if: needs.changes.outputs.should_run == 'true'
runs-on: ubuntu-latest
timeout-minutes: 15
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Set up Python 3.12
uses: actions/setup-python@v4
with:
python-version: "3.12"
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
version: "0.7.20"
enable-cache: true
- name: Install dependencies
run: |
python -m pip install --upgrade pip
uv sync --package marin --extra cpu --extra dedup --group test --frozen
- name: Set up Node.js 20.10.0
uses: actions/setup-node@v4
with:
node-version: "20.10.0"
- name: Test with pytest
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
# Avoid Ray propagating `uv run` to workers (can trigger slow per-worker venv setup in CI).
RAY_ENABLE_UV_RUN_RUNTIME_ENV: "0"
run: |
# Ensure we select the CPU torch wheels (and JAX CPU) for unit tests on GitHub runners.
PYTHONPATH=tests:. uv run --package marin --extra cpu --frozen pytest -n 2 --dist=worksteal --durations=5 --tb=short -m 'not slow and not tpu_ci' -v tests/