Skip to content

Add ops-log skill and move logs out of gitignored path (#5039) #6979

Add ops-log skill and move logs out of gitignored path (#5039)

Add ops-log skill and move logs out of gitignored path (#5039) #6979

Workflow file for this run

name: Marin - Integration Test
on:
push:
branches:
- main
pull_request:
workflow_dispatch:
jobs:
marin-itest:
if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository
runs-on: ubuntu-latest
timeout-minutes: 45
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
steps:
- name: Checkout code
uses: actions/checkout@v5
- name: Set up Python 3.12
uses: actions/setup-python@v6
with:
python-version: "3.12"
- name: Set up Node.js
uses: actions/setup-node@v4
with:
node-version: "22"
- name: Install uv
uses: astral-sh/setup-uv@v7
with:
enable-cache: true
- name: Install dependencies
# `--no-default-groups` keeps uv from also installing every workspace package's
# dev/docs/test groups (e.g. `levanter[docs,test,dev]`), which saves multiple
# minutes and avoids re-downloading CUDA wheels unnecessarily on cold caches.
run: uv sync --all-packages --extra=cpu --extra=dedup --no-default-groups --group dev
- name: Check df -h
run: df -h
- name: Start local Iris cluster
run: |
uv run iris --config lib/iris/examples/test.yaml \
cluster start --local > /tmp/iris-cluster.log 2>&1 &
CLUSTER_PID=$!
echo "CLUSTER_PID=$CLUSTER_PID" >> "$GITHUB_ENV"
# Wait for controller to print its URL
for i in $(seq 1 120); do
if grep -q "Controller started at" /tmp/iris-cluster.log 2>/dev/null; then
URL=$(grep "Controller started at" /tmp/iris-cluster.log | head -1 | sed -n 's/.*Controller started at //p')
echo "IRIS_CONTROLLER_URL=$URL" >> "$GITHUB_ENV"
echo "Cluster ready at $URL"
break
fi
sleep 1
done
if [ -z "${URL:-}" ]; then
echo "Cluster failed to start within timeout"
cat /tmp/iris-cluster.log
exit 1
fi
- name: Run iris integration tests
run: |
uv run pytest tests/integration/iris/ \
--controller-url "$IRIS_CONTROLLER_URL" \
-v -s --log-cli-level=INFO --tb=short --timeout=600 \
-o "addopts=" \
-x
env:
WANDB_MODE: disabled
WANDB_API_KEY: ""
JAX_TRACEBACK_FILTERING: off
- name: Run full marin integration pipeline
run: |
timeout 600 uv run pytest tests/test_integration_test.py \
-m integration -o "addopts=" --timeout=600 -v -s
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
WANDB_MODE: disabled
WANDB_API_KEY: ""
JAX_TRACEBACK_FILTERING: off
- name: Stop cluster
if: always()
run: kill $CLUSTER_PID 2>/dev/null || true
- name: Show cluster logs on failure
if: failure()
run: cat /tmp/iris-cluster.log || true