[MISC] Migrate to native Python API for 'splashsurf'. #540
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Production | |
| on: | |
| # Trigger the workflow on push on the master branch, or for any pull request | |
| push: | |
| branches: | |
| - main | |
| pull_request: | |
| concurrency: | |
| # Cancel all workflows that are stil running if any when updating branches associated with PRs, | |
| # BUT don't do anything for workflows that are not triggered by PRs. | |
| group: ${{ github.workflow }}-${{ github.head_ref || github.ref }} | |
| cancel-in-progress: ${{ github.event_name == 'pull_request' }} | |
| jobs: | |
| linux-gpu: | |
| runs-on: [self-hosted, coreweave] | |
| env: | |
| # Note that secrets are not passed to workflows that are triggered by a pull request from a fork | |
| WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }} | |
| HF_TOKEN: ${{ secrets.HF_TOKEN }} | |
| HF_HUB_DOWNLOAD_TIMEOUT: 60 | |
| GENESIS_IMAGE_VER: "1_1" | |
| TIMEOUT_MINUTES: 180 | |
| MADRONA_DISABLE_CUDA_HEAP_SIZE: "1" | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| # Checkout full history is required to shallow cloning while mark HEAD as "grafted". This breaks remote | |
| # tracking thereby making it impossible to detect whether a commit is contained in upstream main. | |
| fetch-depth: 0 | |
| - name: Run unit tests | |
| if: github.event_name == 'pull_request' | |
| run: | | |
| SLURM_JOB_NAME="$(uuidgen)_$(date +%Y%m%d_%H%M%S)" | |
| echo "SLURM_JOB_NAME=${SLURM_JOB_NAME}" >> $GITHUB_ENV | |
| mkdir -p "${HOME}/.cache" | |
| # Prefer idle nodes if any | |
| IDLE_NODES=$(sinfo -h -o "%N %t" | awk '$2 == "idle" {print $1}') | |
| if [[ -n "$IDLE_NODES" ]]; then | |
| NODELIST="--nodelist=$IDLE_NODES" | |
| fi | |
| srun \ | |
| --container-image="/mnt/data/images/genesis-v${GENESIS_IMAGE_VER}.sqsh" \ | |
| --container-mounts=\ | |
| "${{ github.workspace }}":/root/workspace,\ | |
| "${HOME}/.cache":/root/.cache \ | |
| --no-container-mount-home --container-workdir=/root/workspace \ | |
| --export=\ | |
| HF_TOKEN="${HF_TOKEN}",\ | |
| NVIDIA_DRIVER_CAPABILITIES=all \ | |
| --partition=hpc-mid ${NODELIST} --nodes=1 --time="${TIMEOUT_MINUTES}" \ | |
| --job-name=${SLURM_JOB_NAME} \ | |
| bash -c " | |
| pip install -e '.[dev,render]' && \ | |
| pytest -v --forked ./tests | |
| " | |
| - name: Run benchmarks | |
| run: | | |
| SLURM_JOB_NAME="$(uuidgen)_$(date +%Y%m%d_%H%M%S)" | |
| echo "SLURM_JOB_NAME=${SLURM_JOB_NAME}" >> $GITHUB_ENV | |
| SLURM_ENV_VARS="NVIDIA_DRIVER_CAPABILITIES=all,HF_TOKEN" | |
| if [[ "${{ github.repository }}" == 'Genesis-Embodied-AI/Genesis' && "${{ github.ref }}" == 'refs/heads/main' ]] ; then | |
| SLURM_ENV_VARS="${SLURM_ENV_VARS},WANDB_API_KEY" | |
| fi | |
| # Remove `: #` prefixes to enable tmate interactive debugging session | |
| srun \ | |
| --container-image="/mnt/data/images/genesis-v${GENESIS_IMAGE_VER}.sqsh" \ | |
| --container-mounts=\ | |
| /mnt/data/artifacts:/mnt/data/artifacts,\ | |
| "${{ github.workspace }}":/root/workspace \ | |
| --no-container-mount-home --container-workdir=/root/workspace \ | |
| --export=${SLURM_ENV_VARS} \ | |
| --partition=hpc-mid --exclusive --nodes=1 --time="${TIMEOUT_MINUTES}" \ | |
| --job-name=${SLURM_JOB_NAME} \ | |
| bash -c " | |
| : # sudo apt install -y tmate && \ | |
| tmate -S /tmp/tmate.sock new-session -d && \ | |
| tmate -S /tmp/tmate.sock wait tmate-ready && \ | |
| tmate -S /tmp/tmate.sock display -p '#{tmate_ssh}' | |
| pip install -e '.[dev,render]' && \ | |
| pytest --print -x -m 'benchmarks' ./tests && \ | |
| cat speed_test*.txt > '/mnt/data/artifacts/speed_test_${SLURM_JOB_NAME}.txt' | |
| : # tmate -S /tmp/tmate.sock wait tmate-exit | |
| " | |
| - name: Kill srun job systematically | |
| if: always() | |
| run: | | |
| if [ -n "${SLURM_JOB_NAME}" ] ; then | |
| scancel --user=${USER} --name="${SLURM_JOB_NAME}" | |
| fi | |
| - name: Display benchmark stats | |
| run: | | |
| cat "/mnt/data/artifacts/speed_test_${SLURM_JOB_NAME}.txt" | |
| - name: Upload benchmark stats as artifact | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: speed-test-results | |
| path: "/mnt/data/artifacts/speed_test_${{ env.SLURM_JOB_NAME }}.txt" |