Skip to content

Nightly Github Workflow #38

Nightly Github Workflow

Nightly Github Workflow #38

# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This CI runs nightly to generate the coverage report and testmon database.
# It runs ALL tests and caches the testmon database for use by PR workflows.
# The tests run here will only use UV. This is meant to be nightly functionality
# testing AND a baseline dependency graph for PRs.
# TO DO: THE COVERAGE LIMIT IS VERY LOW, BECAUSE THIS IS NOT USING GPU TESTS OR
# THE DATA-DRIVEN TESTS. RAISE THIS UP AGAIN EVENTUALLY.
name: Nightly Github UV Workflow
on:
schedule:
# Run nightly at 2 AM UTC
- cron: '0 2 * * *'
workflow_dispatch:
# Allow manual triggering
env:
PYTHON_VERSION: "3.12"
UV_CACHE_KEY_PREFIX: uv-cache-nightly-cuda13.1.1-cudnn-devel-ubuntu24.04-py3.12
VENV_CACHE_KEY_PREFIX: uv-env-nightly-cuda13.1.1-cudnn-devel-ubuntu24.04-py3.12
jobs:
# Stage 1: Build and cache the environment
build-environment:
name: Build Environment
runs-on: linux-amd64-cpu8
container:
image: nvidia/cuda:13.1.1-cudnn-devel-ubuntu24.04
steps:
- uses: actions/checkout@v4
- name: Bootstrap cuDNN CI container
uses: ./.github/actions/bootstrap-cudnn-ci
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Restore uv package cache
id: cache-uv-package-restore
uses: actions/cache/restore@v4
with:
path: ~/.cache/uv
key: ${{ env.UV_CACHE_KEY_PREFIX }}-${{ hashFiles('uv.lock', 'pyproject.toml', '.github/workflows/github-nightly-uv.yml') }}
restore-keys: |
${{ env.UV_CACHE_KEY_PREFIX }}-
- name: Restore venv cache
id: cache-venv-restore
uses: actions/cache/restore@v4
with:
path: .venv
key: ${{ env.VENV_CACHE_KEY_PREFIX }}-${{ hashFiles('uv.lock', 'pyproject.toml', '.github/workflows/github-nightly-uv.yml') }}
restore-keys: |
${{ env.VENV_CACHE_KEY_PREFIX }}-
- name: Install base dependencies with uv (dev + cu13)
if: steps.cache-venv-restore.outputs.cache-hit != 'true'
run: |
# Install core dependencies and development group from pyproject/lock.
uv sync \
--group dev \
--extra cu13 \
--preview-features extra-build-dependencies
- name: Install GNN dependencies from pyproject extras
if: steps.cache-venv-restore.outputs.cache-hit != 'true'
run: |
# Extend the environment to include the gnns extra.
uv sync \
--group dev \
--extra cu13 \
--extra gnns \
--preview-features extra-build-dependencies
- name: Install perf dependencies from pyproject extras
if: steps.cache-venv-restore.outputs.cache-hit != 'true'
run: |
# Extend the environment to include performance extra deps.
uv sync \
--group dev \
--extra cu13 \
--extra gnns \
--extra perf \
--preview-features extra-build-dependencies
- name: Save uv package cache
if: steps.cache-uv-package-restore.outputs.cache-hit != 'true'
uses: actions/cache/save@v4
with:
path: ~/.cache/uv
key: ${{ env.UV_CACHE_KEY_PREFIX }}-${{ hashFiles('uv.lock', 'pyproject.toml', '.github/workflows/github-nightly-uv.yml') }}
- name: Free disk space before caching
if: steps.cache-venv-restore.outputs.cache-hit != 'true'
run: |
rm -rf ~/.cache/uv
df -h
- name: Save environment to cache
if: steps.cache-venv-restore.outputs.cache-hit != 'true'
uses: actions/cache/save@v4
with:
path: .venv
key: ${{ env.VENV_CACHE_KEY_PREFIX }}-${{ hashFiles('uv.lock', 'pyproject.toml', '.github/workflows/github-nightly-uv.yml') }}
- name: Upload venv artifact
uses: actions/upload-artifact@v4
with:
name: nightly-uv-venv
path: .venv
retention-days: 2
# Stage 2: Run testmon tests and cache the database
testmon:
name: Testmon
needs: build-environment
runs-on: linux-amd64-gpu-h100-latest-1
container:
image: nvidia/cuda:13.1.1-cudnn-devel-ubuntu24.04
steps:
- uses: actions/checkout@v4
- name: Bootstrap cuDNN CI container
uses: ./.github/actions/bootstrap-cudnn-ci
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Download venv artifact
uses: actions/download-artifact@v4
with:
name: nightly-uv-venv
path: .venv
- name: Validate environment
run: |
.venv/bin/python --version
uv run python -c "import torch; print(torch.__version__)"
- name: Run core tests (collect all for testmon)
run: |
# This populates the testmon database for PR workflows
uv run python -m pytest --testmon --ignore-glob="*docs*" --ignore-glob="*examples*"
- name: Delete old testmon cache
run: |
gh cache delete "testmon-nightly-latest" || true
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Save testmon database to cache
uses: actions/cache/save@v4
with:
path: |
.testmondata
.testmondata-shm
.testmondata-wal
key: testmon-nightly-latest
# Stage 3: Run coverage tests and upload artifacts
coverage:
name: Coverage
needs: build-environment
runs-on: linux-amd64-gpu-h100-latest-1
container:
image: nvidia/cuda:13.1.1-cudnn-devel-ubuntu24.04
steps:
- uses: actions/checkout@v4
- name: Bootstrap cuDNN CI container
uses: ./.github/actions/bootstrap-cudnn-ci
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Download venv artifact
uses: actions/download-artifact@v4
with:
name: nightly-uv-venv
path: .venv
- name: Validate environment
run: |
.venv/bin/python --version
uv run python -c "import torch; print(torch.__version__)"
- name: Run core tests for coverage report
run: |
uv run coverage run --rcfile='test/coverage.pytest.rc' -m pytest --ignore-glob="*docs*" --ignore-glob="*examples*"
- name: Run doc tests (testmon not supported for doctests)
run: |
uv run coverage run --rcfile='test/coverage.docstring.rc' -m pytest --doctest-modules physicsnemo/ --ignore-glob="*internal*" --ignore-glob="*experimental*"
- name: Delete old coverage cache
run: |
gh cache delete "coverage-nightly-latest" || true
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Save coverage files to cache
uses: actions/cache/save@v4
with:
path: .coverage*
key: coverage-nightly-latest
- name: Merge coverage reports
run: |
uv run coverage combine
uv run coverage report --show-missing --omit="*test*" --omit="*internal*" --omit="*experimental*" --fail-under=45
uv run coverage html
# Also create an XML report for potential CI integrations
uv run coverage xml -o coverage.xml
- name: Upload coverage HTML report
uses: actions/upload-artifact@v4
with:
name: coverage-report-nightly
path: htmlcov/
retention-days: 7
- name: Upload combined coverage data
uses: actions/upload-artifact@v4
with:
name: coverage-data-nightly
path: |
.coverage
coverage.xml
retention-days: 30