Skip to content

Nightly Github Workflow #89

Nightly Github Workflow

Nightly Github Workflow #89

# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This CI runs nightly to generate the coverage report and testmon database.
# It runs ALL tests and caches the testmon database for use by PR workflows.
# The tests run here will only use UV. This is meant to be nightly functionality
# testing AND a baseline dependency graph for PRs.
# TO DO: THE COVERAGE LIMIT IS VERY LOW, BECAUSE THIS IS NOT USING GPU TESTS OR
# THE DATA-DRIVEN TESTS. RAISE THIS UP AGAIN EVENTUALLY.
name: Nightly Github UV Workflow
on:
schedule:
# Run nightly at 2 AM UTC
- cron: '0 2 * * *'
workflow_dispatch:
# Allow manual triggering
permissions:
contents: read
actions: read
checks: write
env:
PYTHON_VERSION: "3.12"
UV_CACHE_KEY_PREFIX: uv-cache-nightly-cuda12.8.1-cudnn-devel-ubuntu24.04-py3.12
VENV_CACHE_KEY_PREFIX: uv-env-nightly-cuda12.8.1-cudnn-devel-ubuntu24.04-py3.12
TESTMON_CACHE_KEY_PREFIX: testmon-nightly
COVERAGE_CACHE_KEY_PREFIX: coverage-nightly
PYVISTA_OFF_SCREEN: "true"
jobs:
# Stage 1: Build and cache the environment
build-environment:
name: Build Environment
runs-on: linux-amd64-cpu8
container:
image: nvidia/cuda:12.8.1-cudnn-devel-ubuntu24.04
steps:
- uses: actions/checkout@v4
- name: Bootstrap cuDNN CI container
uses: ./.github/actions/bootstrap-cudnn-ci
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Setup uv environment from cache
# Nightly build-env is fail-open on cache miss by design; it can rebuild from lockfile.
id: setup-uv-env
uses: ./.github/actions/setup-uv-env
with:
uv-cache-key-prefix: ${{ env.UV_CACHE_KEY_PREFIX }}
venv-cache-key-prefix: ${{ env.VENV_CACHE_KEY_PREFIX }}
cache-key-suffix: ${{ hashFiles('uv.lock', 'pyproject.toml') }}
- name: Report setup action outputs
run: |
echo "setup-uv-env.uv_cache_hit=${{ steps.setup-uv-env.outputs.uv_cache_hit }}"
echo "setup-uv-env.venv_cache_hit=${{ steps.setup-uv-env.outputs.venv_cache_hit }}"
- name: Save uv package cache
if: steps.setup-uv-env.outputs.uv_cache_hit != 'true'
uses: actions/cache/save@v4
with:
path: ~/.cache/uv
key: ${{ env.UV_CACHE_KEY_PREFIX }}-${{ hashFiles('uv.lock', 'pyproject.toml') }}
- name: Free disk space before caching
if: steps.setup-uv-env.outputs.venv_cache_hit != 'true'
run: |
rm -rf ~/.cache/uv
df -h
- name: Save environment to cache
if: steps.setup-uv-env.outputs.venv_cache_hit != 'true'
uses: actions/cache/save@v4
with:
path: .venv
key: ${{ env.VENV_CACHE_KEY_PREFIX }}-${{ hashFiles('uv.lock', 'pyproject.toml') }}
# Stage 2: Run testmon tests and cache the database
testmon:
name: Testmon
needs: build-environment
runs-on: linux-amd64-gpu-h100-latest-1
container:
image: nvidia/cuda:12.8.1-cudnn-devel-ubuntu24.04
steps:
- uses: actions/checkout@v4
- name: Bootstrap cuDNN CI container
uses: ./.github/actions/bootstrap-cudnn-ci
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Restore prebuilt environment cache
# Downstream jobs are fail-closed on env cache miss: build-environment must publish .venv first.
uses: actions/cache/restore@v4
with:
path: .venv
key: ${{ env.VENV_CACHE_KEY_PREFIX }}-${{ hashFiles('uv.lock', 'pyproject.toml') }}
fail-on-cache-miss: true
- name: Validate environment
run: |
.venv/bin/python --version
uv run python -c "import torch; print(torch.__version__)"
- name: Run core tests (collect all for testmon)
run: |
# This populates the testmon database for PR workflows
uv run python -m pytest --testmon --ignore-glob="*docs*" --ignore-glob="*examples*"
- name: Save testmon database to cache
uses: actions/cache/save@v4
with:
path: |
.testmondata
.testmondata-shm
.testmondata-wal
key: ${{ env.TESTMON_CACHE_KEY_PREFIX }}-${{ hashFiles('uv.lock', 'pyproject.toml') }}
# Stage 3: Run coverage tests and upload artifacts
coverage:
name: Coverage
needs: build-environment
runs-on: linux-amd64-gpu-h100-latest-1
container:
image: nvidia/cuda:12.8.1-cudnn-devel-ubuntu24.04
steps:
- uses: actions/checkout@v4
- name: Bootstrap cuDNN CI container
uses: ./.github/actions/bootstrap-cudnn-ci
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Restore prebuilt environment cache
# Downstream jobs are fail-closed on env cache miss: build-environment must publish .venv first.
uses: actions/cache/restore@v4
with:
path: .venv
key: ${{ env.VENV_CACHE_KEY_PREFIX }}-${{ hashFiles('uv.lock', 'pyproject.toml') }}
fail-on-cache-miss: true
- name: Validate environment
run: |
.venv/bin/python --version
uv run python -c "import torch; print(torch.__version__)"
- name: Run core tests for coverage report
run: |
uv run coverage run --rcfile='test/coverage.pytest.rc' -m pytest --ignore-glob="*docs*" --ignore-glob="*examples*" --junitxml=coverage-core-report.xml
- name: Run doc tests (testmon not supported for doctests)
run: |
uv run coverage run --rcfile='test/coverage.docstring.rc' -m pytest --doctest-modules physicsnemo/ --ignore-glob="*internal*" --ignore-glob="*experimental*" --junitxml=coverage-doctest-report.xml
- name: Upload core test JUnit XML
if: ${{ !cancelled() }}
uses: actions/upload-artifact@v4
with:
name: junit-coverage-core
path: coverage-core-report.xml
- name: Upload doctest JUnit XML
if: ${{ !cancelled() }}
uses: actions/upload-artifact@v4
with:
name: junit-coverage-doctest
path: coverage-doctest-report.xml
- name: Save coverage files to cache
uses: actions/cache/save@v4
with:
path: .coverage*
key: ${{ env.COVERAGE_CACHE_KEY_PREFIX }}-${{ hashFiles('uv.lock', 'pyproject.toml') }}
- name: Merge coverage reports
run: |
uv run coverage combine
uv run coverage report --show-missing --omit="*test*" --omit="*internal*" --omit="*experimental*" --fail-under=45
uv run coverage html
# Also create an XML report for potential CI integrations
uv run coverage xml -o coverage.xml
- name: Upload coverage HTML report
uses: actions/upload-artifact@v4
with:
name: coverage-report-nightly
path: htmlcov/
retention-days: 7
- name: Upload combined coverage data
uses: actions/upload-artifact@v4
with:
name: coverage-data-nightly
path: |
.coverage
coverage.xml
retention-days: 30
# Stage 4: Generate browsable test reports from JUnit XML
test-reports:
name: Test Reports
needs: [coverage]
if: ${{ !cancelled() }}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Download JUnit artifacts
uses: actions/download-artifact@v4
with:
pattern: junit-*
- name: Core test report
uses: dorny/test-reporter@v2
with:
name: Core Test Results
path: junit-coverage-core/coverage-core-report.xml
reporter: java-junit
fail-on-error: 'false'
- name: Doctest report
uses: dorny/test-reporter@v2
with:
name: Doctest Results
path: junit-coverage-doctest/coverage-doctest-report.xml
reporter: java-junit
fail-on-error: 'false'