Skip to content

Build and publish nightly monarch wheels #686

Build and publish nightly monarch wheels

Build and publish nightly monarch wheels #686

Workflow file for this run

name: Build and publish nightly monarch wheels
on:
workflow_dispatch:
# For testing changes to this workflow.
pull_request:
branches:
- main
- gh/**
paths:
- .github/workflows/wheels.yml
- .github/workflows/build-dist.yml
- .github/workflows/build-macos.yml
- Dockerfile.nightly
push:
branches:
- main
# Run as soon as any changes to .github actions land
paths:
- .github/**
schedule:
- cron: '0 0 * * *' # Daily at midnight UTC
concurrency:
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
cancel-in-progress: true
jobs:
build-x86_64:
uses: ./.github/workflows/build-dist.yml
with:
python-versions: '["3.10", "3.11", "3.12", "3.13"]'
# Nightly wheels ship cu132 even though PR CI tests at cu130 (see
# set-matrix.yaml): pytorch core does the same split — publish cu132
# binaries, GPU-test only at cu130 — because no g4dn-fleet AMI yet
# carries an NVIDIA driver that satisfies nvidia-container-cli's
# cuda>=13.2 check.
#
# Building a wheel needs the CUDA toolkit (nvcc, headers, libs) but not
# host-GPU access. The cuda13.2 builder image supplies the toolkit; we
# set gpu-arch-type=cpu so test-infra's linux_job_v2.yml skips the
# `setup-nvidia` step that mounts `--gpus all` and triggers
# nvidia-container-cli's host-driver-vs-CUDA version check. Passing an
# already-tagged docker-image makes test-infra use it verbatim instead
# of auto-suffixing it from gpu-arch-type/version.
runner: linux.c7i.2xlarge
docker-image: 'pytorch/manylinux2_28-builder:cuda13.2'
torch-spec: '--pre torch --extra-index-url https://download.pytorch.org/whl/nightly/cu132'
gpu-arch-type: cpu
gpu-arch-version: ''
arch-label: cuda13.2
platform: x86_64
build-arm64:
uses: ./.github/workflows/build-dist.yml
with:
python-versions: '["3.10", "3.11", "3.12", "3.13"]'
runner: linux.arm64.2xlarge
docker-image: 'pytorch/manylinuxaarch64-builder:cuda13.2'
torch-spec: '--pre torch --extra-index-url https://download.pytorch.org/whl/nightly/cu132'
gpu-arch-type: cuda
gpu-arch-version: '13.2'
platform: aarch64
build-macos:
uses: ./.github/workflows/build-macos.yml
with:
python-versions: '["3.10", "3.11", "3.12", "3.13"]'
runner: macos-latest
torch-spec: '--pre torch --extra-index-url https://download.pytorch.org/whl/nightly/cpu'
publish-to-pypi:
name: Publish to PyPI
needs: [build-x86_64, build-arm64, build-macos]
runs-on: ubuntu-latest
environment: nightly
if: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
permissions:
id-token: write # Required for PyPI trusted publishing
contents: read
steps:
- name: Download all artifacts
uses: actions/download-artifact@v4
with:
path: dist
merge-multiple: true
- name: Display structure of downloaded files
run: ls -R dist/
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
verbose: true
publish-to-container-registry:
name: Publish to Github Container Registry
needs: [build-x86_64, build-arm64]
runs-on: ubuntu-latest
environment: nightly
if: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
permissions:
contents: read
packages: write # Grant write permission for packages (GHCR)
steps:
# This step is required to avoid the "no space left on device" error from
# the large pytorch docker image.
- name: Free Disk Space (Ubuntu)
uses: jlumbroso/free-disk-space@v1.3.1
- name: Checkout repository
uses: actions/checkout@v6
# Download wheels outside the repo so they don't interfere with the
# Docker build context (dist/ is in .dockerignore).
- name: Download all artifacts
uses: actions/download-artifact@v4
with:
# Docker container only has python 3.12, so we need to use the x86_64 3.12 wheel.
name: monarch-py3.12-cuda13.2-x86_64
path: ${{ runner.temp }}/monarch_wheels
merge-multiple: true
- name: Log in to the Container registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Get tag for publishing
run: echo "DOCKER_TAG=0.6.0.dev$(date +'%Y%m%d')-cuda13.2" >> $GITHUB_ENV
- name: Build and push Docker image
uses: docker/build-push-action@v6
with:
context: . # Build context is the root directory
file: ./Dockerfile.nightly
push: true # Push the image to the registry
# Push to monarch-nightly package instead of monarch.
tags: |
ghcr.io/${{ github.repository }}-nightly:${{ env.DOCKER_TAG }}
ghcr.io/${{ github.repository }}-nightly:latest
# TODO: find docker tag that gets updated automatically.
build-args: |
PYTORCH_TAG=2.13.0.dev20260513-cuda13.2-cudnn9-runtime
build-contexts: |
monarch-wheels=${{ runner.temp }}/monarch_wheels