Build Docker Images #139
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Build Docker Images | |
| on: | |
| # enable manual trigger | |
| workflow_dispatch: | |
| # Scheduled builds | |
| # Do not trigger too often because it can cause images that are still in use | |
| # to get evicted due to the "last N most recent images" retention policy. | |
| schedule: | |
| - cron: '0 2 * * 0' # Weekly: Sundays at 2 AM UTC for cluster images and Levanter | |
| - cron: '0 3 * * *' # Daily: 3 AM UTC for TPU CI images | |
| permissions: | |
| contents: write | |
| pull-requests: write | |
| packages: write | |
| jobs: | |
| marin-cluster-images: | |
| # Run on: manual trigger OR weekly schedule (02:00 UTC on Sundays) | |
| if: | | |
| github.event_name == 'workflow_dispatch' || | |
| (github.event_name == 'schedule' && github.event.schedule == '0 2 * * 0') | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout repository | |
| uses: 'actions/checkout@v4' | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v3 | |
| - name: Set tags | |
| id: set-tags | |
| run: | | |
| echo "DATE_TAG=`date -u +"%Y%m%d"`" >> "$GITHUB_OUTPUT" | |
| echo "HASH_TAG=`git rev-parse --short HEAD`" >> "$GITHUB_OUTPUT" | |
| - name: Authenticate to Google Cloud | |
| id: auth | |
| uses: google-github-actions/auth@v2 | |
| with: | |
| token_format: access_token | |
| credentials_json: ${{ secrets.GCP_ARTIFACT_REPOSITORY_SA_KEY }} | |
| service_account: github-artifact-repository@hai-gcp-models.iam.gserviceaccount.com | |
| - name: Login to Google Artifact Registry in europe-west4 | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: europe-west4-docker.pkg.dev | |
| username: oauth2accesstoken | |
| password: ${{ steps.auth.outputs.access_token }} | |
| - name: Login to Google Artifact Registry in us-central1 | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: us-central1-docker.pkg.dev | |
| username: oauth2accesstoken | |
| password: ${{ steps.auth.outputs.access_token }} | |
| - name: Login to Google Artifact Registry in us-central2 | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: us-central2-docker.pkg.dev | |
| username: oauth2accesstoken | |
| password: ${{ steps.auth.outputs.access_token }} | |
| - name: Login to Google Artifact Registry in us-east1 | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: us-east1-docker.pkg.dev | |
| username: oauth2accesstoken | |
| password: ${{ steps.auth.outputs.access_token }} | |
| - name: Login to Google Artifact Registry in us-east5 | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: us-east5-docker.pkg.dev | |
| username: oauth2accesstoken | |
| password: ${{ steps.auth.outputs.access_token }} | |
| - name: Login to Google Artifact Registry in us-west4 | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: us-west4-docker.pkg.dev | |
| username: oauth2accesstoken | |
| password: ${{ steps.auth.outputs.access_token }} | |
| - name: Build Marin cluster image | |
| run: | | |
| docker buildx build --file docker/marin/Dockerfile.cluster \ | |
| --provenance=false \ | |
| --load \ | |
| --tag marin_cluster:local \ | |
| . | |
| - name: Verify Marin cluster image | |
| # TODO (rav): should we just run tests inside the container? | |
| run: | | |
| echo "Verifying critical imports work correctly..." | |
| docker run --rm marin_cluster:local python -c " | |
| import sys | |
| print(f'Python: {sys.version}') | |
| import numpy | |
| print(f'numpy: {numpy.__version__}') | |
| import pandas | |
| print(f'pandas: {pandas.__version__}') | |
| import ray | |
| print(f'ray: {ray.__version__}') | |
| import torch | |
| print(f'torch: {torch.__version__}') | |
| print('All critical imports verified successfully!') | |
| " | |
| - name: Push Marin cluster image | |
| run: | | |
| REGISTRIES="europe-west4 us-central1 us-central2 us-east1 us-east5 us-west4" | |
| TAGS="latest ${{ steps.set-tags.outputs.DATE_TAG }} ${{ steps.set-tags.outputs.HASH_TAG }}" | |
| for registry in $REGISTRIES; do | |
| for tag in $TAGS; do | |
| docker tag marin_cluster:local ${registry}-docker.pkg.dev/hai-gcp-models/marin/marin_cluster:${tag} | |
| docker push ${registry}-docker.pkg.dev/hai-gcp-models/marin/marin_cluster:${tag} | |
| done | |
| done | |
| - name: Install uv | |
| uses: astral-sh/setup-uv@v6 | |
| with: | |
| version: "0.7.20" | |
| enable-cache: true | |
| - name: Update Docker image tag in cluster config script | |
| run: sed -i -E 's/LATEST = "[a-z0-9]+"/LATEST = "${{ steps.set-tags.outputs.HASH_TAG }}"/' lib/marin/src/marin/cluster/config.py | |
| - name: Generate cluster configs | |
| run: uv run scripts/ray/cluster.py update-configs | |
| - name: Create pull request | |
| uses: peter-evans/create-pull-request@v6 | |
| with: | |
| commit-message: Update Docker image tag | |
| title: Update Docker image tag | |
| branch: actions/update-docker-image-tag | |
| body: Auto-generated from GitHub Actions. | |
| # Iris Images - Worker, controller, and task images pushed to GHCR | |
| iris-images: | |
| # Run on: manual trigger OR weekly schedule (02:00 UTC on Sundays) | |
| if: | | |
| github.event_name == 'workflow_dispatch' || | |
| (github.event_name == 'schedule' && github.event.schedule == '0 2 * * 0') | |
| runs-on: ubuntu-latest | |
| strategy: | |
| matrix: | |
| include: | |
| - image: iris-worker | |
| dockerfile: lib/iris/Dockerfile.worker | |
| context: lib/iris | |
| - image: iris-controller | |
| dockerfile: lib/iris/Dockerfile.controller | |
| context: lib/iris | |
| - image: iris-task | |
| dockerfile: lib/iris/Dockerfile.task | |
| context: . | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v3 | |
| - name: Set tags | |
| id: set-tags | |
| run: | | |
| echo "DATE_TAG=`date -u +"%Y%m%d"`" >> "$GITHUB_OUTPUT" | |
| echo "HASH_TAG=`git rev-parse --short HEAD`" >> "$GITHUB_OUTPUT" | |
| - name: Login to GitHub Container Registry | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: ghcr.io | |
| username: ${{ github.actor }} | |
| password: ${{ github.token }} | |
| - name: Build and push ${{ matrix.image }} | |
| run: | | |
| docker buildx build --file ${{ matrix.dockerfile }} \ | |
| --provenance=false \ | |
| --tag ghcr.io/marin-community/${{ matrix.image }}:latest \ | |
| --tag ghcr.io/marin-community/${{ matrix.image }}:${{ steps.set-tags.outputs.DATE_TAG }} \ | |
| --tag ghcr.io/marin-community/${{ matrix.image }}:${{ steps.set-tags.outputs.HASH_TAG }} \ | |
| --push ${{ matrix.context }} | |
| # Marin TPU CI Images - For self-hosted TPU CI runners | |
| marin-tpu-ci-images: | |
| # Run on: manual trigger OR daily schedule (03:00 UTC) OR push to docker files | |
| if: | | |
| github.event_name == 'workflow_dispatch' || | |
| github.event_name == 'push' || | |
| (github.event_name == 'schedule' && github.event.schedule == '0 3 * * *') | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout repository | |
| uses: 'actions/checkout@v4' | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v3 | |
| - name: Set tags | |
| id: set-tags | |
| run: | | |
| echo "DATE_TAG=`date -u +"%Y%m%d"`" >> "$GITHUB_OUTPUT" | |
| echo "HASH_TAG=`git rev-parse --short HEAD`" >> "$GITHUB_OUTPUT" | |
| - name: Login to GitHub Container Registry | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: ghcr.io | |
| username: ${{ github.actor }} | |
| password: ${{ github.token }} | |
| - name: Build and push TPU CI image | |
| run: | | |
| docker buildx build --file docker/marin/Dockerfile.tpu-ci \ | |
| --provenance=false \ | |
| --tag ghcr.io/marin-community/marin/tpu-ci:latest \ | |
| --tag ghcr.io/marin-community/marin/tpu-ci:${{ steps.set-tags.outputs.DATE_TAG }} \ | |
| --tag ghcr.io/marin-community/marin/tpu-ci:${{ steps.set-tags.outputs.HASH_TAG }} \ | |
| --push . | |
| # Levanter TPU Base Image - Foundation for TPU deployments | |
| levanter-base-image: | |
| # Run on: manual trigger OR weekly schedule (02:00 UTC on Sundays) | |
| if: | | |
| github.event_name == 'workflow_dispatch' || | |
| (github.event_name == 'schedule' && github.event.schedule == '0 2 * * 0') | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v3 | |
| - name: Cache Docker layers | |
| uses: actions/cache@v4 | |
| with: | |
| path: /tmp/.buildx-cache | |
| key: ${{ runner.os }}-buildx-${{ github.sha }} | |
| restore-keys: | | |
| ${{ runner.os }}-buildx- | |
| - name: Get current date | |
| id: date | |
| run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV | |
| - name: Login to GitHub Container Registry | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: ghcr.io | |
| username: ${{ github.actor }} | |
| password: ${{ github.token }} | |
| - name: Build and push Levanter base image | |
| run: | | |
| docker buildx build --file lib/levanter/docker/tpu/Dockerfile.base \ | |
| --tag ghcr.io/marin-community/levanter-base:latest \ | |
| --tag ghcr.io/marin-community/levanter-base:${{ env.DATE }} \ | |
| --push . | |
| # Levanter TPU Incremental Image - Optimized TPU runtime | |
| levanter-tpu-image: | |
| # Run on: manual trigger OR weekly schedule (02:00 UTC on Sundays) | |
| if: | | |
| github.event_name == 'workflow_dispatch' || | |
| (github.event_name == 'schedule' && github.event.schedule == '0 2 * * 0') | |
| runs-on: ubuntu-latest | |
| needs: levanter-base-image | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v3 | |
| - name: Cache Docker layers | |
| uses: actions/cache@v4 | |
| with: | |
| path: /tmp/.buildx-cache | |
| key: ${{ runner.os }}-buildx-${{ github.sha }} | |
| restore-keys: | | |
| ${{ runner.os }}-buildx- | |
| - name: Get current date | |
| id: date | |
| run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV | |
| - name: Login to GitHub Container Registry | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: ghcr.io | |
| username: ${{ github.actor }} | |
| password: ${{ github.token }} | |
| - name: Build and push Levanter TPU image | |
| run: | | |
| docker buildx build --file lib/levanter/docker/tpu/Dockerfile.incremental \ | |
| --tag ghcr.io/marin-community/levanter-tpu:latest \ | |
| --tag ghcr.io/marin-community/levanter-tpu:${{ env.DATE }} \ | |
| --push . | |
| # Levanter Cluster Image - For cluster deployments | |
| levanter-cluster-image: | |
| # Run on: manual trigger OR weekly schedule (02:00 UTC on Sundays) | |
| if: | | |
| github.event_name == 'workflow_dispatch' || | |
| (github.event_name == 'schedule' && github.event.schedule == '0 2 * * 0') | |
| runs-on: ubuntu-latest | |
| needs: levanter-tpu-image | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v3 | |
| - name: Cache Docker layers | |
| uses: actions/cache@v4 | |
| with: | |
| path: /tmp/.buildx-cache | |
| key: ${{ runner.os }}-buildx-${{ github.sha }} | |
| restore-keys: | | |
| ${{ runner.os }}-buildx- | |
| - name: Get current date | |
| id: date | |
| run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV | |
| - name: Login to GitHub Container Registry | |
| uses: docker/login-action@v3 | |
| with: | |
| registry: ghcr.io | |
| username: ${{ github.actor }} | |
| password: ${{ github.token }} | |
| - name: Build and push Levanter cluster image | |
| run: | | |
| docker buildx build --file lib/levanter/docker/tpu/Dockerfile.cluster \ | |
| --tag ghcr.io/levanter-cluster:latest \ | |
| --tag ghcr.io/levanter-cluster:${{ env.DATE }} \ | |
| --push . |