Skip to content

Build Docker Images #13

Build Docker Images

Build Docker Images #13

name: Build Docker Images
on:
# enable manual trigger
workflow_dispatch:
# Scheduled builds
# Do not trigger too often because it can cause images that are still in use
# to get evicted due to the "last N most recent images" retention policy.
schedule:
- cron: '0 2 * * 0' # Weekly: Sundays at 2 AM UTC for cluster images and Levanter
- cron: '0 3 * * *' # Daily: 3 AM UTC for TPU CI images
permissions:
contents: write
pull-requests: write
packages: write
jobs:
# Iris Images - Worker, controller, and task images pushed to GHCR
# Uses a single multi-stage Dockerfile (lib/iris/Dockerfile) with --target.
# Controller and worker share all layers up to Python deps; the worker adds
# only the Docker CLI. Task is an independent stage with marin root context.
iris-images:
# Run on: manual trigger OR weekly schedule (02:00 UTC on Sundays)
if: |
github.event_name == 'workflow_dispatch' ||
(github.event_name == 'schedule' && github.event.schedule == '0 2 * * 0')
runs-on: ubuntu-latest
strategy:
matrix:
include:
- image: iris-worker
target: worker
context: lib/iris
- image: iris-controller
target: controller
context: lib/iris
- image: iris-task
target: task
context: .
steps:
- name: Checkout repository
uses: actions/checkout@v5
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v4
- name: Set tags
id: set-tags
run: |
echo "DATE_TAG=`date -u +"%Y%m%d"`" >> "$GITHUB_OUTPUT"
echo "HASH_TAG=`git rev-parse --short HEAD`" >> "$GITHUB_OUTPUT"
- name: Login to GitHub Container Registry
uses: docker/login-action@v4
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ github.token }}
- name: Build and push ${{ matrix.image }}
run: |
docker buildx build --file lib/iris/Dockerfile \
--target ${{ matrix.target }} \
--cache-from type=registry,ref=ghcr.io/marin-community/iris-cache:${{ matrix.target }} \
--cache-to type=registry,ref=ghcr.io/marin-community/iris-cache:${{ matrix.target }},mode=max \
--output type=image,compression=zstd,compression-level=3,push=true \
--provenance=false \
--build-arg IRIS_GIT_HASH=${{ steps.set-tags.outputs.HASH_TAG }} \
--tag ghcr.io/marin-community/${{ matrix.image }}:latest \
--tag ghcr.io/marin-community/${{ matrix.image }}:${{ steps.set-tags.outputs.DATE_TAG }} \
--tag ghcr.io/marin-community/${{ matrix.image }}:${{ steps.set-tags.outputs.HASH_TAG }} \
${{ matrix.context }}
# Marin TPU CI Images - For self-hosted TPU CI runners
marin-tpu-ci-images:
# Run on: manual trigger OR daily schedule (03:00 UTC) OR push to docker files
if: |
github.event_name == 'workflow_dispatch' ||
github.event_name == 'push' ||
(github.event_name == 'schedule' && github.event.schedule == '0 3 * * *')
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: 'actions/checkout@v5'
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v4
- name: Set tags
id: set-tags
run: |
echo "DATE_TAG=`date -u +"%Y%m%d"`" >> "$GITHUB_OUTPUT"
echo "HASH_TAG=`git rev-parse --short HEAD`" >> "$GITHUB_OUTPUT"
- name: Login to GitHub Container Registry
uses: docker/login-action@v4
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ github.token }}
- name: Build and push TPU CI image
run: |
docker buildx build --file docker/marin/Dockerfile.tpu-ci \
--provenance=false \
--tag ghcr.io/marin-community/marin/tpu-ci:latest \
--tag ghcr.io/marin-community/marin/tpu-ci:${{ steps.set-tags.outputs.DATE_TAG }} \
--tag ghcr.io/marin-community/marin/tpu-ci:${{ steps.set-tags.outputs.HASH_TAG }} \
--push .
# Levanter TPU Base Image - Foundation for TPU deployments
levanter-base-image:
# Run on: manual trigger OR weekly schedule (02:00 UTC on Sundays)
if: |
github.event_name == 'workflow_dispatch' ||
(github.event_name == 'schedule' && github.event.schedule == '0 2 * * 0')
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v5
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v4
- name: Cache Docker layers
uses: actions/cache@v4
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildx-
- name: Get current date
id: date
run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV
- name: Login to GitHub Container Registry
uses: docker/login-action@v4
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ github.token }}
- name: Build and push Levanter base image
run: |
docker buildx build --file lib/levanter/docker/tpu/Dockerfile.base \
--tag ghcr.io/marin-community/levanter-base:latest \
--tag ghcr.io/marin-community/levanter-base:${{ env.DATE }} \
--push .
# Levanter TPU Incremental Image - Optimized TPU runtime
levanter-tpu-image:
# Run on: manual trigger OR weekly schedule (02:00 UTC on Sundays)
if: |
github.event_name == 'workflow_dispatch' ||
(github.event_name == 'schedule' && github.event.schedule == '0 2 * * 0')
runs-on: ubuntu-latest
needs: levanter-base-image
steps:
- name: Checkout code
uses: actions/checkout@v5
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v4
- name: Cache Docker layers
uses: actions/cache@v4
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildx-
- name: Get current date
id: date
run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV
- name: Login to GitHub Container Registry
uses: docker/login-action@v4
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ github.token }}
- name: Build and push Levanter TPU image
run: |
docker buildx build --file lib/levanter/docker/tpu/Dockerfile.incremental \
--tag ghcr.io/marin-community/levanter-tpu:latest \
--tag ghcr.io/marin-community/levanter-tpu:${{ env.DATE }} \
--push .
# Levanter Cluster Image - For cluster deployments
levanter-cluster-image:
# Run on: manual trigger OR weekly schedule (02:00 UTC on Sundays)
if: |
github.event_name == 'workflow_dispatch' ||
(github.event_name == 'schedule' && github.event.schedule == '0 2 * * 0')
runs-on: ubuntu-latest
needs: levanter-tpu-image
steps:
- name: Checkout code
uses: actions/checkout@v5
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v4
- name: Cache Docker layers
uses: actions/cache@v4
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildx-
- name: Get current date
id: date
run: echo "DATE=$(date +'%Y%m%d')" >> $GITHUB_ENV
- name: Login to GitHub Container Registry
uses: docker/login-action@v4
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ github.token }}
- name: Build and push Levanter cluster image
run: |
docker buildx build --file lib/levanter/docker/tpu/Dockerfile.cluster \
--tag ghcr.io/levanter-cluster:latest \
--tag ghcr.io/levanter-cluster:${{ env.DATE }} \
--push .