Skip to content

V0.9.0 merger

V0.9.0 merger #134

name: Build Base Docker Images
on:
schedule:
# Run every Sunday at midnight UTC
- cron: '0 0 * * 0'
workflow_dispatch:
pull_request:
paths:
- 'build/ci_build'
- '.github/workflows/build-base-docker.yml'
- 'docker/Dockerfile*'
- 'docker/manylinux/*'
push:
branches:
- 'master'
paths:
- 'build/ci_build'
- '.github/workflows/build-base-docker.yml'
- 'docker/Dockerfile*'
- 'docker/manylinux/*'
jobs:
build-base-images:
runs-on: ${{ matrix.runner-label }}
strategy:
fail-fast: false
matrix:
rocm-version: ["7.1.1", "7.2.0"]
install-llvm: [true, false]
include:
- rocm-version: "7.1.1"
runner-label: "linux-x86-64-1gpu-amd"
- rocm-version: "7.2.0"
runner-label: "linux-x86-64-1gpu-amd"
steps:
- name: Clean up old runs
run: |
ls -lah
# Make sure that we own all of the files so that we have permissions to delete them
docker run --rm -v "./:/rocm-jax" ubuntu \
/bin/bash -c "shopt -s dotglob; chown -R $UID /rocm-jax/* || true"
# Remove any old work directories from this machine
rm -rf * || true
ls -lah
# Clean up any docker stuff that's more than a week old
docker system prune -a --filter "until=168h"
# Stop any containers running for more than 12 hours. No CI job should take this long.
docker ps --format="{{.RunningFor}} {{.Names}}" | grep hours \
| awk -F: '{if($1>12)print$1}' | awk ' {print $4} ' | xargs docker stop || true
- name: Print system info
run: |
whoami
printenv
df -h
rocm-smi -a || true
rocminfo | grep gfx || true
- uses: actions/checkout@v4
- name: Build base docker image
env:
ROCM_BUILD_JOB: ${{ matrix.rocm-build-job }}
ROCM_BUILD_NUM: ${{ matrix.rocm-build-num }}
run: |
BUILD_ARGS=""
if [ -n "$ROCM_BUILD_JOB" ]; then
BUILD_ARGS="$BUILD_ARGS --rocm-build-job=$ROCM_BUILD_JOB"
fi
if [ -n "$ROCM_BUILD_NUM" ]; then
BUILD_ARGS="$BUILD_ARGS --rocm-build-num=$ROCM_BUILD_NUM"
fi
python3 build/ci_build \
--rocm-version="${{ matrix.rocm-version }}" \
$BUILD_ARGS \
build_base_dockers \
--filter="ubu24" \
${{ matrix.install-llvm && '--install-llvm --llvm-version 18' || '' }}
- name: Authenticate to GitHub Container Registry
run: |
echo "${{ secrets.GITHUB_TOKEN }}" \
| docker login ghcr.io -u ${{ github.actor }} --password-stdin
- name: Push docker images
env:
ROCM_VERSION: ${{ matrix.rocm-version }}
INSTALL_LLVM: ${{ matrix.install-llvm }}
run: |
# Construct image tag based on matrix values
# ROCm version tag removes dots (7.1.1 -> 711, 7.2.0 -> 720)
rocm_tag="rocm${ROCM_VERSION//.}"
if [ "$INSTALL_LLVM" = "true" ]; then
image_tag="jax-dev-ubu24.${rocm_tag}"
else
image_tag="jax-base-ubu24.${rocm_tag}"
fi
# Push with commit SHA tag
ghcr_image_sha="ghcr.io/rocm/${image_tag}:${GITHUB_SHA}"
echo "Image name (SHA): ${ghcr_image_sha}"
docker tag "${image_tag}" "${ghcr_image_sha}"
docker push "${ghcr_image_sha}"
# Push with latest tag (only for schedule and workflow_dispatch, not PRs)
if [ "${{ github.event_name }}" != "pull_request" ]; then
ghcr_image_latest="ghcr.io/rocm/${image_tag}:latest"
echo "Image name (latest): ${ghcr_image_latest}"
docker tag "${image_tag}" "${ghcr_image_latest}"
docker push "${ghcr_image_latest}"
fi
build-manylinux-builder-images:
runs-on: ${{ matrix.runner-label }}
strategy:
fail-fast: false
matrix:
rocm-version: ["7.1.1", "7.2.0"]
include:
- rocm-version: "7.1.1"
runner-label: "linux-x86-64-1gpu-amd"
- rocm-version: "7.2.0"
runner-label: "linux-x86-64-1gpu-amd"
steps:
- name: Clean up old runs
run: |
ls -lah
# Make sure that we own all of the files so that we have permissions to delete them
docker run --rm -v "./:/rocm-jax" ubuntu \
/bin/bash -c "shopt -s dotglob; chown -R $UID /rocm-jax/* || true"
# Remove any old work directories from this machine
rm -rf * || true
ls -lah
# Clean up any docker stuff that's more than a week old
docker system prune -a --filter "until=168h"
# Stop any containers running for more than 12 hours. No CI job should take this long.
docker ps --format="{{.RunningFor}} {{.Names}}" | grep hours \
| awk -F: '{if($1>12)print$1}' | awk ' {print $4} ' | xargs docker stop || true
- uses: actions/checkout@v4
- name: Build docker images
run: |
python3 build/ci_build \
--rocm-version="${{ matrix.rocm-version }}" \
--rocm-build-job="${{ matrix.rocm-build-job }}" \
--rocm-build-num="${{ matrix.rocm-build-num }}" \
build_manylinux_dockers
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Push docker images
env:
ROCM_VERSION: ${{ matrix.rocm-version }}
ROCM_BUILD_JOB: >-
${{ matrix.rocm-build-job && format('-{0}', inputs.rocm-build-job) || '' }}
ROCM_BUILD_NUM: >-
${{ matrix.rocm-build-num && format('-{0}', inputs.rocm-build-num) || '' }}
run: |
image_tag="ghcr.io/rocm/jax-manylinux_2_28-rocm-${ROCM_VERSION}${ROCM_BUILD_JOB}${ROCM_BUILD_NUM}"
# Push with commit SHA tag
sha_image_tag="${image_tag}:${GITHUB_SHA}"
echo "Image name (SHA): ${sha_image_tag}"
docker tag "${image_tag}" "${sha_image_tag}"
docker push "${sha_image_tag}"
# Push with latest tag (only for schedule and workflow_dispatch, not PRs)
if [ "${{ github.event_name }}" != "pull_request" ]; then
latest_image_tag="${image_tag}:latest"
echo "Image Name (latest): ${latest_image_tag}"
docker tag "${image_tag}" "${latest_image_tag}"
docker push "${latest_image_tag}"
fi