Skip to content

Commit 4a8f06a

Browse files
oluptonSteboss
andauthored
Run NCCL tests on the JAX-specific base container (#1284)
The prior setup pre-dated #1248, now things can be simpler. --------- Co-authored-by: Steboss <[email protected]> Co-authored-by: Steboss <[email protected]>
1 parent ba6b549 commit 4a8f06a

File tree

6 files changed

+389
-341
lines changed

6 files changed

+389
-341
lines changed
Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
name: Build container
2+
3+
description: "Builds a Docker container image for JAX-based projects using NVIDIA's Mealkit and uploads it to GitHub Container Registry."
4+
5+
inputs:
6+
ARCHITECTURE:
7+
description: 'CPU architecture to build the image for, e.g. amd64, arm64'
8+
required: true
9+
BASE_IMAGE:
10+
description: 'Base docker image that provides JAX'
11+
required: false
12+
default: ghcr.io/nvidia/jax:mealkit
13+
BUILD_DATE:
14+
description: "Build date in YYYY-MM-DD format"
15+
required: false
16+
default: 'NOT SPECIFIED'
17+
ARTIFACT_NAME:
18+
description: 'Name of the artifact zip file, e.g. artifact-t5x-build'
19+
required: true
20+
BADGE_FILENAME:
21+
description: 'Name of the endpoint JSON file for shields.io badge, e.g. badge-t5x-build'
22+
required: true
23+
CONTAINER_NAME:
24+
description: "Container name, e.g. upstream-t5x"
25+
required: true
26+
DOCKERFILE:
27+
description: "Dockerfile to use, e.g. .github/container/Dockerfile.t5x"
28+
required: true
29+
DOCKER_CONTEXT:
30+
description: "Dockerfile context to build"
31+
default: '.github/container'
32+
required: false
33+
RUNNER_SIZE:
34+
description: "Size of the runner to use"
35+
required: false
36+
default: small
37+
EXTRA_BUILD_ARGS:
38+
description: "Extra build arguments to pass to the Docker build"
39+
required: false
40+
default: ""
41+
ssh-private-key:
42+
description: "SSH private key to use for building the image"
43+
required: true
44+
default: ""
45+
ssh-known-hosts:
46+
description: "SSH known hosts entries to use for building the image"
47+
required: true
48+
default: ""
49+
github-token:
50+
description: "GitHub token to use for authentication"
51+
required: true
52+
default: ""
53+
bazel-remote-cache-url:
54+
description: "URL of the Bazel remote cache to use for building the image"
55+
required: true
56+
default: ""
57+
58+
outputs:
59+
DOCKER_TAG_MEALKIT:
60+
description: "Tags of the 'mealkit' image built"
61+
value: ${{ steps.export.outputs.DOCKER_TAG_MEALKIT }}
62+
DOCKER_TAG_FINAL:
63+
description: "Tags of the complete image built"
64+
value: ${{ steps.export.outputs.DOCKER_TAG_FINAL }}
65+
66+
runs:
67+
using: 'composite'
68+
steps:
69+
- name: Set up environment variables
70+
shell: bash
71+
id: set-env
72+
run: |
73+
echo 'UPLD_IMAGE=ghcr.io/nvidia/jax-toolbox-internal' >> $GITHUB_ENV
74+
echo "BADGE_FILENAME_FULL=${{ inputs.BADGE_FILENAME }}-${{ inputs.ARCHITECTURE }}.json" >> $GITHUB_ENV
75+
76+
- name: Setup SSH
77+
id: setup-ssh
78+
uses: ./.github/actions/setup-ssh
79+
with:
80+
ssh-private-key: ${{ inputs.ssh-private-key }}
81+
ssh-known-hosts: ${{ inputs.ssh-known-hosts }}
82+
83+
- name: Login to GHCR
84+
uses: docker/login-action@v3
85+
with:
86+
registry: ghcr.io
87+
username: ${{ github.repository_owner }}
88+
password: ${{ inputs.github-token }}
89+
90+
- name: Set up Docker Buildx
91+
uses: docker/setup-buildx-action@v3
92+
with:
93+
driver-opts: |
94+
image=moby/buildkit:v0.12.1
95+
96+
# MEALKIT BUILD
97+
- name: Set docker metadata - mealkit
98+
id: mealkit-metadata
99+
uses: docker/metadata-action@v5
100+
with:
101+
images: |
102+
${{ env.UPLD_IMAGE }}
103+
flavor: |
104+
latest=false
105+
tags: |
106+
type=raw,value=${{ github.run_id }}-${{ inputs.CONTAINER_NAME }}-${{ inputs.ARCHITECTURE }}-mealkit
107+
labels:
108+
org.opencontainers.image.created=${{ inputs.BUILD_DATE }}
109+
110+
- name: Build mealkit image
111+
id: mealkit-build
112+
uses: docker/build-push-action@v5
113+
with:
114+
context: ${{ inputs.DOCKER_CONTEXT }}
115+
push: true
116+
file: ${{ inputs.DOCKERFILE }}
117+
platforms: linux/${{ inputs.ARCHITECTURE }}
118+
target: mealkit
119+
tags: ${{ steps.mealkit-metadata.outputs.tags }}
120+
labels: ${{ steps.mealkit-metadata.outputs.labels }}
121+
ssh: default
122+
secret-files: |
123+
"SSH_KNOWN_HOSTS=${{ steps.setup-ssh.outputs.known-hosts-file }}"
124+
build-args: |
125+
BASE_IMAGE=${{ inputs.BASE_IMAGE }}
126+
BAZEL_CACHE=${{ inputs.bazel-remote-cache-url }}
127+
BUILD_DATE=${{ inputs.BUILD_DATE }}
128+
${{ inputs.EXTRA_BUILD_ARGS }}
129+
# FINAL IMAGE BUILD
130+
- name: Set docker metadata - final
131+
id: final-metadata
132+
uses: docker/metadata-action@v5
133+
with:
134+
images: |
135+
${{ env.UPLD_IMAGE }}
136+
flavor: |
137+
latest=false
138+
tags: |
139+
type=raw,value=${{ github.run_id }}-${{ inputs.CONTAINER_NAME }}-${{ inputs.ARCHITECTURE }}
140+
labels:
141+
org.opencontainers.image.created=${{ inputs.BUILD_DATE }}
142+
143+
- name: Build final image
144+
id: final-build
145+
uses: docker/build-push-action@v5
146+
with:
147+
context: ${{ inputs.DOCKER_CONTEXT }}
148+
push: true
149+
file: ${{ inputs.DOCKERFILE }}
150+
platforms: linux/${{ inputs.ARCHITECTURE }}
151+
tags: ${{ steps.final-metadata.outputs.tags }}
152+
labels: ${{ steps.final-metadata.outputs.labels }}
153+
target: final
154+
ssh: default
155+
secret-files: |
156+
"SSH_KNOWN_HOSTS=${{ steps.setup-ssh.outputs.known-hosts-file }}"
157+
build-args: |
158+
BASE_IMAGE=${{ inputs.BASE_IMAGE }}
159+
BAZEL_CACHE=${{ inputs.bazel-remote-cache-url }}
160+
BUILD_DATE=${{ inputs.BUILD_DATE }}
161+
${{ inputs.EXTRA_BUILD_ARGS }}
162+
163+
# SITREP GENERATION
164+
- name: Generate sitrep
165+
if: "!cancelled()"
166+
shell: bash -x -e {0}
167+
run: |
168+
# bring in utility functions
169+
source .github/workflows/scripts/to_json.sh
170+
171+
badge_label='${{ inputs.CONTAINER_NAME }} ${{ inputs.ARCHITECTURE }} build'
172+
tags="${{ steps.final-metadata.outputs.tags }}"
173+
digest="${{ steps.final-build.outputs.digest }}"
174+
outcome="${{ steps.final-build.outcome }}"
175+
176+
if [[ ${outcome} == "success" ]]; then
177+
badge_message="pass"
178+
badge_color=brightgreen
179+
summary="${{ inputs.CONTAINER_NAME }} build on ${{ inputs.ARCHITECTURE }}: $badge_message"
180+
else
181+
badge_message="fail"
182+
badge_color=red
183+
summary="${{ inputs.CONTAINER_NAME }} build on ${{ inputs.ARCHITECTURE }}: $badge_message"
184+
fi
185+
186+
to_json \
187+
summary \
188+
badge_label tags digest outcome \
189+
> sitrep.json
190+
191+
schemaVersion=1 \
192+
label="${badge_label}" \
193+
message="${badge_message}" \
194+
color="${badge_color}" \
195+
to_json schemaVersion label message color \
196+
> ${{ env.BADGE_FILENAME_FULL }}
197+
198+
- name: Upload sitrep and badge
199+
if: "!cancelled()"
200+
uses: actions/upload-artifact@v4
201+
with:
202+
name: ${{ inputs.ARTIFACT_NAME }}-${{ inputs.ARCHITECTURE }}
203+
path: |
204+
sitrep.json
205+
${{ env.BADGE_FILENAME_FULL }}
206+
207+
- name: Export outputs
208+
id: export
209+
shell: bash
210+
run: |
211+
echo "DOCKER_TAG_MEALKIT=${{ steps.mealkit-metadata.outputs.tags }}" >> "$GITHUB_OUTPUT"
212+
echo "DOCKER_TAG_FINAL=${{ steps.final-metadata.outputs.tags }}" >> "$GITHUB_OUTPUT"

0 commit comments

Comments
 (0)