Skip to content

Commit adc2b04

Browse files
committed
Run NCCL tests on the JAX-specific base container.
1 parent 8dd4054 commit adc2b04

File tree

3 files changed

+12
-21
lines changed

3 files changed

+12
-21
lines changed

.github/workflows/_ci.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,13 @@ jobs:
4747
MANIFEST_ARTIFACT_NAME: ${{ inputs.MANIFEST_ARTIFACT_NAME }}
4848
secrets: inherit
4949

50+
test-nccl:
51+
needs: build-base
52+
uses: ./.github/workflows/_test_nccl.yaml
53+
with:
54+
CONTAINER: ${{ needs.build-base.outputs.DOCKER_TAG }}
55+
secrets: inherit
56+
5057
build-jax:
5158
needs: build-base
5259
uses: ./.github/workflows/_build.yaml

.github/workflows/_test_nccl.yaml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,9 @@ name: ~run NCCL tests
33
on:
44
workflow_call:
55
inputs:
6-
# Note that cuda-dl-base installs the NCCL tests, while the vanilla nvidia/cuda
7-
# images do not; when JAX-Toolbox moves to using cuda-dl-base this workflow ought
8-
# to be modified to test one of the JAX-Toolbox containers.
96
CONTAINER:
107
type: string
11-
description: CUDA image to use as base, e.g. nvcr.io/nvidia/cuda-dl-base:24.11-cuda12.6-devel-ubuntu24.04
8+
description: Container to test; assumed to already contain {all_gather,all_reduce,broadcast,reduce_scatter}_perf_mpi in $PATH
129
required: true
1310

1411
permissions:

.github/workflows/nccl-k8s.yaml

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,12 @@
11
name: NCCL on Kubernetes
22
on:
3-
schedule:
4-
- cron: '30 8 * * *'
5-
pull_request:
6-
types:
7-
- opened
8-
- reopened
9-
- ready_for_review
10-
- synchronize
11-
paths-ignore:
12-
- '**.md'
133
workflow_dispatch:
144
inputs:
15-
# Note that cuda-dl-base installs the NCCL tests, while the vanilla nvidia/cuda
16-
# images do not; when JAX-Toolbox moves to using cuda-dl-base this workflow ought
17-
# to be modified to test one of the JAX-Toolbox containers.
185
CONTAINER:
196
type: string
20-
description: Container to test, this is assumed to already contain the NCCL tests e.g. cuda-dl-base or derived
21-
default: ''
22-
required: false
7+
default: ghcr.io/nvidia/jax:base
8+
description: Container to test, this is assumed to already contain the NCCL tests
9+
required: true
2310

2411
concurrency:
2512
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
@@ -34,5 +21,5 @@ jobs:
3421
nccl-tests:
3522
uses: ./.github/workflows/_test_nccl.yaml
3623
with:
37-
CONTAINER: ${{ inputs.CONTAINER || 'nvcr.io/nvidia/cuda-dl-base:24.12-cuda12.6-devel-ubuntu24.04' }}
24+
CONTAINER: ${{ inputs.CONTAINER }}
3825
secrets: inherit

0 commit comments

Comments
 (0)