Skip to content

Commit ce335d8

Browse files
authored
Set custom xpk imagePullSecret (#1559)
Cherry-pick of changes (39baf57, 293f0dc, 4b233e6) to xpk composite action to customize reference to imagePullSecret in xpk workload (n.b. this seems to be necessary in `main` to enable the ngc release branch to use it in a workflow dispatch event)
1 parent 03258c8 commit ce335d8

File tree

6 files changed

+20
-4
lines changed

6 files changed

+20
-4
lines changed

.github/actions/gke-xpk/action.yml

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ inputs:
4747
required: false
4848
default: ghcr.io/nvidia/jax:latest
4949
type: string
50+
IMAGE_PULL_SECRET_NAME:
51+
description: 'Name of k8s Secret resource for registry ImagePullSecret'
52+
required: false
53+
default: jax-toolbox-ghcr
54+
type: string
5055
COMMAND:
5156
description: 'Command to run in main container on JobSet start up'
5257
required: false
@@ -113,9 +118,10 @@ runs:
113118
- name: Apply XPK workload create patch
114119
shell: bash -x -e -u {0}
115120
run: |
116-
git apply --unsafe-paths .github/gke-workflow/xpk/tcpxo_decorator.patch --directory ${WORKLOAD_NAME}/xpk
117-
git apply --unsafe-paths .github/gke-workflow/xpk/docker_resources.patch --directory ${WORKLOAD_NAME}/xpk
118-
git apply --unsafe-paths .github/gke-workflow/xpk/workload.patch --directory ${WORKLOAD_NAME}/xpk
121+
sed -i 's/{{ IMAGE_PULL_SECRET_NAME }}/${{ inputs.IMAGE_PULL_SECRET_NAME }}/g' .github/gke-workflow/xpk/${{ inputs.XPK_VERSION}}/workload.patch
122+
git apply --unsafe-paths .github/gke-workflow/xpk/${{ inputs.XPK_VERSION}}/tcpxo_decorator.patch --directory ${WORKLOAD_NAME}/xpk
123+
git apply --unsafe-paths .github/gke-workflow/xpk/${{ inputs.XPK_VERSION}}/docker_resources.patch --directory ${WORKLOAD_NAME}/xpk
124+
git apply --unsafe-paths .github/gke-workflow/xpk/${{ inputs.XPK_VERSION}}/workload.patch --directory ${WORKLOAD_NAME}/xpk
119125
120126
- name: Set workload commands
121127
shell: bash -x -e -u {0}

.github/gke-workflow/xpk/workload.patch renamed to .github/gke-workflow/xpk/v0.8.0/workload.patch

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ index a466a5c..8a5b99e 100644
77
- operator: "Exists"
88
key: nvidia.com/gpu
99
+ imagePullSecrets:
10-
+ - name: jax-toolbox-ghcr
10+
+ - name: {{ IMAGE_PULL_SECRET_NAME }}
1111
containers:
1212
{container}
1313
"""

.github/workflows/_test_maxtext_gke_xpk.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,20 @@ jobs:
2424
NUM_NODES: 2
2525

2626
steps:
27+
- name: Login to GitHub Container Registry
28+
uses: docker/login-action@v3
29+
with:
30+
registry: ghcr.io
31+
username: ${{ github.repository_owner }}
32+
password: ${{ secrets.GITHUB_TOKEN }}
33+
- name: K8s GHCR store and delete token
34+
id: store-token
35+
uses: ./.github/actions/store-delete-k8s-ghcr
2736
- name: Run XPK workload on cluster
2837
uses: ./.github/actions/gke-xpk
2938
with:
3039
IMAGE: ${{ env.MAXTEXT_IMAGE }}
40+
IMAGE_PULL_SECRET_NAME: ${{ steps.store-token.outputs.token-name }}
3141
WORKLOAD_NAME_PREFIX: ${{ env.WORKLOAD_NAME_PREFIX }}
3242
COMMAND: |
3343
export NCCL_NET_PLUGIN=/opt/hpcx/nccl_rdma_sharp_plugin/lib/libnccl-net.so;

0 commit comments

Comments
 (0)