@@ -70,7 +70,7 @@ inputs:
7070 XPK_VERSION :
7171 description : ' XPK release tag'
7272 required : false
73- default : ' v0.8.0 '
73+ default : ' v0.10.1 '
7474 type : string
7575 XPK_PYTHON :
7676 description : ' Python version for XPK'
@@ -119,9 +119,8 @@ runs:
119119 shell : bash -x -e -u {0}
120120 run : |
121121 sed -i 's/{{ IMAGE_PULL_SECRET_NAME }}/${{ inputs.IMAGE_PULL_SECRET_NAME }}/g' .github/gke-workflow/xpk/${{ inputs.XPK_VERSION}}/workload.patch
122- git apply --unsafe-paths .github/gke-workflow/xpk/${{ inputs.XPK_VERSION}}/tcpxo_decorator.patch --directory ${WORKLOAD_NAME}/xpk
123- git apply --unsafe-paths .github/gke-workflow/xpk/${{ inputs.XPK_VERSION}}/docker_resources.patch --directory ${WORKLOAD_NAME}/xpk
124- git apply --unsafe-paths .github/gke-workflow/xpk/${{ inputs.XPK_VERSION}}/workload.patch --directory ${WORKLOAD_NAME}/xpk
122+ PATCH_PATH=.github/gke-workflow/xpk/${{ inputs.XPK_VERSION}}
123+ ls ${PATCH_PATH} | xargs -I {} git apply --unsafe-paths ${PATCH_PATH}/{} --directory ${WORKLOAD_NAME}/xpk
125124
126125 - name : Set workload commands
127126 shell : bash -x -e -u {0}
@@ -158,18 +157,31 @@ runs:
158157 run : |
159158 source ${WORKLOAD_NAME}/.venv/bin/activate
160159 cd ${WORKLOAD_NAME}/xpk
160+
161+ args=(
162+ --project=${{ inputs.GCP_PROJECT }}
163+ --cluster=${{ inputs.GKE_CLUSTER }}
164+ --zone=${{ inputs.GCP_ZONE }}
165+ --workload=${WORKLOAD_NAME}
166+ --docker-image=${{ inputs.IMAGE }}
167+ --device-type=${{ inputs.CLUSTER_DEVICE }}
168+ --num-nodes=${{ inputs.NUM_NODES }}
169+ --num-slices=${{ inputs.NUM_NODES }}
170+ --priority=high
171+ --scheduler=gke.io/topology-aware-auto
172+ )
173+
174+ if [[ "${{ inputs.XPK_VERSION }}" == "v0.10.1" ]]; then
175+ args+=(
176+ --docker-image-pull-secret=${{ inputs.IMAGE_PULL_SECRET_NAME }}
177+ --env="JAX_COORDINATOR_PORT=3389"
178+ --env="JAX_COORDINATOR_ADDRESS=\$(JOBSET_NAME)-\$(REPLICATED_JOB_NAME)-0-0.\$(JOBSET_NAME):3389"
179+ )
180+ fi
181+
161182 python xpk.py workload create \
162- --project ${{ inputs.GCP_PROJECT }} \
163- --cluster ${{ inputs.GKE_CLUSTER }} \
164- --zone ${{ inputs.GCP_ZONE }} \
165- --workload ${WORKLOAD_NAME} \
166- --docker-image ${{ inputs.IMAGE }} \
167- --device-type ${{ inputs.CLUSTER_DEVICE }} \
168- --num-nodes ${{ inputs.NUM_NODES }} \
169- --num-slices ${{ inputs.NUM_NODES }} \
170- --priority=high \
171- --scheduler=gke.io/topology-aware-auto \
172- --command "${PRELUDE} ${CMD} ${POSTLUDE}"
183+ ${args[@]} \
184+ --command="${PRELUDE} ${CMD} ${POSTLUDE}"
173185
174186 - name : Wait for JobSet to unsuspend on cluster
175187 shell : bash -u {0}
0 commit comments