Merge pull request #2540 from AI-Hypercomputer:grpo_docker_rename

Google-ML-Automation · Google-ML-Automation · commit 72979a303c7a · 2025-10-24T14:49:49.000-07:00
PiperOrigin-RevId: 823673423
diff --git a/docker_build_dependency_image.sh b/docker_build_dependency_image.sh
@@ -27,7 +27,7 @@
 # works with any custom wheels.
 # bash docker_build_dependency_image.sh MODE=custom_wheels
 
-# bash docker_build_dependency_image.sh MODE=grpo
+# bash docker_build_dependency_image.sh MODE=post-training
 
 # Enable "exit immediately if any command fails" option
 set -e
@@ -68,17 +68,17 @@ if [[ -z ${MODE} ]]; then
   export MODE=stable
   echo "Default MODE=${MODE}"
   export CUSTOM_JAX=0
-  export INSTALL_GRPO=0
+  export INSTALL_POST_TRAINING=0
 elif [[ ${MODE} == "custom_wheels" ]] ; then
   export MODE=nightly
   export CUSTOM_JAX=1
-  export INSTALL_GRPO=0
-elif [[ ${MODE} == "grpo" || ${MODE} == "grpo-experimental" ]] ; then
-  export INSTALL_GRPO=1
+  export INSTALL_POST_TRAINING=0
+elif [[ ${MODE} == "post-training" || ${MODE} == "post-training-experimental" ]] ; then
+  export INSTALL_POST_TRAINING=1
   export CUSTOM_JAX=0
 else
   export CUSTOM_JAX=0
-  export INSTALL_GRPO=0
+  export INSTALL_POST_TRAINING=0
 fi
 
 if [[ -z ${DEVICE} ]]; then
@@ -124,8 +124,8 @@ if [[ -z ${LIBTPU_GCS_PATH+x} ]] ; then
     elif [[ ${MANTARAY} == "true" ]]; then
       echo "Building with benchmark-db"
       docker build --network host --build-arg MODE=${MODE} --build-arg JAX_VERSION=$JAX_VERSION --build-arg LIBTPU_GCS_PATH=$LIBTPU_GCS_PATH --build-arg DEVICE=$DEVICE -f ./maxtext_db_dependencies.Dockerfile -t ${LOCAL_IMAGE_NAME} .
-    elif [[ ${INSTALL_GRPO} -eq 1 && ${DEVICE} == "tpu" ]]; then
-      echo "Installing MaxText stable mode dependencies for GRPO"
+    elif [[ ${INSTALL_POST_TRAINING} -eq 1 && ${DEVICE} == "tpu" ]]; then
+      echo "Installing MaxText stable mode dependencies for Post-Training"
       docker build --network host --build-arg MODE=stable --build-arg JAX_VERSION=$JAX_VERSION --build-arg LIBTPU_GCS_PATH=$LIBTPU_GCS_PATH --build-arg DEVICE=$DEVICE -f ./maxtext_dependencies.Dockerfile -t ${LOCAL_IMAGE_NAME} .
     else
       docker build --network host --build-arg MODE=${MODE} --build-arg JAX_VERSION=$JAX_VERSION --build-arg LIBTPU_GCS_PATH=$LIBTPU_GCS_PATH --build-arg DEVICE=$DEVICE -f ./maxtext_dependencies.Dockerfile -t ${LOCAL_IMAGE_NAME} .
@@ -136,9 +136,9 @@ else
   docker build --network host --build-arg CUSTOM_LIBTPU=true -f ./maxtext_libtpu_path.Dockerfile -t ${LOCAL_IMAGE_NAME} .
 fi
 
-if [[ ${INSTALL_GRPO} -eq 1 ]] ; then
+if [[ ${INSTALL_POST_TRAINING} -eq 1 ]] ; then
   if [[ ${DEVICE} != "tpu" ]] ; then
-    echo "Error: MODE=grpo is only supported for DEVICE=tpu"
+    echo "Error: MODE=post-training is only supported for DEVICE=tpu"
     exit 1
   fi
 
@@ -158,7 +158,7 @@ if [[ ${INSTALL_GRPO} -eq 1 ]] ; then
     --network host \
     --build-arg BASEIMAGE=${LOCAL_IMAGE_NAME} \
     --build-arg MODE=${MODE} \
-    -f ./maxtext_grpo_dependencies.Dockerfile \
+    -f ./maxtext_post_training_dependencies.Dockerfile \
     -t ${LOCAL_IMAGE_NAME} .
 fi
 
diff --git a/docs/tutorials/grpo_with_pathways.md b/docs/tutorials/grpo_with_pathways.md
@@ -39,10 +39,10 @@ In addition to MaxText dependencies,
 We use the scheduler code from vLLM, and the model runner code from `tpu_commons`
 
 ```
-bash docker_build_dependency_image.sh MODE=grpo
+bash docker_build_dependency_image.sh MODE=post-training
 ```
 
-You can also use `bash docker_build_dependency_image.sh MODE=grpo-experimental` to try out new features via experimental dependencies such as improved pathwaysutils resharding API
+You can also use `bash docker_build_dependency_image.sh MODE=post-training-experimental` to try out new features via experimental dependencies such as improved pathwaysutils resharding API
 
 
 
diff --git a/maxtext_post_training_dependencies.Dockerfile b/maxtext_post_training_dependencies.Dockerfile
@@ -18,7 +18,7 @@ ARG MODE
 
 ENV MODE=$MODE
 
-RUN echo "Installing GRPO dependencies (vLLM, tpu-common, tunix) with MODE=${MODE}"
+RUN echo "Installing Post-Training dependencies (vLLM, tpu-common, tunix) with MODE=${MODE}"
 
 
 # Uninstall existing jax to avoid conflicts
@@ -52,7 +52,7 @@ RUN pip install --no-cache-dir --pre \
     --find-links https://storage.googleapis.com/jax-releases/libtpu_releases.html \
     tpu-commons==0.1.2
 
-RUN if [ "$MODE" = "grpo-experimental" ]; then \
+RUN if [ "$MODE" = "post-training-experimental" ]; then \
     pip uninstall -y jax jaxlib libtpu && \
     pip install --pre -U jax jaxlib -i https://us-python.pkg.dev/ml-oss-artifacts-published/jax/simple/ && \
     pip install -U --pre libtpu -f https://storage.googleapis.com/jax-releases/libtpu_releases.html; \