diff --git a/.github/container/Dockerfile.mjx b/.github/container/Dockerfile.mjx
deleted file mode 100644
index 1bdd1a439..000000000
--- a/.github/container/Dockerfile.mjx
+++ /dev/null
@@ -1,54 +0,0 @@
-# syntax=docker/dockerfile:1-labs
-
-ARG BASE_IMAGE=ghcr.io/nvidia/jax-mealkit:jax
-ARG URLREF_MUJOCO=https://github.com/google-deepmind/mujoco.git#main
-ARG URLREF_MUJOCO_MPC=https://github.com/google-deepmind/mujoco_mpc.git#main
-ARG URLREF_L2R=https://github.com/google-deepmind/language_to_reward_2023.git#main
-ARG SRC_PATH_MUJOCO=/opt/mujoco
-ARG SRC_PATH_MUJOCO_MPC=/opt/mujoco-mpc
-ARG SRC_PATH_L2R=/opt/language-to-reward-2023
-
-###############################################################################
-## Download source and add auxiliary scripts
-###############################################################################
-
-FROM ${BASE_IMAGE} as mealkit
-ARG URLREF_MUJOCO
-ARG URLREF_MUJOCO_MPC
-ARG URLREF_L2R
-ARG SRC_PATH_MUJOCO
-ARG SRC_PATH_MUJOCO_MPC
-ARG SRC_PATH_L2R
-
-# Install system dependencies for Mujuco/MPC
-RUN <<"EOF" bash -ex
-apt-get update
-apt-get install -y \
-    libgl1-mesa-dev \
-    libxinerama-dev \
-    libxcursor-dev \
-    libxrandr-dev \
-    libxi-dev \
-    ninja-build
-apt-get clean
-rm -rf /var/lib/apt/lists/*
-EOF
-
-# Specify installation targets
-RUN <<"EOF" bash -ex
-git-clone.sh ${URLREF_MUJOCO} ${SRC_PATH_MUJOCO}
-git-clone.sh ${URLREF_MUJOCO_MPC} ${SRC_PATH_MUJOCO_MPC}
-git-clone.sh ${URLREF_L2R} ${SRC_PATH_L2R}
-echo "-f https://py.mujoco.org/" >> /opt/pip-tools.d/requirements-mjx.in
-echo "-e file://${SRC_PATH_MUJOCO}/mjx" >> /opt/pip-tools.d/requirements-mjx.in
-echo "-e file://${SRC_PATH_MUJOCO_MPC}/python" >> /opt/pip-tools.d/requirements-l2r.in
-echo "-e file://${SRC_PATH_L2R}" >> /opt/pip-tools.d/requirements-l2r.in
-EOF
-
-###############################################################################
-## Install accumulated packages from the base image and the previous stage
-###############################################################################
-
-FROM mealkit as final
-
-RUN pip-finalize.sh
diff --git a/.github/container/manifest.yaml b/.github/container/manifest.yaml
index ab2a2f9d7..9746e771a 100644
--- a/.github/container/manifest.yaml
+++ b/.github/container/manifest.yaml
@@ -86,21 +86,6 @@ haliax:
   tracking_ref: main
   latest_verified_commit: 2a696a0c971901ff93afdaa965959d8e3b982ba9
   mode: git-clone
-mujoco:
-  url: https://github.com/google-deepmind/mujoco.git
-  tracking_ref: main
-  latest_verified_commit: e95159b4f6d48d114b16a8dc13ad26b3e44bc3e2
-  mode: git-clone
-mujoco-mpc:
-  url: https://github.com/google-deepmind/mujoco_mpc.git
-  tracking_ref: main
-  latest_verified_commit: 4700f4a13be18398f5aaf6a33ed42e531967e3ae
-  mode: git-clone
-language-to-reward-2023:
-  url: https://github.com/google-deepmind/language_to_reward_2023.git
-  tracking_ref: main
-  latest_verified_commit: abb8e5125e4ecd0da378490b73448c05a694def5
-  mode: git-clone
 mlperf-logging:
   url: https://github.com/mlcommons/logging.git
   tracking_ref: master
diff --git a/.github/workflows/_ci.yaml b/.github/workflows/_ci.yaml
index 7d562a84b..c5bc64ac3 100644
--- a/.github/workflows/_ci.yaml
+++ b/.github/workflows/_ci.yaml
@@ -543,14 +543,6 @@ jobs:
         pytest-report-L0-unittest.jsonl
         pytest-report-L0-distributed-unittest.jsonl
         pytest-report-L1-distributed-unittest.jsonl
-  test-upstream-t5x:
-    needs: build-upstream-t5x
-    if: inputs.ARCHITECTURE == 'amd64' # arm64 runners n/a
-    uses: ./.github/workflows/_test_upstream_t5x.yaml
-    with:
-      T5X_IMAGE: ${{ needs.build-upstream-t5x.outputs.DOCKER_TAG_FINAL }}
-    secrets: inherit
-
   test-rosetta-t5x:
     needs: build-rosetta-t5x
     if: inputs.ARCHITECTURE == 'amd64' # no images for arm64
@@ -569,7 +561,7 @@ jobs:
         docker run -i --gpus all --shm-size=1g \
         ${{ needs.build-levanter.outputs.DOCKER_TAG_FINAL }} \
         bash <<"EOF" |& tee test-levanter.log
-          pip install flake8 pytest soundfile librosa
+          pip install flake8 pytest pytest-asyncio soundfile tensorboardx librosa
           PYTHONPATH=/opt/levanter/tests:$PYTHONPATH pytest /opt/levanter/tests -m "not entry and not slow and not ray"
         EOF
       STATISTICS_SCRIPT: |
diff --git a/.github/workflows/_test_maxtext.yaml b/.github/workflows/_test_maxtext.yaml
index f7a157878..5b1634f9a 100644
--- a/.github/workflows/_test_maxtext.yaml
+++ b/.github/workflows/_test_maxtext.yaml
@@ -373,24 +373,6 @@ jobs:
       ARTIFACT_NAME: ${{ inputs.ARTIFACT_NAME }}
       FW_NAME: ${{ inputs.FW_NAME }}
 
-  summary:
-    name: test-maxtext-summary
-    runs-on: ubuntu-22.04
-    needs: [single-process-multi-device, maxtext-multinode]
-    if: "!cancelled()"
-    steps:
-      - name: Generate TensorBoard query URL
-        run: |
-          (
-          cat << EOF
-
-          ## MaxText training
-
-          [view metrics](https://${{ vars.HOSTNAME_TENSORBOARD }}/#scalars&regexInput=${{ inputs.FW_NAME }}-${GITHUB_RUN_ID}&_smoothingWeight=0&tagFilter=seqs_per)
-
-          EOF
-          ) | tee $GITHUB_STEP_SUMMARY
-
   outcome:
     name: test-maxtext-outcome
     needs: sitrep
diff --git a/.github/workflows/_test_t5x_rosetta.yaml b/.github/workflows/_test_t5x_rosetta.yaml
index cc1a23179..df07e31e4 100644
--- a/.github/workflows/_test_t5x_rosetta.yaml
+++ b/.github/workflows/_test_t5x_rosetta.yaml
@@ -29,568 +29,15 @@ on:
         value: ${{ jobs.sitrep.outputs.STATUS }}
 
 env:
-  BATCH_SIZE_PER_GPU: 32
   VIT_BATCH_SIZE_PER_GPU: 256
 
 jobs:
-
-  single-process-multi-device:
-    strategy:
-      max-parallel: 1
-      matrix:
-        include:
-          # - TEST_NAME: "1P1G_te-0"
-          #   N_GPU: 1
-          #   ADDITIONAL_ARGS: "--enable-te 0"
-          #   EXTRA_GIN_ARGS: ""
-          - TEST_NAME: "1P8G_te-1"
-            N_GPU: 8
-            ADDITIONAL_ARGS: ""
-            EXTRA_GIN_ARGS: "--gin.train/utils.DatasetConfig.pack=False --gin.train_eval/utils.DatasetConfig.pack=False"
-      fail-fast: false
-    runs-on: jumpbox
-    env:
-      BADGE_FILENAME_PREFIX: badge-rosetta-t5x-single-process-multi-device
-    steps:
-      - name: Print environment variables
-        run: env
-          
-      - name: Check out the repository under ${GITHUB_WORKSPACE}
-        uses: actions/checkout@v4
-        
-      - name: Setup SSH
-        id: setup-ssh
-        uses: ./.github/actions/setup-ssh
-        with:
-          ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
-          ssh-known-hosts: ${{ vars.SSH_KNOWN_HOSTS }}
-
-      - name: Labels and metadata
-        id: meta
-        shell: bash -x -e {0}
-        run: |
-          IMAGE="$(echo ${{inputs.T5X_IMAGE}} | sed 's/\//#/')"
-          TEST_CASE_NAME=${{ matrix.TEST_NAME }}
-          JOB_NAME=${{ inputs.FW_NAME }}-${GITHUB_RUN_ID}-${TEST_CASE_NAME}
-          LOG_FILE=/nfs/cluster/${JOB_NAME}.log
-          MODEL_PATH=/nfs/cluster/${JOB_NAME}
-          BATCH_SIZE=$((${{ env.BATCH_SIZE_PER_GPU }} * ${{ matrix.N_GPU }}))
-          for var in IMAGE TEST_CASE_NAME JOB_NAME LOG_FILE MODEL_PATH BATCH_SIZE; do
-            echo "$var=${!var}" >> $GITHUB_OUTPUT
-          done
-
-      - name: Submit SLURM jobs over SSH
-        id: submit
-        shell: bash -O expand_aliases -x -e {0}
-        run: |
-          cd $GITHUB_WORKSPACE
-          alias sshx='ssh -o "ServerAliveInterval 7" ${{ secrets.CLUSTER_LOGIN_USER }}@${{ vars.HOSTNAME_SLURM_LOGIN }}'
-          sshx "date && hostname && sinfo"
-          sshx mkdir -p ${{ steps.meta.outputs.MODEL_PATH }}
-          JOB=$(sshx sbatch --parsable << EOF
-          #!/bin/bash
-          #SBATCH --job-name=${{ steps.meta.outputs.JOB_NAME }}
-          #SBATCH --exclusive
-          #SBATCH --nodes=1
-          #SBATCH --gpus-per-node=${{ matrix.N_GPU }}
-          #SBATCH --time=00:30:00
-          #SBATCH --output=${{ steps.meta.outputs.LOG_FILE }}
-          #SBATCH --export="ENROOT_PASSWORD=${{ secrets.GITHUB_TOKEN }}"
-
-          # preload enroot container using one task per node
-          time srun \
-            --ntasks-per-node=1 \
-            --container-name=runtime \
-            --container-image=${{ steps.meta.outputs.IMAGE }} \
-            true
-
-          # run job with tasks on each node sharing one container
-          time srun \
-            --ntasks=1 \
-            --ntasks-per-node=1 \
-            --container-name=runtime \
-            --container-mounts=${{ steps.meta.outputs.MODEL_PATH }}:/output \
-            --container-entrypoint \
-            bash -c 'wget -P /tmp/ https://raw.githubusercontent.com/NVIDIA/JAX-Toolbox/${{ github.sha }}/.github/container/test-t5x.sh && sleep 10 && bash /tmp/test-t5x.sh \
-              --output /output/${{ steps.meta.outputs.TEST_CASE_NAME }} \
-              --dtype bfloat16 \
-              --batch-size ${{ steps.meta.outputs.BATCH_SIZE }} \
-              --epochs 7 \
-              --steps-per-epoch 100 \
-              --use-contrib-configs \
-              ${{ matrix.ADDITIONAL_ARGS }} \
-              ${{ matrix.EXTRA_GIN_ARGS != '' && format('--additional-args "{0}"', matrix.EXTRA_GIN_ARGS) || '' }}'
-          EOF
-          )
-
-          echo "SLURM_JOB_ID=${JOB}" >> $GITHUB_OUTPUT
-
-          . .github/workflows/scripts/wait_for_slurm_job.sh
-
-          wait_for_slurm_job ${{ secrets.CLUSTER_LOGIN_USER }}@${{ vars.HOSTNAME_SLURM_LOGIN }} ${JOB}
-
-          # Gather job info
-          SLURM_STATE=$(sshx sacct -j $JOB --format=State --parsable2 --noheader |& head -n 1)
-          SLURM_EXITCODE=$(sshx sacct -j $JOB --format=exitcode --parsable2 --noheader | sort -r -u | head -1 | cut -f 1 -d":" | sed 's/ //g')
-          echo "SLURM Job state is ${SLURM_STATE}"
-          echo "SLURM Job exit code is ${SLURM_EXITCODE}"
-          echo "SLURM_STATE=${SLURM_STATE}" >> "$GITHUB_OUTPUT"
-          echo "SLURM_EXITCODE=${SLURM_EXITCODE}" >> "$GITHUB_OUTPUT"
-
-          set -x
-
-      - name: Remove orphaned SLURM job if the CI job is canceled
-        if: cancelled()
-        shell: bash -x -e {0}
-        run: |
-          ssh ${{ secrets.CLUSTER_LOGIN_USER }}@${{ vars.HOSTNAME_SLURM_LOGIN }} \
-            scancel ${{ steps.submit.outputs.SLURM_JOB_ID }}
-
-      - name: Retrieve training logs and upload to TensorBoard server
-        shell: bash -x -e {0}
-        run: |
-          cd $GITHUB_WORKSPACE
-          mkdir output/
-          rsync -rtz --progress \
-            ${{ secrets.CLUSTER_LOGIN_USER }}@${{ vars.HOSTNAME_SLURM_LOGIN }}:${{ steps.meta.outputs.LOG_FILE }} \
-            output/${{ steps.meta.outputs.TEST_CASE_NAME }}.log || true
-          rsync -rtz --progress \
-            ${{ secrets.CLUSTER_LOGIN_USER }}@${{ vars.HOSTNAME_SLURM_LOGIN }}:${{ steps.meta.outputs.MODEL_PATH }}/* \
-            output/ || true
-          rsync -rtz --progress \
-            output/ \
-            ${{ secrets.TENSORBOARD_UPLOAD_USER }}@${{ vars.HOSTNAME_TENSORBOARD }}:/tensorboard-logs/${{ inputs.FW_NAME }}-${GITHUB_RUN_ID}/ || true
-
-      - name: Write SLURM job status to file
-        shell: bash -x -e {0}
-        run: |
-          python << EOF
-          import json
-          with open("output/${{ steps.meta.outputs.TEST_CASE_NAME }}-status.json", "w") as f:
-              dump = {'state': "${{ steps.submit.outputs.SLURM_STATE }}", 'exitcode': "${{ steps.submit.outputs.SLURM_EXITCODE }}"}
-              json.dump(dump, f)
-          EOF
-          
-      - name: Generate sitrep
-        if: success() || failure()
-        shell: bash -x -e {0}
-        run: |
-          # bring in utility functions
-          cd $GITHUB_WORKSPACE
-          source .github/workflows/scripts/to_json.sh
-
-          EXIT_STATUSES="output/*-status.json"
-          badge_label='ROSETTA T5X SINGLE PROCESS MULTI DEVICE ${{ steps.meta.outputs.TEST_CASE_NAME }}'
-          passed_tests=$(jq -r '. | select ((.state == "COMPLETED") and (.exitcode == "0")) | .state' $EXIT_STATUSES | wc -l)
-          failed_tests=$(jq -r '. | select ((.state != "COMPLETED") or (.exitcode != "0")) | .state' $EXIT_STATUSES | wc -l)
-          total_tests=$(ls $EXIT_STATUSES | wc -l)
-          
-          if [[ ${failed_tests} > 0 ]] || [[ ${total_tests} == 0 ]]; then
-            badge_message='error'
-            badge_color=red
-            summary="ROSETTA T5X SINGLE PROCESS MULTI DEVICE ${{ steps.meta.outputs.TEST_CASE_NAME }}: $badge_message"
-          else
-            badge_message="${passed_tests}/${total_tests} passed"
-            if [[ ${failed_tests} == 0 ]]; then
-              badge_color=brightgreen
-            else
-              badge_color=yellow
-            fi
-            summary="ROSETTA T5X SINGLE PROCESS MULTI DEVICE ${{ steps.meta.outputs.TEST_CASE_NAME }}: $badge_message"
-          fi
-
-          to_json \
-            summary \
-            total_tests passed_tests failed_tests \
-            badge_label badge_color badge_message \
-          > output/sitrep.json
-
-          schemaVersion=1 \
-          label="${badge_label}" \
-          message="${badge_message}" \
-          color="${badge_color}" \
-          to_json schemaVersion label message color \
-          > output/${{ env.BADGE_FILENAME_PREFIX }}-${{ steps.meta.outputs.TEST_CASE_NAME }}.json
-
-      - name: Upload training logs as artifacts
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ steps.meta.outputs.JOB_NAME }}
-          path: output/*
-
-  # 2-node configs temporarily disabled
-  # multi-gpu-multi-node:
-  #   strategy:
-  #     max-parallel: 1
-  #     matrix:
-  #       include:
-  #         - TEST_NAME: "2N8G-te-1"
-  #           N_GPU: 8
-  #           N_NODE: 2
-  #           ADDITIONAL_ARGS: ""
-  #           EXTRA_GIN_ARGS: "--gin.train/utils.DatasetConfig.pack=False --gin.train_eval/utils.DatasetConfig.pack=False"
-  #         - TEST_NAME: "2N2G_te-0"
-  #           N_GPU: 2
-  #           N_NODE: 2
-  #           ADDITIONAL_ARGS: "--enable-te 0"
-  #           EXTRA_GIN_ARGS: ""
-  #     fail-fast: false
-  #   runs-on: jumpbox
-  #   env:
-  #     BADGE_FILENAME_PREFIX: badge-rosetta-t5x-multi-gpu-multi-node
-  #   steps:
-  #     - name: Print environment variables
-  #       run: env
-
-  #     - name: Check out the repository under ${GITHUB_WORKSPACE}
-  #       uses: actions/checkout@v4
-
-  #     - name: Setup SSH agent
-  #       uses: webfactory/ssh-agent@v0.9.0
-  #       with:
-  #         ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
-
-  #     - name: Setup SSH known hosts
-  #       id: ssh-known-hosts
-  #       run: |
-  #         mkdir -p ~/.ssh
-  #         cat >> ~/.ssh/known_hosts << EOF
-  #         ${{ vars.SSH_KNOWN_HOSTS }}
-  #         EOF
-  #         chmod 600 ~/.ssh/known_hosts
-  #         echo "FILE=$(realpath ~/.ssh/known_hosts)" >> $GITHUB_OUTPUT
-
-  #     - name: Labels and metadata
-  #       id: meta
-  #       shell: bash -x -e {0}
-  #       run: |
-  #         IMAGE="$(echo ${{inputs.T5X_IMAGE}} | sed 's/\//#/')"
-  #         TEST_CASE_NAME=${{ matrix.TEST_NAME }}
-  #         TOTAL_TASKS=$((${{ matrix.N_GPU }} * ${{ matrix.N_NODE }}))
-  #         JOB_NAME=${{ inputs.FW_NAME }}-${GITHUB_RUN_ID}-${TEST_CASE_NAME}
-  #         LOG_FILE=/nfs/cluster/${JOB_NAME}.log
-  #         MODEL_PATH=/nfs/cluster/${JOB_NAME}
-  #         BATCH_SIZE=$((${{ env.BATCH_SIZE_PER_GPU }} * ${{ matrix.N_GPU }} * ${{ matrix.N_NODE }}))
-  #         for var in IMAGE TEST_CASE_NAME TOTAL_TASKS JOB_NAME LOG_FILE MODEL_PATH BATCH_SIZE; do
-  #           echo "$var=${!var}" >> $GITHUB_OUTPUT
-  #         done
-
-  #     - name: Submit SLURM jobs over SSH
-  #       id: submit
-  #       shell: bash -O expand_aliases -x -e {0}
-  #       run: |
-  #         cd $GITHUB_WORKSPACE
-  #         alias sshx='ssh -o "ServerAliveInterval 7" ${{ secrets.CLUSTER_LOGIN_USER }}@${{ vars.HOSTNAME_SLURM_LOGIN }}'
-  #         sshx "date && hostname && sinfo"
-  #         sshx mkdir -p ${{ steps.meta.outputs.MODEL_PATH }}
-  #         JOB=$(sshx sbatch --parsable << EOF
-  #         #!/bin/bash
-  #         #SBATCH --job-name=${{ steps.meta.outputs.JOB_NAME }}
-  #         #SBATCH --exclusive
-  #         #SBATCH --nodes=${{ matrix.N_NODE }}
-  #         #SBATCH --gpus-per-node=${{ matrix.N_GPU }}
-  #         #SBATCH --time=00:30:00
-  #         #SBATCH --output=${{ steps.meta.outputs.LOG_FILE }}
-  #         #SBATCH --export="ENROOT_PASSWORD=${{ secrets.GITHUB_TOKEN }}"
-
-  #         # preload enroot container using one task per node
-  #         time srun \
-  #           --ntasks-per-node=1 \
-  #           --container-name=runtime \
-  #           --container-image=${{ steps.meta.outputs.IMAGE }} \
-  #           true
-
-  #         # run job with tasks on each node sharing one container
-  #         time srun \
-  #           --ntasks=${{ steps.meta.outputs.TOTAL_TASKS }} \
-  #           --ntasks-per-node=${{ matrix.N_GPU }} \
-  #           --container-name=runtime \
-  #           --container-mounts=${{ steps.meta.outputs.MODEL_PATH }}:/output \
-  #           --container-entrypoint \
-  #           bash -c 'wget -P /tmp/ https://raw.githubusercontent.com/NVIDIA/JAX-Toolbox/${{ github.sha }}/.github/container/test-t5x.sh && sleep 10 && bash /tmp/test-t5x.sh \
-  #             --output /output/${{ steps.meta.outputs.TEST_CASE_NAME }} \
-  #             --dtype bfloat16 \
-  #             --batch-size ${{ steps.meta.outputs.BATCH_SIZE }} \
-  #             --epochs 7 \
-  #             --steps-per-epoch 100 \
-  #             --multiprocess \
-  #             --use-contrib-configs \
-  #             ${{ matrix.ADDITIONAL_ARGS }} \
-  #             ${{ matrix.EXTRA_GIN_ARGS != '' && format('--additional-args "{0}"', matrix.EXTRA_GIN_ARGS) || '' }}'
-  #         EOF
-  #         )
-
-  #         echo "SLURM_JOB_ID=${JOB}" >> $GITHUB_OUTPUT
-
-  #         . .github/workflows/scripts/wait_for_slurm_job.sh
-
-  #         wait_for_slurm_job ${{ secrets.CLUSTER_LOGIN_USER }}@${{ vars.HOSTNAME_SLURM_LOGIN }} ${JOB}
-
-  #         # Gather job info
-  #         SLURM_STATE=$(sshx sacct -j $JOB --format=State --parsable2 --noheader |& head -n 1)
-  #         SLURM_EXITCODE=$(sshx sacct -j $JOB --format=exitcode --parsable2 --noheader | sort -r -u | head -1 | cut -f 1 -d":" | sed 's/ //g')
-  #         echo "SLURM Job state is ${SLURM_STATE}"
-  #         echo "SLURM Job exit code is ${SLURM_EXITCODE}"
-  #         echo "SLURM_STATE=${SLURM_STATE}" >> "$GITHUB_OUTPUT"
-  #         echo "SLURM_EXITCODE=${SLURM_EXITCODE}" >> "$GITHUB_OUTPUT"
-
-  #         set -x
-
-  #     - name: Remove orphaned SLURM job if the CI job is canceled
-  #       if: cancelled()
-  #       shell: bash -x -e {0}
-  #       run: |
-  #         ssh ${{ secrets.CLUSTER_LOGIN_USER }}@${{ vars.HOSTNAME_SLURM_LOGIN }} \
-  #           scancel ${{ steps.submit.outputs.SLURM_JOB_ID }}
-
-  #     - name: Retrieve training logs and upload to TensorBoard server
-  #       shell: bash -x -e {0}
-  #       run: |
-  #         cd $GITHUB_WORKSPACE
-  #         mkdir output/
-  #         rsync -rtz --progress \
-  #           ${{ secrets.CLUSTER_LOGIN_USER }}@${{ vars.HOSTNAME_SLURM_LOGIN }}:${{ steps.meta.outputs.LOG_FILE }} \
-  #           output/${{ steps.meta.outputs.TEST_CASE_NAME }}.log || true
-  #         rsync -rtz --progress \
-  #           ${{ secrets.CLUSTER_LOGIN_USER }}@${{ vars.HOSTNAME_SLURM_LOGIN }}:${{ steps.meta.outputs.MODEL_PATH }}/* \
-  #           output/ || true
-  #         rsync -rtz --progress \
-  #           output/ \
-  #           ${{ secrets.TENSORBOARD_UPLOAD_USER }}@${{ vars.HOSTNAME_TENSORBOARD }}:/tensorboard-logs/${{ inputs.FW_NAME }}-${GITHUB_RUN_ID}/ || true
-
-  #     - name: Write SLURM job status to file
-  #       shell: bash -x -e {0}
-  #       run: |
-  #         python << EOF
-  #         import json
-  #         with open("output/${{ steps.meta.outputs.TEST_CASE_NAME }}-status.json", "w") as f:
-  #             dump = {'state': "${{ steps.submit.outputs.SLURM_STATE }}", 'exitcode': "${{ steps.submit.outputs.SLURM_EXITCODE }}"}
-  #             json.dump(dump, f)
-  #         EOF
-
-  #     - name: Generate sitrep
-  #       if: success() || failure()
-  #       shell: bash -x -e {0}
-  #       run: |
-  #         # bring in utility functions
-  #         cd $GITHUB_WORKSPACE
-  #         source .github/workflows/scripts/to_json.sh
-
-  #         EXIT_STATUSES="output/*-status.json"
-  #         badge_label='ROSETTA T5X MULTI GPU MULTI NODE ${{ steps.meta.outputs.TEST_CASE_NAME }}'
-  #         passed_tests=$(jq -r '. | select ((.state == "COMPLETED") and (.exitcode == "0")) | .state' $EXIT_STATUSES | wc -l)
-  #         failed_tests=$(jq -r '. | select ((.state != "COMPLETED") or (.exitcode != "0")) | .state' $EXIT_STATUSES | wc -l)
-  #         total_tests=$(ls $EXIT_STATUSES | wc -l)
-          
-  #         if [[ ${failed_tests} > 0 ]] || [[ ${total_tests} == 0 ]]; then
-  #           badge_message='error'
-  #           badge_color=red
-  #           summary="ROSETTA T5X MULTI GPU MULTI NODE ${{ steps.meta.outputs.TEST_CASE_NAME }}: $badge_message"
-  #         else
-  #           badge_message="${passed_tests}/${total_tests} passed"
-  #           if [[ ${failed_tests} == 0 ]]; then
-  #             badge_color=brightgreen
-  #           else
-  #             badge_color=yellow
-  #           fi
-  #           summary="ROSETTA T5X MULTI GPU MULTI NODE ${{ steps.meta.outputs.TEST_CASE_NAME }}: $badge_message"
-  #         fi
-
-  #         to_json \
-  #           summary \
-  #           total_tests passed_tests failed_tests \
-  #           badge_label badge_color badge_message \
-  #         > output/sitrep.json
-
-  #         schemaVersion=1 \
-  #         label="${badge_label}" \
-  #         message="${badge_message}" \
-  #         color="${badge_color}" \
-  #         to_json schemaVersion label message color \
-  #         > output/${{ env.BADGE_FILENAME_PREFIX }}-${{ steps.meta.outputs.TEST_CASE_NAME }}.json
- 
-  #     - name: Upload training logs as artifacts
-  #       uses: actions/upload-artifact@v4
-  #       with:
-  #         name: ${{ steps.meta.outputs.JOB_NAME }}
-  #         path: output/*
-
-  # vit-single-process-multi-device:
-  #   strategy:
-  #     max-parallel: 1
-  #     matrix:
-  #       N_GPU: [8]
-  #     fail-fast: false
-  #   runs-on: jumpbox
-  #   env:
-  #     BADGE_FILENAME_PREFIX: badge-rosetta-t5x-vit-single-process-multi-device
-  #   steps:
-  #     - name: Print environment variables
-  #       run: env
-
-  #     - name: Setup SSH agent
-  #       uses: webfactory/ssh-agent@v0.9.0
-  #       with:
-  #         ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
-
-  #     - name: Check out the repository under ${GITHUB_WORKSPACE}
-  #       uses: actions/checkout@v4
-
-  #     - name: Setup SSH known hosts
-  #       id: ssh-known-hosts
-  #       run: |
-  #         mkdir -p ~/.ssh
-  #         cat >> ~/.ssh/known_hosts << EOF
-  #         ${{ vars.SSH_KNOWN_HOSTS }}
-  #         EOF
-  #         chmod 600 ~/.ssh/known_hosts
-  #         echo "FILE=$(realpath ~/.ssh/known_hosts)" >> $GITHUB_OUTPUT
-
-  #     - name: Labels and metadata
-  #       id: meta
-  #       shell: bash -x -e {0}
-  #       run: |
-  #         IMAGE="$(echo ${{inputs.T5X_IMAGE}} | sed 's/\//#/')"
-  #         TEST_CASE_NAME=VIT1P${{ matrix.N_GPU }}G
-  #         JOB_NAME=${{ inputs.FW_NAME }}-vit-${GITHUB_RUN_ID}-${TEST_CASE_NAME}
-  #         LOG_FILE=/nfs/cluster/${JOB_NAME}.log
-  #         MODEL_PATH=/nfs/cluster/${JOB_NAME}
-  #         BATCH_SIZE=$((${{ env.VIT_BATCH_SIZE_PER_GPU }} * ${{ matrix.N_GPU }}))
-  #         for var in IMAGE TEST_CASE_NAME JOB_NAME LOG_FILE MODEL_PATH BATCH_SIZE; do
-  #           echo "$var=${!var}" >> $GITHUB_OUTPUT
-  #         done
-
-  #     - name: Submit SLURM jobs over SSH
-  #       id: submit
-  #       shell: bash -O expand_aliases -x -e {0}
-  #       run: |
-  #         cd $GITHUB_WORKSPACE
-  #         alias sshx='ssh -o "ServerAliveInterval 7" ${{ secrets.CLUSTER_LOGIN_USER }}@${{ vars.HOSTNAME_SLURM_LOGIN }}'
-  #         sshx "date && hostname && sinfo"
-  #         sshx mkdir -p ${{ steps.meta.outputs.MODEL_PATH }}
-  #         JOB=$(sshx sbatch --parsable << EOF
-  #         #!/bin/bash
-  #         #SBATCH --job-name=${{ steps.meta.outputs.JOB_NAME }}
-  #         #SBATCH --exclusive
-  #         #SBATCH --nodes=1
-  #         #SBATCH --gpus-per-node=${{ matrix.N_GPU }}
-  #         #SBATCH --time=00:30:00
-  #         #SBATCH --output=${{ steps.meta.outputs.LOG_FILE }}
-  #         #SBATCH --export="ENROOT_PASSWORD=${{ secrets.GITHUB_TOKEN }}"
-
-  #         # preload enroot container using one task per node
-  #         time srun \
-  #           --ntasks-per-node=1 \
-  #           --container-name=runtime \
-  #           --container-image=${{ steps.meta.outputs.IMAGE }} \
-  #           true
-
-  #         # run job with tasks on each node sharing one container
-  #         time srun \
-  #           --ntasks=1 \
-  #           --container-name=runtime \
-  #           --container-mounts=${{ steps.meta.outputs.MODEL_PATH }}:/output \
-  #           --container-entrypoint \
-  #           test-vit.sh \
-  #             --output /output/${{ steps.meta.outputs.TEST_CASE_NAME }} \
-  #             --dtype bfloat16 \
-  #             --batch-size ${{ steps.meta.outputs.BATCH_SIZE }}
-  #         EOF
-  #         )
-
-  #         . .github/workflows/scripts/wait_for_slurm_job.sh
-
-  #         wait_for_slurm_job ${{ secrets.CLUSTER_LOGIN_USER }}@${{ vars.HOSTNAME_SLURM_LOGIN }} ${JOB}
-
-  #         # Gather job info
-  #         SLURM_STATE=$(sshx sacct -j $JOB --format=State --parsable2 --noheader |& head -n 1)
-  #         SLURM_EXITCODE=$(sshx sacct -j $JOB --format=exitcode --parsable2 --noheader | sort -r -u | head -1 | cut -f 1 -d":" | sed 's/ //g')
-  #         echo "SLURM Job state is ${SLURM_STATE}"
-  #         echo "SLURM Job exit code is ${SLURM_EXITCODE}"
-  #         echo "SLURM_STATE=${SLURM_STATE}" >> "$GITHUB_OUTPUT"
-  #         echo "SLURM_EXITCODE=${SLURM_EXITCODE}" >> "$GITHUB_OUTPUT"
-
-  #         set -x
-
-  #     - name: Retrieve training logs and upload to TensorBoard server
-  #       shell: bash -x -e {0}
-  #       run: |
-  #         cd $GITHUB_WORKSPACE
-  #         mkdir output/
-  #         rsync -rtz --progress \
-  #           ${{ secrets.CLUSTER_LOGIN_USER }}@${{ vars.HOSTNAME_SLURM_LOGIN }}:${{ steps.meta.outputs.LOG_FILE }} \
-  #           output/${{ steps.meta.outputs.TEST_CASE_NAME }}.log || true
-  #         rsync -rtz --progress \
-  #           ${{ secrets.CLUSTER_LOGIN_USER }}@${{ vars.HOSTNAME_SLURM_LOGIN }}:${{ steps.meta.outputs.MODEL_PATH }}/* \
-  #           output/ || true
-  #         rsync -rtz --progress \
-  #           output/ \
-  #           ${{ secrets.TENSORBOARD_UPLOAD_USER }}@${{ vars.HOSTNAME_TENSORBOARD }}:/tensorboard-logs/${{ inputs.FW_NAME }}-vit-${GITHUB_RUN_ID}/ || true
-
-  #     - name: Write SLURM job status to file
-  #       shell: bash -x -e {0}
-  #       run: |
-  #         python << EOF
-  #         import json
-  #         with open("output/${{ steps.meta.outputs.TEST_CASE_NAME }}-status.json", "w") as f:
-  #             dump = {'state': "${{ steps.submit.outputs.SLURM_STATE }}", 'exitcode': "${{ steps.submit.outputs.SLURM_EXITCODE }}"}
-  #             json.dump(dump, f)
-  #         EOF
-
-  #     - name: Generate sitrep
-  #       if: success() || failure()
-  #       shell: bash -x -e {0}
-  #       run: |
-  #         # bring in utility functions
-  #         cd $GITHUB_WORKSPACE
-  #         source .github/workflows/scripts/to_json.sh
-
-  #         EXIT_STATUSES="output/*-status.json"
-  #         badge_label='ROSETTA T5X VIT SINGLE PROCESS MULTI DEVICE ${{ steps.meta.outputs.TEST_CASE_NAME }}'
-  #         passed_tests=$(jq -r '. | select ((.state == "COMPLETED") and (.exitcode == "0")) | .state' $EXIT_STATUSES | wc -l)
-  #         failed_tests=$(jq -r '. | select ((.state != "COMPLETED") or (.exitcode != "0")) | .state' $EXIT_STATUSES | wc -l)
-  #         total_tests=$(ls $EXIT_STATUSES | wc -l)
-          
-  #         if [[ ${failed_tests} > 0 ]] || [[ ${total_tests} == 0 ]]; then
-  #           badge_message='error'
-  #           badge_color=red
-  #           summary="ROSETTA T5X  VIT SINGLE PROCESS MULTI DEVICE ${{ steps.meta.outputs.TEST_CASE_NAME }}: $badge_message"
-  #         else
-  #           badge_message="${passed_tests}/${total_tests} passed"
-  #           if [[ ${failed_tests} == 0 ]]; then
-  #             badge_color=brightgreen
-  #           else
-  #             badge_color=yellow
-  #           fi
-  #           summary="ROSETTA T5X  VIT SINGLE PROCESS MULTI DEVICE ${{ steps.meta.outputs.TEST_CASE_NAME }}: $badge_message"
-  #         fi
-
-  #         to_json \
-  #           summary \
-  #           total_tests passed_tests failed_tests \
-  #           badge_label badge_color badge_message \
-  #         > output/sitrep.json
-
-  #         schemaVersion=1 \
-  #         label="${badge_label}" \
-  #         message="${badge_message}" \
-  #         color="${badge_color}" \
-  #         to_json schemaVersion label message color \
-  #         > output/${{ env.BADGE_FILENAME_PREFIX }}-${{ steps.meta.outputs.TEST_CASE_NAME }}.json
-
-  #     - name: Upload training logs as artifacts
-  #       uses: actions/upload-artifact@v4
-  #       with:
-  #         name: ${{ steps.meta.outputs.JOB_NAME }}
-  #         path: output/*
-
   vit-multi-gpu-multi-node:
     strategy:
       max-parallel: 1
       matrix:
         N_GPU: [8]
-        # 2-node configs temporarily disabled
-        N_NODE: [1] # , 2]
+        N_NODE: [1]
       fail-fast: false
     runs-on: jumpbox
     env:
@@ -760,7 +207,7 @@ jobs:
 
   metrics:
     name: test-t5x-rosetta-metrics
-    needs: [single-process-multi-device, vit-multi-gpu-multi-node] # vit-single-process-multi-device, multi-gpu-multi-node
+    needs: [vit-multi-gpu-multi-node]
     runs-on: ubuntu-22.04
 
     steps:
@@ -774,7 +221,7 @@ jobs:
         shell: bash -eux {0}
         run: |
           pip install 'numpy<2.0.0' pytest pytest-reportlog tensorboard
-          for i in ${{ inputs.FW_NAME }}-${GITHUB_RUN_ID}-* ${{ inputs.FW_NAME }}-vit-${GITHUB_RUN_ID}-*; do
+          for i in ${{ inputs.FW_NAME }}-vit-${GITHUB_RUN_ID}-*; do
             JOB_NAME=$(echo $i | awk -F "${GITHUB_RUN_ID}-" '{print $2}')
             METRIC_PATH=${JOB_NAME}_metrics.json
             python3 .github/workflows/baselines/summarize_metrics.py $i/$JOB_NAME --perf_summary_name "timing/steps_per_second" --output_json_path $METRIC_PATH
@@ -806,7 +253,7 @@ jobs:
   summary:
     name: test-t5x-rosetta-summary
     runs-on: ubuntu-22.04
-    needs: [single-process-multi-device, vit-multi-gpu-multi-node] # multi-gpu-multi-node, vit-single-process-multi-device
+    needs: [vit-multi-gpu-multi-node]
     if: "!cancelled()"
     steps:
       - name: Generate TensorBoard query URL
diff --git a/.github/workflows/_test_upstream_t5x.yaml b/.github/workflows/_test_upstream_t5x.yaml
deleted file mode 100644
index 892290327..000000000
--- a/.github/workflows/_test_upstream_t5x.yaml
+++ /dev/null
@@ -1,394 +0,0 @@
-name: ~test T5X, multi-node
-
-on:
-  workflow_call:
-    inputs:
-      T5X_IMAGE:
-        type: string
-        description: T5X image from ghcr.io/nvidia
-        default: 'ghcr.io/nvidia/upstream-t5x:latest'
-        required: false
-      BATCH_SIZE_PER_GPU:
-        type: number
-        description: Batch size per GPU
-        default: 32
-        required: false
-      BADGE_FILENAME:
-        type: string
-        description: 'Name of the endpoint JSON file for shields.io badge'
-        required: false
-        default: 'badge-upstream-t5x-mgmn-test.json'
-      ARTIFACT_NAME:
-        type: string
-        description: 'Name of the artifact zip file'
-        required: false
-        default: 'artifact-upstream-t5x-mgmn-test'
-      FW_NAME:
-        type: string
-        description: 'Name of the framework being used'
-        required: false
-        default: 'upstream-t5x'
-    outputs:
-      TEST_STATUS:
-        description: 'Summary pass/fail value indicating if results from tests are acceptable'
-        value: ${{ jobs.sitrep.outputs.STATUS }}
-
-jobs:
-
-  t5x-multi-gpu:
-    strategy:
-      max-parallel: 1
-      matrix:
-        include:
-          - TEST_NAME: "1P8G"
-            N_GPU: 8
-          # - TEST_NAME: "1P2G_fmha"
-          #   N_GPU: 2
-          #   ADDITIONAL_ARGS: "--enable-fmha 1"
-      fail-fast: false
-    runs-on: jumpbox
-    steps:
-      - name: Print environment variables
-        run: env
-
-      - name: Check out the repository under ${GITHUB_WORKSPACE}
-        uses: actions/checkout@v4
-
-      - name: Setup SSH
-        id: setup-ssh
-        uses: ./.github/actions/setup-ssh
-        with:
-          ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
-          ssh-known-hosts: ${{ vars.SSH_KNOWN_HOSTS }}
-
-      - name: Labels and metadata
-        id: meta
-        shell: bash -x -e {0}
-        run: |
-          IMAGE="$(echo ${{inputs.T5X_IMAGE}} | sed 's/\//#/')"
-          TEST_CASE_NAME=${{ matrix.TEST_NAME }}
-          JOB_NAME=${{ inputs.FW_NAME }}-${GITHUB_RUN_ID}-${TEST_CASE_NAME}
-          LOG_FILE=/nfs/cluster/${JOB_NAME}.log
-          MODEL_PATH=/nfs/cluster/${JOB_NAME}
-          BATCH_SIZE=$((${{ inputs.BATCH_SIZE_PER_GPU }} * ${{ matrix.N_GPU }}))
-          for var in IMAGE TEST_CASE_NAME JOB_NAME LOG_FILE MODEL_PATH BATCH_SIZE; do
-            echo "$var=${!var}" >> $GITHUB_OUTPUT
-          done
-
-      - name: Submit SLURM jobs over SSH
-        id: submit
-        shell: bash -O expand_aliases -x -e {0}
-        run: |
-          alias sshx='ssh -o "ServerAliveInterval 7" ${{ secrets.CLUSTER_LOGIN_USER }}@${{ vars.HOSTNAME_SLURM_LOGIN }}'
-          sshx "date && hostname && sinfo"
-          sshx mkdir -p ${{ steps.meta.outputs.MODEL_PATH }}
-          JOB=$(sshx sbatch --parsable << EOF
-          #!/bin/bash
-          #SBATCH --job-name=${{ steps.meta.outputs.JOB_NAME }}
-          #SBATCH --exclusive
-          #SBATCH --nodes=1
-          #SBATCH --gpus-per-node=${{ matrix.N_GPU }}
-          #SBATCH --time=00:30:00
-          #SBATCH --output=${{ steps.meta.outputs.LOG_FILE }}
-          #SBATCH --export="ENROOT_PASSWORD=${{ secrets.GITHUB_TOKEN }}"
-
-          # preload enroot container using one task per node
-          time srun \
-            --ntasks-per-node=1 \
-            --container-name=runtime \
-            --container-image=${{ steps.meta.outputs.IMAGE }} \
-            true
-
-          # run job with tasks on each node sharing one container
-          time srun \
-            --ntasks=1 \
-            --container-name=runtime \
-            --container-mounts=${{ steps.meta.outputs.MODEL_PATH }}:/output \
-            --container-entrypoint \
-            test-t5x.sh \
-              --output /output/${{ steps.meta.outputs.TEST_CASE_NAME }} \
-              --dtype bfloat16 \
-              --batch-size ${{ steps.meta.outputs.BATCH_SIZE }} \
-              --epochs 7 \
-              --steps-per-epoch 100 \
-              ${{ matrix.ADDITIONAL_ARGS }} 
-          EOF
-          )
-          
-          echo "SLURM_JOB_ID=${JOB}" >> $GITHUB_OUTPUT
-
-          . .github/workflows/scripts/wait_for_slurm_job.sh
-
-          wait_for_slurm_job ${{ secrets.CLUSTER_LOGIN_USER }}@${{ vars.HOSTNAME_SLURM_LOGIN }} ${JOB}
-
-          # Gather job info
-          SLURM_STATE=$(sshx sacct -j $JOB --format=State --parsable2 --noheader |& head -n 1)
-          SLURM_EXITCODE=$(sshx sacct -j $JOB --format=exitcode --parsable2 --noheader | sort -r -u | head -1 | cut -f 1 -d":" | sed 's/ //g')
-          echo "SLURM Job state is ${SLURM_STATE}"
-          echo "SLURM Job exit code is ${SLURM_EXITCODE}"
-          echo "SLURM_STATE=${SLURM_STATE}" >> "$GITHUB_OUTPUT"
-          echo "SLURM_EXITCODE=${SLURM_EXITCODE}" >> "$GITHUB_OUTPUT"
-
-          set -x
-
-      - name: Remove orphaned SLURM job if the CI job is canceled
-        if: cancelled()
-        shell: bash -x -e {0}
-        run: |
-          ssh ${{ secrets.CLUSTER_LOGIN_USER }}@${{ vars.HOSTNAME_SLURM_LOGIN }} \
-            scancel ${{ steps.submit.outputs.SLURM_JOB_ID }}
-
-      - name: Retrieve training logs and upload to TensorBoard server
-        shell: bash -x -e {0}
-        run: |
-          mkdir output/
-          rsync -rtz --progress \
-            ${{ secrets.CLUSTER_LOGIN_USER }}@${{ vars.HOSTNAME_SLURM_LOGIN }}:${{ steps.meta.outputs.LOG_FILE }} \
-            output/${{ steps.meta.outputs.TEST_CASE_NAME }}.log || true
-          rsync -rtz --progress \
-            ${{ secrets.CLUSTER_LOGIN_USER }}@${{ vars.HOSTNAME_SLURM_LOGIN }}:${{ steps.meta.outputs.MODEL_PATH }}/* \
-            output/ || true
-          rsync -rtz --progress \
-            output/ \
-            ${{ secrets.TENSORBOARD_UPLOAD_USER }}@${{ vars.HOSTNAME_TENSORBOARD }}:/tensorboard-logs/${{ inputs.FW_NAME }}-${GITHUB_RUN_ID}/ || true
-
-      - name: Write SLURM job status to file
-        shell: bash -x -e {0}
-        run: |
-          python << EOF
-          import json
-          with open("output/${{ steps.meta.outputs.TEST_CASE_NAME }}-status.json", "w") as f:
-              dump = {'state': "${{ steps.submit.outputs.SLURM_STATE }}", 'exitcode': "${{ steps.submit.outputs.SLURM_EXITCODE }}"}
-              json.dump(dump, f)
-          EOF
-
-      - name: Upload training logs as artifacts
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ steps.meta.outputs.JOB_NAME }}
-          path: output/*
-
-  # 2-node tests temporarily disabled
-  # t5x-multi-node:
-  #   strategy:
-  #     max-parallel: 1
-  #     matrix:
-  #       include:
-  #         - TEST_NAME: "8G2N"
-  #           N_GPU: 8
-  #           N_NODE: 2
-  #           ADDITIONAL_ARGS: ""
-  #         - TEST_NAME: "8G2N_fmha"
-  #           N_GPU: 8
-  #           N_NODE: 2
-  #           ADDITIONAL_ARGS: "--enable-fmha 1"
-  #     fail-fast: false
-  #   runs-on: jumpbox
-  #   steps:
-  #     - name: Print environment variables
-  #       run: env
-
-  #     - name: Check out the repository under ${GITHUB_WORKSPACE}
-  #       uses: actions/checkout@v4
-
-  #     - name: Setup SSH agent
-  #       uses: webfactory/ssh-agent@v0.9.0
-  #       with:
-  #         ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
-
-  #     - name: Setup SSH known hosts
-  #       id: ssh-known-hosts
-  #       run: |
-  #         mkdir -p ~/.ssh
-  #         cat >> ~/.ssh/known_hosts << EOF
-  #         ${{ vars.SSH_KNOWN_HOSTS }}
-  #         EOF
-  #         chmod 600 ~/.ssh/known_hosts
-  #         echo "FILE=$(realpath ~/.ssh/known_hosts)" >> $GITHUB_OUTPUT
-
-  #     - name: Labels and metadata
-  #       id: meta
-  #       shell: bash -x -e {0}
-  #       run: |
-  #         IMAGE="$(echo ${{inputs.T5X_IMAGE}} | sed 's/\//#/')"
-  #         TEST_CASE_NAME=${{ matrix.TEST_NAME }}
-  #         TOTAL_TASKS=$((${{ matrix.N_GPU }} * ${{ matrix.N_NODE }}))
-  #         JOB_NAME=${{ inputs.FW_NAME }}-${GITHUB_RUN_ID}-${TEST_CASE_NAME};
-  #         LOG_FILE=/nfs/cluster/${JOB_NAME}.log
-  #         MODEL_PATH=/nfs/cluster/${JOB_NAME}
-  #         BATCH_SIZE=$((${{ inputs.BATCH_SIZE_PER_GPU }} * ${{ matrix.N_GPU }} * ${{ matrix.N_NODE }}))
-  #         for var in IMAGE TEST_CASE_NAME TOTAL_TASKS JOB_NAME LOG_FILE MODEL_PATH BATCH_SIZE; do
-  #           echo "$var=${!var}" >> $GITHUB_OUTPUT
-  #         done
-
-  #     - name: Submit SLURM jobs over SSH
-  #       id: submit
-  #       shell: bash -O expand_aliases -x -e {0}
-  #       run: |
-  #         alias sshx='ssh -o "ServerAliveInterval 7" ${{ secrets.CLUSTER_LOGIN_USER }}@${{ vars.HOSTNAME_SLURM_LOGIN }}'
-  #         sshx "date && hostname && sinfo"
-  #         sshx mkdir -p ${{ steps.meta.outputs.MODEL_PATH }}
-  #         JOB=$(sshx sbatch --parsable << EOF
-  #         #!/bin/bash
-  #         #SBATCH --job-name=${{ steps.meta.outputs.JOB_NAME }}
-  #         #SBATCH --exclusive
-  #         #SBATCH --nodes=${{ matrix.N_NODE }}
-  #         #SBATCH --gpus-per-node=${{ matrix.N_GPU }}
-  #         #SBATCH --time=00:30:00
-  #         #SBATCH --output=${{ steps.meta.outputs.LOG_FILE }}
-  #         #SBATCH --export="ENROOT_PASSWORD=${{ secrets.GITHUB_TOKEN }}"
-
-  #         # preload enroot container using one task per node
-  #         time srun \
-  #           --ntasks-per-node=1 \
-  #           --container-name=runtime \
-  #           --container-image=${{ steps.meta.outputs.IMAGE }} \
-  #           true
-
-  #         # run job with tasks on each node sharing one container
-  #         time srun \
-  #           --tasks=${{ steps.meta.outputs.TOTAL_TASKS }} \
-  #           --tasks-per-node=${{ matrix.N_GPU }} \
-  #           --container-name=runtime \
-  #           --container-mounts=${{ steps.meta.outputs.MODEL_PATH }}:/output \
-  #           --container-entrypoint \
-  #           test-t5x.sh \
-  #             --output /output/${{ steps.meta.outputs.TEST_CASE_NAME }} \
-  #             --dtype bfloat16 \
-  #             --batch-size ${{ steps.meta.outputs.BATCH_SIZE }} \
-  #             --epochs 7 \
-  #             --steps-per-epoch 100 \
-  #             --multiprocess \
-  #             ${{ matrix.ADDITIONAL_ARGS }}
-  #         EOF
-  #         )
-
-  #         echo "SLURM_JOB_ID=${JOB}" >> $GITHUB_OUTPUT
-
-  #         . .github/workflows/scripts/wait_for_slurm_job.sh
-
-  #         wait_for_slurm_job ${{ secrets.CLUSTER_LOGIN_USER }}@${{ vars.HOSTNAME_SLURM_LOGIN }} ${JOB}
-
-  #         # Gather job info
-  #         SLURM_STATE=$(sshx sacct -j $JOB --format=State --parsable2 --noheader |& head -n 1)
-  #         SLURM_EXITCODE=$(sshx sacct -j $JOB --format=exitcode --parsable2 --noheader | sort -r -u | head -1 | cut -f 1 -d":" | sed 's/ //g')
-  #         echo "SLURM Job state is ${SLURM_STATE}"
-  #         echo "SLURM Job exit code is ${SLURM_EXITCODE}"
-  #         echo "SLURM_STATE=${SLURM_STATE}" >> "$GITHUB_OUTPUT"
-  #         echo "SLURM_EXITCODE=${SLURM_EXITCODE}" >> "$GITHUB_OUTPUT"
-
-  #         set -x
-
-  #     - name: Remove orphaned SLURM job if the CI job is canceled
-  #       if: cancelled()
-  #       shell: bash -x -e {0}
-  #       run: |
-  #         ssh ${{ secrets.CLUSTER_LOGIN_USER }}@${{ vars.HOSTNAME_SLURM_LOGIN }} \
-  #           scancel ${{ steps.submit.outputs.SLURM_JOB_ID }}
-
-  #     - name: Retrieve training logs and upload to TensorBoard server
-  #       shell: bash -x -e {0}
-  #       run: |
-
-  #         mkdir output/
-  #         rsync -rtz --progress \
-  #           ${{ secrets.CLUSTER_LOGIN_USER }}@${{ vars.HOSTNAME_SLURM_LOGIN }}:${{ steps.meta.outputs.LOG_FILE }} \
-  #           output/${{ steps.meta.outputs.TEST_CASE_NAME }}.log || true
-  #         rsync -rtz --progress \
-  #           ${{ secrets.CLUSTER_LOGIN_USER }}@${{ vars.HOSTNAME_SLURM_LOGIN }}:${{ steps.meta.outputs.MODEL_PATH }}/* \
-  #           output/ || true
-  #         rsync -rtz --progress \
-  #           output/ \
-  #           ${{ secrets.TENSORBOARD_UPLOAD_USER }}@${{ vars.HOSTNAME_TENSORBOARD }}:/tensorboard-logs/${{ inputs.FW_NAME }}-${GITHUB_RUN_ID}/ || true
-
-  #     - name: Write SLURM job status to file
-  #       shell: bash -x -e {0}
-  #       run: |
-  #         python << EOF
-  #         import json
-  #         with open("output/${{ steps.meta.outputs.TEST_CASE_NAME }}-status.json", "w") as f:
-  #             dump = {'state': "${{ steps.submit.outputs.SLURM_STATE }}", 'exitcode': "${{ steps.submit.outputs.SLURM_EXITCODE }}"}
-  #             json.dump(dump, f)
-  #         EOF
-
-  #     - name: Upload training logs as artifacts
-  #       uses: actions/upload-artifact@v4
-  #       with:
-  #         name: ${{ steps.meta.outputs.JOB_NAME }}
-  #         path: output/*
-
-  metrics:
-    name: test-upstream-t5x-metrics
-    needs: [t5x-multi-gpu] # t5x-multi-node
-    runs-on: ubuntu-22.04
-
-    steps:
-      - name: Check out the repository under ${GITHUB_WORKSPACE}
-        uses: actions/checkout@v4
-
-      - name: Download artifacts
-        uses: actions/download-artifact@v4
-
-      - name: Run pytest
-        shell: bash -eux {0}
-        run: |
-          pip install 'numpy<2.0.0' pytest pytest-reportlog tensorboard
-          for i in ${{ inputs.FW_NAME }}-${GITHUB_RUN_ID}-*; do
-            JOB_NAME=$(echo $i | awk -F "${GITHUB_RUN_ID}-" '{print $2}')
-            METRIC_PATH=${JOB_NAME}_metrics.json
-            python3 .github/workflows/baselines/summarize_metrics.py $i/$JOB_NAME --perf_summary_name "timing/steps_per_second" --output_json_path $METRIC_PATH
-            # Test script expects the job dir and the log to be in the CWD
-            mv $i/$JOB_NAME $i/${JOB_NAME}.log .
-          done
-
-          RESULTS_DIR=$PWD BASELINES_DIR=T5X_MGMN/upstream pytest --report-log=report.jsonl .github/workflows/baselines/test_t5x_mgmn_metrics.py || true
-
-      - name: Upload metrics test json logs
-        uses: actions/upload-artifact@v4
-        with:
-          name: ${{ inputs.FW_NAME }}-metrics-test-log
-          path: |
-            report.jsonl
-            *_metrics.json
-
-
-  sitrep:
-    name: test-upstream-t5x-sitrep
-    needs: metrics
-    if: "!cancelled()"
-    uses: ./.github/workflows/_sitrep_mgmn.yaml
-    secrets: inherit
-    with:
-      BADGE_FILENAME: ${{ inputs.BADGE_FILENAME }}
-      ARTIFACT_NAME: ${{ inputs.ARTIFACT_NAME }}
-      FW_NAME: ${{ inputs.FW_NAME }}
-
-  summary:
-    name: test-upstream-t5x-summary
-    runs-on: ubuntu-22.04
-    needs: [t5x-multi-gpu] # t5x-multi-node
-    if: "!cancelled()"
-    steps:
-      - name: Generate TensorBoard query URL
-        run: |
-          (
-          cat << EOF
-
-          ## T5X MGMN training
-
-          [view metrics](https://${{ vars.HOSTNAME_TENSORBOARD }}/#scalars&regexInput=${{ inputs.FW_NAME }}-${GITHUB_RUN_ID}&_smoothingWeight=0&tagFilter=seqs_per)
-
-          EOF
-          ) | tee $GITHUB_STEP_SUMMARY
-
-  outcome:
-    name: test-upstream-t5x-outcome
-    needs: sitrep
-    runs-on: ubuntu-22.04
-    if: "!cancelled()"
-    steps:
-      - name: Sets workflow status based on test outputs
-        run: |
-          if [[ ${{ needs.sitrep.outputs.STATUS }} != 'success' ]]; then
-            exit 1
-          fi
diff --git a/.github/workflows/baselines/MAXTEXT/upstream/1DP1FSDP1TP1PP.json b/.github/workflows/baselines/MAXTEXT/upstream/1DP1FSDP1TP1PP.json
deleted file mode 100644
index 9898f40b6..000000000
--- a/.github/workflows/baselines/MAXTEXT/upstream/1DP1FSDP1TP1PP.json
+++ /dev/null
@@ -1 +0,0 @@
-{"start_step":1,"end_step":9,"step_interval":1,"loss_values":[0.19963198900222778,0,0,0,0,0,0,0,0],"step_times":[0.340298334757487,0.13511633376280466,0.13490866621335348,0.13431999584039053,0.1360036681095759,0.13434800008932749,0.13548333446184793,0.13538900017738342,0.13546699782212576],"step_time_avg":0.15792603680381068,"e2e_time_seconds":59.38633333333333,"run_urls":["https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/7729826963/artifacts","https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/7729993999/artifacts","https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/7731914601/artifacts"],"date":"2024-01-31"}
diff --git a/.github/workflows/baselines/MAXTEXT/upstream/1DP1FSDP8TP1PP.json b/.github/workflows/baselines/MAXTEXT/upstream/1DP1FSDP8TP1PP.json
deleted file mode 100644
index 02664d450..000000000
--- a/.github/workflows/baselines/MAXTEXT/upstream/1DP1FSDP8TP1PP.json
+++ /dev/null
@@ -1 +0,0 @@
-{"start_step":1,"end_step":9,"step_interval":1,"loss_values":[0.19989807903766632,0,0,0,0,0,0,0,0],"step_times":[0.22164533535639444,0.2257696638504664,0.20632266998291016,0.20477033158143362,0.2050279974937439,0.2048743317524592,0.205293337504069,0.2059936672449112,0.2050470014413198],"step_time_avg":0.20941603735641193,"e2e_time_seconds":173.058,"run_urls":["https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/7729826963/artifacts","https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/7729993999/artifacts","https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/7731914601/artifacts"],"date":"2024-01-31"}
diff --git a/.github/workflows/baselines/MAXTEXT/upstream/1DP2FSDP4TP1PP_single_process.json b/.github/workflows/baselines/MAXTEXT/upstream/1DP2FSDP4TP1PP_single_process.json
index 07c427871..490948d42 100644
--- a/.github/workflows/baselines/MAXTEXT/upstream/1DP2FSDP4TP1PP_single_process.json
+++ b/.github/workflows/baselines/MAXTEXT/upstream/1DP2FSDP4TP1PP_single_process.json
@@ -1 +1,33 @@
-{"start_step":1,"end_step":9,"step_interval":1,"loss_values":[0.19917848706245422,0,0,0,0,0,0,0,0],"step_times":[0.27129199107487995,0.17545133332411447,0.1774536669254303,0.18130967020988464,0.17997999986012778,0.17623033126195273,0.17701533436775208,0.17688766618569693,0.1763359953959783],"step_time_avg":0.1879951098450908,"e2e_time_seconds":89.195,"run_urls":["https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/7729826963/artifacts","https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/7729993999/artifacts","https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/7731914601/artifacts"],"date":"2024-01-31"}
+{
+  "start_step": 1,
+  "end_step": 9,
+  "step_interval": 1,
+  "loss_values": [
+    24.950599670410156,
+    23.6308536529541,
+    22.22606086730957,
+    20.85379981994629,
+    19.61219596862793,
+    18.5745849609375,
+    17.77853012084961,
+    17.22124481201172,
+    16.864944458007812
+  ],
+  "step_times": [
+    0.34216299653053284,
+    0.20161199569702148,
+    0.20115399360656738,
+    0.19551700353622437,
+    0.19348999857902527,
+    0.19705399870872498,
+    0.19354699552059174,
+    0.1978529989719391,
+    0.19385899603366852
+  ],
+  "step_time_avg": 0.2129165530204773,
+  "e2e_time_seconds": 34.555,
+  "run_urls": [
+    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/14054620516/artifacts"
+  ],
+  "date": "2025-03-25"
+}
diff --git a/.github/workflows/baselines/MAXTEXT/upstream/1DP4FSDP2TP1PP.json b/.github/workflows/baselines/MAXTEXT/upstream/1DP4FSDP2TP1PP.json
deleted file mode 100644
index 941b3f2a9..000000000
--- a/.github/workflows/baselines/MAXTEXT/upstream/1DP4FSDP2TP1PP.json
+++ /dev/null
@@ -1 +0,0 @@
-{"start_step":1,"end_step":9,"step_interval":1,"loss_values":[0.20010541379451752,3.576278402306343e-07,0,0,0,0,0,0,0],"step_times":[0.2389533370733261,0.16767800350983939,0.16975000500679016,0.16171966989835104,0.1687556654214859,0.1680160015821457,0.1645423322916031,0.16466433803240457,0.15995866556962332],"step_time_avg":0.17378200204284103,"e2e_time_seconds":180.69966666666667,"run_urls":["https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/7729826963/artifacts","https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/7729993999/artifacts","https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/7731914601/artifacts"],"date":"2024-01-31"}
diff --git a/.github/workflows/baselines/MAXTEXT/upstream/1DP8FSDP1TP1PP.json b/.github/workflows/baselines/MAXTEXT/upstream/1DP8FSDP1TP1PP.json
deleted file mode 100644
index e19974954..000000000
--- a/.github/workflows/baselines/MAXTEXT/upstream/1DP8FSDP1TP1PP.json
+++ /dev/null
@@ -1 +0,0 @@
-{"start_step":1,"end_step":9,"step_interval":1,"loss_values":[0.19963198900222778,0,0,0,0,0,0,0,0],"step_times":[0.21862300237019858,0.15024366478125253,0.12854566673437753,0.12942766646544138,0.13015000025431314,0.13066466649373373,0.13325033088525137,0.12929299970467886,0.12958466509977976],"step_time_avg":0.1421980736432252,"e2e_time_seconds":166.42833333333334,"run_urls":["https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/7729826963/artifacts","https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/7729993999/artifacts","https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/7731914601/artifacts"],"date":"2024-01-31"}
diff --git a/.github/workflows/baselines/MAXTEXT/upstream/2DP2FSDP2TP1PP.json b/.github/workflows/baselines/MAXTEXT/upstream/2DP2FSDP2TP1PP.json
index 78648bc87..e6a84022c 100644
--- a/.github/workflows/baselines/MAXTEXT/upstream/2DP2FSDP2TP1PP.json
+++ b/.github/workflows/baselines/MAXTEXT/upstream/2DP2FSDP2TP1PP.json
@@ -1 +1,33 @@
-{"start_step":1,"end_step":9,"step_interval":1,"loss_values":[0.20010541379451752,3.576278402306343e-07,0,0,0,0,0,0,0],"step_times":[0.2453316698471705,0.15532933175563812,0.15473033487796783,0.15553300082683563,0.15387233098347983,0.1558946669101715,0.15286600093046823,0.1544460008541743,0.15370899935563406],"step_time_avg":0.1646347040379489,"e2e_time_seconds":184.883,"run_urls":["https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/7729826963/artifacts","https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/7729993999/artifacts","https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/7731914601/artifacts"],"date":"2024-01-31"}
+{
+  "start_step": 1,
+  "end_step": 9,
+  "step_interval": 1,
+  "loss_values": [
+    24.9504337310791,
+    23.63066291809082,
+    22.226167678833008,
+    20.853334426879883,
+    19.611804962158203,
+    18.574153900146484,
+    17.778240203857422,
+    17.22144317626953,
+    16.864635467529297
+  ],
+  "step_times": [
+    0.31094300746917725,
+    0.21168699860572815,
+    0.16145099699497223,
+    0.1525229960680008,
+    0.153779998421669,
+    0.15510499477386475,
+    0.1530109941959381,
+    0.1535159945487976,
+    0.15335600078105927
+  ],
+  "step_time_avg": 0.17837466465102303,
+  "e2e_time_seconds": 32.688,
+  "run_urls": [
+    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/14054620516/artifacts"
+  ],
+  "date": "2025-03-25"
+}
diff --git a/.github/workflows/baselines/MAXTEXT/upstream/4DP2FSDP2TP1PP.json b/.github/workflows/baselines/MAXTEXT/upstream/4DP2FSDP2TP1PP.json
deleted file mode 100644
index 286efbae3..000000000
--- a/.github/workflows/baselines/MAXTEXT/upstream/4DP2FSDP2TP1PP.json
+++ /dev/null
@@ -1 +0,0 @@
-{"start_step":1,"end_step":9,"step_interval":1,"loss_values":[0.20010541379451752,3.576278402306343e-07,0,0,0,0,0,0,0],"step_times":[0.24482366939385733,0.2246866673231125,0.2056473344564438,0.21987300117810568,0.23971499999364218,0.21608999868234,0.21471566955248514,0.22115066647529602,0.22203166782855988],"step_time_avg":0.22319263054264918,"e2e_time_seconds":185.15866666666668,"run_urls":["https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/7729826963/artifacts","https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/7729993999/artifacts","https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/7731914601/artifacts"],"date":"2024-01-31"}
diff --git a/.github/workflows/baselines/PAX_MGMN/rosetta/16DP1FSDP1TP1PP_TE.json b/.github/workflows/baselines/PAX_MGMN/rosetta/16DP1FSDP1TP1PP_TE.json
deleted file mode 100644
index c5329467e..000000000
--- a/.github/workflows/baselines/PAX_MGMN/rosetta/16DP1FSDP1TP1PP_TE.json
+++ /dev/null
@@ -1 +0,0 @@
-{"start_step": 100, "end_step": 300, "step_interval": 100, "loss_values": [0.0004531083977781236, 2.0986779418308288e-05, 1.31147601223347e-06], "step_times": [9.16663678487142, 9.166899998982748, 9.190437952677408], "step_time_avg": 9.174658245510523, "e2e_time_seconds": 286.86566666666664}
\ No newline at end of file
diff --git a/.github/workflows/baselines/PAX_MGMN/rosetta/1DP1FSDP1TP1PP_TE.json b/.github/workflows/baselines/PAX_MGMN/rosetta/1DP1FSDP1TP1PP_TE.json
deleted file mode 100644
index 62ed20b41..000000000
--- a/.github/workflows/baselines/PAX_MGMN/rosetta/1DP1FSDP1TP1PP_TE.json
+++ /dev/null
@@ -1 +0,0 @@
-{"start_step": 100, "end_step": 300, "step_interval": 100, "loss_values": [0.0004550429875962436, 2.098800177918747e-05, 1.3114761259203078e-06], "step_times": [9.69591999053955, 9.694547653198242, 9.6983060836792], "step_time_avg": 9.696257909138998, "e2e_time_seconds": 193.862}
\ No newline at end of file
diff --git a/.github/workflows/baselines/PAX_MGMN/rosetta/1DP2FSDP4TP1PP_single_process_TE.json b/.github/workflows/baselines/PAX_MGMN/rosetta/1DP2FSDP4TP1PP_single_process_TE.json
deleted file mode 100644
index a255dc62b..000000000
--- a/.github/workflows/baselines/PAX_MGMN/rosetta/1DP2FSDP4TP1PP_single_process_TE.json
+++ /dev/null
@@ -1 +0,0 @@
-{"start_step": 100, "end_step": 500, "step_interval": 100, "loss_values": [0.0004496203036978841, 2.1072781237307936e-05, 1.311417690885719e-06, 6.332992796842518e-08, 0.0], "step_times": [7.595651865005493, 7.599909543991089, 7.602108001708984, 7.595033645629883, 7.59737229347229], "step_time_avg": 7.598015069961548, "e2e_time_seconds": 234.1085}
\ No newline at end of file
diff --git a/.github/workflows/baselines/PAX_MGMN/rosetta/1DP8FSDP1TP1PP_TE.json b/.github/workflows/baselines/PAX_MGMN/rosetta/1DP8FSDP1TP1PP_TE.json
deleted file mode 100644
index ac98f8443..000000000
--- a/.github/workflows/baselines/PAX_MGMN/rosetta/1DP8FSDP1TP1PP_TE.json
+++ /dev/null
@@ -1 +0,0 @@
-{"start_step": 100, "end_step": 300, "step_interval": 100, "loss_values": [0.0004562355752568692, 2.0987012248951942e-05, 1.31147601223347e-06], "step_times": [9.896685918172201, 9.902073860168457, 9.896770795186361], "step_time_avg": 9.898510191175673, "e2e_time_seconds": 282.402}
\ No newline at end of file
diff --git a/.github/workflows/baselines/PAX_MGMN/rosetta/2DP1FSDP1TP4PP.json b/.github/workflows/baselines/PAX_MGMN/rosetta/2DP1FSDP1TP4PP.json
deleted file mode 100644
index a309a4a68..000000000
--- a/.github/workflows/baselines/PAX_MGMN/rosetta/2DP1FSDP1TP4PP.json
+++ /dev/null
@@ -1 +0,0 @@
-{"start_step": 100, "end_step": 300, "step_interval": 100, "loss_values": [0.000438039714936167, 2.219043562945444e-05, 1.4306265256891493e-06], "step_times": [2.470784823099772, 2.471130927403768, 2.471168835957845], "step_time_avg": 2.4710281954871287, "e2e_time_seconds": 316.19}
\ No newline at end of file
diff --git a/.github/workflows/baselines/PAX_MGMN/rosetta/2DP1FSDP2TP4PP.json b/.github/workflows/baselines/PAX_MGMN/rosetta/2DP1FSDP2TP4PP.json
deleted file mode 100644
index f79ed51bb..000000000
--- a/.github/workflows/baselines/PAX_MGMN/rosetta/2DP1FSDP2TP4PP.json
+++ /dev/null
@@ -1 +0,0 @@
-{"start_step": 100, "end_step": 300, "step_interval": 100, "loss_values": [0.0004382363404147327, 2.253897037007846e-05, 1.4306265256891493e-06], "step_times": [1.6266847054163616, 1.6370126803716023, 1.6351629098256428], "step_time_avg": 1.6329534318712022, "e2e_time_seconds": 393.96866666666665}
\ No newline at end of file
diff --git a/.github/workflows/baselines/PAX_MGMN/rosetta/4DP1FSDP2TP1PP.json b/.github/workflows/baselines/PAX_MGMN/rosetta/4DP1FSDP2TP1PP.json
deleted file mode 100644
index 368097a8c..000000000
--- a/.github/workflows/baselines/PAX_MGMN/rosetta/4DP1FSDP2TP1PP.json
+++ /dev/null
@@ -1 +0,0 @@
-{"start_step": 100, "end_step": 300, "step_interval": 100, "loss_values": [0.0004683640436269343, 2.1106745407450944e-05, 1.311883579546702e-06], "step_times": [6.465008894602458, 6.465569972991943, 6.463742891947429], "step_time_avg": 6.464773919847276, "e2e_time_seconds": 285.65200000000004}
\ No newline at end of file
diff --git a/.github/workflows/baselines/PAX_MGMN/rosetta/4DP1FSDP2TP1PP_TE.json b/.github/workflows/baselines/PAX_MGMN/rosetta/4DP1FSDP2TP1PP_TE.json
deleted file mode 100644
index d900af22e..000000000
--- a/.github/workflows/baselines/PAX_MGMN/rosetta/4DP1FSDP2TP1PP_TE.json
+++ /dev/null
@@ -1 +0,0 @@
-{"start_step": 100, "end_step": 300, "step_interval": 100, "loss_values": [0.0004528434365056455, 2.122193473041989e-05, 1.3954693258710904e-06], "step_times": [8.431368192036947, 8.43382708231608, 8.431408246358236], "step_time_avg": 8.432201173570421, "e2e_time_seconds": 287.23}
\ No newline at end of file
diff --git a/.github/workflows/baselines/PAX_MGMN/rosetta/8DP1FSDP1TP1PP.json b/.github/workflows/baselines/PAX_MGMN/rosetta/8DP1FSDP1TP1PP.json
deleted file mode 100644
index 8ad323abf..000000000
--- a/.github/workflows/baselines/PAX_MGMN/rosetta/8DP1FSDP1TP1PP.json
+++ /dev/null
@@ -1 +0,0 @@
-{"start_step": 100, "end_step": 300, "step_interval": 100, "loss_values": [0.0004681030404753983, 2.086862332362216e-05, 1.3121162965035182e-06], "step_times": [7.275770664215088, 7.267403920491536, 7.267686367034912], "step_time_avg": 7.270286983913845, "e2e_time_seconds": 273.26233333333334}
\ No newline at end of file
diff --git a/.github/workflows/baselines/PAX_MGMN/rosetta/8DP1FSDP1TP1PP_TE.json b/.github/workflows/baselines/PAX_MGMN/rosetta/8DP1FSDP1TP1PP_TE.json
deleted file mode 100644
index 52a386ce0..000000000
--- a/.github/workflows/baselines/PAX_MGMN/rosetta/8DP1FSDP1TP1PP_TE.json
+++ /dev/null
@@ -1 +0,0 @@
-{"start_step": 100, "end_step": 300, "step_interval": 100, "loss_values": [0.0004531083977781236, 2.0986779418308288e-05, 1.31147601223347e-06], "step_times": [9.474847793579102, 9.474331537882486, 9.4735320409139], "step_time_avg": 9.474237124125162, "e2e_time_seconds": 281.1556666666667}
\ No newline at end of file
diff --git a/.github/workflows/baselines/PAX_MGMN/rosetta/8DP_TE_dropout.json b/.github/workflows/baselines/PAX_MGMN/rosetta/8DP_TE_dropout.json
deleted file mode 100644
index 928612d1e..000000000
--- a/.github/workflows/baselines/PAX_MGMN/rosetta/8DP_TE_dropout.json
+++ /dev/null
@@ -1 +0,0 @@
-{"start_step": 100, "end_step": 300, "step_interval": 100, "loss_values": [0.0005329704144969583, 2.392743408563547e-05, 1.6679570080668782e-06], "step_times": [7.743874549865723, 7.7442946434021, 7.74159049987793], "step_time_avg": 7.743253231048584, "e2e_time_seconds": 307.527}
\ No newline at end of file
diff --git a/.github/workflows/baselines/PAX_MGMN/upstream/16DP1FSDP1TP1PP.json b/.github/workflows/baselines/PAX_MGMN/upstream/16DP1FSDP1TP1PP.json
deleted file mode 100644
index 17eaca746..000000000
--- a/.github/workflows/baselines/PAX_MGMN/upstream/16DP1FSDP1TP1PP.json
+++ /dev/null
@@ -1 +0,0 @@
-{"start_step": 100, "end_step": 500, "step_interval": 100, "loss_values": [0.0004681030404753983, 2.086862332362216e-05, 1.3121162965035182e-06, 5.8207657444020455e-11, 0.0], "step_times": [7.725894292195638, 7.695748964945476, 7.674357891082764, 7.7014509836832685, 7.720887184143066], "step_time_avg": 7.703667863210043, "e2e_time_seconds": 295.4576666666666}
\ No newline at end of file
diff --git a/.github/workflows/baselines/PAX_MGMN/upstream/1DP1FSDP1TP1PP.json b/.github/workflows/baselines/PAX_MGMN/upstream/1DP1FSDP1TP1PP.json
deleted file mode 100644
index 89aaeca73..000000000
--- a/.github/workflows/baselines/PAX_MGMN/upstream/1DP1FSDP1TP1PP.json
+++ /dev/null
@@ -1 +0,0 @@
-{"start_step": 100, "end_step": 500, "step_interval": 100, "loss_values": [0.00046809131163172424, 2.086867971229367e-05, 1.3123492408340098e-06, 5.8207657444020455e-11, 0.0], "step_times": [8.008778889973959, 8.014708836873373, 8.011429150899252, 8.013259251912435, 8.00814119974772], "step_time_avg": 8.011263465881347, "e2e_time_seconds": 204.86966666666663}
\ No newline at end of file
diff --git a/.github/workflows/baselines/PAX_MGMN/upstream/1DP2FSDP4TP1PP_single_process.json b/.github/workflows/baselines/PAX_MGMN/upstream/1DP2FSDP4TP1PP_single_process.json
deleted file mode 100644
index bd06ec03c..000000000
--- a/.github/workflows/baselines/PAX_MGMN/upstream/1DP2FSDP4TP1PP_single_process.json
+++ /dev/null
@@ -1 +0,0 @@
-{"start_step": 100, "end_step": 500, "step_interval": 100, "loss_values": [0.0004679674166254699, 2.0868097635684535e-05, 1.3118251445121132e-06, 6.335539382007482e-08, 0.0], "step_times": [6.345967451731364, 6.3443193435668945, 6.345146497090657, 6.344050407409668, 6.3422525723775225], "step_time_avg": 6.344347254435221, "e2e_time_seconds": 238.7543333333333}
\ No newline at end of file
diff --git a/.github/workflows/baselines/PAX_MGMN/upstream/1DP8FSDP1TP1PP.json b/.github/workflows/baselines/PAX_MGMN/upstream/1DP8FSDP1TP1PP.json
deleted file mode 100644
index ab5c598ac..000000000
--- a/.github/workflows/baselines/PAX_MGMN/upstream/1DP8FSDP1TP1PP.json
+++ /dev/null
@@ -1 +0,0 @@
-{"start_step": 100, "end_step": 500, "step_interval": 100, "loss_values": [0.00046810254571028054, 2.0870078515144996e-05, 1.3122327118253452e-06, 5.8207657444020455e-11, 0.0], "step_times": [8.161738077799479, 8.162349383036295, 8.15965493520101, 8.158018112182617, 8.157390912373861], "step_time_avg": 8.159830284118652, "e2e_time_seconds": 296.75933333333336}
\ No newline at end of file
diff --git a/.github/workflows/baselines/PAX_MGMN/upstream/2DP1FSDP1TP4PP.json b/.github/workflows/baselines/PAX_MGMN/upstream/2DP1FSDP1TP4PP.json
deleted file mode 100644
index 97371666e..000000000
--- a/.github/workflows/baselines/PAX_MGMN/upstream/2DP1FSDP1TP4PP.json
+++ /dev/null
@@ -1 +0,0 @@
-{"start_step": 100, "end_step": 500, "step_interval": 100, "loss_values": [0.000438039714936167, 2.219043562945444e-05, 1.4306265256891493e-06, 5.8207657444020455e-11, 0.0], "step_times": [2.5234107971191406, 2.5232578118642173, 2.5235915184020996, 2.5234344005584717, 2.5233071645100913], "step_time_avg": 2.523400338490804, "e2e_time_seconds": 388.4533333333334}
\ No newline at end of file
diff --git a/.github/workflows/baselines/PAX_MGMN/upstream/2DP1FSDP2TP4PP.json b/.github/workflows/baselines/PAX_MGMN/upstream/2DP1FSDP2TP4PP.json
deleted file mode 100644
index 85d1e5d4c..000000000
--- a/.github/workflows/baselines/PAX_MGMN/upstream/2DP1FSDP2TP4PP.json
+++ /dev/null
@@ -1 +0,0 @@
-{"start_step": 100, "end_step": 500, "step_interval": 100, "loss_values": [0.0004382363404147327, 2.253897037007846e-05, 1.4306265256891493e-06, 5.8207657444020455e-11, 0.0], "step_times": [1.6573359568913777, 1.654531757036845, 1.6514259179433186, 1.652300516764323, 1.6523643334706624], "step_time_avg": 1.653591696421305, "e2e_time_seconds": 507.07099999999997}
\ No newline at end of file
diff --git a/.github/workflows/baselines/PAX_MGMN/upstream/4DP1FSDP2TP1PP.json b/.github/workflows/baselines/PAX_MGMN/upstream/4DP1FSDP2TP1PP.json
deleted file mode 100644
index 43e34bbce..000000000
--- a/.github/workflows/baselines/PAX_MGMN/upstream/4DP1FSDP2TP1PP.json
+++ /dev/null
@@ -1 +0,0 @@
-{"start_step": 100, "end_step": 500, "step_interval": 100, "loss_values": [0.0004683640436269343, 2.1106745407450944e-05, 1.311883579546702e-06, 5.8207657444020455e-11, 0.0], "step_times": [6.97843599319458, 6.977321783701579, 6.975888093312581, 6.976069609324138, 6.976081212361653], "step_time_avg": 6.976759338378906, "e2e_time_seconds": 306.4506666666667}
\ No newline at end of file
diff --git a/.github/workflows/baselines/PAX_MGMN/upstream/8DP1FSDP1TP1PP.json b/.github/workflows/baselines/PAX_MGMN/upstream/8DP1FSDP1TP1PP.json
deleted file mode 100644
index 762a1bc9f..000000000
--- a/.github/workflows/baselines/PAX_MGMN/upstream/8DP1FSDP1TP1PP.json
+++ /dev/null
@@ -1 +0,0 @@
-{"start_step": 100, "end_step": 500, "step_interval": 100, "loss_values": [0.0004681030404753983, 2.086862332362216e-05, 1.3121162965035182e-06, 5.8207657444020455e-11, 0.0], "step_times": [7.965368588765462, 7.962141513824463, 7.961517333984375, 7.960983753204346, 7.957266171773274], "step_time_avg": 7.961455472310384, "e2e_time_seconds": 291.03866666666664}
\ No newline at end of file
diff --git a/.github/workflows/baselines/T5X_MGMN/rosetta/1N1G-te-1.json b/.github/workflows/baselines/T5X_MGMN/rosetta/1N1G-te-1.json
deleted file mode 100644
index 4d4d02e03..000000000
--- a/.github/workflows/baselines/T5X_MGMN/rosetta/1N1G-te-1.json
+++ /dev/null
@@ -1,31 +0,0 @@
-{
-  "start_step": 100,
-  "end_step": 700,
-  "step_interval": 100,
-  "loss_values": [
-    24153.919921875,
-    18200.547526041668,
-    16711.00390625,
-    15931.573567708334,
-    15485.792643229166,
-    14638.229166666666,
-    14497.9931640625
-  ],
-  "step_times": [
-    16.47681490580241,
-    17.68256441752116,
-    17.76396878560384,
-    17.746811548868816,
-    17.440324783325195,
-    17.567402521769207,
-    17.809424082438152
-  ],
-  "step_time_avg": 17.498187292189826,
-  "e2e_time_seconds": 50.457,
-  "run_urls": [
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8324237226/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8314453358/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8306837544/artifacts"
-  ],
-  "date": "2024-03-19"
-}
diff --git a/.github/workflows/baselines/T5X_MGMN/rosetta/1N8G-te-1.json b/.github/workflows/baselines/T5X_MGMN/rosetta/1N8G-te-1.json
deleted file mode 100644
index 3b23678da..000000000
--- a/.github/workflows/baselines/T5X_MGMN/rosetta/1N8G-te-1.json
+++ /dev/null
@@ -1,31 +0,0 @@
-{
-  "start_step": 100,
-  "end_step": 700,
-  "step_interval": 100,
-  "loss_values": [
-    184496.02083333334,
-    132556.78125,
-    118741.51041666667,
-    112102,
-    106474.15625,
-    102533.91666666667,
-    98940.96875
-  ],
-  "step_times": [
-    7.723111152648926,
-    10.411026954650879,
-    10.323720932006836,
-    10.33003012339274,
-    10.171146392822266,
-    10.210242907206217,
-    10.345155080159506
-  ],
-  "step_time_avg": 9.930633363269623,
-  "e2e_time_seconds": 51.413000000000004,
-  "run_urls": [
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8324237226/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8314453358/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8306837544/artifacts"
-  ],
-  "date": "2024-03-19"
-}
diff --git a/.github/workflows/baselines/T5X_MGMN/rosetta/1P1G_te-0.json b/.github/workflows/baselines/T5X_MGMN/rosetta/1P1G_te-0.json
deleted file mode 100644
index 7b5a56b18..000000000
--- a/.github/workflows/baselines/T5X_MGMN/rosetta/1P1G_te-0.json
+++ /dev/null
@@ -1,31 +0,0 @@
-{
-  "start_step": 100,
-  "end_step": 700,
-  "step_interval": 100,
-  "loss_values": [
-    24058.563802083332,
-    18159.987630208332,
-    16757.4765625,
-    16054.7275390625,
-    15671.132161458334,
-    14891.440755208334,
-    14785.358723958334
-  ],
-  "step_times": [
-    2.516990343729655,
-    20.410126368204754,
-    20.358519236246746,
-    20.480276107788086,
-    20.413113276163738,
-    20.514227549235027,
-    20.529977798461914
-  ],
-  "step_time_avg": 17.889032954261417,
-  "e2e_time_seconds": 95.94066666666667,
-  "run_urls": [
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8324237226/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8314453358/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8306837544/artifacts"
-  ],
-  "date": "2024-03-19"
-}
diff --git a/.github/workflows/baselines/T5X_MGMN/rosetta/1P1G_te-1.json b/.github/workflows/baselines/T5X_MGMN/rosetta/1P1G_te-1.json
deleted file mode 100644
index 5288e8366..000000000
--- a/.github/workflows/baselines/T5X_MGMN/rosetta/1P1G_te-1.json
+++ /dev/null
@@ -1,31 +0,0 @@
-{
-  "start_step": 100,
-  "end_step": 700,
-  "step_interval": 100,
-  "loss_values": [
-    24152.125,
-    18200.479817708332,
-    16711.490885416668,
-    15932.138671875,
-    15485.988932291666,
-    14638.2822265625,
-    14498.707356770834
-  ],
-  "step_times": [
-    16.6195125579834,
-    17.70514488220215,
-    17.761261622111004,
-    17.79443422953288,
-    17.802642186482746,
-    17.263482411702473,
-    17.815998077392578
-  ],
-  "step_time_avg": 17.53749656677246,
-  "e2e_time_seconds": 71.54266666666668,
-  "run_urls": [
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8324237226/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8314453358/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8306837544/artifacts"
-  ],
-  "date": "2024-03-19"
-}
diff --git a/.github/workflows/baselines/T5X_MGMN/rosetta/1P8G_te-1.json b/.github/workflows/baselines/T5X_MGMN/rosetta/1P8G_te-1.json
deleted file mode 100644
index 1f003af14..000000000
--- a/.github/workflows/baselines/T5X_MGMN/rosetta/1P8G_te-1.json
+++ /dev/null
@@ -1,31 +0,0 @@
-{
-  "start_step": 100,
-  "end_step": 700,
-  "step_interval": 100,
-  "loss_values": [
-    184948.375,
-    132454.71875,
-    119301.75520833333,
-    111850.2734375,
-    106656.45052083333,
-    102391.19791666667,
-    98660.5546875
-  ],
-  "step_times": [
-    7.569497108459473,
-    8.135419845581055,
-    8.108287652333578,
-    8.119354883829752,
-    8.113591512044271,
-    8.524995803833008,
-    9.187231063842773
-  ],
-  "step_time_avg": 8.251196838560558,
-  "e2e_time_seconds": 84.15966666666667,
-  "run_urls": [
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8324237226/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8314453358/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8306837544/artifacts"
-  ],
-  "date": "2024-03-19"
-}
diff --git a/.github/workflows/baselines/T5X_MGMN/rosetta/2N2G_te-0.json b/.github/workflows/baselines/T5X_MGMN/rosetta/2N2G_te-0.json
deleted file mode 100644
index bc1f34bae..000000000
--- a/.github/workflows/baselines/T5X_MGMN/rosetta/2N2G_te-0.json
+++ /dev/null
@@ -1,31 +0,0 @@
-{
-  "start_step": 100,
-  "end_step": 700,
-  "step_interval": 100,
-  "loss_values": [
-    93512.08072916667,
-    68011.20052083333,
-    62676.079427083336,
-    59229.05859375,
-    56724.227864583336,
-    55185.227864583336,
-    54274.234375
-  ],
-  "step_times": [
-    2.4313742319742837,
-    16.954547882080078,
-    16.901138305664062,
-    17.1687068939209,
-    17.038618723551433,
-    17.15284029642741,
-    16.860179265340168
-  ],
-  "step_time_avg": 14.929629371279761,
-  "e2e_time_seconds": 51.90366666666667,
-  "run_urls": [
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8324237226/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8314453358/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8306837544/artifacts"
-  ],
-  "date": "2024-03-19"
-}
diff --git a/.github/workflows/baselines/T5X_MGMN/rosetta/2N8G-te-1.json b/.github/workflows/baselines/T5X_MGMN/rosetta/2N8G-te-1.json
deleted file mode 100644
index 38c336a24..000000000
--- a/.github/workflows/baselines/T5X_MGMN/rosetta/2N8G-te-1.json
+++ /dev/null
@@ -1,31 +0,0 @@
-{
-  "start_step": 100,
-  "end_step": 700,
-  "step_interval": 100,
-  "loss_values": [
-    365773.1354166667,
-    259264.546875,
-    232126.52604166666,
-    216850.84895833334,
-    206546.31770833334,
-    197197.40104166666,
-    189904.16145833334
-  ],
-  "step_times": [
-    1.3103941679000854,
-    1.4087820053100586,
-    1.4035348892211914,
-    1.3380741675694783,
-    1.4081446329752605,
-    1.4064313570658367,
-    1.4399555921554565
-  ],
-  "step_time_avg": 1.3879024017424813,
-  "e2e_time_seconds": 52.197,
-  "run_urls": [
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8324237226/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8314453358/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8306837544/artifacts"
-  ],
-  "date": "2024-03-19"
-}
diff --git a/.github/workflows/baselines/T5X_MGMN/rosetta/VIT1G1N.json b/.github/workflows/baselines/T5X_MGMN/rosetta/VIT1G1N.json
deleted file mode 100644
index f9804cfd0..000000000
--- a/.github/workflows/baselines/T5X_MGMN/rosetta/VIT1G1N.json
+++ /dev/null
@@ -1,27 +0,0 @@
-{
-  "start_step": 100,
-  "end_step": 500,
-  "step_interval": 100,
-  "loss_values": [
-    6.5204572677612305,
-    4.362146536509196,
-    2.4585838317871094,
-    2.3129119078318277,
-    2.296177943547567
-  ],
-  "step_times": [
-    4.789742787679036,
-    4.7799530029296875,
-    4.815515836079915,
-    4.816166400909424,
-    4.81501833597819
-  ],
-  "step_time_avg": 4.80327927271525,
-  "e2e_time_seconds": 51.43133333333333,
-  "run_urls": [
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8324237226/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8314453358/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8306837544/artifacts"
-  ],
-  "date": "2024-03-19"
-}
diff --git a/.github/workflows/baselines/T5X_MGMN/rosetta/VIT1G2N.json b/.github/workflows/baselines/T5X_MGMN/rosetta/VIT1G2N.json
deleted file mode 100644
index 3d29e750c..000000000
--- a/.github/workflows/baselines/T5X_MGMN/rosetta/VIT1G2N.json
+++ /dev/null
@@ -1,27 +0,0 @@
-{
-  "start_step": 100,
-  "end_step": 500,
-  "step_interval": 100,
-  "loss_values": [
-    6.523322423299153,
-    4.372343381245931,
-    2.4592310587565103,
-    2.312021334966024,
-    2.29211433728536
-  ],
-  "step_times": [
-    4.418381532033284,
-    4.498948574066162,
-    4.580581188201904,
-    4.584135850270589,
-    4.587292830149333
-  ],
-  "step_time_avg": 4.533867994944255,
-  "e2e_time_seconds": 51.27533333333333,
-  "run_urls": [
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8324237226/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8314453358/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8306837544/artifacts"
-  ],
-  "date": "2024-03-19"
-}
diff --git a/.github/workflows/baselines/T5X_MGMN/rosetta/VIT1P8G.json b/.github/workflows/baselines/T5X_MGMN/rosetta/VIT1P8G.json
deleted file mode 100644
index 051843745..000000000
--- a/.github/workflows/baselines/T5X_MGMN/rosetta/VIT1P8G.json
+++ /dev/null
@@ -1,27 +0,0 @@
-{
-  "start_step": 100,
-  "end_step": 500,
-  "step_interval": 100,
-  "loss_values": [
-    6.519473075866699,
-    4.362549622853597,
-    2.4568604628245034,
-    2.3104422092437744,
-    2.2859185536702475
-  ],
-  "step_times": [
-    1.2238010168075562,
-    1.2661592562993367,
-    1.2517070372899373,
-    1.2438101768493652,
-    1.2496572335561116
-  ],
-  "step_time_avg": 1.2470269441604613,
-  "e2e_time_seconds": 51.833666666666666,
-  "run_urls": [
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8324237226/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8314453358/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8306837544/artifacts"
-  ],
-  "date": "2024-03-19"
-}
diff --git a/.github/workflows/baselines/T5X_MGMN/rosetta/VIT8G2N.json b/.github/workflows/baselines/T5X_MGMN/rosetta/VIT8G2N.json
deleted file mode 100644
index fd4900ff5..000000000
--- a/.github/workflows/baselines/T5X_MGMN/rosetta/VIT8G2N.json
+++ /dev/null
@@ -1,27 +0,0 @@
-{
-  "start_step": 100,
-  "end_step": 500,
-  "step_interval": 100,
-  "loss_values": [
-    6.519150098164876,
-    4.360336621602376,
-    2.457017421722412,
-    2.3106113274892173,
-    2.2869962056477866
-  ],
-  "step_times": [
-    3.559901714324951,
-    4.227313041687012,
-    4.242276032765706,
-    4.264291922251384,
-    4.272752126057942
-  ],
-  "step_time_avg": 4.1133069674174,
-  "e2e_time_seconds": 75.54066666666667,
-  "run_urls": [
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8324237226/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8314453358/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8306837544/artifacts"
-  ],
-  "date": "2024-03-19"
-}
diff --git a/.github/workflows/baselines/T5X_MGMN/upstream/1G1N.json b/.github/workflows/baselines/T5X_MGMN/upstream/1G1N.json
deleted file mode 100644
index 31c72c276..000000000
--- a/.github/workflows/baselines/T5X_MGMN/upstream/1G1N.json
+++ /dev/null
@@ -1,31 +0,0 @@
-{
-  "start_step": 100,
-  "end_step": 700,
-  "step_interval": 100,
-  "loss_values": [
-    21865.04296875,
-    16778.55078125,
-    15394.390625,
-    14834.7900390625,
-    14571.5107421875,
-    13927.41015625,
-    13905.013671875
-  ],
-  "step_times": [
-    2.473292112350464,
-    17.985205332438152,
-    18.19552739461263,
-    18.03304926554362,
-    18.031944910685223,
-    18.190677007039387,
-    18.142311096191406
-  ],
-  "step_time_avg": 15.864572445551554,
-  "e2e_time_seconds": 63.13766666666667,
-  "run_urls": [
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8324237226/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8314453358/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8306837544/artifacts"
-  ],
-  "date": "2024-03-19"
-}
diff --git a/.github/workflows/baselines/T5X_MGMN/upstream/1G2N.json b/.github/workflows/baselines/T5X_MGMN/upstream/1G2N.json
deleted file mode 100644
index 335569788..000000000
--- a/.github/workflows/baselines/T5X_MGMN/upstream/1G2N.json
+++ /dev/null
@@ -1,31 +0,0 @@
-{
-  "start_step": 100,
-  "end_step": 700,
-  "step_interval": 100,
-  "loss_values": [
-    42944.29296875,
-    32115.787109375,
-    29564.89453125,
-    28500.9140625,
-    27571.00390625,
-    26453.740234375,
-    26083.974609375
-  ],
-  "step_times": [
-    2.3437534173329673,
-    13.097546895345053,
-    13.210731824239096,
-    13.215600649515787,
-    13.055537223815918,
-    13.16451358795166,
-    13.058845202128092
-  ],
-  "step_time_avg": 11.592361257189795,
-  "e2e_time_seconds": 47.98266666666666,
-  "run_urls": [
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8324237226/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8314453358/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8306837544/artifacts"
-  ],
-  "date": "2024-03-19"
-}
diff --git a/.github/workflows/baselines/T5X_MGMN/upstream/1P1G.json b/.github/workflows/baselines/T5X_MGMN/upstream/1P1G.json
deleted file mode 100644
index 6fe10cd06..000000000
--- a/.github/workflows/baselines/T5X_MGMN/upstream/1P1G.json
+++ /dev/null
@@ -1,31 +0,0 @@
-{
-  "start_step": 100,
-  "end_step": 700,
-  "step_interval": 100,
-  "loss_values": [
-    21865.04296875,
-    16778.55078125,
-    15394.390625,
-    14834.7900390625,
-    14571.5107421875,
-    13927.41015625,
-    13905.013671875
-  ],
-  "step_times": [
-    2.47874116897583,
-    17.988933563232422,
-    18.21541976928711,
-    17.84804407755534,
-    18.093478520711262,
-    18.190120061238606,
-    18.059662501017254
-  ],
-  "step_time_avg": 15.83919995171683,
-  "e2e_time_seconds": 82.35000000000001,
-  "run_urls": [
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8324237226/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8314453358/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8306837544/artifacts"
-  ],
-  "date": "2024-03-19"
-}
diff --git a/.github/workflows/baselines/T5X_MGMN/upstream/1P1G_fmha.json b/.github/workflows/baselines/T5X_MGMN/upstream/1P1G_fmha.json
deleted file mode 100644
index 5255585b3..000000000
--- a/.github/workflows/baselines/T5X_MGMN/upstream/1P1G_fmha.json
+++ /dev/null
@@ -1,31 +0,0 @@
-{
-  "start_step": 100,
-  "end_step": 700,
-  "step_interval": 100,
-  "loss_values": [
-    21865.04296875,
-    16778.55078125,
-    15394.390625,
-    14834.7900390625,
-    14571.5107421875,
-    13927.41015625,
-    13905.013671875
-  ],
-  "step_times": [
-    2.4637417793273926,
-    18.070358276367188,
-    18.185569127400715,
-    18.15062967936198,
-    18.173222223917644,
-    18.198484420776367,
-    18.11294428507487
-  ],
-  "step_time_avg": 15.907849970318024,
-  "e2e_time_seconds": 47.749,
-  "run_urls": [
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8324237226/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8314453358/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8306837544/artifacts"
-  ],
-  "date": "2024-03-19"
-}
diff --git a/.github/workflows/baselines/T5X_MGMN/upstream/1P2G.json b/.github/workflows/baselines/T5X_MGMN/upstream/1P2G.json
deleted file mode 100644
index 1ac8c54bf..000000000
--- a/.github/workflows/baselines/T5X_MGMN/upstream/1P2G.json
+++ /dev/null
@@ -1,31 +0,0 @@
-{
-  "start_step": 100,
-  "end_step": 700,
-  "step_interval": 100,
-  "loss_values": [
-    42809.5703125,
-    31982.818359375,
-    29629.173828125,
-    28579.404296875,
-    27443.494140625,
-    26543.869140625,
-    26117.07421875
-  ],
-  "step_times": [
-    2.3347439765930176,
-    17.100131352742512,
-    17.403623580932617,
-    17.190806070963543,
-    17.344409306844074,
-    17.314453125,
-    17.230820337931316
-  ],
-  "step_time_avg": 15.131283964429583,
-  "e2e_time_seconds": 67.003,
-  "run_urls": [
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8324237226/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8314453358/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8306837544/artifacts"
-  ],
-  "date": "2024-03-19"
-}
diff --git a/.github/workflows/baselines/T5X_MGMN/upstream/1P2G_fmha.json b/.github/workflows/baselines/T5X_MGMN/upstream/1P2G_fmha.json
deleted file mode 100644
index 5234895ee..000000000
--- a/.github/workflows/baselines/T5X_MGMN/upstream/1P2G_fmha.json
+++ /dev/null
@@ -1,31 +0,0 @@
-{
-  "start_step": 100,
-  "end_step": 700,
-  "step_interval": 100,
-  "loss_values": [
-    42809.5703125,
-    31982.818359375,
-    29629.173828125,
-    28579.404296875,
-    27443.494140625,
-    26543.869140625,
-    26117.07421875
-  ],
-  "step_times": [
-    2.3423378467559814,
-    17.1409117380778,
-    17.402149836222332,
-    17.29240608215332,
-    17.291365305582683,
-    17.392317454020183,
-    17.15788968404134
-  ],
-  "step_time_avg": 15.145625420979092,
-  "e2e_time_seconds": 63.98333333333334,
-  "run_urls": [
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8324237226/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8314453358/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8306837544/artifacts"
-  ],
-  "date": "2024-03-19"
-}
diff --git a/.github/workflows/baselines/T5X_MGMN/upstream/1P4G.json b/.github/workflows/baselines/T5X_MGMN/upstream/1P4G.json
deleted file mode 100644
index 1cf7e35a2..000000000
--- a/.github/workflows/baselines/T5X_MGMN/upstream/1P4G.json
+++ /dev/null
@@ -1,31 +0,0 @@
-{
-  "start_step": 100,
-  "end_step": 700,
-  "step_interval": 100,
-  "loss_values": [
-    84190.9453125,
-    62587.01953125,
-    57130.765625,
-    54206.9921875,
-    52445.33984375,
-    50840.7734375,
-    50487.34375
-  ],
-  "step_times": [
-    2.079073746999105,
-    16.785479227701824,
-    17.152976353963215,
-    17.007422765096027,
-    17.030206044514973,
-    17.1840763092041,
-    17.014991124471027
-  ],
-  "step_time_avg": 14.893460795992896,
-  "e2e_time_seconds": 60.213,
-  "run_urls": [
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8324237226/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8314453358/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8306837544/artifacts"
-  ],
-  "date": "2024-03-19"
-}
diff --git a/.github/workflows/baselines/T5X_MGMN/upstream/1P8G.json b/.github/workflows/baselines/T5X_MGMN/upstream/1P8G.json
deleted file mode 100644
index 9f974cac5..000000000
--- a/.github/workflows/baselines/T5X_MGMN/upstream/1P8G.json
+++ /dev/null
@@ -1,31 +0,0 @@
-{
-  "start_step": 100,
-  "end_step": 700,
-  "step_interval": 100,
-  "loss_values": [
-    166564.625,
-    121425.8984375,
-    110409.046875,
-    104837.1875,
-    101569.234375,
-    100145.3046875,
-    99009.4453125
-  ],
-  "step_times": [
-    1.160822868347168,
-    16.73763910929362,
-    16.92938804626465,
-    17.105731964111328,
-    17.199840545654297,
-    16.785912195841473,
-    17.00780423482259
-  ],
-  "step_time_avg": 14.703876994905016,
-  "e2e_time_seconds": 48.071666666666665,
-  "run_urls": [
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8324237226/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8314453358/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8306837544/artifacts"
-  ],
-  "date": "2024-03-19"
-}
diff --git a/.github/workflows/baselines/T5X_MGMN/upstream/2G1N.json b/.github/workflows/baselines/T5X_MGMN/upstream/2G1N.json
deleted file mode 100644
index 90a1ddc3a..000000000
--- a/.github/workflows/baselines/T5X_MGMN/upstream/2G1N.json
+++ /dev/null
@@ -1,31 +0,0 @@
-{
-  "start_step": 100,
-  "end_step": 700,
-  "step_interval": 100,
-  "loss_values": [
-    42944.29296875,
-    32115.787109375,
-    29564.89453125,
-    28500.9140625,
-    27571.00390625,
-    26453.740234375,
-    26083.974609375
-  ],
-  "step_times": [
-    2.391838232676188,
-    16.899702707926433,
-    17.412066141764324,
-    17.324010848999023,
-    17.375378290812176,
-    17.41155942281087,
-    17.307125727335613
-  ],
-  "step_time_avg": 15.160240196046376,
-  "e2e_time_seconds": 47.81833333333333,
-  "run_urls": [
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8324237226/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8314453358/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8306837544/artifacts"
-  ],
-  "date": "2024-03-19"
-}
diff --git a/.github/workflows/baselines/T5X_MGMN/upstream/2G2N.json b/.github/workflows/baselines/T5X_MGMN/upstream/2G2N.json
deleted file mode 100644
index c8942fc31..000000000
--- a/.github/workflows/baselines/T5X_MGMN/upstream/2G2N.json
+++ /dev/null
@@ -1,31 +0,0 @@
-{
-  "start_step": 100,
-  "end_step": 700,
-  "step_interval": 100,
-  "loss_values": [
-    84881.8203125,
-    62609.671875,
-    57477.609375,
-    54356.4453125,
-    52232.98046875,
-    51183.7890625,
-    50615.93359375
-  ],
-  "step_times": [
-    2.3189565340677896,
-    14.426481564839682,
-    15.366949081420898,
-    14.962293942769369,
-    14.883244832356771,
-    15.368088722229004,
-    14.990182876586914
-  ],
-  "step_time_avg": 13.188028222038632,
-  "e2e_time_seconds": 47.80100000000001,
-  "run_urls": [
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8324237226/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8314453358/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8306837544/artifacts"
-  ],
-  "date": "2024-03-19"
-}
diff --git a/.github/workflows/baselines/T5X_MGMN/upstream/2G2N_fmha.json b/.github/workflows/baselines/T5X_MGMN/upstream/2G2N_fmha.json
deleted file mode 100644
index 0d67b192a..000000000
--- a/.github/workflows/baselines/T5X_MGMN/upstream/2G2N_fmha.json
+++ /dev/null
@@ -1,31 +0,0 @@
-{
-  "start_step": 100,
-  "end_step": 700,
-  "step_interval": 100,
-  "loss_values": [
-    84881.8203125,
-    62609.671875,
-    57477.609375,
-    54356.4453125,
-    52232.98046875,
-    51183.7890625,
-    50615.93359375
-  ],
-  "step_times": [
-    2.3478333155314126,
-    14.952726046244303,
-    15.38317584991455,
-    14.957984606424967,
-    14.925978342692057,
-    15.261824289957682,
-    15.095227241516113
-  ],
-  "step_time_avg": 13.274964241754441,
-  "e2e_time_seconds": 47.87633333333333,
-  "run_urls": [
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8324237226/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8314453358/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8306837544/artifacts"
-  ],
-  "date": "2024-03-19"
-}
diff --git a/.github/workflows/baselines/T5X_MGMN/upstream/4G1N.json b/.github/workflows/baselines/T5X_MGMN/upstream/4G1N.json
deleted file mode 100644
index 78a194565..000000000
--- a/.github/workflows/baselines/T5X_MGMN/upstream/4G1N.json
+++ /dev/null
@@ -1,31 +0,0 @@
-{
-  "start_step": 100,
-  "end_step": 700,
-  "step_interval": 100,
-  "loss_values": [
-    85074.609375,
-    62618.43359375,
-    57475.515625,
-    54443.35546875,
-    52452.90234375,
-    51453.50390625,
-    50904.84375
-  ],
-  "step_times": [
-    2.3716205755869546,
-    16.59337552388509,
-    17.15019162495931,
-    16.815487543741863,
-    16.90180206298828,
-    16.74636459350586,
-    16.827538172403973
-  ],
-  "step_time_avg": 14.772340013867334,
-  "e2e_time_seconds": 47.494,
-  "run_urls": [
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8324237226/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8314453358/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8306837544/artifacts"
-  ],
-  "date": "2024-03-19"
-}
diff --git a/.github/workflows/baselines/T5X_MGMN/upstream/4G2N.json b/.github/workflows/baselines/T5X_MGMN/upstream/4G2N.json
deleted file mode 100644
index 491b6e0e9..000000000
--- a/.github/workflows/baselines/T5X_MGMN/upstream/4G2N.json
+++ /dev/null
@@ -1,31 +0,0 @@
-{
-  "start_step": 100,
-  "end_step": 700,
-  "step_interval": 100,
-  "loss_values": [
-    166386.734375,
-    121687.125,
-    110239.375,
-    105225.1171875,
-    101571.890625,
-    100226.734375,
-    99198.7890625
-  ],
-  "step_times": [
-    2.3323241074879966,
-    15.210881868998209,
-    16.050008455912273,
-    15.354175249735514,
-    15.846360206604004,
-    15.817280133565268,
-    15.525897343953451
-  ],
-  "step_time_avg": 13.733846766608103,
-  "e2e_time_seconds": 48.040666666666674,
-  "run_urls": [
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8324237226/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8314453358/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8306837544/artifacts"
-  ],
-  "date": "2024-03-19"
-}
diff --git a/.github/workflows/baselines/T5X_MGMN/upstream/8G1N.json b/.github/workflows/baselines/T5X_MGMN/upstream/8G1N.json
deleted file mode 100644
index 79995a1dc..000000000
--- a/.github/workflows/baselines/T5X_MGMN/upstream/8G1N.json
+++ /dev/null
@@ -1,31 +0,0 @@
-{
-  "start_step": 100,
-  "end_step": 700,
-  "step_interval": 100,
-  "loss_values": [
-    166136.0625,
-    121262.1171875,
-    109982.3359375,
-    105095.59375,
-    101807.7890625,
-    100457.359375,
-    100047.9453125
-  ],
-  "step_times": [
-    2.2884463469187417,
-    16.788105010986328,
-    17.029067357381184,
-    17.20726267496745,
-    17.159779230753582,
-    17.193702697753906,
-    17.069965362548828
-  ],
-  "step_time_avg": 14.962332668758576,
-  "e2e_time_seconds": 48.57266666666667,
-  "run_urls": [
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8324237226/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8314453358/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8306837544/artifacts"
-  ],
-  "date": "2024-03-19"
-}
diff --git a/.github/workflows/baselines/T5X_MGMN/upstream/8G2N.json b/.github/workflows/baselines/T5X_MGMN/upstream/8G2N.json
deleted file mode 100644
index d3f19ab9e..000000000
--- a/.github/workflows/baselines/T5X_MGMN/upstream/8G2N.json
+++ /dev/null
@@ -1,31 +0,0 @@
-{
-  "start_step": 100,
-  "end_step": 700,
-  "step_interval": 100,
-  "loss_values": [
-    329745.75,
-    238854.71875,
-    216490,
-    206838.921875,
-    202744.78125,
-    200420.703125,
-    199783.03125
-  ],
-  "step_times": [
-    2.2293623288472495,
-    15.984151522318522,
-    16.54537582397461,
-    16.404622395833332,
-    16.47221310933431,
-    16.532596588134766,
-    16.46229362487793
-  ],
-  "step_time_avg": 14.375802199045816,
-  "e2e_time_seconds": 50.278999999999996,
-  "run_urls": [
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8324237226/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8314453358/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8306837544/artifacts"
-  ],
-  "date": "2024-03-19"
-}
diff --git a/.github/workflows/baselines/T5X_MGMN/upstream/8G2N_fmha.json b/.github/workflows/baselines/T5X_MGMN/upstream/8G2N_fmha.json
deleted file mode 100644
index 6a1785a0a..000000000
--- a/.github/workflows/baselines/T5X_MGMN/upstream/8G2N_fmha.json
+++ /dev/null
@@ -1,31 +0,0 @@
-{
-  "start_step": 100,
-  "end_step": 700,
-  "step_interval": 100,
-  "loss_values": [
-    329745.75,
-    238854.71875,
-    216490,
-    206838.921875,
-    202744.78125,
-    200420.703125,
-    199783.03125
-  ],
-  "step_times": [
-    2.2479323546091714,
-    15.95144526163737,
-    16.547892252604168,
-    16.44203758239746,
-    16.48525047302246,
-    16.44901402791341,
-    16.404547373453777
-  ],
-  "step_time_avg": 14.361159903662546,
-  "e2e_time_seconds": 48.32,
-  "run_urls": [
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8324237226/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8314453358/artifacts",
-    "https://api.github.com/repos/NVIDIA/JAX-Toolbox/actions/runs/8306837544/artifacts"
-  ],
-  "date": "2024-03-19"
-}
diff --git a/.github/workflows/baselines/create_baselines.sh b/.github/workflows/baselines/create_baselines.sh
index 10d1d9cb6..086d7efa6 100755
--- a/.github/workflows/baselines/create_baselines.sh
+++ b/.github/workflows/baselines/create_baselines.sh
@@ -46,7 +46,10 @@ elif [[ "$TYPE" == "rosetta-t5x" ]]; then
     )
     OUTPUT_DIR=T5X_MGMN/rosetta
 elif [[ "$TYPE" == "upstream-maxtext" ]]; then
-    CONFIGS=("1DP1FSDP1TP1PP" "1DP1FSDP8TP1PP" "1DP2FSDP4TP1PP_single_process" "1DP4FSDP2TP1PP" "1DP8FSDP1TP1PP" "2DP2FSDP2TP1PP" "4DP2FSDP2TP1PP")
+    CONFIGS=(
+	"1DP2FSDP4TP1PP_single_process"
+	"2DP2FSDP2TP1PP"
+    )
     OUTPUT_DIR=MAXTEXT/upstream
 else
     usage
@@ -60,8 +63,10 @@ bash ${UTIL_DIR}/download_artifacts.sh ${ALL_WF_RUNS[@]}
 URLS=()
 for WORKFLOW_RUN in ${ALL_WF_RUNS[@]}; do
   for CFG in ${CONFIGS[@]}; do
-    if [[ $(find . -mindepth 1 -maxdepth 2 -type d -name $CFG | wc -l) -ne 1 ]]; then
-      echo "Expected one artifact to have a '$CFG' dir under '$PWD', but found $(find . -mindepth 1 -maxdepth 2 -type d -name $CFG)"
+    CFG=$TYPE-$WORKFLOW_RUN-$CFG
+    ARTS=$(find . -mindepth 1 -maxdepth 2 -type d -name $CFG)
+    if (( $(echo ${ARTS} | wc -l) != 1 )); then
+      echo "Expected one artifact to have a '$CFG' dir under '$PWD', but found ${ARTS}"
       exit 1
     fi
   done
diff --git a/.github/workflows/baselines/summarize_metrics.py b/.github/workflows/baselines/summarize_metrics.py
index 1e1e13009..5fc52e1a0 100644
--- a/.github/workflows/baselines/summarize_metrics.py
+++ b/.github/workflows/baselines/summarize_metrics.py
@@ -40,7 +40,7 @@ def main():
         if not os.path.exists(searchpath):
             searchpath = os.path.join(args.test_config, "summaries/train")
         if not os.path.exists(searchpath):
-            searchpath = os.path.join(args.test_config, "logdir/tensorboard")
+            searchpath = os.path.join(args.test_config, "logdir/tensorboard/logdir")
         assert os.path.exists(searchpath), f"Neither {args.test_config}/train nor {args.test_config}/summaries/train nor {args.test_config}/logdir/tensorboard dirs exist"
         event_files = glob.glob(os.path.join(searchpath, "events*"))
         assert len(event_files) > 0, f"{searchpath} did not contain a tensorboard events file"
diff --git a/.github/workflows/baselines/test_maxtext_metrics.py b/.github/workflows/baselines/test_maxtext_metrics.py
index a130c86c6..6626f0a25 100644
--- a/.github/workflows/baselines/test_maxtext_metrics.py
+++ b/.github/workflows/baselines/test_maxtext_metrics.py
@@ -2,10 +2,11 @@
 import os
 import json
 import glob
-import sys
+from numpy.testing import assert_allclose
 import test_utils
 from statistics import mean
 
+LOSS_RTOL = 0.10
 STEP_TIME_MULT = 0.95
 E2E_TIME_MULT = 0.95
 test_dir = os.path.dirname(os.path.abspath(__file__))
@@ -22,9 +23,22 @@ def test_loss(baseline_filename):
     event_file = os.path.join(results_dir, test_config, "logdir/tensorboard/logdir/events*")
     event_file = glob.glob(event_file)[0]
     with open(baseline_filepath, "r") as baseline_file:
-        end_step = json.load(baseline_file)["end_step"]
+        baseline_data = json.load(baseline_file)
+        loss_expected_values = baseline_data["loss_values"]
+        start_step = baseline_data["start_step"]
+        end_step = baseline_data["end_step"]
+        interval = baseline_data["step_interval"]
+        loss_expected = {step: loss_expected_values[i] for i, step in enumerate(
+            range(start_step, end_step+1, interval))}
         loss_actual = test_utils.read_maxtext_tb_tag(event_file, loss_summary_name)
-        assert 0 <= loss_actual[end_step] < 1.8e-3, f"Loss at final step: {loss_actual[end_step]}, Expected 0 <= loss < 1.8e-3"
+        del loss_actual[0] # removing the very first step
+        assert loss_expected.keys() == loss_actual.keys(), \
+            f"Steps at which loss was emitted for run do not match baseline. \
+            Actual steps: {loss_actual.keys()}, Baseline steps: {loss_expected.keys()}"
+        assert_allclose(list(loss_actual.values()), list(loss_expected.values()),
+                        rtol=LOSS_RTOL,
+                        err_msg=f"Run loss values: {loss_actual.values()}, \
+                                Baseline loss values: {loss_expected.values()}")
 
 
 @pytest.mark.parametrize("baseline_filename", os.listdir(baselines_dir))
diff --git a/.github/workflows/baselines/test_pax_mgmn_metrics.py b/.github/workflows/baselines/test_pax_mgmn_metrics.py
deleted file mode 100644
index e52b8d686..000000000
--- a/.github/workflows/baselines/test_pax_mgmn_metrics.py
+++ /dev/null
@@ -1,56 +0,0 @@
-import pytest
-import os
-import json
-import glob
-import sys
-import test_utils
-from statistics import mean
-
-STEP_TIME_MULT = 0.95
-E2E_TIME_MULT = 0.95
-test_dir = os.path.dirname(os.path.abspath(__file__))
-baselines_dir = os.path.join(test_dir, os.environ.get("BASELINES_DIR"))
-results_dir = os.environ.get("RESULTS_DIR")
-loss_summary_name = "loss"
-step_time_summary_name = "Steps/sec"
-
-
-@pytest.mark.parametrize("baseline_filename", os.listdir(baselines_dir))
-def test_loss(baseline_filename):
-    baseline_filepath = os.path.join(baselines_dir, baseline_filename)
-    test_config = baseline_filename.split(".")[0]
-    event_file = os.path.join(results_dir, test_config, "summaries/train/events*")
-    event_file = glob.glob(event_file)[0]
-    with open(baseline_filepath, "r") as baseline_file:
-        end_step = json.load(baseline_file)["end_step"]
-        loss_actual = test_utils.read_tb_tag(event_file, loss_summary_name)
-        assert 0 <= loss_actual[end_step] < 1.8e-6, f"Loss at final step: {loss_actual[end_step]}, Expected 0 <= loss < 1.8e-6"
-
-
-@pytest.mark.parametrize("baseline_filename", os.listdir(baselines_dir))
-def test_step_time(baseline_filename):
-    baseline_filepath = os.path.join(baselines_dir, baseline_filename)
-    test_config = baseline_filename.split(".")[0]
-    event_file = os.path.join(results_dir, test_config, "summaries/train/events*")
-    event_file = glob.glob(event_file)[0]
-    with open(baseline_filepath, "r") as baseline_file:
-        step_time_avg_expected = json.load(baseline_file)["step_time_avg"]
-        step_time_dict = test_utils.read_tb_tag(event_file, step_time_summary_name)
-        step_time_values = [step_time_dict[i] for i in sorted(step_time_dict.keys())]
-        ## exclude the first steps/sec value from the average 
-        ## because it includes compilation time
-        step_time_avg_actual = mean(step_time_values[1:])
-        assert step_time_avg_actual > step_time_avg_expected * \
-            STEP_TIME_MULT, f"Step time values: {step_time_values} (Avg: {step_time_avg_actual}), Expected avg: {step_time_avg_expected}"
-
-
-@pytest.mark.parametrize("baseline_filename", os.listdir(baselines_dir))
-def test_e2e_time(baseline_filename):
-    baseline_filepath = os.path.join(baselines_dir, baseline_filename)
-    test_config = baseline_filename.split(".")[0]
-    run_log = os.path.join(results_dir, test_config + ".log")
-    with open(baseline_filepath, "r") as baseline_file:
-        e2e_time_expected = json.load(baseline_file)["e2e_time_seconds"]
-        e2e_time_actual = test_utils.read_e2e_time(run_log)
-        assert e2e_time_actual < e2e_time_expected / \
-            E2E_TIME_MULT, f"Run E2E time: {e2e_time_actual}, Expected E2E time: {e2e_time_expected}"
diff --git a/.github/workflows/mjx-build-test.yaml b/.github/workflows/mjx-build-test.yaml
deleted file mode 100644
index d91bf8850..000000000
--- a/.github/workflows/mjx-build-test.yaml
+++ /dev/null
@@ -1,204 +0,0 @@
-name: MJX build
-run-name: MJX build (${{ github.event_name == 'workflow_run' && format('nightly {0}', github.event.workflow_run.created_at) || github.event_name }})
-
-on:
-  schedule:
-    - cron: '30 9 * * *'  # Pacific Time 01:30 AM in UTC
-  workflow_dispatch:
-    inputs:
-      BASE_IMAGE_AMD64:
-        type: string
-        description: 'JAX mealkit AMD64 image built by NVIDIA/JAX-Toolbox'
-        default: ''
-        required: false
-      BASE_IMAGE_ARM64:
-        type: string
-        description: 'JAX mealkit AMD64 image built by NVIDIA/JAX-Toolbox'
-        default: ''
-        required: false
-      PUBLISH:
-        type: boolean
-        description: Publish dated images and update the 'latest' tag?
-        default: false
-        required: false
-
-
-env:
-  DOCKER_REGISTRY: ghcr.io/nvidia
-  DEFAULT_BASE_IMAGE: ghcr.io/nvidia/jax-mealkit:jax
-
-
-permissions:
-  contents: read  # to fetch code
-  actions:  write # to cancel previous workflows
-  packages: write # to upload container
-
-jobs:
-
-  metadata:
-    runs-on: ubuntu-22.04
-    outputs:
-      PUBLISH: ${{ steps.if-publish.outputs.PUBLISH }}
-      BASE_IMAGE_AMD64: ${{ steps.base-image.outputs.BASE_IMAGE_AMD64 }}
-      BASE_IMAGE_ARM64: ${{ steps.base-image.outputs.BASE_IMAGE_ARM64 }}
-      BUILD_DATE: ${{ steps.date.outputs.BUILD_DATE }}
-      
-    steps:
-
-      - name: Cancel workflow if upstream workflow did not success
-        if: ${{ steps.if-upstream-failed.outputs.UPSTREAM_FAILED == 'true' }}
-        run: |
-          echo "Upstream workflow failed, cancelling this workflow"
-          curl -X POST -H "Authorization: token ${{ github.token }}" \
-              -H "Accept: application/vnd.github.v3+json" \
-              "https://api.github.com/repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/cancel"
-          cat  # blocks execution in case workflow cancellation takes time
-
-      - name: Determine if the resulting container should be 'published'
-        id: if-publish
-        shell: bash -x -e {0}
-        run:
-          # A container should be published if:
-          # 1) the workflow is triggered by workflow_dispatch and the PUBLISH input is true, or
-          # 2) the workflow is triggered by workflow_run (i.e., a nightly build)
-          echo "PUBLISH=${{ github.event_name == 'workflow_run' || (github.event_name == 'workflow_dispatch' && inputs.PUBLISH) }}" >> $GITHUB_OUTPUT
-
-      - name: Set build date
-        id: date
-        shell: bash -x -e {0}
-        run: |
-          BUILD_DATE=$(TZ='US/Los_Angeles' date '+%Y-%m-%d')
-          echo "BUILD_DATE=${BUILD_DATE}" >> $GITHUB_OUTPUT
-
-      - name: Set base image
-        id: base-image
-        shell: bash -x -e {0}
-        run: |
-          if [[ -z "${{ inputs.BASE_IMAGE }}" ]]; then
-            BASE_IMAGE_AMD64=${{ env.DEFAULT_BASE_IMAGE }}
-            BASE_IMAGE_ARM64=${{ env.DEFAULT_BASE_IMAGE }}
-          else
-            BASE_IMAGE_AMD64=${{ inputs.BASE_IMAGE_AMD64 }}
-            BASE_IMAGE_ARM64=${{ inputs.BASE_IMAGE_ARM64 }}
-          fi
-          echo "BASE_IMAGE_AMD64=${BASE_IMAGE_AMD64}" >> $GITHUB_OUTPUT
-          echo "BASE_IMAGE_ARM64=${BASE_IMAGE_ARM64}" >> $GITHUB_OUTPUT
-      
-  amd64:
-    needs: metadata
-    uses: ./.github/workflows/_build.yaml
-    with:
-      ARCHITECTURE: amd64
-      ARTIFACT_NAME: artifact-mjx-build
-      BADGE_FILENAME: badge-mjx-build
-      BASE_IMAGE: ${{ needs.metadata.outputs.BASE_IMAGE_AMD64 }}
-      BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }}
-      CONTAINER_NAME: mjx
-      DOCKERFILE: .github/container/Dockerfile.mjx
-    secrets: inherit
-
-  arm64:
-    needs: metadata
-    uses: ./.github/workflows/_build.yaml
-    with:
-      ARCHITECTURE: arm64
-      ARTIFACT_NAME: artifact-mjx-build
-      BADGE_FILENAME: badge-mjx-build
-      BASE_IMAGE: ${{ needs.metadata.outputs.BASE_IMAGE_ARM64 }}
-      BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }}
-      CONTAINER_NAME: mjx
-      DOCKERFILE: .github/container/Dockerfile.mjx
-    secrets: inherit
-
-  publish-mealkit:
-    needs: [metadata, amd64, arm64]
-    if: false
-    #if: needs.metadata.outputs.PUBLISH == 'true'
-    uses: ./.github/workflows/_publish_container.yaml
-    with:
-      ARTIFACT_NAME: mealkit-mjx
-      ARTIFACT_TAG: mjx-mealkit-${{ needs.metadata.outputs.BUILD_DATE }}
-      SOURCE_IMAGE: |
-        ${{ needs.amd64.outputs.DOCKER_TAG_MEALKIT }}
-        ${{ needs.arm64.outputs.DOCKER_TAG_MEALKIT }}
-      TARGET_IMAGE: jax
-      TARGET_TAGS: |
-        type=raw,value=mjx-mealkit,priority=500
-        type=raw,value=mjx-mealkit-${{ needs.metadata.outputs.BUILD_DATE }},priority=500
-
-  publish-final:
-    needs: [metadata, amd64, arm64]
-    if: false
-    #if: needs.metadata.outputs.PUBLISH == 'true'
-    uses: ./.github/workflows/_publish_container.yaml
-    with:
-      ARTIFACT_NAME: final-mjx
-      ARTIFACT_TAG: mjx-nightly-${{ needs.metadata.outputs.BUILD_DATE }}
-      SOURCE_IMAGE: |
-        ${{ needs.amd64.outputs.DOCKER_TAG_FINAL }}
-        ${{ needs.arm64.outputs.DOCKER_TAG_FINAL }}
-      TARGET_IMAGE: jax
-      TARGET_TAGS: |
-        type=raw,value=mjx-latest,priority=1000
-        type=raw,value=mjx-nightly-${{ needs.metadata.outputs.BUILD_DATE }},priority=900
-
-  # disabled because the build is failing and this workflow needs reworking not to block the slurm cluster 
-  # small perf tests
-  # runner:
-  #   uses: ./.github/workflows/_runner_ondemand_slurm.yaml
-  #   with:
-  #     NAME: "A100-${{ github.run_id }}"
-  #     LABELS: "A100:${{ github.run_id }}"
-  #     TIME: "01:00:00"
-  #   secrets: inherit
-
-  # mjx-unit-test:
-  #   needs: amd64
-  #   strategy:
-  #     fail-fast: false
-  #     matrix:
-  #       GPU_ARCH: [A100]
-  #   # ensures A100 job lands on dedicated runner for this particular job
-  #   runs-on: [self-hosted, "${{ matrix.GPU_ARCH == 'A100' && format('{0}:{1}', matrix.GPU_ARCH, github.run_id) || matrix.GPU_ARCH }}"]
-  #   steps:
-  #     - name: Print environment variables
-  #       run: env
-
-  #     - name: Print GPU information
-  #       run: nvidia-smi  
-
-  #     - name: Check out repository
-  #       uses: actions/checkout@v4
-
-  #     - name: Login to GitHub Container Registry
-  #       uses: docker/login-action@v3
-  #       with:
-  #         registry: ghcr.io
-  #         username: ${{ github.repository_owner }}
-  #         password: ${{ secrets.GITHUB_TOKEN }}
-
-  #     - name: Pull MJX image
-  #       shell: bash -x -e {0}
-  #       run: |
-  #         docker pull ${{ needs.amd64.outputs.DOCKER_TAG_FINAL }}
-
-  #     - name: MJX speed test
-  #       shell: bash -x -e {0}
-  #       continue-on-error: true
-  #       run: |
-  #         docker run --gpus=all --shm-size=1g ${{ needs.amd64.outputs.DOCKER_TAG_FINAL }} bash -ec "mjx-testspeed --mjcf=humanoid/humanoid.xml --batch_size=8192 --unroll=4 --output=tsv" | tee -a test-mjx.log
-      
-  #     - name: Save perf to summary
-  #       shell: bash -x -e {0}
-  #       continue-on-error: true
-  #       run: |
-  #         SUMMARY_PATTERN="^mjx-testspeed"
-  #         SUMMARY=$(cat test-mjx.log | grep "$SUMMARY_PATTERN")
-  #         echo "${SUMMARY}" | tee -a $GITHUB_STEP_SUMMARY
-
-  #     - name: Upload artifacts
-  #       uses: actions/upload-artifact@v4
-  #       with:
-  #         name: ${{ env.DEFAULT_ARTIFACT_NAME }}-${{ matrix.GPU_ARCH }}
-  #         path: |
-  #           test-mjx.log
diff --git a/README.md b/README.md
index d50469c9b..6e6570b9d 100644
--- a/README.md
+++ b/README.md
@@ -154,9 +154,7 @@ We support and test the following JAX frameworks and model architectures. More d
         </a>
       </td>
       <td>
-        <a href="https://gist.github.com/nvjax/913c2af68649fe568e9711c2dabb23ae#file-badge-upstream-t5x-mgmn-test-json">
-          <img style="height:1em;" src="https://img.shields.io/endpoint?url=https%3A%2F%2Fgist.githubusercontent.com%2Fnvjax%2F913c2af68649fe568e9711c2dabb23ae%2Fraw%2Fbadge-upstream-t5x-mgmn-test.json&logo=nvidia&label=A100%20distributed">
-        </a>
+        [tests disabled]
       </td>
     </tr>
     <tr>
@@ -280,6 +278,9 @@ For a list of previously used XLA flags that are no longer needed, please also r
 
 | First nightly with new base container | Base container |
 | ------------------------------------- | -------------- |
+| 2025-03-04 | nvcr.io/nvidia/cuda-dl-base:25.02-cuda12.8-devel-ubuntu24.04 |
+| 2025-01-31 | nvcr.io/nvidia/cuda-dl-base:25.01-cuda12.8-devel-ubuntu24.04 |
+| 2025-01-28 | nvcr.io/nvidia/cuda-dl-base:24.11-cuda12.6-devel-ubuntu24.04 |
 | 2024-12-07 | nvidia/cuda:12.6.3-devel-ubuntu22.04 |
 | 2024-11-06 | nvidia/cuda:12.6.2-devel-ubuntu22.04 |
 | 2024-09-25 | nvidia/cuda:12.6.1-devel-ubuntu22.04 |