File tree Expand file tree Collapse file tree 4 files changed +14
-11
lines changed Expand file tree Collapse file tree 4 files changed +14
-11
lines changed Original file line number Diff line number Diff line change 8585 fi
8686 - name : Build and Push Docker Image
8787 run : |
88- echo "> Login to Docker Hub"
89- docker login -u tasimage -p ${{ secrets.PRIMUS_DOCKER_HUB_TOKEN }}
88+ echo "> Login to ROCm Docker Hub"
89+ docker login -u rocmshared -p ${{ secrets.ROCM_DOCKER_HUB_TOKEN }}
9090
9191 echo "> Build Docker Image with tag: ${{ env.IMAGE_TAG }}"
9292 start_time=$(date +%s)
@@ -100,6 +100,9 @@ jobs:
100100 echo "> Docker tag image for Docker Hub"
101101 docker tag tasimage/primus:${{env.IMAGE_TAG}} docker.io/tasimage/primus:${{env.IMAGE_TAG}}
102102
103+ echo "> Login to Primus Docker Hub"
104+ docker login -u tasimage -p ${{ secrets.PRIMUS_DOCKER_HUB_TOKEN }}
105+
103106 echo "> Docker push to Docker Hub"
104107 start_time=$(date +%s)
105108 docker push docker.io/tasimage/primus:${{env.IMAGE_TAG}}
Original file line number Diff line number Diff line change 11# Base image
2- FROM docker.io/rocm/megatron-lm:v25.9_gfx942
2+ # FROM docker.io/rocm/megatron-lm:v25.9_gfx942
3+ FROM docker.io/rocm/pyt-megatron-lm-jax-nightly-private:pytorch_rocm7.0_20251024
34
45# Specify the commit of Primus-Turbo when building: docker build --build-arg PRIMUS_TURBO_COMMIT=xxx .)
56ARG PRIMUS_TURBO_COMMIT
67
78# Install basic dependencies
89RUN apt-get update
910
10- RUN pip3 install fbgemm-gpu && \
11- pip3 install torch==2.10.0.dev20251019+rocm7.0 \
12- torchao==0.15.0.dev20251015+rocm7.0 \
13- torchvision==0.25.0.dev20251020+rocm7.0 \
14- --index-url https://download.pytorch.org/whl/nightly/rocm7.0 \
15- --force-reinstall
16-
1711# Clone and install the Primus-Turbo
1812WORKDIR /opt
1913RUN mkdir -p /opt && cd /opt && \
@@ -24,7 +18,6 @@ RUN mkdir -p /opt && cd /opt && \
2418 pip3 install -r requirements.txt && \
2519 GPU_ARCHS="gfx942;gfx950" pip3 install --no-build-isolation .
2620
27-
2821# Set the default working directory
2922WORKDIR /opt
3023
Original file line number Diff line number Diff line change @@ -73,6 +73,9 @@ done < <(env | grep "^PRIMUS_")
7373while IFS=' =' read -r name _; do
7474 ENV_ARGS+=(" --env" " $name " )
7575done < <( env | grep " ^NCCL_" )
76+ while IFS=' =' read -r name _; do
77+ ENV_ARGS+=(" --env" " $name " )
78+ done < <( env | grep " ^PRIMUS_TURBO_" )
7679ENV_ARGS+=(" --env" " EXP" )
7780ENV_ARGS+=(" --env" " HF_TOKEN" )
7881
Original file line number Diff line number Diff line change @@ -224,6 +224,9 @@ export NVTE_USE_OPTIMIZED_HIPIFIED_CAST_TRANSPOSE=0
224224# Note: Disable v3 due to accuracy issues. Will fix after TE version 2.1.
225225export NVTE_CK_USES_BWD_V3=${NVTE_CK_USES_BWD_V3:- 0}
226226
227+ # Note: Disable fp32 atomic due if you find any accuracy issue.
228+ export PRIMUS_TURBO_ATTN_V3_ATOMIC_FP32=${PRIMUS_TURBO_ATTN_V3_ATOMIC_FP32: 0}
229+
227230# nvte debug envs
228231export NVTE_DEBUG=0 # 0, 1
229232export NVTE_DEBUG_LEVEL=0 # 0, 1, 2
@@ -241,6 +244,7 @@ LOG_INFO_RANK0 "NCCL_P2P_NET_CHUNKSIZE: $NCCL_P2P_NET_CHUNKSIZE"
241244LOG_INFO_RANK0 " NVTE_CK_USES_BWD_V3: $NVTE_CK_USES_BWD_V3 "
242245LOG_INFO_RANK0 " NVTE_USE_CAST_TRANSPOSE_TRITON: $NVTE_USE_CAST_TRANSPOSE_TRITON "
243246LOG_INFO_RANK0 " NVTE_USE_OPTIMIZED_HIPIFIED_CAST_TRANSPOSE: $NVTE_USE_OPTIMIZED_HIPIFIED_CAST_TRANSPOSE "
247+ LOG_INFO_RANK0 " PRIMUS_TURBO_ATTN_V3_ATOMIC_FP32: $PRIMUS_TURBO_ATTN_V3_ATOMIC_FP32 "
244248if [[ " $PATCH_TE_FLASH_ATTN " == " 1" ]]; then
245249 LOG_INFO_RANK0 ' Patching _flash_attn_max_version in attention.py...'
246250 sed -i ' s/_flash_attn_max_version = PkgVersion(\".*\")/_flash_attn_max_version = PkgVersion(\"3.0.0.post1\")/' \
You can’t perform that action at this time.
0 commit comments