File tree Expand file tree Collapse file tree 2 files changed +14
-2
lines changed
Expand file tree Collapse file tree 2 files changed +14
-2
lines changed Original file line number Diff line number Diff line change @@ -7,6 +7,7 @@ FROM ${BASE_IMAGE}
77# - Ubuntu: 22.04
88# - Python: 3.10
99# - ROCm: 6.3.4
10+ # - openmpi: 4.0.7rc2
1011# - torch: 2.8.0a0+git7d205b2
1112# - rccl: 2.21.5.60304-76
1213# - hipblaslt: 0.15.0-8c69191d
@@ -17,9 +18,8 @@ FROM ${BASE_IMAGE}
1718# - amd-smi: 25.1.0+8dc45db
1819# Added by this Dockerfile:
1920# - Docker Client: 27.5.1
20- # - openmpi: pre-installed at /opt/ompi
2121# - mlc: v3.12
22- # - OFED: 24.10-1.1.4.0 LTS (if not present)
22+ # - OFED: 24.10-1.1.4.0 LTS
2323
2424# Fix base image botocore/urllib3 incompatibility:
2525# Base image ships botocore 1.22.12 (expects urllib3 1.x) with urllib3 2.6.3,
@@ -104,6 +104,9 @@ RUN if ! command -v ofed_info >/dev/null 2>&1; then \
104104
105105ENV ROCM_PATH=/opt/rocm
106106
107+ # Target GPU architectures for ROCm builds (space-separated)
108+ ENV AMDGPU_TARGETS="gfx908 gfx90a gfx942"
109+
107110# Use pre-installed OpenMPI from base image at /opt/ompi
108111ENV MPI_HOME=/opt/ompi
109112
Original file line number Diff line number Diff line change @@ -134,9 +134,17 @@ ifneq (,$(wildcard fio/Makefile))
134134endif
135135
136136# Build rccl-tests from commit 46375b1 of default branch.
137+ # If AMDGPU_TARGETS env var is set (space-separated, e.g. "gfx908 gfx90a gfx942"),
138+ # explicit --offload-arch flags and include paths are used.
139+ # Otherwise, the original build command is used (relies on hipcc auto-detection).
140+ ROCM_OFFLOAD_ARCH_FLAGS := $(foreach arch,$(AMDGPU_TARGETS ) ,--offload-arch=$(arch ) )
137141rocm_rccl_tests : sb_micro_path
138142ifneq (, $(wildcard rccl-tests/Makefile) )
143+ ifdef AMDGPU_TARGETS
144+ cd ./rccl-tests && make MPI=1 MPI_HOME=$(MPI_HOME) HIPCUFLAGS="-std=c++14 -O3 $(ROCM_OFFLOAD_ARCH_FLAGS) -I$(ROCM_PATH)/include -I$(ROCM_PATH)/include/rccl -I$(ROCM_PATH)/include/hip -DMPI_SUPPORT -I$(MPI_HOME)/include -I$(MPI_HOME)/include/mpi" -j
145+ else
139146 cd ./rccl-tests && make MPI=1 MPI_HOME=$(MPI_HOME) -j
147+ endif
140148 cp -v -r ./rccl-tests/build/* $(SB_MICRO_PATH)/bin/
141149endif
142150
@@ -168,6 +176,7 @@ rocm_hipblaslt: sb_micro_path
168176# Build hipBusBandwidth.
169177# HIP is released with rocm, like rocm-4.2.0 and so on.
170178# The version we use is the released tag which is consistent with the rocm version in the environment or docker.
179+
171180rocm_bandwidthTest : sb_micro_path
172181 git clone -b ${ROCM_VER} https://github.com/ROCm-Developer-Tools/HIP.git
173182 cd ./HIP/samples/1_Utils/hipBusBandwidth/ && mkdir -p build && cd build && cmake .. && make
You can’t perform that action at this time.
0 commit comments