Skip to content

Commit 08589bf

Browse files
Use native GPU architecture when NVIDIA GPU is detected; otherwise fall back to multi-arch build. (#732)
This change makes MSCCL++ automatically select CUDA architectures based on the build environment. If an NVIDIA GPU is detected, the build targets the native GPU architecture for optimal performance; otherwise, it falls back to building for multiple architectures for portability. When building for the native architecture, FP8 support is automatically enabled for “a-series” GPUs (e.g., sm_100a), allowing the appropriate optimized code paths to be picked up.
1 parent cc797ab commit 08589bf

1 file changed

Lines changed: 16 additions & 12 deletions

File tree

CMakeLists.txt

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -106,18 +106,22 @@ if(MSCCLPP_GPU_ARCHS)
106106
message(FATAL_ERROR "MSCCLPP_GPU_ARCHS is empty. Specify GPU architectures or leave unset.")
107107
endif()
108108
elseif(MSCCLPP_USE_CUDA)
109-
if(CUDAToolkit_VERSION VERSION_LESS "11.8")
110-
message(FATAL_ERROR "CUDA 11.8 or higher required, found ${CUDAToolkit_VERSION}")
111-
endif()
112-
set(MSCCLPP_GPU_ARCHS 80)
113-
if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL "12.0")
114-
list(APPEND MSCCLPP_GPU_ARCHS 90)
115-
endif()
116-
if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL "12.8")
117-
list(APPEND MSCCLPP_GPU_ARCHS 100)
118-
endif()
119-
if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL "12.9")
120-
list(APPEND MSCCLPP_GPU_ARCHS 120)
109+
if(NVIDIA_FOUND)
110+
set(MSCCLPP_GPU_ARCHS "native")
111+
else()
112+
if(CUDAToolkit_VERSION VERSION_LESS "11.8")
113+
message(FATAL_ERROR "CUDA 11.8 or higher required, found ${CUDAToolkit_VERSION}")
114+
endif()
115+
set(MSCCLPP_GPU_ARCHS 80)
116+
if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL "12.0")
117+
list(APPEND MSCCLPP_GPU_ARCHS 90)
118+
endif()
119+
if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL "12.8")
120+
list(APPEND MSCCLPP_GPU_ARCHS 100)
121+
endif()
122+
if(CUDAToolkit_VERSION VERSION_GREATER_EQUAL "12.9")
123+
list(APPEND MSCCLPP_GPU_ARCHS 120)
124+
endif()
121125
endif()
122126
elseif(MSCCLPP_USE_ROCM)
123127
set(MSCCLPP_GPU_ARCHS gfx90a gfx941 gfx942)

0 commit comments

Comments
 (0)