Skip to content

Commit 6625856

Browse files
authored
Add support for CUDA architecture family codes (#27278)
This change extends CUDA architecture handling to support family-specific codes (suffix 'f') introduced in CUDA 12.9, aligning with updates made to Triton Inference Server repositories (backend and onnxruntime_backend). Changes: 1. Added CUDAARCHS environment variable support (standard CMake variable) - Allows users to override architecture list via environment variable - Takes precedence when CMAKE_CUDA_ARCHITECTURES is not set 2. Extended regex patterns to recognize family code suffix 'f' - Supports codes like 100f, 110f, 120f for CC 10.x, 11.x, 12.x families - Preserves 'f' suffix during parsing phase 3. Updated normalization logic to handle family codes - Family codes (ending with 'f') preserved without adding -real suffix - Traditional codes continue to receive -real or -a-real suffixes - Architecture-specific codes (with 'a') remain unchanged 4. Extended architecture support lists - Added SM 110 to ARCHITECTURES_WITH_KERNELS - Added SM 110 to ARCHITECTURES_WITH_ACCEL Family-specific codes (introduced in CUDA 12.9/Blackwell) enable forward compatibility within a GPU family. For example, 100f runs on CC 10.0, 10.3, and future 10.x devices, using features common across the family. Usage examples: - CUDAARCHS="75;80;90;100f;110f;120f" cmake .. - cmake -DCMAKE_CUDA_ARCHITECTURES="75-real;80-real;90-real;100f;120f" .. - python build.py --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES="100f;110f" The implementation supports mixed formats in the same list: - Traditional: 75-real, 80-real, 90-real - Architecture-specific: 90a-real (CC 9.0 only) - Family-specific: 100f, 110f, 120f (entire family) Note: Current defaults still use bare numbers (75;80;90;100;120) which normalize to architecture-specific codes with 'a' suffix. Users who want family-specific behavior should explicitly use the 'f' suffix via CUDAARCHS environment variable or CMAKE_CUDA_ARCHITECTURES. References: - NVIDIA Blackwell and CUDA 12.9 Family-Specific Architecture Features: https://developer.nvidia.com/blog/nvidia-blackwell-and-nvidia-cuda-12-9-introduce-family-specific-architecture-features/ - Triton Inference Server backend updates (commit f5e901f) ### Description <!-- Describe your changes. --> ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. -->
1 parent 5645b77 commit 6625856

File tree

1 file changed

+15
-7
lines changed

1 file changed

+15
-7
lines changed

cmake/external/cuda_configuration.cmake

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,11 @@ macro(setup_cuda_architectures)
8585
# * Always use accelerated (`-a` suffix) target for supported real architectures.
8686
# cmake-format: on
8787

88+
# Allow override via CUDAARCHS environment variable (standard CMake variable)
89+
if(NOT CMAKE_CUDA_ARCHITECTURES AND DEFINED ENV{CUDAARCHS})
90+
set(CMAKE_CUDA_ARCHITECTURES "$ENV{CUDAARCHS}")
91+
endif()
92+
8893
if(CMAKE_CUDA_ARCHITECTURES STREQUAL "native")
8994
# Detect highest available compute capability
9095
set(OUTPUTFILE ${PROJECT_BINARY_DIR}/detect_cuda_arch)
@@ -142,12 +147,12 @@ macro(setup_cuda_architectures)
142147
continue()
143148
endif()
144149

145-
if(CUDA_ARCH MATCHES "^([1-9])([0-9])+a?-virtual$")
150+
if(CUDA_ARCH MATCHES "^([1-9])([0-9])+[af]?-virtual$")
146151
set(CMAKE_CUDA_ARCHITECTURES_LAST_VIRTUAL ${CUDA_ARCH})
147-
elseif(CUDA_ARCH MATCHES "^(([1-9])([0-9])+)a?-real$")
148-
list(APPEND CMAKE_CUDA_ARCHITECTURES_CLEAN ${CMAKE_MATCH_1})
149-
elseif(CUDA_ARCH MATCHES "^(([1-9])([0-9])+)a?$")
152+
elseif(CUDA_ARCH MATCHES "^(([1-9])([0-9])+)[af]?-real$")
150153
list(APPEND CMAKE_CUDA_ARCHITECTURES_CLEAN ${CMAKE_MATCH_1})
154+
elseif(CUDA_ARCH MATCHES "^(([1-9])([0-9])+)([af]?)$")
155+
list(APPEND CMAKE_CUDA_ARCHITECTURES_CLEAN ${CMAKE_MATCH_1}${CMAKE_MATCH_4})
151156
else()
152157
message(FATAL_ERROR "Unrecognized CUDA architecture: ${CUDA_ARCH}")
153158
endif()
@@ -159,7 +164,7 @@ macro(setup_cuda_architectures)
159164
set(CMAKE_CUDA_ARCHITECTURES_ORIG "${CMAKE_CUDA_ARCHITECTURES}")
160165
message(STATUS "GPU architectures: ${CMAKE_CUDA_ARCHITECTURES_ORIG}")
161166

162-
set(ARCHITECTURES_WITH_KERNELS "80" "86" "89" "90" "100" "120")
167+
set(ARCHITECTURES_WITH_KERNELS "80" "86" "89" "90" "100" "110" "120")
163168
foreach(CUDA_ARCH IN LISTS ARCHITECTURES_WITH_KERNELS)
164169
if(NOT "${CUDA_ARCH}" IN_LIST CMAKE_CUDA_ARCHITECTURES_ORIG)
165170
add_definitions("-DEXCLUDE_SM_${CUDA_ARCH}")
@@ -168,10 +173,13 @@ macro(setup_cuda_architectures)
168173
endforeach()
169174

170175
# Enable accelerated features (like WGMMA, TMA and setmaxnreg) for SM >= 90.
171-
set(ARCHITECTURES_WITH_ACCEL "90" "100" "101" "120")
176+
set(ARCHITECTURES_WITH_ACCEL "90" "100" "101" "110" "120")
172177
unset(CMAKE_CUDA_ARCHITECTURES_NORMALIZED)
173178
foreach(CUDA_ARCH IN LISTS CMAKE_CUDA_ARCHITECTURES)
174-
if("${CUDA_ARCH}" IN_LIST ARCHITECTURES_WITH_ACCEL)
179+
if(CUDA_ARCH MATCHES "^([0-9]+)f$")
180+
# Family code, no -real suffix
181+
list(APPEND CMAKE_CUDA_ARCHITECTURES_NORMALIZED "${CUDA_ARCH}")
182+
elseif("${CUDA_ARCH}" IN_LIST ARCHITECTURES_WITH_ACCEL)
175183
list(APPEND CMAKE_CUDA_ARCHITECTURES_NORMALIZED "${CUDA_ARCH}a-real")
176184
else()
177185
list(APPEND CMAKE_CUDA_ARCHITECTURES_NORMALIZED "${CUDA_ARCH}-real")

0 commit comments

Comments
 (0)