Skip to content

Commit f040aac

Browse files
mc-nvtianleiwu
authored andcommitted
Add support for CUDA architecture family codes (#27278)
This change extends CUDA architecture handling to support family-specific codes (suffix 'f') introduced in CUDA 12.9, aligning with updates made to Triton Inference Server repositories (backend and onnxruntime_backend). Changes: 1. Added CUDAARCHS environment variable support (standard CMake variable) - Allows users to override architecture list via environment variable - Takes precedence when CMAKE_CUDA_ARCHITECTURES is not set 2. Extended regex patterns to recognize family code suffix 'f' - Supports codes like 100f, 110f, 120f for CC 10.x, 11.x, 12.x families - Preserves 'f' suffix during parsing phase 3. Updated normalization logic to handle family codes - Family codes (ending with 'f') preserved without adding -real suffix - Traditional codes continue to receive -real or -a-real suffixes - Architecture-specific codes (with 'a') remain unchanged 4. Extended architecture support lists - Added SM 110 to ARCHITECTURES_WITH_KERNELS - Added SM 110 to ARCHITECTURES_WITH_ACCEL Family-specific codes (introduced in CUDA 12.9/Blackwell) enable forward compatibility within a GPU family. For example, 100f runs on CC 10.0, 10.3, and future 10.x devices, using features common across the family. Usage examples: - CUDAARCHS="75;80;90;100f;110f;120f" cmake .. - cmake -DCMAKE_CUDA_ARCHITECTURES="75-real;80-real;90-real;100f;120f" .. - python build.py --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES="100f;110f" The implementation supports mixed formats in the same list: - Traditional: 75-real, 80-real, 90-real - Architecture-specific: 90a-real (CC 9.0 only) - Family-specific: 100f, 110f, 120f (entire family) Note: Current defaults still use bare numbers (75;80;90;100;120) which normalize to architecture-specific codes with 'a' suffix. Users who want family-specific behavior should explicitly use the 'f' suffix via CUDAARCHS environment variable or CMAKE_CUDA_ARCHITECTURES. References: - NVIDIA Blackwell and CUDA 12.9 Family-Specific Architecture Features: https://developer.nvidia.com/blog/nvidia-blackwell-and-nvidia-cuda-12-9-introduce-family-specific-architecture-features/ - Triton Inference Server backend updates (commit f5e901f) ### Description <!-- Describe your changes. --> ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. -->
1 parent a21298f commit f040aac

File tree

1 file changed

+15
-7
lines changed

1 file changed

+15
-7
lines changed

cmake/external/cuda_configuration.cmake

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,11 @@ macro(setup_cuda_architectures)
8585
# * Always use accelerated (`-a` suffix) target for supported real architectures.
8686
# cmake-format: on
8787

88+
# Allow override via CUDAARCHS environment variable (standard CMake variable)
89+
if(NOT CMAKE_CUDA_ARCHITECTURES AND DEFINED ENV{CUDAARCHS})
90+
set(CMAKE_CUDA_ARCHITECTURES "$ENV{CUDAARCHS}")
91+
endif()
92+
8893
if(CMAKE_CUDA_ARCHITECTURES STREQUAL "native")
8994
# Detect highest available compute capability
9095
set(OUTPUTFILE ${PROJECT_BINARY_DIR}/detect_cuda_arch)
@@ -139,12 +144,12 @@ macro(setup_cuda_architectures)
139144
continue()
140145
endif()
141146

142-
if(CUDA_ARCH MATCHES "^([1-9])([0-9])+a?-virtual$")
147+
if(CUDA_ARCH MATCHES "^([1-9])([0-9])+[af]?-virtual$")
143148
set(CMAKE_CUDA_ARCHITECTURES_LAST_VIRTUAL ${CUDA_ARCH})
144-
elseif(CUDA_ARCH MATCHES "^(([1-9])([0-9])+)a?-real$")
145-
list(APPEND CMAKE_CUDA_ARCHITECTURES_CLEAN ${CMAKE_MATCH_1})
146-
elseif(CUDA_ARCH MATCHES "^(([1-9])([0-9])+)a?$")
149+
elseif(CUDA_ARCH MATCHES "^(([1-9])([0-9])+)[af]?-real$")
147150
list(APPEND CMAKE_CUDA_ARCHITECTURES_CLEAN ${CMAKE_MATCH_1})
151+
elseif(CUDA_ARCH MATCHES "^(([1-9])([0-9])+)([af]?)$")
152+
list(APPEND CMAKE_CUDA_ARCHITECTURES_CLEAN ${CMAKE_MATCH_1}${CMAKE_MATCH_4})
148153
else()
149154
message(FATAL_ERROR "Unrecognized CUDA architecture: ${CUDA_ARCH}")
150155
endif()
@@ -156,7 +161,7 @@ macro(setup_cuda_architectures)
156161
set(CMAKE_CUDA_ARCHITECTURES_ORIG "${CMAKE_CUDA_ARCHITECTURES}")
157162
message(STATUS "GPU architectures: ${CMAKE_CUDA_ARCHITECTURES_ORIG}")
158163

159-
set(ARCHITECTURES_WITH_KERNELS "80" "86" "89" "90" "100" "120")
164+
set(ARCHITECTURES_WITH_KERNELS "80" "86" "89" "90" "100" "110" "120")
160165
foreach(CUDA_ARCH IN LISTS ARCHITECTURES_WITH_KERNELS)
161166
if(NOT "${CUDA_ARCH}" IN_LIST CMAKE_CUDA_ARCHITECTURES_ORIG)
162167
add_definitions("-DEXCLUDE_SM_${CUDA_ARCH}")
@@ -165,10 +170,13 @@ macro(setup_cuda_architectures)
165170
endforeach()
166171

167172
# Enable accelerated features (like WGMMA, TMA and setmaxnreg) for SM >= 90.
168-
set(ARCHITECTURES_WITH_ACCEL "90" "100" "101" "120")
173+
set(ARCHITECTURES_WITH_ACCEL "90" "100" "101" "110" "120")
169174
unset(CMAKE_CUDA_ARCHITECTURES_NORMALIZED)
170175
foreach(CUDA_ARCH IN LISTS CMAKE_CUDA_ARCHITECTURES)
171-
if("${CUDA_ARCH}" IN_LIST ARCHITECTURES_WITH_ACCEL)
176+
if(CUDA_ARCH MATCHES "^([0-9]+)f$")
177+
# Family code, no -real suffix
178+
list(APPEND CMAKE_CUDA_ARCHITECTURES_NORMALIZED "${CUDA_ARCH}")
179+
elseif("${CUDA_ARCH}" IN_LIST ARCHITECTURES_WITH_ACCEL)
172180
list(APPEND CMAKE_CUDA_ARCHITECTURES_NORMALIZED "${CUDA_ARCH}a-real")
173181
else()
174182
list(APPEND CMAKE_CUDA_ARCHITECTURES_NORMALIZED "${CUDA_ARCH}-real")

0 commit comments

Comments
 (0)