Skip to content

Commit 50e3362

Browse files
authored
Fix some issues when building with the latest CUDA and cuDNN versions (#27049)
### Description <!-- Describe your changes. --> Fix some issues when building with the latest CUDA and cuDNN versions on Windows. * Latest cuDNN install has the CUDA toolkit version in the path. * Adjust cmake files to support that. * CUDA 13.x drops support for compute capability 6.0 and 7.0. * Remove from CMAKE_CUDA_ARCHITECTURES. * Remove a LINK_LANGUAGE:CUDA flag for CETCOMPAT * Syntax doesn't seem to be supported with MSVC. Build is successful without this (CUDA 13.1, cuDNN 9.17). * `LINK : warning LNK4044: unrecognized option '/Xlinker=/CETCOMPAT'; ignored [D:\src\github\ort.cuda\build\Windows.CUDA\Debug\onnxruntime_providers_cuda_ut.vcxproj]` * Memory leak checker fixes * onnxruntime_providers_cuda_ut was incorrectly linking against ORT common causing a duplicate symbol when the debug leak checker is enabled (multiple overrides of `new` and `delete`. * As the CUDA EP is built as a separate library it shouldn't need to link against `common`. * Use the debug alloc/free for provider bridge when leak checker is enabled * Ignore EtwEventWriteNoRegistration in leak checker output as we don't control that. ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. -->
1 parent 6d34aba commit 50e3362

File tree

7 files changed

+38
-11
lines changed

7 files changed

+38
-11
lines changed

cmake/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1207,7 +1207,7 @@ function(onnxruntime_configure_target target_name)
12071207

12081208
# Keep BinSkim happy
12091209
if(MSVC AND NOT onnxruntime_target_platform MATCHES "ARM")
1210-
target_link_options(${target_name} PRIVATE "$<$<LINK_LANGUAGE:CXX,C>:/CETCOMPAT>" "$<$<LINK_LANGUAGE:CUDA>:-Xlinker=/CETCOMPAT>")
1210+
target_link_options(${target_name} PRIVATE "$<$<LINK_LANGUAGE:CXX,C>:/CETCOMPAT>")
12111211
endif()
12121212

12131213
endfunction()

cmake/external/cuDNN.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ add_library(CUDNN::cudnn_all INTERFACE IMPORTED)
33
find_path(
44
CUDNN_INCLUDE_DIR cudnn.h
55
HINTS $ENV{CUDNN_PATH} ${CUDNN_PATH} ${Python_SITEARCH}/nvidia/cudnn ${CUDAToolkit_INCLUDE_DIRS}
6-
PATH_SUFFIXES include
6+
PATH_SUFFIXES include include/${onnxruntime_CUDA_VERSION}
77
REQUIRED
88
)
99

@@ -15,7 +15,7 @@ function(find_cudnn_library NAME)
1515
find_library(
1616
${NAME}_LIBRARY ${NAME} "lib${NAME}.so.${CUDNN_MAJOR_VERSION}"
1717
HINTS $ENV{CUDNN_PATH} ${CUDNN_PATH} ${Python_SITEARCH}/nvidia/cudnn ${CUDAToolkit_LIBRARY_DIR}
18-
PATH_SUFFIXES lib64 lib/x64 lib
18+
PATH_SUFFIXES lib64 lib/x64 lib lib/${onnxruntime_CUDA_VERSION}/x64
1919
REQUIRED
2020
)
2121

cmake/external/cuda_configuration.cmake

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,9 @@ macro(setup_cuda_architectures)
126126
set(CMAKE_CUDA_ARCHITECTURES "37;50;52;60;70;75;80;86;89")
127127
elseif(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 12.8)
128128
set(CMAKE_CUDA_ARCHITECTURES "52;60;70;75;80;86;89;90")
129+
elseif(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0)
130+
# 13.x drops support for 60 and 70
131+
set(CMAKE_CUDA_ARCHITECTURES "75;80;86;89;90;100;120")
129132
else()
130133
set(CMAKE_CUDA_ARCHITECTURES "60;70;75;80;86;89;90;100;120")
131134
endif()

cmake/onnxruntime_python.cmake

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -297,8 +297,22 @@ if (WIN32)
297297
if (onnxruntime_USE_CUDA)
298298
file(WRITE "${VERSION_INFO_FILE}" "use_cuda = True\n")
299299
if(onnxruntime_CUDNN_HOME)
300-
file(GLOB CUDNN_DLL_PATH "${onnxruntime_CUDNN_HOME}/bin/cudnn64_*.dll")
301-
if (NOT CUDNN_DLL_PATH)
300+
# may have x64 in the path
301+
# may have a path with CUDA toolkit version if multiple installed on the machine
302+
set(CUDNN_SEARCH_PATHS
303+
"${onnxruntime_CUDNN_HOME}/bin/cudnn64_*.dll"
304+
"${onnxruntime_CUDNN_HOME}/bin/x64/cudnn64_*.dll"
305+
"${onnxruntime_CUDNN_HOME}/bin/${onnxruntime_CUDA_VERSION}/cudnn64_*.dll"
306+
"${onnxruntime_CUDNN_HOME}/bin/${onnxruntime_CUDA_VERSION}/x64/cudnn64_*.dll"
307+
)
308+
set(CUDNN_DLL_PATH "")
309+
foreach(search_path ${CUDNN_SEARCH_PATHS})
310+
file(GLOB CUDNN_DLL_PATH "${search_path}")
311+
if(CUDNN_DLL_PATH)
312+
break()
313+
endif()
314+
endforeach()
315+
if(NOT CUDNN_DLL_PATH)
302316
message(FATAL_ERROR "cuDNN not found in ${onnxruntime_CUDNN_HOME}")
303317
endif()
304318
else()

cmake/onnxruntime_unittests.cmake

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -949,9 +949,9 @@ if (onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS)
949949
onnxruntime_add_shared_library_module(onnxruntime_providers_cuda_ut ${onnxruntime_test_providers_cuda_ut_src} $<TARGET_OBJECTS:onnxruntime_providers_cuda_obj>)
950950
config_cuda_provider_shared_module(onnxruntime_providers_cuda_ut)
951951
onnxruntime_add_include_to_target(onnxruntime_providers_cuda_ut GTest::gtest GTest::gmock)
952-
add_dependencies(onnxruntime_providers_cuda_ut onnxruntime_test_utils onnxruntime_common)
952+
add_dependencies(onnxruntime_providers_cuda_ut onnxruntime_test_utils)
953953
target_include_directories(onnxruntime_providers_cuda_ut PRIVATE ${ONNXRUNTIME_ROOT}/core/mickey)
954-
target_link_libraries(onnxruntime_providers_cuda_ut PRIVATE GTest::gtest GTest::gmock ${ONNXRUNTIME_MLAS_LIBS} onnxruntime_test_utils onnxruntime_common)
954+
target_link_libraries(onnxruntime_providers_cuda_ut PRIVATE GTest::gtest GTest::gmock ${ONNXRUNTIME_MLAS_LIBS} onnxruntime_test_utils)
955955
if (MSVC)
956956
# Cutlass code has an issue with the following:
957957
# warning C4100: 'magic': unreferenced formal parameter
@@ -1233,15 +1233,15 @@ block()
12331233
DEPENDS ${onnxruntime_provider_test_deps}
12341234
)
12351235

1236-
# Expose QNN SDK headers to unit tests via an interface target
1236+
# Expose QNN SDK headers to unit tests via an interface target
12371237
if(onnxruntime_USE_QNN)
12381238
add_library(qnn_sdk_headers_include INTERFACE)
12391239
target_include_directories(qnn_sdk_headers_include INTERFACE
12401240
${onnxruntime_QNN_HOME}/include
12411241
${onnxruntime_QNN_HOME}/include/QNN)
12421242
target_link_libraries(onnxruntime_provider_test PRIVATE qnn_sdk_headers_include)
12431243
endif()
1244-
1244+
12451245
if (UNIX AND (onnxruntime_USE_TENSORRT OR onnxruntime_USE_NV))
12461246
# The test_main.cc includes NvInfer.h where it has many deprecated declarations
12471247
# simply ignore them for TensorRT EP build

onnxruntime/core/platform/windows/debug_alloc.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,8 @@ Memory_LeakCheck::~Memory_LeakCheck() {
254254
string.find("testing::internal::ThreadLocalRegistryImpl::GetThreadLocalsMapLocked") == std::string::npos &&
255255
string.find("testing::internal::ThreadLocalRegistryImpl::GetValueOnCurrentThread") == std::string::npos &&
256256
string.find("PyInit_onnxruntime_pybind11_state") == std::string::npos &&
257-
string.find("google::protobuf::internal::InitProtobufDefaultsSlow") == std::string::npos) {
257+
string.find("google::protobuf::internal::InitProtobufDefaultsSlow") == std::string::npos &&
258+
string.find("EtwEventWriteNoRegistration") == std::string::npos) {
258259
if (leaked_bytes == 0)
259260
DebugPrint("\n-----Starting Heap Trace-----\n\n");
260261

onnxruntime/core/session/provider_bridge_ort.cc

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,9 @@ using EtwRegistrationManager_EtwInternalCallback = EtwRegistrationManager::EtwIn
9292

9393
#include "core/common/cpuid_info.h"
9494
#include "core/common/logging/logging.h"
95+
9596
#include "core/providers/shared_library/provider_interfaces.h"
9697
#include "core/providers/partitioning_utils.h"
97-
9898
#include "core/providers/cuda/cuda_provider_factory_creator.h"
9999
#include "core/providers/cann/cann_provider_factory_creator.h"
100100
#include "core/providers/dnnl/dnnl_provider_factory_creator.h"
@@ -118,6 +118,10 @@ using EtwRegistrationManager_EtwInternalCallback = EtwRegistrationManager::EtwIn
118118
#include "core/providers/nv_tensorrt_rtx/nv_provider_factory.h"
119119
#include "core/providers/nv_tensorrt_rtx/nv_provider_options.h"
120120

121+
#if defined(_WIN32) && !defined(NDEBUG) && defined(ONNXRUNTIME_ENABLE_MEMLEAK_CHECK)
122+
#include "core/platform/windows/debug_alloc.h"
123+
#endif
124+
121125
#if !defined(ORT_MINIMAL_BUILD) && \
122126
(defined(USE_TENSORRT) || defined(USE_TENSORRT_PROVIDER_INTERFACE) || \
123127
defined(USE_NV) || defined(USE_NV_PROVIDER_INTERFACE))
@@ -279,8 +283,13 @@ struct ProviderHostImpl : ProviderHost {
279283
return Status::OK();
280284
};
281285

286+
#if defined(_WIN32) && !defined(NDEBUG) && defined(ONNXRUNTIME_ENABLE_MEMLEAK_CHECK)
287+
void* HeapAllocate(size_t size) override { return DebugHeapAlloc(size, 1); }
288+
void HeapFree(void* p) override { DebugHeapFree(p); }
289+
#else
282290
void* HeapAllocate(size_t size) override { return new uint8_t[size]; }
283291
void HeapFree(void* p) override { delete[] reinterpret_cast<uint8_t*>(p); }
292+
#endif
284293

285294
logging::Logger* LoggingManager_GetDefaultLogger() override {
286295
return const_cast<logging::Logger*>(&logging::LoggingManager::DefaultLogger());

0 commit comments

Comments
 (0)