Skip to content

Commit c531a61

Browse files
ankitm3kvrasparadrianlizarragaminfhong-quicminfhong-quic
authored
Cherry-picks into rel-1.22.0 (microsoft#24580) (#680)
### Description Cherry pick the following into [rel-1.22.0](https://github.com/microsoft/onnxruntime/tree/rel-1.22.0) - (microsoft#24487) - (microsoft#24466) - (microsoft#24493) - (microsoft#24484) - (microsoft#24494) - (microsoft#24489) - (microsoft#24504) - (microsoft#24510) - (microsoft#24456) - (microsoft#24537) - (microsoft#24501) - (microsoft#24519) - (microsoft#24513) - (microsoft#24539) - (microsoft#24514) - (microsoft#24542) - (microsoft#24585) Not added: Planning to cherry pick Cuda Matmulnbits PRs once the fix for failing cuda pipeline is ready - (microsoft#24491) - (microsoft#24509) - (microsoft#24564) --------- Co-authored-by: vraspar <[email protected]> Co-authored-by: Adrian Lizarraga <[email protected]> Co-authored-by: minfhong-quic <[email protected]> Co-authored-by: minfhong-quic <[email protected]> Co-authored-by: Justin Chu <[email protected]> Co-authored-by: Prathik Rao <[email protected]> Co-authored-by: Edward Chen <[email protected]> Co-authored-by: Ankan Banerjee <[email protected]> Co-authored-by: Maximilian Müller <[email protected]> Co-authored-by: Gaurav Garg <[email protected]> Co-authored-by: iraut <[email protected]> Co-authored-by: Hrishikesh Manohar <[email protected]> Co-authored-by: Maximilian Müller <[email protected]> Co-authored-by: Scott McKay <[email protected]> Co-authored-by: Jiajia Qin <[email protected]> Co-authored-by: kunal-vaishnavi <[email protected]> Co-authored-by: xhcao <[email protected]>
1 parent c405425 commit c531a61

File tree

77 files changed

+7610
-133
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

77 files changed

+7610
-133
lines changed

cmake/CMakeLists.txt

+11
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ option(onnxruntime_ENABLE_MICROSOFT_INTERNAL "Use this option to enable/disable
107107
option(onnxruntime_USE_VITISAI "Build with Vitis-AI" OFF)
108108
option(onnxruntime_USE_TENSORRT "Build with TensorRT support" OFF)
109109
option(onnxruntime_USE_TENSORRT_BUILTIN_PARSER "Use TensorRT builtin parser" OFF)
110+
option(onnxruntime_USE_NV "Build with TensorRT support" OFF)
110111
option(onnxruntime_ENABLE_LTO "Enable link time optimization" OFF)
111112
option(onnxruntime_CROSS_COMPILING "Cross compiling onnx runtime" OFF)
112113
option(onnxruntime_GCOV_COVERAGE "Compile with options necessary to run code coverage" OFF)
@@ -250,6 +251,7 @@ option(onnxruntime_USE_LOCK_FREE_QUEUE "Build with lock-free task queue for thre
250251
option(onnxruntime_FORCE_GENERIC_ALGORITHMS "Disable optimized arch-specific algorithms. Use only for testing and debugging generic algorithms." OFF)
251252

252253
option(onnxruntime_USE_TENSORRT_INTERFACE "Build ONNXRuntime shared lib which is compatible with TensorRT EP interface" OFF)
254+
option(onnxruntime_USE_NV_INTERFACE "Build ONNXRuntime shared lib which is compatible with NV EP interface" OFF)
253255
option(onnxruntime_USE_CUDA_INTERFACE "Build ONNXRuntime shared lib which is compatible with Cuda EP interface" OFF)
254256
option(onnxruntime_USE_OPENVINO_INTERFACE "Build ONNXRuntime shared lib which is compatible with OpenVINO EP interface" OFF)
255257
option(onnxruntime_USE_VITISAI_INTERFACE "Build ONNXRuntime shared lib which is compatible with Vitis-AI EP interface" OFF)
@@ -946,6 +948,15 @@ if (onnxruntime_USE_TENSORRT_INTERFACE AND (NOT onnxruntime_USE_TENSORRT))
946948
list(APPEND ORT_INTERFACE_FLAGS -DUSE_TENSORRT=1)
947949
endif()
948950

951+
if (onnxruntime_USE_NV)
952+
list(APPEND ORT_PROVIDER_FLAGS -DUSE_NV=1)
953+
list(APPEND ONNXRUNTIME_PROVIDER_NAMES nv_tensorrt_rtx)
954+
endif()
955+
956+
if (onnxruntime_USE_NV_INTERFACE AND (NOT onnxruntime_USE_NV))
957+
list(APPEND ORT_INTERFACE_FLAGS -DUSE_NV=1)
958+
endif()
959+
949960
if (onnxruntime_USE_RKNPU)
950961
list(APPEND ORT_PROVIDER_FLAGS -DUSE_RKNPU=1)
951962
list(APPEND ONNXRUNTIME_PROVIDER_NAMES rknpu)

cmake/onnxruntime_framework.cmake

+1-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ endif()
6363
if(onnxruntime_ENABLE_INSTRUMENT)
6464
target_compile_definitions(onnxruntime_framework PRIVATE ONNXRUNTIME_ENABLE_INSTRUMENT)
6565
endif()
66-
if(onnxruntime_USE_TENSORRT OR onnxruntime_USE_NCCL)
66+
if(onnxruntime_USE_TENSORRT OR onnxruntime_USE_NCCL OR onnxruntime_USE_NV)
6767
# TODO: for now, core framework depends on CUDA. It should be moved to TensorRT EP
6868
# TODO: provider_bridge_ort.cc should not include nccl.h
6969
target_include_directories(onnxruntime_framework PRIVATE ${ONNXRUNTIME_ROOT} PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})

cmake/onnxruntime_providers.cmake

+4
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,10 @@ if (onnxruntime_USE_TENSORRT)
132132
include(onnxruntime_providers_tensorrt.cmake)
133133
endif()
134134

135+
if (onnxruntime_USE_NV)
136+
include(onnxruntime_providers_nv.cmake)
137+
endif()
138+
135139
if (onnxruntime_USE_VITISAI)
136140
include(onnxruntime_providers_vitisai.cmake)
137141
endif()

cmake/onnxruntime_providers_nv.cmake

+202
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
# Copyright (c) Microsoft Corporation. All rights reserved.
2+
# Licensed under the MIT License.
3+
find_package(CUDAToolkit REQUIRED 12.8)
4+
enable_language(CUDA)
5+
if(onnxruntime_DISABLE_CONTRIB_OPS)
6+
message( FATAL_ERROR "To compile TensorRT execution provider contrib ops have to be enabled to dump an engine using com.microsoft:EPContext node." )
7+
endif()
8+
add_definitions(-DUSE_NV=1)
9+
if (onnxruntime_NV_PLACEHOLDER_BUILDER)
10+
add_definitions(-DORT_NV_PLACEHOLDER_BUILDER)
11+
endif()
12+
set(BUILD_LIBRARY_ONLY 1)
13+
add_definitions("-DONNX_ML=1")
14+
add_definitions("-DONNX_NAMESPACE=onnx")
15+
set(CUDA_INCLUDE_DIRS ${CUDAToolkit_INCLUDE_DIRS})
16+
set(TENSORRT_ROOT ${onnxruntime_TENSORRT_HOME})
17+
set(OLD_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
18+
set(PROTOBUF_LIBRARY ${PROTOBUF_LIB})
19+
if (WIN32)
20+
set(OLD_CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS})
21+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4099 /wd4551 /wd4505 /wd4515 /wd4706 /wd4456 /wd4324 /wd4701 /wd4804 /wd4702 /wd4458 /wd4703")
22+
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
23+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4805")
24+
endif()
25+
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -include algorithm")
26+
set(DISABLED_WARNINGS_FOR_TRT /wd4456)
27+
endif()
28+
if ( CMAKE_COMPILER_IS_GNUCC )
29+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter -Wno-missing-field-initializers")
30+
endif()
31+
set(CXX_VERSION_DEFINED TRUE)
32+
33+
find_path(TENSORRT_INCLUDE_DIR NvInfer.h
34+
HINTS ${TENSORRT_ROOT}
35+
PATH_SUFFIXES include)
36+
37+
38+
file(READ ${TENSORRT_INCLUDE_DIR}/NvInferVersion.h NVINFER_VER_CONTENT)
39+
string(REGEX MATCH "define NV_TENSORRT_MAJOR * +([0-9]+)" NV_TENSORRT_MAJOR "${NVINFER_VER_CONTENT}")
40+
string(REGEX REPLACE "define NV_TENSORRT_MAJOR * +([0-9]+)" "\\1" NV_TENSORRT_MAJOR "${NV_TENSORRT_MAJOR}")
41+
string(REGEX MATCH "define NV_TENSORRT_MINOR * +([0-9]+)" NV_TENSORRT_MINOR "${NVINFER_VER_CONTENT}")
42+
string(REGEX REPLACE "define NV_TENSORRT_MINOR * +([0-9]+)" "\\1" NV_TENSORRT_MINOR "${NV_TENSORRT_MINOR}")
43+
string(REGEX MATCH "define NV_TENSORRT_PATCH * +([0-9]+)" NV_TENSORRT_PATCH "${NVINFER_VER_CONTENT}")
44+
string(REGEX REPLACE "define NV_TENSORRT_PATCH * +([0-9]+)" "\\1" NV_TENSORRT_PATCH "${NV_TENSORRT_PATCH}")
45+
math(EXPR NV_TENSORRT_MAJOR_INT "${NV_TENSORRT_MAJOR}")
46+
math(EXPR NV_TENSORRT_MINOR_INT "${NV_TENSORRT_MINOR}")
47+
math(EXPR NV_TENSORRT_PATCH_INT "${NV_TENSORRT_PATCH}")
48+
49+
if (NV_TENSORRT_MAJOR)
50+
MESSAGE(STATUS "NV_TENSORRT_MAJOR is ${NV_TENSORRT_MAJOR}")
51+
else()
52+
MESSAGE(STATUS "Can't find NV_TENSORRT_MAJOR macro")
53+
endif()
54+
55+
# Check TRT version >= 10.0.1.6
56+
if ((NV_TENSORRT_MAJOR_INT GREATER 10) OR
57+
(NV_TENSORRT_MAJOR_INT EQUAL 10 AND NV_TENSORRT_MINOR_INT GREATER 0) OR
58+
(NV_TENSORRT_MAJOR_INT EQUAL 10 AND NV_TENSORRT_PATCH_INT GREATER 0))
59+
set(TRT_GREATER_OR_EQUAL_TRT_10_GA ON)
60+
else()
61+
message( FATAL_ERROR "Only TensorRT 10.x or higher is supported." )
62+
endif()
63+
64+
# TensorRT 10 GA onwards, the TensorRT libraries will have major version appended to the end on Windows,
65+
# for example, nvinfer_10.dll, nvonnxparser_10.dll ...
66+
if (WIN32 AND TRT_GREATER_OR_EQUAL_TRT_10_GA)
67+
set(NVINFER_LIB "nvinfer_${NV_TENSORRT_MAJOR}")
68+
set(PARSER_LIB "nvonnxparser_${NV_TENSORRT_MAJOR}")
69+
endif()
70+
71+
if (NOT NVINFER_LIB)
72+
set(NVINFER_LIB "nvinfer")
73+
endif()
74+
75+
if (NOT PARSER_LIB)
76+
set(PARSER_LIB "nvonnxparser")
77+
endif()
78+
79+
MESSAGE(STATUS "Looking for ${NVINFER_LIB}")
80+
81+
find_library(TENSORRT_LIBRARY_INFER ${NVINFER_LIB}
82+
HINTS ${TENSORRT_ROOT}
83+
PATH_SUFFIXES lib lib64 lib/x64)
84+
85+
if (NOT TENSORRT_LIBRARY_INFER)
86+
MESSAGE(STATUS "Can't find ${NVINFER_LIB}")
87+
endif()
88+
89+
if (onnxruntime_USE_TENSORRT_BUILTIN_PARSER)
90+
MESSAGE(STATUS "Looking for ${PARSER_LIB}")
91+
92+
find_library(TENSORRT_LIBRARY_NVONNXPARSER ${PARSER_LIB}
93+
HINTS ${TENSORRT_ROOT}
94+
PATH_SUFFIXES lib lib64 lib/x64)
95+
96+
if (NOT TENSORRT_LIBRARY_NVONNXPARSER)
97+
MESSAGE(STATUS "Can't find ${PARSER_LIB}")
98+
endif()
99+
100+
set(TENSORRT_LIBRARY ${TENSORRT_LIBRARY_INFER} ${TENSORRT_LIBRARY_NVONNXPARSER})
101+
MESSAGE(STATUS "Find TensorRT libs at ${TENSORRT_LIBRARY}")
102+
else()
103+
if (TRT_GREATER_OR_EQUAL_TRT_10_GA)
104+
set(ONNX_USE_LITE_PROTO ON)
105+
endif()
106+
onnxruntime_fetchcontent_declare(
107+
onnx_tensorrt
108+
URL ${DEP_URL_onnx_tensorrt}
109+
URL_HASH SHA1=${DEP_SHA1_onnx_tensorrt}
110+
EXCLUDE_FROM_ALL
111+
)
112+
if (NOT CUDA_INCLUDE_DIR)
113+
set(CUDA_INCLUDE_DIR ${CUDAToolkit_INCLUDE_DIRS}) # onnx-tensorrt repo needs this variable to build
114+
endif()
115+
# The onnx_tensorrt repo contains a test program, getSupportedAPITest, which doesn't support Windows. It uses
116+
# unistd.h. So we must exclude it from our build. onnxruntime_fetchcontent_makeavailable is for the purpose.
117+
onnxruntime_fetchcontent_makeavailable(onnx_tensorrt)
118+
include_directories(${onnx_tensorrt_SOURCE_DIR})
119+
set(CMAKE_CXX_FLAGS ${OLD_CMAKE_CXX_FLAGS})
120+
if ( CMAKE_COMPILER_IS_GNUCC )
121+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter")
122+
endif()
123+
if (WIN32)
124+
set(CMAKE_CUDA_FLAGS ${OLD_CMAKE_CUDA_FLAGS})
125+
unset(PROTOBUF_LIBRARY)
126+
unset(OLD_CMAKE_CXX_FLAGS)
127+
unset(OLD_CMAKE_CUDA_FLAGS)
128+
set_target_properties(${PARSER_LIB} PROPERTIES LINK_FLAGS "/ignore:4199")
129+
target_compile_options(nvonnxparser_static PRIVATE /FIio.h /wd4100)
130+
target_compile_options(${PARSER_LIB} PRIVATE /FIio.h /wd4100)
131+
endif()
132+
# Static libraries are just nvonnxparser_static on all platforms
133+
set(onnxparser_link_libs nvonnxparser_static)
134+
set(TENSORRT_LIBRARY ${TENSORRT_LIBRARY_INFER})
135+
MESSAGE(STATUS "Find TensorRT libs at ${TENSORRT_LIBRARY}")
136+
endif()
137+
138+
include_directories(${TENSORRT_INCLUDE_DIR})
139+
# ${TENSORRT_LIBRARY} is empty if we link nvonnxparser_static.
140+
# nvonnxparser_static is linked against tensorrt libraries in onnx-tensorrt
141+
# See https://github.com/onnx/onnx-tensorrt/blob/8af13d1b106f58df1e98945a5e7c851ddb5f0791/CMakeLists.txt#L121
142+
# However, starting from TRT 10 GA, nvonnxparser_static doesn't link against tensorrt libraries.
143+
# Therefore, the above code finds ${TENSORRT_LIBRARY_INFER}
144+
set(trt_link_libs ${CMAKE_DL_LIBS} ${TENSORRT_LIBRARY})
145+
file(GLOB_RECURSE onnxruntime_providers_nv_tensorrt_rtx_cc_srcs CONFIGURE_DEPENDS
146+
"${ONNXRUNTIME_ROOT}/core/providers/nv_tensorrt_rtx/*.h"
147+
"${ONNXRUNTIME_ROOT}/core/providers/nv_tensorrt_rtx/*.cc"
148+
"${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.h"
149+
"${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.cc"
150+
"${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_stream_handle.h"
151+
"${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_stream_handle.cc"
152+
"${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_graph.h"
153+
"${ONNXRUNTIME_ROOT}/core/providers/cuda/cuda_graph.cc"
154+
)
155+
156+
source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_nv_tensorrt_rtx_cc_srcs})
157+
onnxruntime_add_shared_library_module(onnxruntime_providers_nv_tensorrt_rtx ${onnxruntime_providers_nv_tensorrt_rtx_cc_srcs})
158+
onnxruntime_add_include_to_target(onnxruntime_providers_nv_tensorrt_rtx onnxruntime_common)
159+
target_link_libraries(onnxruntime_providers_nv_tensorrt_rtx PRIVATE Eigen3::Eigen onnx flatbuffers::flatbuffers Boost::mp11 safeint_interface Eigen3::Eigen)
160+
add_dependencies(onnxruntime_providers_nv_tensorrt_rtx onnxruntime_providers_shared ${onnxruntime_EXTERNAL_DEPENDENCIES})
161+
if (onnxruntime_USE_TENSORRT_BUILTIN_PARSER)
162+
target_link_libraries(onnxruntime_providers_nv_tensorrt_rtx PRIVATE ${trt_link_libs} ${ONNXRUNTIME_PROVIDERS_SHARED} ${PROTOBUF_LIB} flatbuffers::flatbuffers Boost::mp11 safeint_interface ${ABSEIL_LIBS} PUBLIC CUDA::cudart)
163+
else()
164+
target_link_libraries(onnxruntime_providers_nv_tensorrt_rtx PRIVATE ${onnxparser_link_libs} ${trt_link_libs} ${ONNXRUNTIME_PROVIDERS_SHARED} ${PROTOBUF_LIB} flatbuffers::flatbuffers ${ABSEIL_LIBS} PUBLIC CUDA::cudart)
165+
endif()
166+
target_include_directories(onnxruntime_providers_nv_tensorrt_rtx PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR}
167+
PUBLIC ${CUDAToolkit_INCLUDE_DIRS})
168+
169+
# ${CMAKE_CURRENT_BINARY_DIR} is so that #include "onnxruntime_config.h" inside tensor_shape.h is found
170+
set_target_properties(onnxruntime_providers_nv_tensorrt_rtx PROPERTIES LINKER_LANGUAGE CUDA)
171+
set_target_properties(onnxruntime_providers_nv_tensorrt_rtx PROPERTIES FOLDER "ONNXRuntime")
172+
target_compile_definitions(onnxruntime_providers_nv_tensorrt_rtx PRIVATE ONNXIFI_BUILD_LIBRARY=1)
173+
target_compile_options(onnxruntime_providers_nv_tensorrt_rtx PRIVATE ${DISABLED_WARNINGS_FOR_TRT})
174+
if (WIN32)
175+
target_compile_options(onnxruntime_providers_nv_tensorrt_rtx INTERFACE /wd4456)
176+
endif()
177+
# set CUDA_MINIMAL as default for NV provider since we do not have fallback to CUDA
178+
target_compile_definitions(onnxruntime_providers_nv_tensorrt_rtx PRIVATE USE_CUDA_MINIMAL=1)
179+
180+
# Needed for the provider interface, as it includes training headers when training is enabled
181+
if (onnxruntime_ENABLE_TRAINING_OPS)
182+
target_include_directories(onnxruntime_providers_nv_tensorrt_rtx PRIVATE ${ORTTRAINING_ROOT})
183+
if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
184+
onnxruntime_add_include_to_target(onnxruntime_providers_nv_tensorrt_rtx Python::Module)
185+
endif()
186+
endif()
187+
188+
if(APPLE)
189+
set_property(TARGET onnxruntime_providers_nv_tensorrt_rtx APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker -exported_symbols_list ${ONNXRUNTIME_ROOT}/core/providers/nv_tensorrt_rtx/exported_symbols.lst")
190+
elseif(UNIX)
191+
set_property(TARGET onnxruntime_providers_nv_tensorrt_rtx APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations")
192+
set_property(TARGET onnxruntime_providers_nv_tensorrt_rtx APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker --version-script=${ONNXRUNTIME_ROOT}/core/providers/nv_tensorrt_rtx/version_script.lds -Xlinker --gc-sections")
193+
elseif(WIN32)
194+
set_property(TARGET onnxruntime_providers_nv_tensorrt_rtx APPEND_STRING PROPERTY LINK_FLAGS "-DEF:${ONNXRUNTIME_ROOT}/core/providers/nv_tensorrt_rtx/symbols.def")
195+
else()
196+
message(FATAL_ERROR "onnxruntime_providers_nv_tensorrt_rtx unknown platform, need to specify shared library exports for it")
197+
endif()
198+
199+
install(TARGETS onnxruntime_providers_nv_tensorrt_rtx
200+
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
201+
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
202+
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})

cmake/onnxruntime_python.cmake

+10
Original file line numberDiff line numberDiff line change
@@ -928,6 +928,16 @@ if (onnxruntime_USE_TENSORRT)
928928
)
929929
endif()
930930

931+
if (onnxruntime_USE_NV)
932+
add_custom_command(
933+
TARGET onnxruntime_pybind11_state POST_BUILD
934+
COMMAND ${CMAKE_COMMAND} -E copy
935+
$<TARGET_FILE:onnxruntime_providers_nv_tensorrt_rtx>
936+
$<TARGET_FILE:onnxruntime_providers_shared>
937+
$<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/
938+
)
939+
endif()
940+
931941
if (onnxruntime_USE_MIGRAPHX)
932942
add_custom_command(
933943
TARGET onnxruntime_pybind11_state POST_BUILD

0 commit comments

Comments
 (0)