From 780e23f2e37b1d134d0147ff5df5fe8147e8e8e2 Mon Sep 17 00:00:00 2001 From: Lei Cao Date: Fri, 13 Dec 2024 15:36:32 +0000 Subject: [PATCH 01/13] add enable_tensorrt_interface and build ort.so only --- cmake/onnxruntime_providers.cmake | 4 ++-- tools/ci_build/build.py | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake index 582491de9503d..a3f7f71c16d12 100644 --- a/cmake/onnxruntime_providers.cmake +++ b/cmake/onnxruntime_providers.cmake @@ -123,7 +123,7 @@ if(onnxruntime_USE_SNPE) endif() include(onnxruntime_providers_cpu.cmake) -if (onnxruntime_USE_CUDA) +if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_TRT_INTERFACE)) include(onnxruntime_providers_cuda.cmake) endif() @@ -131,7 +131,7 @@ if (onnxruntime_USE_DNNL) include(onnxruntime_providers_dnnl.cmake) endif() -if (onnxruntime_USE_TENSORRT) +if (onnxruntime_USE_TENSORRT AND (NOT onnxruntime_ENABLE_TRT_INTERFACE)) include(onnxruntime_providers_tensorrt.cmake) endif() diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 3527a89ca7a7b..1df13c710a093 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -764,6 +764,8 @@ def convert_arg_line_to_args(self, arg_line): parser.add_argument("--use_triton_kernel", action="store_true", help="Use triton compiled kernels") parser.add_argument("--use_lock_free_queue", action="store_true", help="Use lock-free task queue for threadpool.") + parser.add_argument("--enable_tensorrt_interface", action="store_true", help="build ORT shared library and compatible bridge with tensorrt, but not TRT EP nor tests") + if not is_windows(): parser.add_argument( "--allow_running_as_root", @@ -1024,6 +1026,7 @@ def generate_build_tree( "-Donnxruntime_USE_TENSORRT=" + ("ON" if args.use_tensorrt else "OFF"), "-Donnxruntime_USE_TENSORRT_BUILTIN_PARSER=" + ("ON" if args.use_tensorrt_builtin_parser and not args.use_tensorrt_oss_parser else "OFF"), + "-Donnxruntime_ENABLE_TRT_INTERFACE=" + ("ON" if args.enable_tensorrt_interface else "OFF"), # set vars for migraphx "-Donnxruntime_USE_MIGRAPHX=" + ("ON" if args.use_migraphx else "OFF"), "-Donnxruntime_DISABLE_CONTRIB_OPS=" + ("ON" if args.disable_contrib_ops else "OFF"), @@ -1454,6 +1457,9 @@ def generate_build_tree( "-Donnxruntime_USE_FULL_PROTOBUF=ON", ] + if args.enable_tensorrt_interface: + cmake_args += ["-Donnxruntime_BUILD_UNIT_TESTS=OFF"] + if args.enable_lazy_tensor: import torch @@ -2567,6 +2573,9 @@ def main(): # Disable ONNX Runtime's builtin memory checker args.disable_memleak_checker = True + if args.enable_tensorrt_interface: + args.use_tensorrt, args.test = True, False + # If there was no explicit argument saying what to do, default # to update, build and test (for native builds). if not (args.update or args.clean or args.build or args.test or args.gen_doc): From 9de9b4ca55aed04baa04d61256d487820971ff0c Mon Sep 17 00:00:00 2001 From: jslhcl Date: Mon, 16 Dec 2024 07:23:29 -0800 Subject: [PATCH 02/13] add enable_openvino_interface parameter in build option and e2e test, now a runtime error 'LoadLibrary failed with error 1114' when loading onnxruntime_providers_openvino.dll --- cmake/onnxruntime_providers.cmake | 2 +- samples/GenericInterface/CMakeLists.txt | 13 +++++ samples/GenericInterface/test.cpp | 72 +++++++++++++++++++++++++ tools/ci_build/build.py | 6 ++- 4 files changed, 91 insertions(+), 2 deletions(-) create mode 100644 samples/GenericInterface/CMakeLists.txt create mode 100644 samples/GenericInterface/test.cpp diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake index a3f7f71c16d12..df6e0f073bfd9 100644 --- a/cmake/onnxruntime_providers.cmake +++ b/cmake/onnxruntime_providers.cmake @@ -139,7 +139,7 @@ if (onnxruntime_USE_VITISAI) include(onnxruntime_providers_vitisai.cmake) endif() -if (onnxruntime_USE_OPENVINO) +if (onnxruntime_USE_OPENVINO AND (NOT onnxruntime_ENABLE_OPENVINO_INTERFACE)) include(onnxruntime_providers_openvino.cmake) endif() diff --git a/samples/GenericInterface/CMakeLists.txt b/samples/GenericInterface/CMakeLists.txt new file mode 100644 index 0000000000000..29c141adbbab5 --- /dev/null +++ b/samples/GenericInterface/CMakeLists.txt @@ -0,0 +1,13 @@ +# usage: +# cd build/ +# cmake -S ../ -B ./ -DCMAKE_BUILD_TYPE=Debug +# cmake --build ./ +# NOTE: For Windows, copy onnxruntime.dll and onnxruntime.pdb into the same folder of TestOutTreeEp.exe, otherwise, during runtime, +# it will search the default system path (C:\Windows\System32) for onnxruntime.dll +cmake_minimum_required(VERSION 3.26) +project(GenericOrtEpInterface) +add_executable(GenericOrtEpInterface test.cpp) + +target_include_directories(GenericOrtEpInterface PUBLIC "../../include/onnxruntime") +target_link_libraries(GenericOrtEpInterface PUBLIC "C:/Users/leca/source/onnxruntime3/samples/GenericInterface/build/Debug/onnxruntime.lib") + diff --git a/samples/GenericInterface/test.cpp b/samples/GenericInterface/test.cpp new file mode 100644 index 0000000000000..64cb95e2b8388 --- /dev/null +++ b/samples/GenericInterface/test.cpp @@ -0,0 +1,72 @@ +#include +#include +#include +#include "core/session/onnxruntime_c_api.h" + +const OrtApi* g_ort = OrtGetApiBase()->GetApi(ORT_API_VERSION); + +inline void THROW_ON_ERROR(OrtStatus* status) { + if (status != nullptr) { + std::cout<<"ErrorMessage:"<GetErrorMessage(status)<<"\n"; + abort(); + } +} + +void RunRelu(const OrtApi* g_ort, OrtEnv* p_env, OrtSessionOptions* so) { + OrtSession* session = nullptr; +#ifdef _WIN32 + THROW_ON_ERROR(g_ort->CreateSession(p_env, L"C:/share/models/relu/Relu.onnx", so, &session)); +#else + THROW_ON_ERROR(g_ort->CreateSession(p_env, "/home/leca/code/onnxruntime/samples/c_test/Relu.onnx", so, &session)); +#endif + + OrtMemoryInfo* memory_info = nullptr; + THROW_ON_ERROR(g_ort->CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &memory_info)); + float input_data[] = {-3.0f, 5.0f, -2.0f, 4.0f}; + const size_t input_len = 4 * sizeof(float); + const int64_t input_shape[] = {4}; + const size_t shape_len = sizeof(input_shape)/sizeof(input_shape[0]); + + OrtValue* input_tensor = nullptr; + THROW_ON_ERROR(g_ort->CreateTensorWithDataAsOrtValue(memory_info, input_data, input_len, input_shape, shape_len, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, &input_tensor)); + + const char* input_names[] = {"x"}; + const char* output_names[] = {"graphOut"}; + OrtValue* output_tensor = nullptr; + THROW_ON_ERROR(g_ort->Run(session, nullptr, input_names, (const OrtValue* const*)&input_tensor, 1, output_names, 1, &output_tensor)); + + float* output_tensor_data = nullptr; + THROW_ON_ERROR(g_ort->GetTensorMutableData(output_tensor, (void**)&output_tensor_data)); + std::cout<<"Result:\n"; + for (size_t i = 0; i < 4; i++) std::cout<>a; + + OrtEnv* p_env = nullptr; + OrtLoggingLevel log_level = OrtLoggingLevel::ORT_LOGGING_LEVEL_ERROR;//OrtLoggingLevel::ORT_LOGGING_LEVEL_INFO; + THROW_ON_ERROR(g_ort->CreateEnv(log_level, "", &p_env)); + OrtSessionOptions* so = nullptr; + THROW_ON_ERROR(g_ort->CreateSessionOptions(&so)); + + OrtTensorRTProviderOptionsV2* tensorrt_options = nullptr; + THROW_ON_ERROR(g_ort->CreateTensorRTProviderOptions(&tensorrt_options)); + THROW_ON_ERROR(g_ort->SessionOptionsAppendExecutionProvider_TensorRT_V2(so, tensorrt_options)); + + std::unordered_map ov_options; + ov_options["device_type"] = "CPU"; + ov_options["precision"] = "FP32"; + std::vector keys, values; + for (const auto& entry : ov_options) { + keys.push_back(entry.first.c_str()); + values.push_back(entry.second.c_str()); + } + THROW_ON_ERROR(g_ort->SessionOptionsAppendExecutionProvider_OpenVINO_V2(so, keys.data(), values.data(), keys.size())); + + RunRelu(g_ort, p_env, so); + + return 0; +} diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 1df13c710a093..5a258725541ba 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -765,6 +765,7 @@ def convert_arg_line_to_args(self, arg_line): parser.add_argument("--use_lock_free_queue", action="store_true", help="Use lock-free task queue for threadpool.") parser.add_argument("--enable_tensorrt_interface", action="store_true", help="build ORT shared library and compatible bridge with tensorrt, but not TRT EP nor tests") + parser.add_argument("--enable_openvino_interface", action="store_true", help="build ORT shared library and compatible bridge with OpenVINO, but not OpenVINO EP nor tests") if not is_windows(): parser.add_argument( @@ -1027,6 +1028,7 @@ def generate_build_tree( "-Donnxruntime_USE_TENSORRT_BUILTIN_PARSER=" + ("ON" if args.use_tensorrt_builtin_parser and not args.use_tensorrt_oss_parser else "OFF"), "-Donnxruntime_ENABLE_TRT_INTERFACE=" + ("ON" if args.enable_tensorrt_interface else "OFF"), + "-Donnxruntime_ENABLE_OPENVINO_INTERFACE=" + ("ON" if args.enable_openvino_interface else "OFF"), # set vars for migraphx "-Donnxruntime_USE_MIGRAPHX=" + ("ON" if args.use_migraphx else "OFF"), "-Donnxruntime_DISABLE_CONTRIB_OPS=" + ("ON" if args.disable_contrib_ops else "OFF"), @@ -1457,7 +1459,7 @@ def generate_build_tree( "-Donnxruntime_USE_FULL_PROTOBUF=ON", ] - if args.enable_tensorrt_interface: + if args.enable_tensorrt_interface or args.enable_openvino_interface: cmake_args += ["-Donnxruntime_BUILD_UNIT_TESTS=OFF"] if args.enable_lazy_tensor: @@ -2575,6 +2577,8 @@ def main(): if args.enable_tensorrt_interface: args.use_tensorrt, args.test = True, False + if args.enable_openvino_interface: + args.use_openvino, args.test = "CPU", False # If there was no explicit argument saying what to do, default # to update, build and test (for native builds). From 0273bf596f2f918064445a391477922f0725c199 Mon Sep 17 00:00:00 2001 From: jslhcl Date: Tue, 17 Dec 2024 07:39:21 -0800 Subject: [PATCH 03/13] fix the runtime error and now trt and openvino can run together --- .../core/providers/shared_library/provider_interfaces.h | 8 ++++---- onnxruntime/core/session/provider_bridge_ort.cc | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h index 7ab93d56cfe26..a5bdaa58f6474 100644 --- a/onnxruntime/core/providers/shared_library/provider_interfaces.h +++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h @@ -167,7 +167,7 @@ struct ProviderHost { virtual std::string demangle(const char* name) = 0; virtual std::string demangle(const std::string& name) = 0; -#ifdef USE_CUDA +//#ifdef USE_CUDA virtual std::unique_ptr CreateCUDAAllocator(int16_t device_id, const char* name) = 0; virtual std::unique_ptr CreateCUDAPinnedAllocator(const char* name) = 0; virtual std::unique_ptr CreateGPUDataTransfer() = 0; @@ -179,7 +179,7 @@ struct ProviderHost { virtual Status CudaCall_false(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) = 0; virtual void CudaCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) = 0; -#endif +//#endif #ifdef USE_MIGRAPHX virtual std::unique_ptr CreateMIGraphXAllocator(int16_t device_id, const char* name) = 0; @@ -1177,9 +1177,9 @@ struct ProviderHost { virtual training::DistributedRunContext& GetDistributedRunContextInstance() = 0; #endif -#if defined(USE_CUDA) || defined(USE_ROCM) +//#if defined(USE_CUDA) || defined(USE_ROCM) virtual PhiloxGenerator& PhiloxGenerator__Default() = 0; -#endif +//#endif #ifdef ENABLE_TRAINING_TORCH_INTEROP virtual void contrib__PythonOpBase__Init(contrib::PythonOpBase* p, const OpKernelInfo& info) = 0; diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index e0c479dbc7637..8d4892879f711 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -234,7 +234,7 @@ struct ProviderHostImpl : ProviderHost { void* CPUAllocator__Alloc(CPUAllocator* p, size_t size) override { return p->CPUAllocator::Alloc(size); } void CPUAllocator__Free(CPUAllocator* p, void* allocation) override { return p->CPUAllocator::Free(allocation); } -#ifdef USE_CUDA +//#ifdef USE_CUDA std::unique_ptr CreateCUDAAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_CUDA().CreateCUDAAllocator(device_id, name); } std::unique_ptr CreateCUDAPinnedAllocator(const char* name) override { return GetProviderInfo_CUDA().CreateCUDAPinnedAllocator(name); } std::unique_ptr CreateGPUDataTransfer() override { return GetProviderInfo_CUDA().CreateGPUDataTransfer(); } @@ -247,7 +247,7 @@ struct ProviderHostImpl : ProviderHost { Status CudaCall_false(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { return GetProviderInfo_CUDA().CudaCall_false(retCode, exprString, libName, successCode, msg, file, line); } void CudaCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { GetProviderInfo_CUDA().CudaCall_true(retCode, exprString, libName, successCode, msg, file, line); } -#endif +//#endif #ifdef USE_MIGRAPHX std::unique_ptr CreateMIGraphXAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_MIGraphX().CreateMIGraphXAllocator(device_id, name); } @@ -1419,9 +1419,9 @@ struct ProviderHostImpl : ProviderHost { training::DistributedRunContext& GetDistributedRunContextInstance() override { return training::DistributedRunContext::GetInstance(); } #endif -#if defined(USE_CUDA) || defined(USE_ROCM) +//#if defined(USE_CUDA) || defined(USE_ROCM) PhiloxGenerator& PhiloxGenerator__Default() override { return PhiloxGenerator::Default(); } -#endif +//#endif #ifdef ENABLE_TRAINING_TORCH_INTEROP void contrib__PythonOpBase__Init(contrib::PythonOpBase* p, const OpKernelInfo& info) override { p->PythonOpBase::Init(info); } From e5bb3da3ee4dfd29fff1c1875bb6cd524cf6d733 Mon Sep 17 00:00:00 2001 From: Karim Vadsariya Date: Fri, 10 Jan 2025 09:41:25 -0800 Subject: [PATCH 04/13] Experimental changes to remove the IHV SDK dependencies when building onnxruntime.dll onnxruntime_shared.dll --- cmake/CMakeLists.txt | 33 ++++++------ .../external/onnxruntime_external_deps.cmake | 5 +- cmake/onnxruntime.cmake | 8 +-- cmake/onnxruntime_providers.cmake | 15 +++--- cmake/onnxruntime_python.cmake | 8 +-- tools/ci_build/build.py | 50 ++++++++++++------- 6 files changed, 68 insertions(+), 51 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 7710ab2f4cac7..c60bafd9be0d6 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -634,7 +634,7 @@ if (WIN32) # structure was padded due to __declspec(align()) list(APPEND ORT_WARNING_FLAGS "/wd4324") # warning C4800: Implicit conversion from 'X' to bool. Possible information loss - if (onnxruntime_USE_OPENVINO) + if (onnxruntime_USE_OPENVINO) #TODO[Karim] applys to all projects... list(APPEND ORT_WARNING_FLAGS "/wd4800") endif() # operator 'operator-name': deprecated between enumerations of different types @@ -758,7 +758,7 @@ set(ONNXRUNTIME_PROVIDER_NAMES cpu) set(ORT_PROVIDER_FLAGS) set(ORT_PROVIDER_CMAKE_FLAGS) -if (onnxruntime_USE_CUDA) +if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) enable_language(CUDA) message( STATUS "CMAKE_CUDA_COMPILER_VERSION: ${CMAKE_CUDA_COMPILER_VERSION}") @@ -793,7 +793,7 @@ endif() if (onnxruntime_USE_CUDA) list(APPEND ORT_PROVIDER_FLAGS -DUSE_CUDA=1) - list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_CUDA=1) + list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_CUDA=1) #TODO[Karim] Not used anywhere list(APPEND ONNXRUNTIME_PROVIDER_NAMES cuda) if (onnxruntime_USE_FLASH_ATTENTION) @@ -856,7 +856,7 @@ if (onnxruntime_USE_JSEP) list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_JSEP=1) list(APPEND ONNXRUNTIME_PROVIDER_NAMES js) endif() -if (onnxruntime_USE_QNN) +if (onnxruntime_USE_QNN) #TODO[Karim] list(APPEND ORT_PROVIDER_FLAGS -DUSE_QNN=1) list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_QNN=1) list(APPEND ONNXRUNTIME_PROVIDER_NAMES qnn) @@ -1032,14 +1032,15 @@ function(onnxruntime_set_compile_flags target_name) endif() set_target_properties(${target_name} PROPERTIES COMPILE_WARNING_AS_ERROR ON) - if (onnxruntime_USE_CUDA) + + if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) # Suppress a "conversion_function_not_usable" warning in gsl/span target_compile_options(${target_name} PRIVATE "$<$:SHELL:-Xcudafe \"--diag_suppress=conversion_function_not_usable\">") target_compile_definitions(${target_name} PRIVATE -DDISABLE_CUSPARSE_DEPRECATED) endif() if (MSVC) foreach(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORY ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) - target_compile_options(${target_name} PRIVATE "$<$:/external:I${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORY}>") + #target_compile_options(${target_name} PRIVATE "$<$:/external:I${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORY}>") endforeach() foreach(onnxruntime_external_lib IN LISTS onnxruntime_EXTERNAL_LIBRARIES) @@ -1097,7 +1098,7 @@ function(onnxruntime_set_compile_flags target_name) target_compile_options(${target_name} PRIVATE "-Wno-unused-function") endif() endif() - foreach(ORT_FLAG ${ORT_PROVIDER_FLAGS}) + foreach(ORT_FLAG ${ORT_PROVIDER_FLAGS}) #TODO[Karim] applies to all projects... target_compile_definitions(${target_name} PRIVATE ${ORT_FLAG}) endforeach() if (HAS_DEPRECATED_COPY) @@ -1110,7 +1111,7 @@ function(onnxruntime_set_compile_flags target_name) if("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_LESS 13 AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 12) target_compile_options(${target_name} PRIVATE "$<$:-Wno-maybe-uninitialized>") endif() - if (onnxruntime_USE_CUDA) + if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) foreach(FLAG ${ORT_WARNING_FLAGS}) target_compile_options(${target_name} PRIVATE "$<$:SHELL:--compiler-options ${FLAG}>") endforeach() @@ -1332,7 +1333,7 @@ if (onnxruntime_ENABLE_TRAINING_APIS) ) endif() -if (onnxruntime_USE_OPENVINO) +if (onnxruntime_USE_OPENVINO) #TODO[Karim] applies to both onnxruntime.dll and onnxruntime_shared.dll add_definitions(-DUSE_OPENVINO=1) @@ -1389,7 +1390,7 @@ if (onnxruntime_USE_OPENVINO) endif() -if (onnxruntime_USE_VITISAI) +if (onnxruntime_USE_VITISAI) #TODO[Karim] set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${CMAKE_CURRENT_LIST_DIR}") endif() @@ -1411,7 +1412,7 @@ string(APPEND ORT_BUILD_INFO ", cmake cxx flags: ${CMAKE_CXX_FLAGS}") configure_file(onnxruntime_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/onnxruntime_config.h) get_property(onnxruntime_GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) -if (onnxruntime_USE_CUDA) +if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) set(CMAKE_CUDA_RUNTIME_LIBRARY Shared) set(CMAKE_CUDA_STANDARD 17) if(onnxruntime_CUDA_HOME) @@ -1685,17 +1686,17 @@ if (onnxruntime_BUILD_SHARED_LIB OR onnxruntime_BUILD_APPLE_FRAMEWORK) list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime) endif() -if (onnxruntime_BUILD_JAVA) +if (onnxruntime_BUILD_JAVA) #TODO[Karim] *cmake files included below uses onnxruntime_USE_CUDA, USE_QNN etc variables, message(STATUS "Java Build is enabled") list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_java) endif() -if (onnxruntime_BUILD_NODEJS) +if (onnxruntime_BUILD_NODEJS) #TODO[Karim] message(STATUS "Node.js Build is enabled") list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_nodejs) endif() -if (onnxruntime_ENABLE_PYTHON) +if (onnxruntime_ENABLE_PYTHON) #TODO[Karim] message(STATUS "Python Build is enabled") list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_python) endif() @@ -1705,7 +1706,7 @@ if (onnxruntime_BUILD_OBJC) list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_objectivec) endif() -if (onnxruntime_BUILD_UNIT_TESTS) +if (onnxruntime_BUILD_UNIT_TESTS) #TODO[Karim] list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_unittests) endif() @@ -1753,7 +1754,7 @@ if (WIN32 AND NOT GDK_PLATFORM AND NOT CMAKE_CROSSCOMPILING) endif() foreach(target_name ${ONNXRUNTIME_CMAKE_FILES}) - include(${target_name}.cmake) + include(${target_name}.cmake) #TODO[Karim] endforeach() if (UNIX) option(BUILD_PKGCONFIG_FILES "Build and install pkg-config files" ON) diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake index ee7abcbad025c..308f0997d28e1 100644 --- a/cmake/external/onnxruntime_external_deps.cmake +++ b/cmake/external/onnxruntime_external_deps.cmake @@ -400,7 +400,7 @@ if ((CPUINFO_SUPPORTED OR onnxruntime_USE_XNNPACK) AND NOT ANDROID) endif() endif() -if(onnxruntime_USE_CUDA) +if(onnxruntime_USE_CUDA) #TODO[Karim] FetchContent_Declare( GSL URL ${DEP_URL_microsoft_gsl} @@ -682,7 +682,8 @@ if (onnxruntime_USE_WEBGPU) endif() set(onnxruntime_LINK_DIRS) -if (onnxruntime_USE_CUDA) + +if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) find_package(CUDAToolkit REQUIRED) if(onnxruntime_CUDNN_HOME) diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake index 732c0511d400f..0ea2000e347d8 100644 --- a/cmake/onnxruntime.cmake +++ b/cmake/onnxruntime.cmake @@ -37,7 +37,7 @@ function(get_c_cxx_api_headers HEADERS_VAR) endif() # need to add header files for enabled EPs - foreach(f ${ONNXRUNTIME_PROVIDER_NAMES}) + foreach(f ${ONNXRUNTIME_PROVIDER_NAMES}) #TODO[Karim] # The header files in include/onnxruntime/core/providers/cuda directory cannot be flattened to the same directory # with onnxruntime_c_api.h . Most other EPs probably also do not work in this way. if((NOT f STREQUAL cuda) AND (NOT f STREQUAL rocm)) @@ -66,12 +66,12 @@ if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "AIX") add_custom_command(OUTPUT ${SYMBOL_FILE} ${CMAKE_CURRENT_BINARY_DIR}/generated_source.c COMMAND ${Python_EXECUTABLE} "${REPO_ROOT}/tools/ci_build/gen_def.py" --version_file "${ONNXRUNTIME_ROOT}/../VERSION_NUMBER" --src_root "${ONNXRUNTIME_ROOT}" - --config ${ONNXRUNTIME_PROVIDER_NAMES} --style=${OUTPUT_STYLE} --output ${SYMBOL_FILE} + --config ${ONNXRUNTIME_PROVIDER_NAMES} --style=${OUTPUT_STYLE} --output ${SYMBOL_FILE} #TODO[Karim] --output_source ${CMAKE_CURRENT_BINARY_DIR}/generated_source.c DEPENDS ${SYMBOL_FILES} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) -add_custom_target(onnxruntime_generate_def ALL DEPENDS ${SYMBOL_FILE} ${CMAKE_CURRENT_BINARY_DIR}/generated_source.c) +add_custom_target(onnxruntime_generate_def ALL DEPENDS ${SYMBOL_FILE} ${CMAKE_CURRENT_BINARY_DIR}/generated_source.c) #TODO[Karim] endif() if(WIN32) onnxruntime_add_shared_library(onnxruntime @@ -208,7 +208,7 @@ set(onnxruntime_INTERNAL_LIBRARIES ${PROVIDERS_COREML} ${PROVIDERS_DML} ${PROVIDERS_NNAPI} - ${PROVIDERS_QNN} + ${PROVIDERS_QNN} #TODO[Karim] ?? ${PROVIDERS_SNPE} ${PROVIDERS_RKNPU} ${PROVIDERS_VSINPU} diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake index df6e0f073bfd9..f866c7b366f33 100644 --- a/cmake/onnxruntime_providers.cmake +++ b/cmake/onnxruntime_providers.cmake @@ -59,10 +59,10 @@ function(add_op_reduction_include_dirs target) endfunction() -if(onnxruntime_USE_VITISAI) +if(onnxruntime_USE_VITISAI) #TODO[Karim] seems like not used?? set(PROVIDERS_VITISAI onnxruntime_providers_vitisai) endif() -if(onnxruntime_USE_CUDA) +if(onnxruntime_USE_CUDA) #TODO[Karim] set(PROVIDERS_CUDA onnxruntime_providers_cuda) endif() if(onnxruntime_USE_COREML) @@ -74,7 +74,7 @@ endif() if(onnxruntime_USE_JSEP) set(PROVIDERS_JS onnxruntime_providers_js) endif() -if(onnxruntime_USE_QNN) +if(onnxruntime_USE_QNN) #TODO[Karim] set(PROVIDERS_QNN onnxruntime_providers_qnn) endif() if(onnxruntime_USE_RKNPU) @@ -123,7 +123,7 @@ if(onnxruntime_USE_SNPE) endif() include(onnxruntime_providers_cpu.cmake) -if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_TRT_INTERFACE)) +if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) include(onnxruntime_providers_cuda.cmake) endif() @@ -131,15 +131,15 @@ if (onnxruntime_USE_DNNL) include(onnxruntime_providers_dnnl.cmake) endif() -if (onnxruntime_USE_TENSORRT AND (NOT onnxruntime_ENABLE_TRT_INTERFACE)) +if (onnxruntime_USE_TENSORRT AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) include(onnxruntime_providers_tensorrt.cmake) endif() -if (onnxruntime_USE_VITISAI) +if (onnxruntime_USE_VITISAI AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) include(onnxruntime_providers_vitisai.cmake) endif() -if (onnxruntime_USE_OPENVINO AND (NOT onnxruntime_ENABLE_OPENVINO_INTERFACE)) +if (onnxruntime_USE_OPENVINO AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) include(onnxruntime_providers_openvino.cmake) endif() @@ -164,6 +164,7 @@ if (onnxruntime_USE_JSEP) endif() if (onnxruntime_USE_QNN) + message("Hitting qnn check") include(onnxruntime_providers_qnn.cmake) endif() diff --git a/cmake/onnxruntime_python.cmake b/cmake/onnxruntime_python.cmake index 5a87252b08573..10633abac1799 100644 --- a/cmake/onnxruntime_python.cmake +++ b/cmake/onnxruntime_python.cmake @@ -881,7 +881,7 @@ if (onnxruntime_USE_DNNL) ) endif() -if (onnxruntime_USE_VITISAI) +if (onnxruntime_USE_VITISAI AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) add_custom_command( TARGET onnxruntime_pybind11_state POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy @@ -891,7 +891,7 @@ if (onnxruntime_USE_VITISAI) ) endif() -if (onnxruntime_USE_TENSORRT) +if (onnxruntime_USE_TENSORRT AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) add_custom_command( TARGET onnxruntime_pybind11_state POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy @@ -911,7 +911,7 @@ if (onnxruntime_USE_MIGRAPHX) ) endif() -if (onnxruntime_USE_OPENVINO) +if (onnxruntime_USE_OPENVINO AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) add_custom_command( TARGET onnxruntime_pybind11_state POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy @@ -934,7 +934,7 @@ if (DEFINED ENV{OPENVINO_MANYLINUX}) ) endif() -if (onnxruntime_USE_CUDA) +if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) add_custom_command( TARGET onnxruntime_pybind11_state POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 5a258725541ba..cbfe8cced02bf 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -764,8 +764,7 @@ def convert_arg_line_to_args(self, arg_line): parser.add_argument("--use_triton_kernel", action="store_true", help="Use triton compiled kernels") parser.add_argument("--use_lock_free_queue", action="store_true", help="Use lock-free task queue for threadpool.") - parser.add_argument("--enable_tensorrt_interface", action="store_true", help="build ORT shared library and compatible bridge with tensorrt, but not TRT EP nor tests") - parser.add_argument("--enable_openvino_interface", action="store_true", help="build ORT shared library and compatible bridge with OpenVINO, but not OpenVINO EP nor tests") + parser.add_argument("--enable_generic_interface", action="store_true", help="build ORT shared library and compatible bridge with primary EPs(tensorRT, OpenVino, Qnn, vitisai) but not tests") if not is_windows(): parser.add_argument( @@ -1027,8 +1026,7 @@ def generate_build_tree( "-Donnxruntime_USE_TENSORRT=" + ("ON" if args.use_tensorrt else "OFF"), "-Donnxruntime_USE_TENSORRT_BUILTIN_PARSER=" + ("ON" if args.use_tensorrt_builtin_parser and not args.use_tensorrt_oss_parser else "OFF"), - "-Donnxruntime_ENABLE_TRT_INTERFACE=" + ("ON" if args.enable_tensorrt_interface else "OFF"), - "-Donnxruntime_ENABLE_OPENVINO_INTERFACE=" + ("ON" if args.enable_openvino_interface else "OFF"), + "-Donnxruntime_ENABLE_GENERIC_INTERFACE=" + ("ON" if args.enable_generic_interface else "OFF"), # set vars for migraphx "-Donnxruntime_USE_MIGRAPHX=" + ("ON" if args.use_migraphx else "OFF"), "-Donnxruntime_DISABLE_CONTRIB_OPS=" + ("ON" if args.disable_contrib_ops else "OFF"), @@ -1145,6 +1143,8 @@ def generate_build_tree( cmake_args.append("-Donnxruntime_ENABLE_WEBASSEMBLY_SIMD=" + ("ON" if args.enable_wasm_simd else "OFF")) if args.use_migraphx: cmake_args.append("-Donnxruntime_MIGRAPHX_HOME=" + migraphx_home) + + ''' if args.use_cuda: nvcc_threads = number_of_nvcc_threads(args) cmake_args.append("-Donnxruntime_NVCC_THREADS=" + str(nvcc_threads)) @@ -1155,11 +1155,14 @@ def generate_build_tree( f"Add '--disable_types float8' to your command line. See option disable_types." ) cmake_args.append(f"-DCMAKE_CUDA_COMPILER={cuda_home}/bin/nvcc") + ''' + if args.use_rocm: cmake_args.append("-Donnxruntime_ROCM_HOME=" + rocm_home) cmake_args.append("-Donnxruntime_ROCM_VERSION=" + args.rocm_version) - if args.use_tensorrt: - cmake_args.append("-Donnxruntime_TENSORRT_HOME=" + tensorrt_home) + + #if args.use_tensorrt: + # cmake_args.append("-Donnxruntime_TENSORRT_HOME=" + tensorrt_home) if args.use_cuda: add_default_definition(cmake_extra_defines, "onnxruntime_USE_CUDA", "ON") @@ -1211,8 +1214,8 @@ def generate_build_tree( if nccl_home and os.path.exists(nccl_home): cmake_args += ["-Donnxruntime_NCCL_HOME=" + nccl_home] - if qnn_home and os.path.exists(qnn_home): - cmake_args += ["-Donnxruntime_QNN_HOME=" + qnn_home] + #if qnn_home and os.path.exists(qnn_home): + # cmake_args += ["-Donnxruntime_QNN_HOME=" + qnn_home] if snpe_root and os.path.exists(snpe_root): cmake_args += ["-DSNPE_ROOT=" + snpe_root] @@ -1302,8 +1305,8 @@ def generate_build_tree( cmake_args += ["-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=" + args.xcode_code_signing_team_id] if args.use_qnn: - if args.qnn_home is None or os.path.exists(args.qnn_home) is False: - raise BuildError("qnn_home=" + qnn_home + " not valid." + " qnn_home paths must be specified and valid.") + #if args.qnn_home is None or os.path.exists(args.qnn_home) is False: + # raise BuildError("qnn_home=" + qnn_home + " not valid." + " qnn_home paths must be specified and valid.") cmake_args += ["-Donnxruntime_USE_QNN=ON"] if args.use_coreml: @@ -1459,7 +1462,7 @@ def generate_build_tree( "-Donnxruntime_USE_FULL_PROTOBUF=ON", ] - if args.enable_tensorrt_interface or args.enable_openvino_interface: + if args.enable_generic_interface: cmake_args += ["-Donnxruntime_BUILD_UNIT_TESTS=OFF"] if args.enable_lazy_tensor: @@ -2575,10 +2578,12 @@ def main(): # Disable ONNX Runtime's builtin memory checker args.disable_memleak_checker = True - if args.enable_tensorrt_interface: - args.use_tensorrt, args.test = True, False - if args.enable_openvino_interface: - args.use_openvino, args.test = "CPU", False + if args.enable_generic_interface: + args.test = False + args.use_tensorrt = True + args.use_openvino = "CPU" + args.use_vitisai = True + #args.use_qnn = True #defaults should be set based on arm64 vs x64 builds... # If there was no explicit argument saying what to do, default # to update, build and test (for native builds). @@ -2683,7 +2688,9 @@ def main(): source_dir = os.path.normpath(os.path.join(script_dir, "..", "..")) # if using cuda, setup cuda paths and env vars - cuda_home, cudnn_home = setup_cuda_vars(args) + #cuda_home, cudnn_home = setup_cuda_vars(args) + cuda_home = "" + cudnn_home = "" mpi_home = args.mpi_home nccl_home = args.nccl_home @@ -2697,9 +2704,11 @@ def main(): armnn_libs = args.armnn_libs qnn_home = args.qnn_home + qnn_home = "" # if using tensorrt, setup tensorrt paths - tensorrt_home = setup_tensorrt_vars(args) + #tensorrt_home = setup_tensorrt_vars(args) + tensorrt_home = "" # if using migraphx, setup migraphx paths migraphx_home = setup_migraphx_vars(args) @@ -2790,7 +2799,12 @@ def main(): toolset += ",cuda=" + args.cuda_home if args.windows_sdk_version: target_arch += ",version=" + args.windows_sdk_version - cmake_extra_args = ["-A", target_arch, "-T", toolset, "-G", args.cmake_generator] + + make_extra_args = ["-A", target_arch, "-G", args.cmake_generator] + + if (args.use_cuda and (not args.enable_generic_interface)): + cmake_extra_args += ["-T", toolset] + if args.enable_wcos: cmake_extra_defines.append("CMAKE_USER_MAKE_RULES_OVERRIDE=wcos_rules_override.cmake") From a6e35b756c2c12644d0df51026dcb5b4b7cece52 Mon Sep 17 00:00:00 2001 From: Karim Vadsariya Date: Fri, 10 Jan 2025 14:52:37 -0800 Subject: [PATCH 05/13] Incorporate code review comments and simply the changes by introducing EP specific Interface flags --- cmake/CMakeLists.txt | 30 ++++++++-------- cmake/adjust_global_compile_flags.cmake | 2 +- .../external/onnxruntime_external_deps.cmake | 4 +-- cmake/onnxruntime.cmake | 2 +- cmake/onnxruntime_providers.cmake | 19 ++++------- cmake/onnxruntime_python.cmake | 8 ++--- tools/ci_build/build.py | 34 +++++++++---------- 7 files changed, 46 insertions(+), 53 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index c60bafd9be0d6..c1510026ece6c 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -634,7 +634,7 @@ if (WIN32) # structure was padded due to __declspec(align()) list(APPEND ORT_WARNING_FLAGS "/wd4324") # warning C4800: Implicit conversion from 'X' to bool. Possible information loss - if (onnxruntime_USE_OPENVINO) #TODO[Karim] applys to all projects... + if (onnxruntime_USE_OPENVINO OR onnxruntime_USE_OPENVINO_INTERFACE) list(APPEND ORT_WARNING_FLAGS "/wd4800") endif() # operator 'operator-name': deprecated between enumerations of different types @@ -657,7 +657,7 @@ if (WIN32) set(CMAKE_${type}_LINKER_FLAGS_MINSIZEREL "${CMAKE_${type}_LINKER_FLAGS_MINSIZEREL} /INCREMENTAL:NO") endif() endif() - if (onnxruntime_ENABLE_LTO AND NOT onnxruntime_USE_CUDA) + if (onnxruntime_ENABLE_LTO AND NOT onnxruntime_USE_CUDA) #TODO[Low] Any changes ? set(CMAKE_${type}_LINKER_FLAGS_RELEASE "${CMAKE_${type}_LINKER_FLAGS_RELEASE} /LTCG") set(CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO} /LTCG") set(CMAKE_${type}_LINKER_FLAGS_MINSIZEREL "${CMAKE_${type}_LINKER_FLAGS_MINSIZEREL} /LTCG") @@ -758,7 +758,7 @@ set(ONNXRUNTIME_PROVIDER_NAMES cpu) set(ORT_PROVIDER_FLAGS) set(ORT_PROVIDER_CMAKE_FLAGS) -if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) +if (onnxruntime_USE_CUDA) enable_language(CUDA) message( STATUS "CMAKE_CUDA_COMPILER_VERSION: ${CMAKE_CUDA_COMPILER_VERSION}") @@ -791,7 +791,7 @@ else() set(onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION OFF) endif() -if (onnxruntime_USE_CUDA) +if (onnxruntime_USE_CUDA OR onnxruntime_USE_CUDA_INTERFACE) list(APPEND ORT_PROVIDER_FLAGS -DUSE_CUDA=1) list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_CUDA=1) #TODO[Karim] Not used anywhere list(APPEND ONNXRUNTIME_PROVIDER_NAMES cuda) @@ -815,7 +815,7 @@ if (onnxruntime_USE_CUDA) endif() endif() -if (onnxruntime_USE_VITISAI) +if (onnxruntime_USE_VITISAI OR onnxruntime_USE_VITISAI_INTERFACE) list(APPEND ORT_PROVIDER_FLAGS -DUSE_VITISAI=1) list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_VITISAI=1) list(APPEND ONNXRUNTIME_PROVIDER_NAMES vitisai) @@ -825,12 +825,12 @@ if (onnxruntime_USE_DNNL) list(APPEND ONNXRUNTIME_PROVIDER_NAMES dnnl) list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_DNNL=1) endif() -if (onnxruntime_USE_OPENVINO) +if (onnxruntime_USE_OPENVINO OR onnxruntime_USE_OPENVINO_INTERFACE) list(APPEND ORT_PROVIDER_FLAGS -DUSE_OPENVINO=1) list(APPEND ONNXRUNTIME_PROVIDER_NAMES openvino) list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_OPENVINO=1) endif() -if (onnxruntime_USE_TENSORRT) +if (onnxruntime_USE_TENSORRT OR onnxruntime_USE_TENSORRT_INTERFACE) list(APPEND ORT_PROVIDER_FLAGS -DUSE_TENSORRT=1) #TODO: remove the following line and change the test code in onnxruntime_shared_lib_test to use the new EP API. list(APPEND ONNXRUNTIME_PROVIDER_NAMES tensorrt) @@ -856,7 +856,7 @@ if (onnxruntime_USE_JSEP) list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_JSEP=1) list(APPEND ONNXRUNTIME_PROVIDER_NAMES js) endif() -if (onnxruntime_USE_QNN) #TODO[Karim] +if (onnxruntime_USE_QNN OR onnxruntime_USE_QNN_INTERFACE) list(APPEND ORT_PROVIDER_FLAGS -DUSE_QNN=1) list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_QNN=1) list(APPEND ONNXRUNTIME_PROVIDER_NAMES qnn) @@ -884,7 +884,7 @@ if (onnxruntime_USE_QNN) #TODO[Karim] endif() endif() - if (MSVC OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux") + if ((NOT onnxruntime_USE_QNN_INTERFACE) AND (MSVC OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")) file(GLOB QNN_LIB_FILES LIST_DIRECTORIES false "${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/libQnn*.so" "${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/Qnn*.dll" "${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/libHtpPrepare.so" @@ -1033,7 +1033,7 @@ function(onnxruntime_set_compile_flags target_name) set_target_properties(${target_name} PROPERTIES COMPILE_WARNING_AS_ERROR ON) - if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) + if (onnxruntime_USE_CUDA) # Suppress a "conversion_function_not_usable" warning in gsl/span target_compile_options(${target_name} PRIVATE "$<$:SHELL:-Xcudafe \"--diag_suppress=conversion_function_not_usable\">") target_compile_definitions(${target_name} PRIVATE -DDISABLE_CUSPARSE_DEPRECATED) @@ -1111,7 +1111,7 @@ function(onnxruntime_set_compile_flags target_name) if("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" AND CMAKE_C_COMPILER_VERSION VERSION_LESS 13 AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 12) target_compile_options(${target_name} PRIVATE "$<$:-Wno-maybe-uninitialized>") endif() - if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) + if (onnxruntime_USE_CUDA) foreach(FLAG ${ORT_WARNING_FLAGS}) target_compile_options(${target_name} PRIVATE "$<$:SHELL:--compiler-options ${FLAG}>") endforeach() @@ -1333,7 +1333,7 @@ if (onnxruntime_ENABLE_TRAINING_APIS) ) endif() -if (onnxruntime_USE_OPENVINO) #TODO[Karim] applies to both onnxruntime.dll and onnxruntime_shared.dll +if (onnxruntime_USE_OPENVINO OR onnxruntime_USE_OPENVINO_INTERFACE) add_definitions(-DUSE_OPENVINO=1) @@ -1346,7 +1346,7 @@ if (onnxruntime_USE_OPENVINO) #TODO[Karim] applies to both onnxruntime.dll and o add_definitions(-DOPENVINO_CONFIG_GPU=1) endif() - if (onnxruntime_USE_OPENVINO_CPU) + if (onnxruntime_USE_OPENVINO_CPU OR onnxruntime_USE_OPENVINO_INTERFACE) # OpenVino CPU interface is default built. add_definitions(-DOPENVINO_CONFIG_CPU=1) endif() @@ -1412,7 +1412,7 @@ string(APPEND ORT_BUILD_INFO ", cmake cxx flags: ${CMAKE_CXX_FLAGS}") configure_file(onnxruntime_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/onnxruntime_config.h) get_property(onnxruntime_GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) -if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) +if (onnxruntime_USE_CUDA) set(CMAKE_CUDA_RUNTIME_LIBRARY Shared) set(CMAKE_CUDA_STANDARD 17) if(onnxruntime_CUDA_HOME) @@ -1686,7 +1686,7 @@ if (onnxruntime_BUILD_SHARED_LIB OR onnxruntime_BUILD_APPLE_FRAMEWORK) list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime) endif() -if (onnxruntime_BUILD_JAVA) #TODO[Karim] *cmake files included below uses onnxruntime_USE_CUDA, USE_QNN etc variables, +if (onnxruntime_BUILD_JAVA) message(STATUS "Java Build is enabled") list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_java) endif() diff --git a/cmake/adjust_global_compile_flags.cmake b/cmake/adjust_global_compile_flags.cmake index dbbf685346532..eeb8d4fb2b99c 100644 --- a/cmake/adjust_global_compile_flags.cmake +++ b/cmake/adjust_global_compile_flags.cmake @@ -283,7 +283,7 @@ if (MSVC) string(APPEND CMAKE_C_FLAGS " /arch:AVX512") endif() - if (onnxruntime_ENABLE_LTO AND NOT onnxruntime_USE_CUDA) + if (onnxruntime_ENABLE_LTO AND NOT onnxruntime_USE_CUDA) #TODO[Low] Any changes ? set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Gw /GL") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Gw /GL") set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} /Gw /GL") diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake index 308f0997d28e1..ffd7e5f4243cb 100644 --- a/cmake/external/onnxruntime_external_deps.cmake +++ b/cmake/external/onnxruntime_external_deps.cmake @@ -400,7 +400,7 @@ if ((CPUINFO_SUPPORTED OR onnxruntime_USE_XNNPACK) AND NOT ANDROID) endif() endif() -if(onnxruntime_USE_CUDA) #TODO[Karim] +if(onnxruntime_USE_CUDA) #TODO[Low] Any changes? FetchContent_Declare( GSL URL ${DEP_URL_microsoft_gsl} @@ -683,7 +683,7 @@ endif() set(onnxruntime_LINK_DIRS) -if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) +if (onnxruntime_USE_CUDA) find_package(CUDAToolkit REQUIRED) if(onnxruntime_CUDNN_HOME) diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake index 0ea2000e347d8..293429973670f 100644 --- a/cmake/onnxruntime.cmake +++ b/cmake/onnxruntime.cmake @@ -208,7 +208,7 @@ set(onnxruntime_INTERNAL_LIBRARIES ${PROVIDERS_COREML} ${PROVIDERS_DML} ${PROVIDERS_NNAPI} - ${PROVIDERS_QNN} #TODO[Karim] ?? + ${PROVIDERS_QNN} ${PROVIDERS_SNPE} ${PROVIDERS_RKNPU} ${PROVIDERS_VSINPU} diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake index f866c7b366f33..dad3a5f93fd65 100644 --- a/cmake/onnxruntime_providers.cmake +++ b/cmake/onnxruntime_providers.cmake @@ -59,12 +59,6 @@ function(add_op_reduction_include_dirs target) endfunction() -if(onnxruntime_USE_VITISAI) #TODO[Karim] seems like not used?? - set(PROVIDERS_VITISAI onnxruntime_providers_vitisai) -endif() -if(onnxruntime_USE_CUDA) #TODO[Karim] - set(PROVIDERS_CUDA onnxruntime_providers_cuda) -endif() if(onnxruntime_USE_COREML) set(PROVIDERS_COREML onnxruntime_providers_coreml coreml_proto) endif() @@ -74,7 +68,7 @@ endif() if(onnxruntime_USE_JSEP) set(PROVIDERS_JS onnxruntime_providers_js) endif() -if(onnxruntime_USE_QNN) #TODO[Karim] +if(onnxruntime_USE_QNN OR onnxruntime_USE_QNN_INTERFACE) #TODO[Low] Revisit when qnn EP becomes dynamic lib set(PROVIDERS_QNN onnxruntime_providers_qnn) endif() if(onnxruntime_USE_RKNPU) @@ -123,7 +117,7 @@ if(onnxruntime_USE_SNPE) endif() include(onnxruntime_providers_cpu.cmake) -if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) +if (onnxruntime_USE_CUDA) include(onnxruntime_providers_cuda.cmake) endif() @@ -131,15 +125,15 @@ if (onnxruntime_USE_DNNL) include(onnxruntime_providers_dnnl.cmake) endif() -if (onnxruntime_USE_TENSORRT AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) +if (onnxruntime_USE_TENSORRT) include(onnxruntime_providers_tensorrt.cmake) endif() -if (onnxruntime_USE_VITISAI AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) +if (onnxruntime_USE_VITISAI) include(onnxruntime_providers_vitisai.cmake) endif() -if (onnxruntime_USE_OPENVINO AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) +if (onnxruntime_USE_OPENVINO) include(onnxruntime_providers_openvino.cmake) endif() @@ -163,8 +157,7 @@ if (onnxruntime_USE_JSEP) include(onnxruntime_providers_js.cmake) endif() -if (onnxruntime_USE_QNN) - message("Hitting qnn check") +if (onnxruntime_USE_QNN OR onnxruntime_USE_QNN_INTERFACE) #TODO[Low] Revisit when QNN EP becomes dynamic lib. include(onnxruntime_providers_qnn.cmake) endif() diff --git a/cmake/onnxruntime_python.cmake b/cmake/onnxruntime_python.cmake index 10633abac1799..5a87252b08573 100644 --- a/cmake/onnxruntime_python.cmake +++ b/cmake/onnxruntime_python.cmake @@ -881,7 +881,7 @@ if (onnxruntime_USE_DNNL) ) endif() -if (onnxruntime_USE_VITISAI AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) +if (onnxruntime_USE_VITISAI) add_custom_command( TARGET onnxruntime_pybind11_state POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy @@ -891,7 +891,7 @@ if (onnxruntime_USE_VITISAI AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) ) endif() -if (onnxruntime_USE_TENSORRT AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) +if (onnxruntime_USE_TENSORRT) add_custom_command( TARGET onnxruntime_pybind11_state POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy @@ -911,7 +911,7 @@ if (onnxruntime_USE_MIGRAPHX) ) endif() -if (onnxruntime_USE_OPENVINO AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) +if (onnxruntime_USE_OPENVINO) add_custom_command( TARGET onnxruntime_pybind11_state POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy @@ -934,7 +934,7 @@ if (DEFINED ENV{OPENVINO_MANYLINUX}) ) endif() -if (onnxruntime_USE_CUDA AND (NOT onnxruntime_ENABLE_GENERIC_INTERFACE)) +if (onnxruntime_USE_CUDA) add_custom_command( TARGET onnxruntime_pybind11_state POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index cbfe8cced02bf..618e8980370d8 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -999,6 +999,8 @@ def generate_build_tree( disable_optional_type = "optional" in types_to_disable disable_sparse_tensors = "sparsetensor" in types_to_disable + enable_qnn_interface = True if((args.arm64 or args.arm or args.arm64ec) and (args.enable_generic_interface)) else False + cmake_args += [ "-Donnxruntime_RUN_ONNX_TESTS=" + ("ON" if args.enable_onnx_tests else "OFF"), "-Donnxruntime_GENERATE_TEST_REPORTS=ON", @@ -1026,7 +1028,11 @@ def generate_build_tree( "-Donnxruntime_USE_TENSORRT=" + ("ON" if args.use_tensorrt else "OFF"), "-Donnxruntime_USE_TENSORRT_BUILTIN_PARSER=" + ("ON" if args.use_tensorrt_builtin_parser and not args.use_tensorrt_oss_parser else "OFF"), - "-Donnxruntime_ENABLE_GENERIC_INTERFACE=" + ("ON" if args.enable_generic_interface else "OFF"), + # interface variables are used only for building onnxruntime/onnxruntime_shared.dll but not EPs + "-Donnxruntime_USE_TENSORRT_INTERFACE=" + ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), + "-Donnxruntime_USE_OPENVINO_INTERFACE=" + ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), + "-Donnxruntime_USE_VITISAI_INTERFACE=" + ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), + "-Donnxruntime_USE_QNN_INTERFACE=" + ("ON" if (args.enable_generic_interface and enable_qnn_interface) else "OFF"), # set vars for migraphx "-Donnxruntime_USE_MIGRAPHX=" + ("ON" if args.use_migraphx else "OFF"), "-Donnxruntime_DISABLE_CONTRIB_OPS=" + ("ON" if args.disable_contrib_ops else "OFF"), @@ -1143,8 +1149,6 @@ def generate_build_tree( cmake_args.append("-Donnxruntime_ENABLE_WEBASSEMBLY_SIMD=" + ("ON" if args.enable_wasm_simd else "OFF")) if args.use_migraphx: cmake_args.append("-Donnxruntime_MIGRAPHX_HOME=" + migraphx_home) - - ''' if args.use_cuda: nvcc_threads = number_of_nvcc_threads(args) cmake_args.append("-Donnxruntime_NVCC_THREADS=" + str(nvcc_threads)) @@ -1155,14 +1159,11 @@ def generate_build_tree( f"Add '--disable_types float8' to your command line. See option disable_types." ) cmake_args.append(f"-DCMAKE_CUDA_COMPILER={cuda_home}/bin/nvcc") - ''' - if args.use_rocm: cmake_args.append("-Donnxruntime_ROCM_HOME=" + rocm_home) cmake_args.append("-Donnxruntime_ROCM_VERSION=" + args.rocm_version) - - #if args.use_tensorrt: - # cmake_args.append("-Donnxruntime_TENSORRT_HOME=" + tensorrt_home) + if args.use_tensorrt: + cmake_args.append("-Donnxruntime_TENSORRT_HOME=" + tensorrt_home) if args.use_cuda: add_default_definition(cmake_extra_defines, "onnxruntime_USE_CUDA", "ON") @@ -1214,8 +1215,8 @@ def generate_build_tree( if nccl_home and os.path.exists(nccl_home): cmake_args += ["-Donnxruntime_NCCL_HOME=" + nccl_home] - #if qnn_home and os.path.exists(qnn_home): - # cmake_args += ["-Donnxruntime_QNN_HOME=" + qnn_home] + if qnn_home and os.path.exists(qnn_home): + cmake_args += ["-Donnxruntime_QNN_HOME=" + qnn_home] if snpe_root and os.path.exists(snpe_root): cmake_args += ["-DSNPE_ROOT=" + snpe_root] @@ -1305,8 +1306,8 @@ def generate_build_tree( cmake_args += ["-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=" + args.xcode_code_signing_team_id] if args.use_qnn: - #if args.qnn_home is None or os.path.exists(args.qnn_home) is False: - # raise BuildError("qnn_home=" + qnn_home + " not valid." + " qnn_home paths must be specified and valid.") + if args.qnn_home is None or os.path.exists(args.qnn_home) is False: + raise BuildError("qnn_home=" + qnn_home + " not valid." + " qnn_home paths must be specified and valid.") cmake_args += ["-Donnxruntime_USE_QNN=ON"] if args.use_coreml: @@ -2580,10 +2581,6 @@ def main(): if args.enable_generic_interface: args.test = False - args.use_tensorrt = True - args.use_openvino = "CPU" - args.use_vitisai = True - #args.use_qnn = True #defaults should be set based on arm64 vs x64 builds... # If there was no explicit argument saying what to do, default # to update, build and test (for native builds). @@ -2691,6 +2688,8 @@ def main(): #cuda_home, cudnn_home = setup_cuda_vars(args) cuda_home = "" cudnn_home = "" + if args.use_cuda: + cuda_home, cudnn_home = setup_cuda_vars(args) mpi_home = args.mpi_home nccl_home = args.nccl_home @@ -2707,8 +2706,9 @@ def main(): qnn_home = "" # if using tensorrt, setup tensorrt paths - #tensorrt_home = setup_tensorrt_vars(args) tensorrt_home = "" + if args.use_tensorrt: + tensorrt_home = setup_tensorrt_vars(args) # if using migraphx, setup migraphx paths migraphx_home = setup_migraphx_vars(args) From eff95c6ca075f0d876f02553b90f05dd2435eade Mon Sep 17 00:00:00 2001 From: Karim Vadsariya Date: Fri, 10 Jan 2025 15:04:42 -0800 Subject: [PATCH 06/13] Remove unncessary comments --- cmake/CMakeLists.txt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index c1510026ece6c..aea860616d665 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -793,7 +793,7 @@ endif() if (onnxruntime_USE_CUDA OR onnxruntime_USE_CUDA_INTERFACE) list(APPEND ORT_PROVIDER_FLAGS -DUSE_CUDA=1) - list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_CUDA=1) #TODO[Karim] Not used anywhere + list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_CUDA=1) list(APPEND ONNXRUNTIME_PROVIDER_NAMES cuda) if (onnxruntime_USE_FLASH_ATTENTION) @@ -1098,7 +1098,7 @@ function(onnxruntime_set_compile_flags target_name) target_compile_options(${target_name} PRIVATE "-Wno-unused-function") endif() endif() - foreach(ORT_FLAG ${ORT_PROVIDER_FLAGS}) #TODO[Karim] applies to all projects... + foreach(ORT_FLAG ${ORT_PROVIDER_FLAGS}) target_compile_definitions(${target_name} PRIVATE ${ORT_FLAG}) endforeach() if (HAS_DEPRECATED_COPY) @@ -1691,12 +1691,12 @@ if (onnxruntime_BUILD_JAVA) list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_java) endif() -if (onnxruntime_BUILD_NODEJS) #TODO[Karim] +if (onnxruntime_BUILD_NODEJS) message(STATUS "Node.js Build is enabled") list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_nodejs) endif() -if (onnxruntime_ENABLE_PYTHON) #TODO[Karim] +if (onnxruntime_ENABLE_PYTHON) message(STATUS "Python Build is enabled") list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_python) endif() @@ -1706,7 +1706,7 @@ if (onnxruntime_BUILD_OBJC) list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_objectivec) endif() -if (onnxruntime_BUILD_UNIT_TESTS) #TODO[Karim] +if (onnxruntime_BUILD_UNIT_TESTS) list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_unittests) endif() @@ -1754,7 +1754,7 @@ if (WIN32 AND NOT GDK_PLATFORM AND NOT CMAKE_CROSSCOMPILING) endif() foreach(target_name ${ONNXRUNTIME_CMAKE_FILES}) - include(${target_name}.cmake) #TODO[Karim] + include(${target_name}.cmake) endforeach() if (UNIX) option(BUILD_PKGCONFIG_FILES "Build and install pkg-config files" ON) From 2e59f380a3764ea247606ff813d78e18efb1a63c Mon Sep 17 00:00:00 2001 From: Karim Vadsariya Date: Sat, 11 Jan 2025 14:13:13 -0800 Subject: [PATCH 07/13] Fix cmake toolset settings --- tools/ci_build/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 618e8980370d8..e756857115d92 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -2802,7 +2802,7 @@ def main(): make_extra_args = ["-A", target_arch, "-G", args.cmake_generator] - if (args.use_cuda and (not args.enable_generic_interface)): + if args.use_cuda: cmake_extra_args += ["-T", toolset] if args.enable_wcos: From 4d531308be7e6e0ea5c3676d999f4980cc84d88f Mon Sep 17 00:00:00 2001 From: Karim Vadsariya Date: Mon, 13 Jan 2025 13:14:09 -0800 Subject: [PATCH 08/13] Fix toolset settings in CMake Remove TODO[low] that are no longer applicable --- cmake/CMakeLists.txt | 2 +- cmake/adjust_global_compile_flags.cmake | 2 +- cmake/external/onnxruntime_external_deps.cmake | 2 +- tools/ci_build/build.py | 11 +++-------- 4 files changed, 6 insertions(+), 11 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index aea860616d665..c299e1b6dc4b0 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -657,7 +657,7 @@ if (WIN32) set(CMAKE_${type}_LINKER_FLAGS_MINSIZEREL "${CMAKE_${type}_LINKER_FLAGS_MINSIZEREL} /INCREMENTAL:NO") endif() endif() - if (onnxruntime_ENABLE_LTO AND NOT onnxruntime_USE_CUDA) #TODO[Low] Any changes ? + if (onnxruntime_ENABLE_LTO AND NOT onnxruntime_USE_CUDA) set(CMAKE_${type}_LINKER_FLAGS_RELEASE "${CMAKE_${type}_LINKER_FLAGS_RELEASE} /LTCG") set(CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_${type}_LINKER_FLAGS_RELWITHDEBINFO} /LTCG") set(CMAKE_${type}_LINKER_FLAGS_MINSIZEREL "${CMAKE_${type}_LINKER_FLAGS_MINSIZEREL} /LTCG") diff --git a/cmake/adjust_global_compile_flags.cmake b/cmake/adjust_global_compile_flags.cmake index eeb8d4fb2b99c..dbbf685346532 100644 --- a/cmake/adjust_global_compile_flags.cmake +++ b/cmake/adjust_global_compile_flags.cmake @@ -283,7 +283,7 @@ if (MSVC) string(APPEND CMAKE_C_FLAGS " /arch:AVX512") endif() - if (onnxruntime_ENABLE_LTO AND NOT onnxruntime_USE_CUDA) #TODO[Low] Any changes ? + if (onnxruntime_ENABLE_LTO AND NOT onnxruntime_USE_CUDA) set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Gw /GL") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} /Gw /GL") set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} /Gw /GL") diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake index ffd7e5f4243cb..6b34317c5cb42 100644 --- a/cmake/external/onnxruntime_external_deps.cmake +++ b/cmake/external/onnxruntime_external_deps.cmake @@ -400,7 +400,7 @@ if ((CPUINFO_SUPPORTED OR onnxruntime_USE_XNNPACK) AND NOT ANDROID) endif() endif() -if(onnxruntime_USE_CUDA) #TODO[Low] Any changes? +if(onnxruntime_USE_CUDA) FetchContent_Declare( GSL URL ${DEP_URL_microsoft_gsl} diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index e756857115d92..dee478bf4f575 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -2793,18 +2793,13 @@ def main(): toolset = "host=" + host_arch + ",version=" + args.msvc_toolset else: toolset = "host=" + host_arch - if args.cuda_version: + if args.use_cuda and args.cuda_version: toolset += ",cuda=" + args.cuda_version - elif args.cuda_home: + elif args.use_cuda and args.cuda_home: toolset += ",cuda=" + args.cuda_home if args.windows_sdk_version: target_arch += ",version=" + args.windows_sdk_version - - make_extra_args = ["-A", target_arch, "-G", args.cmake_generator] - - if args.use_cuda: - cmake_extra_args += ["-T", toolset] - + cmake_extra_args = ["-A", target_arch, "-T", toolset, "-G", args.cmake_generator] if args.enable_wcos: cmake_extra_defines.append("CMAKE_USER_MAKE_RULES_OVERRIDE=wcos_rules_override.cmake") From 5388d0c71d8e38c8d581427abaa887b17c051eaa Mon Sep 17 00:00:00 2001 From: jslhcl Date: Tue, 14 Jan 2025 10:51:35 -0800 Subject: [PATCH 09/13] fix comments --- cmake/CMakeLists.txt | 11 ++++++++--- samples/GenericInterface/test.cpp | 19 ++++++++----------- tools/ci_build/build.py | 5 ++++- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index c299e1b6dc4b0..769531ec5209e 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -255,6 +255,11 @@ option(onnxruntime_USE_AZURE "Build with azure inferencing support" OFF) option(onnxruntime_USE_LOCK_FREE_QUEUE "Build with lock-free task queue for threadpool." OFF) option(onnxruntime_FORCE_GENERIC_ALGORITHMS "Disable optimized arch-specific algorithms. Use only for testing and debugging generic algorithms." OFF) +option(onnxruntime_USE_TENSORRT_INTERFACE "Build ONNXRuntime shared lib which is compatible with TensorRT EP interface" OFF) +option(onnxruntime_USE_OPENVINO_INTERFACE "Build ONNXRuntime shared lib which is compatible with OpenVINO EP interface" OFF) +option(onnxruntime_USE_VITISAI_INTERFACE "Build ONNXRuntime shared lib which is compatible with Vitis-AI EP interface" OFF) +option(onnxruntime_USE_QNN_INTERFACE "Build ONNXRuntime shared lib which is compatible with QNN EP interface" OFF) + # ENABLE_TRAINING includes all training functionality # The following 2 entry points # 1. ORTModule @@ -1032,7 +1037,7 @@ function(onnxruntime_set_compile_flags target_name) endif() set_target_properties(${target_name} PROPERTIES COMPILE_WARNING_AS_ERROR ON) - + if (onnxruntime_USE_CUDA) # Suppress a "conversion_function_not_usable" warning in gsl/span target_compile_options(${target_name} PRIVATE "$<$:SHELL:-Xcudafe \"--diag_suppress=conversion_function_not_usable\">") @@ -1333,7 +1338,7 @@ if (onnxruntime_ENABLE_TRAINING_APIS) ) endif() -if (onnxruntime_USE_OPENVINO OR onnxruntime_USE_OPENVINO_INTERFACE) +if (onnxruntime_USE_OPENVINO OR onnxruntime_USE_OPENVINO_INTERFACE) add_definitions(-DUSE_OPENVINO=1) @@ -1686,7 +1691,7 @@ if (onnxruntime_BUILD_SHARED_LIB OR onnxruntime_BUILD_APPLE_FRAMEWORK) list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime) endif() -if (onnxruntime_BUILD_JAVA) +if (onnxruntime_BUILD_JAVA) message(STATUS "Java Build is enabled") list(APPEND ONNXRUNTIME_CMAKE_FILES onnxruntime_java) endif() diff --git a/samples/GenericInterface/test.cpp b/samples/GenericInterface/test.cpp index 64cb95e2b8388..d29d00013cb73 100644 --- a/samples/GenericInterface/test.cpp +++ b/samples/GenericInterface/test.cpp @@ -14,31 +14,28 @@ inline void THROW_ON_ERROR(OrtStatus* status) { void RunRelu(const OrtApi* g_ort, OrtEnv* p_env, OrtSessionOptions* so) { OrtSession* session = nullptr; -#ifdef _WIN32 - THROW_ON_ERROR(g_ort->CreateSession(p_env, L"C:/share/models/relu/Relu.onnx", so, &session)); -#else - THROW_ON_ERROR(g_ort->CreateSession(p_env, "/home/leca/code/onnxruntime/samples/c_test/Relu.onnx", so, &session)); -#endif + // Copy relu.onnx model from winml\test\collateral\models to the same path as the executable + THROW_ON_ERROR(g_ort->CreateSession(p_env, L"relu.onnx", so, &session)); OrtMemoryInfo* memory_info = nullptr; THROW_ON_ERROR(g_ort->CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &memory_info)); - float input_data[] = {-3.0f, 5.0f, -2.0f, 4.0f}; - const size_t input_len = 4 * sizeof(float); - const int64_t input_shape[] = {4}; + float input_data[] = {-3.0f, 5.0f, -2.0f, 4.0f, 0.0f}; + const size_t input_len = 5 * sizeof(float); + const int64_t input_shape[] = {5}; const size_t shape_len = sizeof(input_shape)/sizeof(input_shape[0]); OrtValue* input_tensor = nullptr; THROW_ON_ERROR(g_ort->CreateTensorWithDataAsOrtValue(memory_info, input_data, input_len, input_shape, shape_len, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, &input_tensor)); - const char* input_names[] = {"x"}; - const char* output_names[] = {"graphOut"}; + const char* input_names[] = {"X"}; + const char* output_names[] = {"Y"}; OrtValue* output_tensor = nullptr; THROW_ON_ERROR(g_ort->Run(session, nullptr, input_names, (const OrtValue* const*)&input_tensor, 1, output_names, 1, &output_tensor)); float* output_tensor_data = nullptr; THROW_ON_ERROR(g_ort->GetTensorMutableData(output_tensor, (void**)&output_tensor_data)); std::cout<<"Result:\n"; - for (size_t i = 0; i < 4; i++) std::cout< Date: Tue, 14 Jan 2025 14:07:35 -0800 Subject: [PATCH 10/13] Fixing lintrunner warnings ( lintrunner -a) --- .../shared_library/provider_interfaces.h | 8 +- .../core/session/provider_bridge_ort.cc | 8 +- samples/GenericInterface/test.cpp | 90 +++++++++---------- tools/ci_build/build.py | 24 +++-- 4 files changed, 70 insertions(+), 60 deletions(-) diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h index a5bdaa58f6474..f99ced51c9cbb 100644 --- a/onnxruntime/core/providers/shared_library/provider_interfaces.h +++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h @@ -167,7 +167,7 @@ struct ProviderHost { virtual std::string demangle(const char* name) = 0; virtual std::string demangle(const std::string& name) = 0; -//#ifdef USE_CUDA + // #ifdef USE_CUDA virtual std::unique_ptr CreateCUDAAllocator(int16_t device_id, const char* name) = 0; virtual std::unique_ptr CreateCUDAPinnedAllocator(const char* name) = 0; virtual std::unique_ptr CreateGPUDataTransfer() = 0; @@ -179,7 +179,7 @@ struct ProviderHost { virtual Status CudaCall_false(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) = 0; virtual void CudaCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) = 0; -//#endif + // #endif #ifdef USE_MIGRAPHX virtual std::unique_ptr CreateMIGraphXAllocator(int16_t device_id, const char* name) = 0; @@ -1177,9 +1177,9 @@ struct ProviderHost { virtual training::DistributedRunContext& GetDistributedRunContextInstance() = 0; #endif -//#if defined(USE_CUDA) || defined(USE_ROCM) + // #if defined(USE_CUDA) || defined(USE_ROCM) virtual PhiloxGenerator& PhiloxGenerator__Default() = 0; -//#endif + // #endif #ifdef ENABLE_TRAINING_TORCH_INTEROP virtual void contrib__PythonOpBase__Init(contrib::PythonOpBase* p, const OpKernelInfo& info) = 0; diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 8d4892879f711..f1a56b43e9a53 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -234,7 +234,7 @@ struct ProviderHostImpl : ProviderHost { void* CPUAllocator__Alloc(CPUAllocator* p, size_t size) override { return p->CPUAllocator::Alloc(size); } void CPUAllocator__Free(CPUAllocator* p, void* allocation) override { return p->CPUAllocator::Free(allocation); } -//#ifdef USE_CUDA + // #ifdef USE_CUDA std::unique_ptr CreateCUDAAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_CUDA().CreateCUDAAllocator(device_id, name); } std::unique_ptr CreateCUDAPinnedAllocator(const char* name) override { return GetProviderInfo_CUDA().CreateCUDAPinnedAllocator(name); } std::unique_ptr CreateGPUDataTransfer() override { return GetProviderInfo_CUDA().CreateGPUDataTransfer(); } @@ -247,7 +247,7 @@ struct ProviderHostImpl : ProviderHost { Status CudaCall_false(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { return GetProviderInfo_CUDA().CudaCall_false(retCode, exprString, libName, successCode, msg, file, line); } void CudaCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { GetProviderInfo_CUDA().CudaCall_true(retCode, exprString, libName, successCode, msg, file, line); } -//#endif + // #endif #ifdef USE_MIGRAPHX std::unique_ptr CreateMIGraphXAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_MIGraphX().CreateMIGraphXAllocator(device_id, name); } @@ -1419,9 +1419,9 @@ struct ProviderHostImpl : ProviderHost { training::DistributedRunContext& GetDistributedRunContextInstance() override { return training::DistributedRunContext::GetInstance(); } #endif -//#if defined(USE_CUDA) || defined(USE_ROCM) + // #if defined(USE_CUDA) || defined(USE_ROCM) PhiloxGenerator& PhiloxGenerator__Default() override { return PhiloxGenerator::Default(); } -//#endif + // #endif #ifdef ENABLE_TRAINING_TORCH_INTEROP void contrib__PythonOpBase__Init(contrib::PythonOpBase* p, const OpKernelInfo& info) override { p->PythonOpBase::Init(info); } diff --git a/samples/GenericInterface/test.cpp b/samples/GenericInterface/test.cpp index d29d00013cb73..ddfc30a1fedf6 100644 --- a/samples/GenericInterface/test.cpp +++ b/samples/GenericInterface/test.cpp @@ -6,64 +6,64 @@ const OrtApi* g_ort = OrtGetApiBase()->GetApi(ORT_API_VERSION); inline void THROW_ON_ERROR(OrtStatus* status) { - if (status != nullptr) { - std::cout<<"ErrorMessage:"<GetErrorMessage(status)<<"\n"; - abort(); - } + if (status != nullptr) { + std::cout << "ErrorMessage:" << g_ort->GetErrorMessage(status) << "\n"; + abort(); + } } void RunRelu(const OrtApi* g_ort, OrtEnv* p_env, OrtSessionOptions* so) { - OrtSession* session = nullptr; - // Copy relu.onnx model from winml\test\collateral\models to the same path as the executable - THROW_ON_ERROR(g_ort->CreateSession(p_env, L"relu.onnx", so, &session)); + OrtSession* session = nullptr; + // Copy relu.onnx model from winml\test\collateral\models to the same path as the executable + THROW_ON_ERROR(g_ort->CreateSession(p_env, L"relu.onnx", so, &session)); - OrtMemoryInfo* memory_info = nullptr; - THROW_ON_ERROR(g_ort->CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &memory_info)); - float input_data[] = {-3.0f, 5.0f, -2.0f, 4.0f, 0.0f}; - const size_t input_len = 5 * sizeof(float); - const int64_t input_shape[] = {5}; - const size_t shape_len = sizeof(input_shape)/sizeof(input_shape[0]); + OrtMemoryInfo* memory_info = nullptr; + THROW_ON_ERROR(g_ort->CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &memory_info)); + float input_data[] = {-3.0f, 5.0f, -2.0f, 4.0f, 0.0f}; + const size_t input_len = 5 * sizeof(float); + const int64_t input_shape[] = {5}; + const size_t shape_len = sizeof(input_shape) / sizeof(input_shape[0]); - OrtValue* input_tensor = nullptr; - THROW_ON_ERROR(g_ort->CreateTensorWithDataAsOrtValue(memory_info, input_data, input_len, input_shape, shape_len, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, &input_tensor)); + OrtValue* input_tensor = nullptr; + THROW_ON_ERROR(g_ort->CreateTensorWithDataAsOrtValue(memory_info, input_data, input_len, input_shape, shape_len, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, &input_tensor)); - const char* input_names[] = {"X"}; - const char* output_names[] = {"Y"}; - OrtValue* output_tensor = nullptr; - THROW_ON_ERROR(g_ort->Run(session, nullptr, input_names, (const OrtValue* const*)&input_tensor, 1, output_names, 1, &output_tensor)); + const char* input_names[] = {"X"}; + const char* output_names[] = {"Y"}; + OrtValue* output_tensor = nullptr; + THROW_ON_ERROR(g_ort->Run(session, nullptr, input_names, (const OrtValue* const*)&input_tensor, 1, output_names, 1, &output_tensor)); - float* output_tensor_data = nullptr; - THROW_ON_ERROR(g_ort->GetTensorMutableData(output_tensor, (void**)&output_tensor_data)); - std::cout<<"Result:\n"; - for (size_t i = 0; i < 5; i++) std::cout<GetTensorMutableData(output_tensor, (void**)&output_tensor_data)); + std::cout << "Result:\n"; + for (size_t i = 0; i < 5; i++) std::cout << output_tensor_data[i] << " \n"; } int main() { - int a; - std::cout<<"prepare to attach:"; - std::cin>>a; + int a; + std::cout << "prepare to attach:"; + std::cin >> a; - OrtEnv* p_env = nullptr; - OrtLoggingLevel log_level = OrtLoggingLevel::ORT_LOGGING_LEVEL_ERROR;//OrtLoggingLevel::ORT_LOGGING_LEVEL_INFO; - THROW_ON_ERROR(g_ort->CreateEnv(log_level, "", &p_env)); - OrtSessionOptions* so = nullptr; - THROW_ON_ERROR(g_ort->CreateSessionOptions(&so)); + OrtEnv* p_env = nullptr; + OrtLoggingLevel log_level = OrtLoggingLevel::ORT_LOGGING_LEVEL_ERROR; // OrtLoggingLevel::ORT_LOGGING_LEVEL_INFO; + THROW_ON_ERROR(g_ort->CreateEnv(log_level, "", &p_env)); + OrtSessionOptions* so = nullptr; + THROW_ON_ERROR(g_ort->CreateSessionOptions(&so)); - OrtTensorRTProviderOptionsV2* tensorrt_options = nullptr; - THROW_ON_ERROR(g_ort->CreateTensorRTProviderOptions(&tensorrt_options)); - THROW_ON_ERROR(g_ort->SessionOptionsAppendExecutionProvider_TensorRT_V2(so, tensorrt_options)); + OrtTensorRTProviderOptionsV2* tensorrt_options = nullptr; + THROW_ON_ERROR(g_ort->CreateTensorRTProviderOptions(&tensorrt_options)); + THROW_ON_ERROR(g_ort->SessionOptionsAppendExecutionProvider_TensorRT_V2(so, tensorrt_options)); - std::unordered_map ov_options; - ov_options["device_type"] = "CPU"; - ov_options["precision"] = "FP32"; - std::vector keys, values; - for (const auto& entry : ov_options) { - keys.push_back(entry.first.c_str()); - values.push_back(entry.second.c_str()); - } - THROW_ON_ERROR(g_ort->SessionOptionsAppendExecutionProvider_OpenVINO_V2(so, keys.data(), values.data(), keys.size())); + std::unordered_map ov_options; + ov_options["device_type"] = "CPU"; + ov_options["precision"] = "FP32"; + std::vector keys, values; + for (const auto& entry : ov_options) { + keys.push_back(entry.first.c_str()); + values.push_back(entry.second.c_str()); + } + THROW_ON_ERROR(g_ort->SessionOptionsAppendExecutionProvider_OpenVINO_V2(so, keys.data(), values.data(), keys.size())); - RunRelu(g_ort, p_env, so); + RunRelu(g_ort, p_env, so); - return 0; + return 0; } diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 0234a9772bd9a..456d51ab41305 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -764,7 +764,11 @@ def convert_arg_line_to_args(self, arg_line): parser.add_argument("--use_triton_kernel", action="store_true", help="Use triton compiled kernels") parser.add_argument("--use_lock_free_queue", action="store_true", help="Use lock-free task queue for threadpool.") - parser.add_argument("--enable_generic_interface", action="store_true", help="build ORT shared library and compatible bridge with primary EPs(tensorRT, OpenVino, Qnn, vitisai) but not tests") + parser.add_argument( + "--enable_generic_interface", + action="store_true", + help="build ORT shared library and compatible bridge with primary EPs(tensorRT, OpenVino, Qnn, vitisai) but not tests", + ) if not is_windows(): parser.add_argument( @@ -999,7 +1003,9 @@ def generate_build_tree( disable_optional_type = "optional" in types_to_disable disable_sparse_tensors = "sparsetensor" in types_to_disable - enable_qnn_interface = True if((args.arm64 or args.arm or args.arm64ec) and (args.enable_generic_interface)) else False + enable_qnn_interface = ( + True if ((args.arm64 or args.arm or args.arm64ec) and (args.enable_generic_interface)) else False + ) cmake_args += [ "-Donnxruntime_RUN_ONNX_TESTS=" + ("ON" if args.enable_onnx_tests else "OFF"), @@ -1029,10 +1035,14 @@ def generate_build_tree( "-Donnxruntime_USE_TENSORRT_BUILTIN_PARSER=" + ("ON" if args.use_tensorrt_builtin_parser and not args.use_tensorrt_oss_parser else "OFF"), # interface variables are used only for building onnxruntime/onnxruntime_shared.dll but not EPs - "-Donnxruntime_USE_TENSORRT_INTERFACE=" + ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), - "-Donnxruntime_USE_OPENVINO_INTERFACE=" + ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), - "-Donnxruntime_USE_VITISAI_INTERFACE=" + ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), - "-Donnxruntime_USE_QNN_INTERFACE=" + ("ON" if (args.enable_generic_interface and enable_qnn_interface) else "OFF"), + "-Donnxruntime_USE_TENSORRT_INTERFACE=" + + ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), + "-Donnxruntime_USE_OPENVINO_INTERFACE=" + + ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), + "-Donnxruntime_USE_VITISAI_INTERFACE=" + + ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), + "-Donnxruntime_USE_QNN_INTERFACE=" + + ("ON" if (args.enable_generic_interface and enable_qnn_interface) else "OFF"), # set vars for migraphx "-Donnxruntime_USE_MIGRAPHX=" + ("ON" if args.use_migraphx else "OFF"), "-Donnxruntime_DISABLE_CONTRIB_OPS=" + ("ON" if args.disable_contrib_ops else "OFF"), @@ -2688,7 +2698,7 @@ def main(): source_dir = os.path.normpath(os.path.join(script_dir, "..", "..")) # if using cuda, setup cuda paths and env vars - #cuda_home, cudnn_home = setup_cuda_vars(args) + # cuda_home, cudnn_home = setup_cuda_vars(args) cuda_home = "" cudnn_home = "" if args.use_cuda: From 52e041d273007a3d31bee4fe363b12ab90d82e73 Mon Sep 17 00:00:00 2001 From: Karim Vadsariya Date: Tue, 14 Jan 2025 16:26:55 -0800 Subject: [PATCH 11/13] Few more issues --- cmake/CMakeLists.txt | 2 +- cmake/onnxruntime.cmake | 6 +++--- onnxruntime/core/session/provider_bridge_ort.cc | 2 -- tools/ci_build/build.py | 4 +--- 4 files changed, 5 insertions(+), 9 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 769531ec5209e..7c48dca5cd895 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -1395,7 +1395,7 @@ if (onnxruntime_USE_OPENVINO OR onnxruntime_USE_OPENVINO_INTERFACE) endif() -if (onnxruntime_USE_VITISAI) #TODO[Karim] +if (onnxruntime_USE_VITISAI) set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${CMAKE_CURRENT_LIST_DIR}") endif() diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake index 293429973670f..732c0511d400f 100644 --- a/cmake/onnxruntime.cmake +++ b/cmake/onnxruntime.cmake @@ -37,7 +37,7 @@ function(get_c_cxx_api_headers HEADERS_VAR) endif() # need to add header files for enabled EPs - foreach(f ${ONNXRUNTIME_PROVIDER_NAMES}) #TODO[Karim] + foreach(f ${ONNXRUNTIME_PROVIDER_NAMES}) # The header files in include/onnxruntime/core/providers/cuda directory cannot be flattened to the same directory # with onnxruntime_c_api.h . Most other EPs probably also do not work in this way. if((NOT f STREQUAL cuda) AND (NOT f STREQUAL rocm)) @@ -66,12 +66,12 @@ if(NOT ${CMAKE_SYSTEM_NAME} MATCHES "AIX") add_custom_command(OUTPUT ${SYMBOL_FILE} ${CMAKE_CURRENT_BINARY_DIR}/generated_source.c COMMAND ${Python_EXECUTABLE} "${REPO_ROOT}/tools/ci_build/gen_def.py" --version_file "${ONNXRUNTIME_ROOT}/../VERSION_NUMBER" --src_root "${ONNXRUNTIME_ROOT}" - --config ${ONNXRUNTIME_PROVIDER_NAMES} --style=${OUTPUT_STYLE} --output ${SYMBOL_FILE} #TODO[Karim] + --config ${ONNXRUNTIME_PROVIDER_NAMES} --style=${OUTPUT_STYLE} --output ${SYMBOL_FILE} --output_source ${CMAKE_CURRENT_BINARY_DIR}/generated_source.c DEPENDS ${SYMBOL_FILES} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) -add_custom_target(onnxruntime_generate_def ALL DEPENDS ${SYMBOL_FILE} ${CMAKE_CURRENT_BINARY_DIR}/generated_source.c) #TODO[Karim] +add_custom_target(onnxruntime_generate_def ALL DEPENDS ${SYMBOL_FILE} ${CMAKE_CURRENT_BINARY_DIR}/generated_source.c) endif() if(WIN32) onnxruntime_add_shared_library(onnxruntime diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index f1a56b43e9a53..1f3968ca40d07 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -234,7 +234,6 @@ struct ProviderHostImpl : ProviderHost { void* CPUAllocator__Alloc(CPUAllocator* p, size_t size) override { return p->CPUAllocator::Alloc(size); } void CPUAllocator__Free(CPUAllocator* p, void* allocation) override { return p->CPUAllocator::Free(allocation); } - // #ifdef USE_CUDA std::unique_ptr CreateCUDAAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_CUDA().CreateCUDAAllocator(device_id, name); } std::unique_ptr CreateCUDAPinnedAllocator(const char* name) override { return GetProviderInfo_CUDA().CreateCUDAPinnedAllocator(name); } std::unique_ptr CreateGPUDataTransfer() override { return GetProviderInfo_CUDA().CreateGPUDataTransfer(); } @@ -247,7 +246,6 @@ struct ProviderHostImpl : ProviderHost { Status CudaCall_false(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { return GetProviderInfo_CUDA().CudaCall_false(retCode, exprString, libName, successCode, msg, file, line); } void CudaCall_true(int retCode, const char* exprString, const char* libName, int successCode, const char* msg, const char* file, const int line) override { GetProviderInfo_CUDA().CudaCall_true(retCode, exprString, libName, successCode, msg, file, line); } - // #endif #ifdef USE_MIGRAPHX std::unique_ptr CreateMIGraphXAllocator(int16_t device_id, const char* name) override { return GetProviderInfo_MIGraphX().CreateMIGraphXAllocator(device_id, name); } diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 456d51ab41305..e01e85fca083e 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -1003,9 +1003,7 @@ def generate_build_tree( disable_optional_type = "optional" in types_to_disable disable_sparse_tensors = "sparsetensor" in types_to_disable - enable_qnn_interface = ( - True if ((args.arm64 or args.arm or args.arm64ec) and (args.enable_generic_interface)) else False - ) + enable_qnn_interface = bool((args.arm64 or args.arm or args.arm64ec) and (args.enable_generic_interface)) cmake_args += [ "-Donnxruntime_RUN_ONNX_TESTS=" + ("ON" if args.enable_onnx_tests else "OFF"), From 1c58fee656aed5086ee381dbe9f5c7ea699f43c6 Mon Sep 17 00:00:00 2001 From: Karim Vadsariya Date: Wed, 15 Jan 2025 15:23:20 -0800 Subject: [PATCH 12/13] Fix Pipeline issues with QNN --- tools/ci_build/build.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index e01e85fca083e..74b8cfe270069 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -1314,6 +1314,7 @@ def generate_build_tree( cmake_args += ["-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=" + args.xcode_code_signing_team_id] if args.use_qnn: + if args.qnn_home is None or os.path.exists(args.qnn_home) is False: raise BuildError("qnn_home=" + qnn_home + " not valid." + " qnn_home paths must be specified and valid.") cmake_args += ["-Donnxruntime_USE_QNN=ON"] @@ -2713,8 +2714,9 @@ def main(): armnn_home = args.armnn_home armnn_libs = args.armnn_libs - qnn_home = args.qnn_home qnn_home = "" + if args.use_qnn: + qnn_home = args.qnn_home # if using tensorrt, setup tensorrt paths tensorrt_home = "" From 0f8267499824b192b4a467fa2c7513c2a0c4a5a4 Mon Sep 17 00:00:00 2001 From: Karim Vadsariya Date: Sat, 18 Jan 2025 10:25:01 -0800 Subject: [PATCH 13/13] Incorporate code review comments --- cmake/CMakeLists.txt | 4 ++-- cmake/external/onnxruntime_external_deps.cmake | 1 - cmake/onnxruntime_providers.cmake | 6 ++++++ .../core/providers/shared_library/provider_interfaces.h | 1 - tools/ci_build/build.py | 2 ++ 5 files changed, 10 insertions(+), 4 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 7c48dca5cd895..87cc6f0fce6f6 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -256,6 +256,7 @@ option(onnxruntime_USE_LOCK_FREE_QUEUE "Build with lock-free task queue for thre option(onnxruntime_FORCE_GENERIC_ALGORITHMS "Disable optimized arch-specific algorithms. Use only for testing and debugging generic algorithms." OFF) option(onnxruntime_USE_TENSORRT_INTERFACE "Build ONNXRuntime shared lib which is compatible with TensorRT EP interface" OFF) +option(onnxruntime_USE_CUDA_INTERFACE "Build ONNXRuntime shared lib which is compatible with Cuda EP interface" OFF) option(onnxruntime_USE_OPENVINO_INTERFACE "Build ONNXRuntime shared lib which is compatible with OpenVINO EP interface" OFF) option(onnxruntime_USE_VITISAI_INTERFACE "Build ONNXRuntime shared lib which is compatible with Vitis-AI EP interface" OFF) option(onnxruntime_USE_QNN_INTERFACE "Build ONNXRuntime shared lib which is compatible with QNN EP interface" OFF) @@ -1037,7 +1038,6 @@ function(onnxruntime_set_compile_flags target_name) endif() set_target_properties(${target_name} PROPERTIES COMPILE_WARNING_AS_ERROR ON) - if (onnxruntime_USE_CUDA) # Suppress a "conversion_function_not_usable" warning in gsl/span target_compile_options(${target_name} PRIVATE "$<$:SHELL:-Xcudafe \"--diag_suppress=conversion_function_not_usable\">") @@ -1045,7 +1045,7 @@ function(onnxruntime_set_compile_flags target_name) endif() if (MSVC) foreach(CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORY ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) - #target_compile_options(${target_name} PRIVATE "$<$:/external:I${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORY}>") + target_compile_options(${target_name} PRIVATE "$<$:/external:I${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORY}>") endforeach() foreach(onnxruntime_external_lib IN LISTS onnxruntime_EXTERNAL_LIBRARIES) diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake index 6b34317c5cb42..ee7abcbad025c 100644 --- a/cmake/external/onnxruntime_external_deps.cmake +++ b/cmake/external/onnxruntime_external_deps.cmake @@ -682,7 +682,6 @@ if (onnxruntime_USE_WEBGPU) endif() set(onnxruntime_LINK_DIRS) - if (onnxruntime_USE_CUDA) find_package(CUDAToolkit REQUIRED) diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake index dad3a5f93fd65..e60f7568a18df 100644 --- a/cmake/onnxruntime_providers.cmake +++ b/cmake/onnxruntime_providers.cmake @@ -59,6 +59,12 @@ function(add_op_reduction_include_dirs target) endfunction() +if(onnxruntime_USE_VITISAI) + set(PROVIDERS_VITISAI onnxruntime_providers_vitisai) +endif() +if(onnxruntime_USE_CUDA) + set(PROVIDERS_CUDA onnxruntime_providers_cuda) +endif() if(onnxruntime_USE_COREML) set(PROVIDERS_COREML onnxruntime_providers_coreml coreml_proto) endif() diff --git a/onnxruntime/core/providers/shared_library/provider_interfaces.h b/onnxruntime/core/providers/shared_library/provider_interfaces.h index f99ced51c9cbb..c1af17fb583c7 100644 --- a/onnxruntime/core/providers/shared_library/provider_interfaces.h +++ b/onnxruntime/core/providers/shared_library/provider_interfaces.h @@ -189,7 +189,6 @@ struct ProviderHost { #ifdef USE_ROCM virtual std::unique_ptr CreateROCMAllocator(int16_t device_id, const char* name) = 0; virtual std::unique_ptr CreateROCMPinnedAllocator(const char* name) = 0; - virtual std::unique_ptr CreateGPUDataTransfer() = 0; virtual void rocm__Impl_Cast(void* stream, const int64_t* input_data, int32_t* output_data, size_t count) = 0; virtual void rocm__Impl_Cast(void* stream, const int32_t* input_data, int64_t* output_data, size_t count) = 0; diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 74b8cfe270069..6b6e707008c63 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -1035,6 +1035,8 @@ def generate_build_tree( # interface variables are used only for building onnxruntime/onnxruntime_shared.dll but not EPs "-Donnxruntime_USE_TENSORRT_INTERFACE=" + ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), + "-Donnxruntime_USE_CUDA_INTERFACE=" + + ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), "-Donnxruntime_USE_OPENVINO_INTERFACE=" + ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), "-Donnxruntime_USE_VITISAI_INTERFACE="