-
Notifications
You must be signed in to change notification settings - Fork 3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[onnxruntime/build] Add new flag enable_generic_interface to build primary EPs by default #23342
base: main
Are you sure you want to change the base?
Changes from all commits
780e23f
9de9b4c
0273bf5
e5bb3da
a6e35b7
eff95c6
2e59f38
4d53130
5388d0c
85637e8
52e041d
1c58fee
0f82674
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# usage: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sample code are better to be put in https://github.com/microsoft/onnxruntime-inference-examples/ , so that they will be exempted from compliance requirements(because we do not ship the code) |
||
# cd build/ | ||
# cmake -S ../ -B ./ -DCMAKE_BUILD_TYPE=Debug | ||
# cmake --build ./ | ||
# NOTE: For Windows, copy onnxruntime.dll and onnxruntime.pdb into the same folder of TestOutTreeEp.exe, otherwise, during runtime, | ||
# it will search the default system path (C:\Windows\System32) for onnxruntime.dll | ||
cmake_minimum_required(VERSION 3.26) | ||
project(GenericOrtEpInterface) | ||
add_executable(GenericOrtEpInterface test.cpp) | ||
|
||
target_include_directories(GenericOrtEpInterface PUBLIC "../../include/onnxruntime") | ||
target_link_libraries(GenericOrtEpInterface PUBLIC "C:/Users/leca/source/onnxruntime3/samples/GenericInterface/build/Debug/onnxruntime.lib") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
#include <iostream> | ||
|
||
#include <string> | ||
#include <unordered_map> | ||
#include "core/session/onnxruntime_c_api.h" | ||
|
||
const OrtApi* g_ort = OrtGetApiBase()->GetApi(ORT_API_VERSION); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please check if the returned value is NULL. |
||
|
||
inline void THROW_ON_ERROR(OrtStatus* status) { | ||
if (status != nullptr) { | ||
std::cout << "ErrorMessage:" << g_ort->GetErrorMessage(status) << "\n"; | ||
abort(); | ||
} | ||
} | ||
|
||
void RunRelu(const OrtApi* g_ort, OrtEnv* p_env, OrtSessionOptions* so) { | ||
OrtSession* session = nullptr; | ||
// Copy relu.onnx model from winml\test\collateral\models to the same path as the executable | ||
THROW_ON_ERROR(g_ort->CreateSession(p_env, L"relu.onnx", so, &session)); | ||
|
||
OrtMemoryInfo* memory_info = nullptr; | ||
THROW_ON_ERROR(g_ort->CreateCpuMemoryInfo(OrtArenaAllocator, OrtMemTypeDefault, &memory_info)); | ||
float input_data[] = {-3.0f, 5.0f, -2.0f, 4.0f, 0.0f}; | ||
const size_t input_len = 5 * sizeof(float); | ||
const int64_t input_shape[] = {5}; | ||
const size_t shape_len = sizeof(input_shape) / sizeof(input_shape[0]); | ||
|
||
OrtValue* input_tensor = nullptr; | ||
THROW_ON_ERROR(g_ort->CreateTensorWithDataAsOrtValue(memory_info, input_data, input_len, input_shape, shape_len, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, &input_tensor)); | ||
|
||
const char* input_names[] = {"X"}; | ||
const char* output_names[] = {"Y"}; | ||
OrtValue* output_tensor = nullptr; | ||
THROW_ON_ERROR(g_ort->Run(session, nullptr, input_names, (const OrtValue* const*)&input_tensor, 1, output_names, 1, &output_tensor)); | ||
|
||
float* output_tensor_data = nullptr; | ||
THROW_ON_ERROR(g_ort->GetTensorMutableData(output_tensor, (void**)&output_tensor_data)); | ||
std::cout << "Result:\n"; | ||
for (size_t i = 0; i < 5; i++) std::cout << output_tensor_data[i] << " \n"; | ||
} | ||
|
||
int main() { | ||
int a; | ||
std::cout << "prepare to attach:"; | ||
std::cin >> a; | ||
|
||
OrtEnv* p_env = nullptr; | ||
OrtLoggingLevel log_level = OrtLoggingLevel::ORT_LOGGING_LEVEL_ERROR; // OrtLoggingLevel::ORT_LOGGING_LEVEL_INFO; | ||
THROW_ON_ERROR(g_ort->CreateEnv(log_level, "", &p_env)); | ||
OrtSessionOptions* so = nullptr; | ||
THROW_ON_ERROR(g_ort->CreateSessionOptions(&so)); | ||
|
||
OrtTensorRTProviderOptionsV2* tensorrt_options = nullptr; | ||
THROW_ON_ERROR(g_ort->CreateTensorRTProviderOptions(&tensorrt_options)); | ||
THROW_ON_ERROR(g_ort->SessionOptionsAppendExecutionProvider_TensorRT_V2(so, tensorrt_options)); | ||
|
||
std::unordered_map<std::string, std::string> ov_options; | ||
ov_options["device_type"] = "CPU"; | ||
ov_options["precision"] = "FP32"; | ||
std::vector<const char*> keys, values; | ||
for (const auto& entry : ov_options) { | ||
keys.push_back(entry.first.c_str()); | ||
values.push_back(entry.second.c_str()); | ||
} | ||
THROW_ON_ERROR(g_ort->SessionOptionsAppendExecutionProvider_OpenVINO_V2(so, keys.data(), values.data(), keys.size())); | ||
|
||
RunRelu(g_ort, p_env, so); | ||
|
||
return 0; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -764,6 +764,12 @@ def convert_arg_line_to_args(self, arg_line): | |
parser.add_argument("--use_triton_kernel", action="store_true", help="Use triton compiled kernels") | ||
parser.add_argument("--use_lock_free_queue", action="store_true", help="Use lock-free task queue for threadpool.") | ||
|
||
parser.add_argument( | ||
"--enable_generic_interface", | ||
action="store_true", | ||
help="build ORT shared library and compatible bridge with primary EPs(tensorRT, OpenVino, Qnn, vitisai) but not tests", | ||
) | ||
|
||
if not is_windows(): | ||
parser.add_argument( | ||
"--allow_running_as_root", | ||
|
@@ -997,6 +1003,8 @@ def generate_build_tree( | |
disable_optional_type = "optional" in types_to_disable | ||
disable_sparse_tensors = "sparsetensor" in types_to_disable | ||
|
||
enable_qnn_interface = bool((args.arm64 or args.arm or args.arm64ec) and (args.enable_generic_interface)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The --arm64, --args.arm and --args.arm64ec are for Windows only and for cross-compiling only. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The intention here is when building onnxruntime for ARM64 target platform, only QNN EP is applicable for now. |
||
|
||
|
||
cmake_args += [ | ||
"-Donnxruntime_RUN_ONNX_TESTS=" + ("ON" if args.enable_onnx_tests else "OFF"), | ||
"-Donnxruntime_GENERATE_TEST_REPORTS=ON", | ||
|
@@ -1024,6 +1032,17 @@ def generate_build_tree( | |
"-Donnxruntime_USE_TENSORRT=" + ("ON" if args.use_tensorrt else "OFF"), | ||
"-Donnxruntime_USE_TENSORRT_BUILTIN_PARSER=" | ||
+ ("ON" if args.use_tensorrt_builtin_parser and not args.use_tensorrt_oss_parser else "OFF"), | ||
# interface variables are used only for building onnxruntime/onnxruntime_shared.dll but not EPs | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Misses onnxruntime_USE_CUDA_INTERFACE. |
||
"-Donnxruntime_USE_TENSORRT_INTERFACE=" | ||
+ ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), | ||
"-Donnxruntime_USE_CUDA_INTERFACE=" | ||
+ ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), | ||
"-Donnxruntime_USE_OPENVINO_INTERFACE=" | ||
+ ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), | ||
"-Donnxruntime_USE_VITISAI_INTERFACE=" | ||
+ ("ON" if (args.enable_generic_interface and not enable_qnn_interface) else "OFF"), | ||
"-Donnxruntime_USE_QNN_INTERFACE=" | ||
+ ("ON" if (args.enable_generic_interface and enable_qnn_interface) else "OFF"), | ||
# set vars for migraphx | ||
"-Donnxruntime_USE_MIGRAPHX=" + ("ON" if args.use_migraphx else "OFF"), | ||
"-Donnxruntime_DISABLE_CONTRIB_OPS=" + ("ON" if args.disable_contrib_ops else "OFF"), | ||
|
@@ -1297,6 +1316,7 @@ def generate_build_tree( | |
cmake_args += ["-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=" + args.xcode_code_signing_team_id] | ||
|
||
if args.use_qnn: | ||
|
||
if args.qnn_home is None or os.path.exists(args.qnn_home) is False: | ||
raise BuildError("qnn_home=" + qnn_home + " not valid." + " qnn_home paths must be specified and valid.") | ||
cmake_args += ["-Donnxruntime_USE_QNN=ON"] | ||
|
@@ -1454,6 +1474,12 @@ def generate_build_tree( | |
"-Donnxruntime_USE_FULL_PROTOBUF=ON", | ||
] | ||
|
||
# When this flag is enabled, that means we only build ONNXRuntime shared library, expecting some compatible EP | ||
# shared lib being build in a seperate process. So we skip the test for now as ONNXRuntime shared lib built under | ||
# this flag is not expected to work alone | ||
if args.enable_generic_interface: | ||
jslhcl marked this conversation as resolved.
Show resolved
Hide resolved
|
||
cmake_args += ["-Donnxruntime_BUILD_UNIT_TESTS=OFF"] | ||
|
||
if args.enable_lazy_tensor: | ||
import torch | ||
|
||
|
@@ -2567,6 +2593,9 @@ def main(): | |
# Disable ONNX Runtime's builtin memory checker | ||
args.disable_memleak_checker = True | ||
|
||
if args.enable_generic_interface: | ||
args.test = False | ||
|
||
# If there was no explicit argument saying what to do, default | ||
# to update, build and test (for native builds). | ||
if not (args.update or args.clean or args.build or args.test or args.gen_doc): | ||
|
@@ -2670,7 +2699,11 @@ def main(): | |
source_dir = os.path.normpath(os.path.join(script_dir, "..", "..")) | ||
|
||
# if using cuda, setup cuda paths and env vars | ||
cuda_home, cudnn_home = setup_cuda_vars(args) | ||
# cuda_home, cudnn_home = setup_cuda_vars(args) | ||
cuda_home = "" | ||
cudnn_home = "" | ||
if args.use_cuda: | ||
cuda_home, cudnn_home = setup_cuda_vars(args) | ||
|
||
mpi_home = args.mpi_home | ||
nccl_home = args.nccl_home | ||
|
@@ -2683,10 +2716,14 @@ def main(): | |
armnn_home = args.armnn_home | ||
armnn_libs = args.armnn_libs | ||
|
||
qnn_home = args.qnn_home | ||
qnn_home = "" | ||
if args.use_qnn: | ||
qnn_home = args.qnn_home | ||
|
||
# if using tensorrt, setup tensorrt paths | ||
tensorrt_home = setup_tensorrt_vars(args) | ||
tensorrt_home = "" | ||
if args.use_tensorrt: | ||
tensorrt_home = setup_tensorrt_vars(args) | ||
|
||
# if using migraphx, setup migraphx paths | ||
migraphx_home = setup_migraphx_vars(args) | ||
|
@@ -2771,9 +2808,9 @@ def main(): | |
toolset = "host=" + host_arch + ",version=" + args.msvc_toolset | ||
else: | ||
toolset = "host=" + host_arch | ||
if args.cuda_version: | ||
if args.use_cuda and args.cuda_version: | ||
toolset += ",cuda=" + args.cuda_version | ||
elif args.cuda_home: | ||
elif args.use_cuda and args.cuda_home: | ||
toolset += ",cuda=" + args.cuda_home | ||
if args.windows_sdk_version: | ||
target_arch += ",version=" + args.windows_sdk_version | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Misses onnxruntime_USE_CUDA_INTERFACE
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are onnxruntime_USE_TENSORRT_INTERFACE and onnxruntime_USE_TENSORRT exclusive? Could both of them be set?