Skip to content

Add an option in OpenVINOProviderOptions to support the queue-based overload for creating ClContext #19699

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -623,7 +623,8 @@ typedef struct OrtOpenVINOProviderOptions {
cache_dir{},
context{},
enable_opencl_throttling{},
enable_dynamic_shapes{} {}
enable_dynamic_shapes{},
queue{} {}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The struct is frozen and its our legacy API. Kindly upgrade to ORT ProviderOptions Map structure

#endif
/** \brief Device type string
*
Expand All @@ -637,6 +638,7 @@ typedef struct OrtOpenVINOProviderOptions {
void* context;
unsigned char enable_opencl_throttling; ///< 0 = disabled, nonzero = enabled
unsigned char enable_dynamic_shapes; ///< 0 = disabled, nonzero = enabled
void* queue;
} OrtOpenVINOProviderOptions;

struct OrtApi;
Expand Down
16 changes: 12 additions & 4 deletions onnxruntime/core/providers/openvino/backends/basic_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,17 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
if (global_context.is_wholly_supported_graph) {
#if defined(IO_BUFFER_ENABLED)
if ((global_context.device_type.find("GPU") != std::string::npos) &&
(global_context_.context != nullptr)) {
((global_context_.context != nullptr) || (global_context_.queue != nullptr))) {
LOGS_DEFAULT(INFO) << log_tag << "IO Buffering Enabled";
cl_context ctx = static_cast<cl_context>(global_context_.context);
remote_context_ = new ov::intel_gpu::ocl::ClContext(global_context_.ie_core.Get(), ctx);
if (global_context_.context != nullptr) {
LOGS_DEFAULT(VERBOSE) << log_tag << "Using OpenCL Context sharing";
cl_context ctx = static_cast<cl_context>(global_context_.context);
remote_context_ = new ov::intel_gpu::ocl::ClContext(global_context_.ie_core.Get(), ctx);
} else if (global_context.queue != nullptr) {
LOGS_DEFAULT(VERBOSE) << log_tag << "Using OpenCL Command Queue sharing";
cl_command_queue queue = static_cast<cl_command_queue>(global_context_.queue);
remote_context_ = new ov::intel_gpu::ocl::ClContext(global_context_.ie_core.Get(), queue);
}
ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
exe_network_ = global_context_.ie_core.LoadNetwork(
ie_cnn_network_, remote_context_, subgraph_context_.subgraph_name);
Expand Down Expand Up @@ -457,7 +464,8 @@ void BasicBackend::Infer(OrtKernelContext* ctx) {

#ifdef IO_BUFFER_ENABLED
if ((global_context_.device_type.find("GPU") != std::string::npos) &&
(global_context_.context != nullptr) && global_context_.is_wholly_supported_graph) {
((global_context_.context != nullptr) || (global_context_.queue != nullptr)) &&
global_context_.is_wholly_supported_graph) {
try {
StartRemoteAsyncInference(context, infer_request);
} catch (std::string const& msg) {
Expand Down
1 change: 1 addition & 0 deletions onnxruntime/core/providers/openvino/contexts.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ struct GlobalContext {
int onnx_opset_version;
void* context = 0;
bool use_api_2;
void* queue = 0;
};

// Holds context specific to subgraph.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
global_context_->enable_opencl_throttling = info.enable_opencl_throttling_;
global_context_->disable_dynamic_shapes = info.disable_dynamic_shapes_;
global_context_->num_of_threads = info.num_of_threads_;
global_context_->queue = info.queue_;

// to check if target device is available
// using ie_core capability GetAvailableDevices to fetch list of devices plugged in
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,19 +70,21 @@ struct OpenVINOExecutionProviderInfo {
void* context_;
bool enable_opencl_throttling_;
bool disable_dynamic_shapes_;
void* queue_;

explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_npu_fast_compile, std::string dev_id,
size_t num_of_threads, std::string cache_dir, int num_streams,
void* context, bool enable_opencl_throttling,
bool disable_dynamic_shapes)
bool disable_dynamic_shapes, void* queue)
: enable_npu_fast_compile_(enable_npu_fast_compile),
device_id_(dev_id),
num_of_threads_(num_of_threads),
cache_dir_(cache_dir),
num_streams_(num_streams),
context_(context),
enable_opencl_throttling_(enable_opencl_throttling),
disable_dynamic_shapes_(disable_dynamic_shapes) {
disable_dynamic_shapes_(disable_dynamic_shapes),
queue_(queue) {
if (dev_type == "") {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP]"
<< "No runtime device selection option provided.";
Expand Down Expand Up @@ -166,7 +168,7 @@ struct OpenVINOExecutionProviderInfo {
<< "Choosing Device: " << device_type_ << " , Precision: " << precision_;
}
OpenVINOExecutionProviderInfo() {
OpenVINOExecutionProviderInfo("", false, "", 0, "", 1, NULL, false, false);
OpenVINOExecutionProviderInfo("", false, "", 0, "", 1, NULL, false, false, 0);
}
};

Expand Down
23 changes: 18 additions & 5 deletions onnxruntime/core/providers/openvino/openvino_provider_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,15 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
OpenVINOProviderFactory(const char* device_type, bool enable_npu_fast_compile,
const char* device_id, size_t num_of_threads,
const char* cache_dir, int num_streams, void* context,
bool enable_opencl_throttling, bool disable_dynamic_shapes)
bool enable_opencl_throttling, bool disable_dynamic_shapes,
void* queue)
: enable_npu_fast_compile_(enable_npu_fast_compile),
num_of_threads_(num_of_threads),
num_streams_(num_streams),
context_(context),
enable_opencl_throttling_(enable_opencl_throttling),
disable_dynamic_shapes_(disable_dynamic_shapes) {
disable_dynamic_shapes_(disable_dynamic_shapes),
queue_(queue) {
device_type_ = (device_type == nullptr) ? "" : device_type;
device_id_ = (device_id == nullptr) ? "" : device_id;
cache_dir_ = (cache_dir == nullptr) ? "" : cache_dir;
Expand All @@ -37,12 +39,13 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
void* context_;
bool enable_opencl_throttling_;
bool disable_dynamic_shapes_;
void* queue_;
};

std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
OpenVINOExecutionProviderInfo info(device_type_, enable_npu_fast_compile_, device_id_, num_of_threads_,
cache_dir_, num_streams_, context_, enable_opencl_throttling_,
disable_dynamic_shapes_);
disable_dynamic_shapes_, queue_);
return std::make_unique<OpenVINOExecutionProvider>(info);
}

Expand Down Expand Up @@ -79,7 +82,9 @@ struct OpenVINO_Provider : Provider {
bool enable_opencl_throttling = false; // [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU
// device (Reduces CPU Utilization when using GPU)
bool disable_dynamic_shapes = false; // [disable_dynamic_shapes]: Execute model with default static shape for optimal performance.
void* context = nullptr;
void* context = nullptr; // [context]: Pointer to OpenCL context (cl_context) to use for IO buffering.
void* queue = nullptr; // [queue]: Pointer to OpenCL command queue (cl_command_queue) to use for IO buffering.
// Note that this is only possible in latency-optimized mode and is mutually exclusive with `context`.

if (provider_options_map.find("device_type") != provider_options_map.end()) {
device_type = provider_options_map.at("device_type").c_str();
Expand Down Expand Up @@ -154,6 +159,13 @@ struct OpenVINO_Provider : Provider {
else if (bool_flag == "false" || bool_flag == "False")
disable_dynamic_shapes = false;
}

if (provider_options_map.find("queue") != provider_options_map.end()) {
std::string str = provider_options_map.at("queue");
uint64_t number = std::strtoull(str.c_str(), nullptr, 16);
queue = reinterpret_cast<void*>(number);
}

return std::make_shared<OpenVINOProviderFactory>(const_cast<char*>(device_type.c_str()),
enable_npu_fast_compile,
device_id,
Expand All @@ -162,7 +174,8 @@ struct OpenVINO_Provider : Provider {
num_streams,
context,
enable_opencl_throttling,
disable_dynamic_shapes);
disable_dynamic_shapes,
queue);
}

void Initialize() override {
Expand Down
7 changes: 7 additions & 0 deletions onnxruntime/core/session/provider_bridge_ort.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1713,6 +1713,13 @@ ProviderOptions OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(const O

// Add new provider option below
ov_options_converted_map["num_streams"] = "1";

if (legacy_ov_options->queue != nullptr) {
std::stringstream queue_string;
queue_string << legacy_ov_options->queue;
ov_options_converted_map["queue"] = queue_string.str();
}

return ov_options_converted_map;
}

Expand Down