microsoft · fireyoshiqc · Feb 28, 2024 · Feb 28, 2024 · preetha-intel · Mar 5, 2024
diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -623,7 +623,8 @@ typedef struct OrtOpenVINOProviderOptions {
                                  cache_dir{},
                                  context{},
                                  enable_opencl_throttling{},
-                                 enable_dynamic_shapes{} {}
+                                 enable_dynamic_shapes{},
+                                 queue{} {}
 #endif
   /** \brief Device type string
    *
@@ -637,6 +638,7 @@ typedef struct OrtOpenVINOProviderOptions {
   void* context;
   unsigned char enable_opencl_throttling;  ///< 0 = disabled, nonzero = enabled
   unsigned char enable_dynamic_shapes;     ///< 0 = disabled, nonzero = enabled
+  void* queue;
 } OrtOpenVINOProviderOptions;
 
 struct OrtApi;

diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -55,10 +55,17 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
     if (global_context.is_wholly_supported_graph) {
 #if defined(IO_BUFFER_ENABLED)
       if ((global_context.device_type.find("GPU") != std::string::npos) &&
-          (global_context_.context != nullptr)) {
+          ((global_context_.context != nullptr) || (global_context_.queue != nullptr))) {
         LOGS_DEFAULT(INFO) << log_tag << "IO Buffering Enabled";
-        cl_context ctx = static_cast<cl_context>(global_context_.context);
-        remote_context_ = new ov::intel_gpu::ocl::ClContext(global_context_.ie_core.Get(), ctx);
+        if (global_context_.context != nullptr) {
+          LOGS_DEFAULT(VERBOSE) << log_tag << "Using OpenCL Context sharing";
+          cl_context ctx = static_cast<cl_context>(global_context_.context);
+          remote_context_ = new ov::intel_gpu::ocl::ClContext(global_context_.ie_core.Get(), ctx);
+        } else if (global_context.queue != nullptr) {
+          LOGS_DEFAULT(VERBOSE) << log_tag << "Using OpenCL Command Queue sharing";
+          cl_command_queue queue = static_cast<cl_command_queue>(global_context_.queue);
+          remote_context_ = new ov::intel_gpu::ocl::ClContext(global_context_.ie_core.Get(), queue);
+        }     
         ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
         exe_network_ = global_context_.ie_core.LoadNetwork(
             ie_cnn_network_, remote_context_, subgraph_context_.subgraph_name);
@@ -457,7 +464,8 @@ void BasicBackend::Infer(OrtKernelContext* ctx) {
 
 #ifdef IO_BUFFER_ENABLED
     if ((global_context_.device_type.find("GPU") != std::string::npos) &&
-        (global_context_.context != nullptr) && global_context_.is_wholly_supported_graph) {
+        ((global_context_.context != nullptr) || (global_context_.queue != nullptr)) &&
+        global_context_.is_wholly_supported_graph) {
       try {
         StartRemoteAsyncInference(context, infer_request);
       } catch (std::string const& msg) {

diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h
@@ -31,6 +31,7 @@ struct GlobalContext {
   int onnx_opset_version;
   void* context = 0;
   bool use_api_2;
+  void* queue = 0;
 };
 
 // Holds context specific to subgraph.

diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
@@ -25,6 +25,7 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
   global_context_->enable_opencl_throttling = info.enable_opencl_throttling_;
   global_context_->disable_dynamic_shapes = info.disable_dynamic_shapes_;
   global_context_->num_of_threads = info.num_of_threads_;
+  global_context_->queue = info.queue_;
 
   // to check if target device is available
   // using ie_core capability GetAvailableDevices to fetch list of devices plugged in

diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.h b/onnxruntime/core/providers/openvino/openvino_execution_provider.h
@@ -70,19 +70,21 @@ struct OpenVINOExecutionProviderInfo {
   void* context_;
   bool enable_opencl_throttling_;
   bool disable_dynamic_shapes_;
+  void* queue_;
 
   explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_npu_fast_compile, std::string dev_id,
                                          size_t num_of_threads, std::string cache_dir, int num_streams,
                                          void* context, bool enable_opencl_throttling,
-                                         bool disable_dynamic_shapes)
+                                         bool disable_dynamic_shapes, void* queue)
       : enable_npu_fast_compile_(enable_npu_fast_compile),
         device_id_(dev_id),
         num_of_threads_(num_of_threads),
         cache_dir_(cache_dir),
         num_streams_(num_streams),
         context_(context),
         enable_opencl_throttling_(enable_opencl_throttling),
-        disable_dynamic_shapes_(disable_dynamic_shapes) {
+        disable_dynamic_shapes_(disable_dynamic_shapes),
+        queue_(queue) {
     if (dev_type == "") {
       LOGS_DEFAULT(INFO) << "[OpenVINO-EP]"
                          << "No runtime device selection option provided.";
@@ -166,7 +168,7 @@ struct OpenVINOExecutionProviderInfo {
                        << "Choosing Device: " << device_type_ << " , Precision: " << precision_;
   }
   OpenVINOExecutionProviderInfo() {
-    OpenVINOExecutionProviderInfo("", false, "", 0, "", 1, NULL, false, false);
+    OpenVINOExecutionProviderInfo("", false, "", 0, "", 1, NULL, false, false, 0);
   }
 };
 

diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
@@ -11,13 +11,15 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
   OpenVINOProviderFactory(const char* device_type, bool enable_npu_fast_compile,
                           const char* device_id, size_t num_of_threads,
                           const char* cache_dir, int num_streams, void* context,
-                          bool enable_opencl_throttling, bool disable_dynamic_shapes)
+                          bool enable_opencl_throttling, bool disable_dynamic_shapes,
+                          void* queue)
       : enable_npu_fast_compile_(enable_npu_fast_compile),
         num_of_threads_(num_of_threads),
         num_streams_(num_streams),
         context_(context),
         enable_opencl_throttling_(enable_opencl_throttling),
-        disable_dynamic_shapes_(disable_dynamic_shapes) {
+        disable_dynamic_shapes_(disable_dynamic_shapes),
+        queue_(queue) {
     device_type_ = (device_type == nullptr) ? "" : device_type;
     device_id_ = (device_id == nullptr) ? "" : device_id;
     cache_dir_ = (cache_dir == nullptr) ? "" : cache_dir;
@@ -37,12 +39,13 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
   void* context_;
   bool enable_opencl_throttling_;
   bool disable_dynamic_shapes_;
+  void* queue_;
 };
 
 std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
   OpenVINOExecutionProviderInfo info(device_type_, enable_npu_fast_compile_, device_id_, num_of_threads_,
                                      cache_dir_, num_streams_, context_, enable_opencl_throttling_,
-                                     disable_dynamic_shapes_);
+                                     disable_dynamic_shapes_, queue_);
   return std::make_unique<OpenVINOExecutionProvider>(info);
 }
 
@@ -79,7 +82,9 @@ struct OpenVINO_Provider : Provider {
     bool enable_opencl_throttling = false;  // [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU
                                             // device (Reduces CPU Utilization when using GPU)
     bool disable_dynamic_shapes = false;    // [disable_dynamic_shapes]:  Execute model with default static shape for optimal performance.
-    void* context = nullptr;
+    void* context = nullptr;                // [context]: Pointer to OpenCL context (cl_context) to use for IO buffering.
+    void* queue = nullptr;                  // [queue]: Pointer to OpenCL command queue (cl_command_queue) to use for IO buffering.
+                                            // Note that this is only possible in latency-optimized mode and is mutually exclusive with `context`.
 
     if (provider_options_map.find("device_type") != provider_options_map.end()) {
       device_type = provider_options_map.at("device_type").c_str();
@@ -154,6 +159,13 @@ struct OpenVINO_Provider : Provider {
       else if (bool_flag == "false" || bool_flag == "False")
         disable_dynamic_shapes = false;
     }
+
+    if (provider_options_map.find("queue") != provider_options_map.end()) {
+      std::string str = provider_options_map.at("queue");
+      uint64_t number = std::strtoull(str.c_str(), nullptr, 16);
+      queue = reinterpret_cast<void*>(number);
+    }
+
     return std::make_shared<OpenVINOProviderFactory>(const_cast<char*>(device_type.c_str()),
                                                      enable_npu_fast_compile,
                                                      device_id,
@@ -162,7 +174,8 @@ struct OpenVINO_Provider : Provider {
                                                      num_streams,
                                                      context,
                                                      enable_opencl_throttling,
-                                                     disable_dynamic_shapes);
+                                                     disable_dynamic_shapes,
+                                                     queue);
   }
 
   void Initialize() override {

diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -1713,6 +1713,13 @@ ProviderOptions OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(const O
 
   // Add new provider option below
   ov_options_converted_map["num_streams"] = "1";
+
+  if (legacy_ov_options->queue != nullptr) {
+    std::stringstream queue_string;
+    queue_string << legacy_ov_options->queue;
+    ov_options_converted_map["queue"] = queue_string.str();
+  }
+
   return ov_options_converted_map;
 }