Open
Description
Describe the issue
Hi folks -
does run_async really offer a non-blocking interface to integrated accelerators?
I'm trying to create a number of outstanding requests to a single accelerator. My intent is to hide the submission latency by pipelining the submissions in this way.
I cannot get it to work however. Whether I run the commands with the blocking session.run or with a non-blocking session.run_async, the submission rate to the accelerator looks the same.
Kevin
To reproduce
import onnxruntime as ort
import numpy as np
import os
import threading
import time
_OUTSTANDING_REQ_ = 4
model_path = 'mobilenetv2_035_96.onnx'
# Create session options
session_options = ort.SessionOptions()
session = ort.InferenceSession(model_path, providers=[('OpenVINOExecutionProvider', {'device_type': 'CPU'})], sess_options=session_options)
print("Providers:", session.get_providers())
# Get model input information
input_name = session.get_inputs()[0].name
input_shape = session.get_inputs()[0].shape
input_type = session.get_inputs()[0].type
# Prepare input data - Replace this with real input data matching the model's input shape and type
dummy_input = np.random.randn(*input_shape).astype(np.float32)
_SEC_OFFSET_ = 86400
class run_async_inf:
def __init__(self):
self.__event = threading.Event()
self.__outputs = None
self.__err = ''
def fill_outputs(self, outputs, err):
self.__outputs = outputs
self.__err = err
self.__event.set()
def get_outputs(self):
if self.__err != '':
raise Exception(self.__err)
return self.__outputs;
def wait(self, sec):
self.__event.wait(sec)
self.__event.clear()
def reset(self):
self.__event = threading.Event()
self.__outputs = None
self.__err = ''
def _callback_(outputs: np.ndarray, state: run_async_inf, err: str) -> None:
state.fill_outputs(outputs, err)
infer_requests = [run_async_inf() for _ in range(_OUTSTANDING_REQ_)]
# Run inference
start_t_s = time.time() % _SEC_OFFSET_
print("> starting asyncronous submissions")
for idx, _infer_request_ in enumerate(infer_requests):
print("spawning request asynchronously.....", idx)
session.run_async(None, {input_name: dummy_input}, _callback_, _infer_request_)
for x in range(0,int(40000/_OUTSTANDING_REQ_)):
for idx, _infer_request_ in enumerate(infer_requests):
_infer_request_.wait(10)
_infer_request_.reset()
session.run_async(None, {input_name: dummy_input}, _callback_, _infer_request_)
end_t_s = time.time() % _SEC_OFFSET_
duration_in_sec = end_t_s - start_t_s
duration_in_sec = duration_in_sec - duration_in_sec%1
print("> duration (sec) =", duration_in_sec )
exit(0)
Urgency
No response
Platform
Windows
OS Version
Windows11
ONNX Runtime Installation
Released Package
ONNX Runtime Version or Commit ID
1.19.0
ONNX Runtime API
Python
Architecture
X64
Execution Provider
OpenVINO
Execution Provider Library Version
1.18.0