Skip to content

Commit ade008e

Browse files
[QNN EP] Enable automatic selection of QNN EP for PREFER_NPU policy (#24629)
### Description - Enables automatic selection of QNN EP for PREFER_NPU policy - Fixes cpuid vendor id for Qualcomm to be `'Q' | ('C' << 8) | ('O' << 16) | ('M' << 24);` Sample code from unit test: ```c++ // Tests autoEP feature to automatically select an EP that supports the NPU. // Currently only works on Windows. TEST_F(QnnHTPBackendTests, AutoEp_PreferNpu) { ASSERT_ORTSTATUS_OK(Ort::GetApi().RegisterExecutionProviderLibrary(*ort_env, kQnnExecutionProvider, ORT_TSTR("onnxruntime_providers_qnn.dll"))); Ort::SessionOptions so; so.SetEpSelectionPolicy(OrtExecutionProviderDevicePolicy_PREFER_NPU); const ORTCHAR_T* ort_model_path = ORT_MODEL_FOLDER "nhwc_resize_sizes_opset18.quant.onnx"; Ort::Session session(*ort_env, ort_model_path, so); EXPECT_TRUE(SessionHasEp(session, kQnnExecutionProvider)); ASSERT_ORTSTATUS_OK(Ort::GetApi().UnregisterExecutionProviderLibrary(*ort_env, kQnnExecutionProvider)); } ``` ### Motivation and Context A recent feature allows ORT to automatically select an EP according to policies set by the user (e.g., prefer npu or prefer gpu). This PR allows QNN EP to be potentially selected when the user sets the `PREFER_NPU` policy.
1 parent 6fa8ba1 commit ade008e

File tree

4 files changed

+181
-20
lines changed

4 files changed

+181
-20
lines changed

onnxruntime/core/common/cpuid_info.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ std::string CPUIDInfo::GetX86Vendor(int32_t* data) {
155155
uint32_t CPUIDInfo::GetVendorId(const std::string& vendor) {
156156
if (vendor == "GenuineIntel") return 0x8086;
157157
if (vendor == "GenuineAMD") return 0x1022;
158-
if (vendor.find("Qualcomm") == 0) return 'Q' << 24 | 'C' << 16 | 'O' << 8 | 'M';
158+
if (vendor.find("Qualcomm") == 0) return 'Q' | ('C' << 8) | ('O' << 16) | ('M' << 24);
159159
if (vendor.find("NV") == 0) return 0x10DE;
160160
return 0;
161161
}

onnxruntime/core/providers/qnn/qnn_provider_factory.cc

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,27 @@ struct QNN_Provider : Provider {
7979
return std::make_shared<onnxruntime::QNNProviderFactory>(*provider_options, config_options);
8080
}
8181

82+
Status CreateIExecutionProvider(const OrtHardwareDevice* const* /*devices*/,
83+
const OrtKeyValuePairs* const* /*ep_metadata*/,
84+
size_t num_devices,
85+
ProviderOptions& provider_options,
86+
const OrtSessionOptions& session_options,
87+
const OrtLogger& logger,
88+
std::unique_ptr<IExecutionProvider>& ep) override {
89+
if (num_devices != 1) {
90+
return Status(common::ONNXRUNTIME, ORT_EP_FAIL, "QNN EP only supports one device.");
91+
}
92+
93+
const ConfigOptions* config_options = &session_options.GetConfigOptions();
94+
95+
std::array<const void*, 2> configs_array = {&provider_options, config_options};
96+
const void* arg = reinterpret_cast<const void*>(&configs_array);
97+
auto ep_factory = CreateExecutionProviderFactory(arg);
98+
ep = ep_factory->CreateProvider(session_options, logger);
99+
100+
return Status::OK();
101+
}
102+
82103
void Initialize() override {}
83104
void Shutdown() override {}
84105
} g_provider;
@@ -93,4 +114,121 @@ ORT_API(onnxruntime::Provider*, GetProvider) {
93114
return &onnxruntime::g_provider;
94115
}
95116
}
117+
118+
#include "core/framework/error_code_helper.h"
119+
120+
// OrtEpApi infrastructure to be able to use the QNN EP as an OrtEpFactory for auto EP selection.
121+
struct QnnEpFactory : OrtEpFactory {
122+
QnnEpFactory(const OrtApi& ort_api_in,
123+
const char* ep_name,
124+
OrtHardwareDeviceType hw_type,
125+
const char* qnn_backend_type)
126+
: ort_api{ort_api_in}, ep_name{ep_name}, ort_hw_device_type{hw_type}, qnn_backend_type{qnn_backend_type} {
127+
GetName = GetNameImpl;
128+
GetVendor = GetVendorImpl;
129+
GetSupportedDevices = GetSupportedDevicesImpl;
130+
CreateEp = CreateEpImpl;
131+
ReleaseEp = ReleaseEpImpl;
132+
}
133+
134+
// Returns the name for the EP. Each unique factory configuration must have a unique name.
135+
// Ex: a factory that supports NPU should have a different than a factory that supports GPU.
136+
static const char* GetNameImpl(const OrtEpFactory* this_ptr) {
137+
const auto* factory = static_cast<const QnnEpFactory*>(this_ptr);
138+
return factory->ep_name.c_str();
139+
}
140+
141+
static const char* GetVendorImpl(const OrtEpFactory* this_ptr) {
142+
const auto* factory = static_cast<const QnnEpFactory*>(this_ptr);
143+
return factory->vendor.c_str();
144+
}
145+
146+
// Creates and returns OrtEpDevice instances for all OrtHardwareDevices that this factory supports.
147+
// An EP created with this factory is expected to be able to execute a model with *all* supported
148+
// hardware devices at once. A single instance of QNN EP is not currently setup to partition a model among
149+
// multiple different QNN backends at once (e.g, npu, cpu, gpu), so this factory instance is set to only
150+
// support one backend: npu. To support a different backend, like gpu, create a different factory instance
151+
// that only supports GPU.
152+
static OrtStatus* GetSupportedDevicesImpl(OrtEpFactory* this_ptr,
153+
const OrtHardwareDevice* const* devices,
154+
size_t num_devices,
155+
OrtEpDevice** ep_devices,
156+
size_t max_ep_devices,
157+
size_t* p_num_ep_devices) {
158+
size_t& num_ep_devices = *p_num_ep_devices;
159+
auto* factory = static_cast<QnnEpFactory*>(this_ptr);
160+
161+
for (size_t i = 0; i < num_devices && num_ep_devices < max_ep_devices; ++i) {
162+
const OrtHardwareDevice& device = *devices[i];
163+
if (factory->ort_api.HardwareDevice_Type(&device) == factory->ort_hw_device_type &&
164+
factory->ort_api.HardwareDevice_VendorId(&device) == factory->vendor_id) {
165+
OrtKeyValuePairs* ep_options = nullptr;
166+
factory->ort_api.CreateKeyValuePairs(&ep_options);
167+
factory->ort_api.AddKeyValuePair(ep_options, "backend_type", factory->qnn_backend_type.c_str());
168+
ORT_API_RETURN_IF_ERROR(
169+
factory->ort_api.GetEpApi()->CreateEpDevice(factory, &device, nullptr, ep_options,
170+
&ep_devices[num_ep_devices++]));
171+
}
172+
}
173+
174+
return nullptr;
175+
}
176+
177+
static OrtStatus* CreateEpImpl(OrtEpFactory* /*this_ptr*/,
178+
_In_reads_(num_devices) const OrtHardwareDevice* const* /*devices*/,
179+
_In_reads_(num_devices) const OrtKeyValuePairs* const* /*ep_metadata*/,
180+
_In_ size_t /*num_devices*/,
181+
_In_ const OrtSessionOptions* /*session_options*/,
182+
_In_ const OrtLogger* /*logger*/,
183+
_Out_ OrtEp** /*ep*/) {
184+
return onnxruntime::CreateStatus(ORT_INVALID_ARGUMENT, "QNN EP factory does not support this method.");
185+
}
186+
187+
static void ReleaseEpImpl(OrtEpFactory* /*this_ptr*/, OrtEp* /*ep*/) {
188+
// no-op as we never create an EP here.
189+
}
190+
191+
const OrtApi& ort_api;
192+
const std::string ep_name; // EP name
193+
const std::string vendor{"Microsoft"}; // EP vendor name
194+
195+
// Qualcomm vendor ID. Refer to the ACPI ID registry (search Qualcomm): https://uefi.org/ACPI_ID_List
196+
const uint32_t vendor_id{'Q' | ('C' << 8) | ('O' << 16) | ('M' << 24)};
197+
const OrtHardwareDeviceType ort_hw_device_type; // Supported OrtHardwareDevice
198+
const std::string qnn_backend_type; // QNN backend type for OrtHardwareDevice
199+
};
200+
201+
extern "C" {
202+
//
203+
// Public symbols
204+
//
205+
OrtStatus* CreateEpFactories(const char* /*registration_name*/, const OrtApiBase* ort_api_base,
206+
OrtEpFactory** factories, size_t max_factories, size_t* num_factories) {
207+
const OrtApi* ort_api = ort_api_base->GetApi(ORT_API_VERSION);
208+
209+
// Factory could use registration_name or define its own EP name.
210+
auto factory_npu = std::make_unique<QnnEpFactory>(*ort_api,
211+
onnxruntime::kQnnExecutionProvider,
212+
OrtHardwareDeviceType_NPU, "htp");
213+
214+
// If want to support GPU, create a new factory instance because QNN EP is not currently setup to partition a single model
215+
// among heterogeneous devices.
216+
// std::unique_ptr<OrtEpFactory> factory_gpu = std::make_unique<QnnEpFactory>(*ort_api, "QNNExecutionProvider_GPU", OrtHardwareDeviceType_GPU, "gpu");
217+
218+
if (max_factories < 1) {
219+
return ort_api->CreateStatus(ORT_INVALID_ARGUMENT,
220+
"Not enough space to return EP factory. Need at least one.");
221+
}
222+
223+
factories[0] = factory_npu.release();
224+
*num_factories = 1;
225+
226+
return nullptr;
227+
}
228+
229+
OrtStatus* ReleaseEpFactory(OrtEpFactory* factory) {
230+
delete static_cast<QnnEpFactory*>(factory);
231+
return nullptr;
232+
}
233+
}
96234
#endif // !BUILD_QNN_EP_STATIC_LIB
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
EXPORTS
22
GetProvider
3+
CreateEpFactories
4+
ReleaseEpFactory

onnxruntime/test/providers/qnn/qnn_basic_test.cc

Lines changed: 40 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "core/session/onnxruntime_run_options_config_keys.h"
1717

1818
#include "test/providers/qnn/qnn_test_utils.h"
19+
#include "test/util/include/api_asserts.h"
1920

2021
#include "gtest/gtest.h"
2122
#include "gmock/gmock.h"
@@ -37,24 +38,24 @@ namespace test {
3738
// TODO: When we need QNN in a minimal build we should add an ORT format version of the model
3839
#if !defined(ORT_MINIMAL_BUILD)
3940

41+
static bool SessionHasEp(Ort::Session& session, const char* ep_name) {
42+
// Access the underlying InferenceSession.
43+
const OrtSession* ort_session = session;
44+
const InferenceSession* s = reinterpret_cast<const InferenceSession*>(ort_session);
45+
bool has_ep = false;
46+
47+
for (const auto& provider : s->GetRegisteredProviderTypes()) {
48+
if (provider == ep_name) {
49+
has_ep = true;
50+
break;
51+
}
52+
}
53+
return has_ep;
54+
}
55+
4056
// Tests that the QNN EP is registered when added via the public C++ API.
4157
// Loads a simple ONNX model that adds floats.
4258
TEST_F(QnnHTPBackendTests, TestAddEpUsingPublicApi) {
43-
auto session_has_qnn_ep = [](Ort::Session& session) -> bool {
44-
// Access the underlying InferenceSession.
45-
const OrtSession* ort_session = session;
46-
const InferenceSession* s = reinterpret_cast<const InferenceSession*>(ort_session);
47-
bool have_qnn_ep = false;
48-
49-
for (const auto& provider : s->GetRegisteredProviderTypes()) {
50-
if (provider == kQnnExecutionProvider) {
51-
have_qnn_ep = true;
52-
break;
53-
}
54-
}
55-
return have_qnn_ep;
56-
};
57-
5859
onnxruntime::ProviderOptions options;
5960
#if defined(_WIN32)
6061
options["backend_path"] = "QnnHtp.dll";
@@ -77,8 +78,9 @@ TEST_F(QnnHTPBackendTests, TestAddEpUsingPublicApi) {
7778
so.AppendExecutionProvider("QNN", options);
7879

7980
Ort::Session session(*ort_env, ort_model_path, so);
80-
ASSERT_TRUE(session_has_qnn_ep(session)) << "QNN EP was not found in registered providers for session "
81-
<< "when added to session with name 'QNN'";
81+
ASSERT_TRUE(SessionHasEp(session, kQnnExecutionProvider))
82+
<< "QNN EP was not found in registered providers for session "
83+
<< "providers for session when added to session with name 'QNN'";
8284
}
8385

8486
{
@@ -92,8 +94,9 @@ TEST_F(QnnHTPBackendTests, TestAddEpUsingPublicApi) {
9294
so.AppendExecutionProvider(kQnnExecutionProvider, options);
9395

9496
Ort::Session session(*ort_env, ort_model_path, so);
95-
ASSERT_TRUE(session_has_qnn_ep(session)) << "QNN EP was not found in registered providers for session "
96-
<< "when added to session with name '" << kQnnExecutionProvider << "'";
97+
ASSERT_TRUE(SessionHasEp(session, kQnnExecutionProvider))
98+
<< "QNN EP was not found in registered providers for session "
99+
<< "when added to session with name '" << kQnnExecutionProvider << "'";
97100
}
98101
}
99102

@@ -1265,6 +1268,24 @@ TEST_F(QnnHTPBackendTests, LoadingAndUnloadingOfQnnLibrary_FixSegFault) {
12651268
}
12661269
#endif // !BUILD_QNN_EP_STATIC_LIB
12671270

1271+
#if defined(WIN32) && !BUILD_QNN_EP_STATIC_LIB
1272+
// Tests autoEP feature to automatically select an EP that supports the NPU.
1273+
// Currently only works on Windows.
1274+
TEST_F(QnnHTPBackendTests, AutoEp_PreferNpu) {
1275+
ASSERT_ORTSTATUS_OK(Ort::GetApi().RegisterExecutionProviderLibrary(*ort_env, kQnnExecutionProvider,
1276+
ORT_TSTR("onnxruntime_providers_qnn.dll")));
1277+
1278+
Ort::SessionOptions so;
1279+
so.SetEpSelectionPolicy(OrtExecutionProviderDevicePolicy_PREFER_NPU);
1280+
1281+
const ORTCHAR_T* ort_model_path = ORT_MODEL_FOLDER "nhwc_resize_sizes_opset18.quant.onnx";
1282+
Ort::Session session(*ort_env, ort_model_path, so);
1283+
EXPECT_TRUE(SessionHasEp(session, kQnnExecutionProvider));
1284+
1285+
ASSERT_ORTSTATUS_OK(Ort::GetApi().UnregisterExecutionProviderLibrary(*ort_env, kQnnExecutionProvider));
1286+
}
1287+
#endif // defined(WIN32) && !BUILD_QNN_EP_STATIC_LIB
1288+
12681289
#endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
12691290
#endif // !defined(ORT_MINIMAL_BUILD)
12701291

0 commit comments

Comments
 (0)