[QNN EP] Enable automatic selection of QNN EP for PREFER_NPU policy (#24629)

adrianlizarraga · web-flow · commit ade008ed4c02 · 2025-05-04T00:57:40.000-07:00
### Description
- Enables automatic selection of QNN EP for PREFER_NPU policy
- Fixes cpuid vendor id for Qualcomm to be `'Q' | ('C' &lt;&lt; 8) | ('O' &lt;&lt;
16) | ('M' &lt;&lt; 24);`

Sample code from unit test:
```c++
// Tests autoEP feature to automatically select an EP that supports the NPU.
// Currently only works on Windows.
TEST_F(QnnHTPBackendTests, AutoEp_PreferNpu) {
  ASSERT_ORTSTATUS_OK(Ort::GetApi().RegisterExecutionProviderLibrary(*ort_env, kQnnExecutionProvider,
                                                                     ORT_TSTR("onnxruntime_providers_qnn.dll")));

  Ort::SessionOptions so;
  so.SetEpSelectionPolicy(OrtExecutionProviderDevicePolicy_PREFER_NPU);

  const ORTCHAR_T* ort_model_path = ORT_MODEL_FOLDER "nhwc_resize_sizes_opset18.quant.onnx";
  Ort::Session session(*ort_env, ort_model_path, so);
  EXPECT_TRUE(SessionHasEp(session, kQnnExecutionProvider));

  ASSERT_ORTSTATUS_OK(Ort::GetApi().UnregisterExecutionProviderLibrary(*ort_env, kQnnExecutionProvider));
}
```

### Motivation and Context
A recent feature allows ORT to automatically select an EP according to
policies set by the user (e.g., prefer npu or prefer gpu). This PR
allows QNN EP to be potentially selected when the user sets the
`PREFER_NPU` policy.
diff --git a/onnxruntime/core/common/cpuid_info.cc b/onnxruntime/core/common/cpuid_info.cc
@@ -155,7 +155,7 @@ std::string CPUIDInfo::GetX86Vendor(int32_t* data) {
 uint32_t CPUIDInfo::GetVendorId(const std::string& vendor) {
   if (vendor == "GenuineIntel") return 0x8086;
   if (vendor == "GenuineAMD") return 0x1022;
-  if (vendor.find("Qualcomm") == 0) return 'Q' << 24 | 'C' << 16 | 'O' << 8 | 'M';
+  if (vendor.find("Qualcomm") == 0) return 'Q' | ('C' << 8) | ('O' << 16) | ('M' << 24);
   if (vendor.find("NV") == 0) return 0x10DE;
   return 0;
 }
diff --git a/onnxruntime/core/providers/qnn/qnn_provider_factory.cc b/onnxruntime/core/providers/qnn/qnn_provider_factory.cc
@@ -79,6 +79,27 @@ struct QNN_Provider : Provider {
     return std::make_shared<onnxruntime::QNNProviderFactory>(*provider_options, config_options);
   }
 
+  Status CreateIExecutionProvider(const OrtHardwareDevice* const* /*devices*/,
+                                  const OrtKeyValuePairs* const* /*ep_metadata*/,
+                                  size_t num_devices,
+                                  ProviderOptions& provider_options,
+                                  const OrtSessionOptions& session_options,
+                                  const OrtLogger& logger,
+                                  std::unique_ptr<IExecutionProvider>& ep) override {
+    if (num_devices != 1) {
+      return Status(common::ONNXRUNTIME, ORT_EP_FAIL, "QNN EP only supports one device.");
+    }
+
+    const ConfigOptions* config_options = &session_options.GetConfigOptions();
+
+    std::array<const void*, 2> configs_array = {&provider_options, config_options};
+    const void* arg = reinterpret_cast<const void*>(&configs_array);
+    auto ep_factory = CreateExecutionProviderFactory(arg);
+    ep = ep_factory->CreateProvider(session_options, logger);
+
+    return Status::OK();
+  }
+
   void Initialize() override {}
   void Shutdown() override {}
 } g_provider;
@@ -93,4 +114,121 @@ ORT_API(onnxruntime::Provider*, GetProvider) {
   return &onnxruntime::g_provider;
 }
 }
+
+#include "core/framework/error_code_helper.h"
+
+// OrtEpApi infrastructure to be able to use the QNN EP as an OrtEpFactory for auto EP selection.
+struct QnnEpFactory : OrtEpFactory {
+  QnnEpFactory(const OrtApi& ort_api_in,
+               const char* ep_name,
+               OrtHardwareDeviceType hw_type,
+               const char* qnn_backend_type)
+      : ort_api{ort_api_in}, ep_name{ep_name}, ort_hw_device_type{hw_type}, qnn_backend_type{qnn_backend_type} {
+    GetName = GetNameImpl;
+    GetVendor = GetVendorImpl;
+    GetSupportedDevices = GetSupportedDevicesImpl;
+    CreateEp = CreateEpImpl;
+    ReleaseEp = ReleaseEpImpl;
+  }
+
+  // Returns the name for the EP. Each unique factory configuration must have a unique name.
+  // Ex: a factory that supports NPU should have a different than a factory that supports GPU.
+  static const char* GetNameImpl(const OrtEpFactory* this_ptr) {
+    const auto* factory = static_cast<const QnnEpFactory*>(this_ptr);
+    return factory->ep_name.c_str();
+  }
+
+  static const char* GetVendorImpl(const OrtEpFactory* this_ptr) {
+    const auto* factory = static_cast<const QnnEpFactory*>(this_ptr);
+    return factory->vendor.c_str();
+  }
+
+  // Creates and returns OrtEpDevice instances for all OrtHardwareDevices that this factory supports.
+  // An EP created with this factory is expected to be able to execute a model with *all* supported
+  // hardware devices at once. A single instance of QNN EP is not currently setup to partition a model among
+  // multiple different QNN backends at once (e.g, npu, cpu, gpu), so this factory instance is set to only
+  // support one backend: npu. To support a different backend, like gpu, create a different factory instance
+  // that only supports GPU.
+  static OrtStatus* GetSupportedDevicesImpl(OrtEpFactory* this_ptr,
+                                            const OrtHardwareDevice* const* devices,
+                                            size_t num_devices,
+                                            OrtEpDevice** ep_devices,
+                                            size_t max_ep_devices,
+                                            size_t* p_num_ep_devices) {
+    size_t& num_ep_devices = *p_num_ep_devices;
+    auto* factory = static_cast<QnnEpFactory*>(this_ptr);
+
+    for (size_t i = 0; i < num_devices && num_ep_devices < max_ep_devices; ++i) {
+      const OrtHardwareDevice& device = *devices[i];
+      if (factory->ort_api.HardwareDevice_Type(&device) == factory->ort_hw_device_type &&
+          factory->ort_api.HardwareDevice_VendorId(&device) == factory->vendor_id) {
+        OrtKeyValuePairs* ep_options = nullptr;
+        factory->ort_api.CreateKeyValuePairs(&ep_options);
+        factory->ort_api.AddKeyValuePair(ep_options, "backend_type", factory->qnn_backend_type.c_str());
+        ORT_API_RETURN_IF_ERROR(
+            factory->ort_api.GetEpApi()->CreateEpDevice(factory, &device, nullptr, ep_options,
+                                                        &ep_devices[num_ep_devices++]));
+      }
+    }
+
+    return nullptr;
+  }
+
+  static OrtStatus* CreateEpImpl(OrtEpFactory* /*this_ptr*/,
+                                 _In_reads_(num_devices) const OrtHardwareDevice* const* /*devices*/,
+                                 _In_reads_(num_devices) const OrtKeyValuePairs* const* /*ep_metadata*/,
+                                 _In_ size_t /*num_devices*/,
+                                 _In_ const OrtSessionOptions* /*session_options*/,
+                                 _In_ const OrtLogger* /*logger*/,
+                                 _Out_ OrtEp** /*ep*/) {
+    return onnxruntime::CreateStatus(ORT_INVALID_ARGUMENT, "QNN EP factory does not support this method.");
+  }
+
+  static void ReleaseEpImpl(OrtEpFactory* /*this_ptr*/, OrtEp* /*ep*/) {
+    // no-op as we never create an EP here.
+  }
+
+  const OrtApi& ort_api;
+  const std::string ep_name;              // EP name
+  const std::string vendor{"Microsoft"};  // EP vendor name
+
+  // Qualcomm vendor ID. Refer to the ACPI ID registry (search Qualcomm): https://uefi.org/ACPI_ID_List
+  const uint32_t vendor_id{'Q' | ('C' << 8) | ('O' << 16) | ('M' << 24)};
+  const OrtHardwareDeviceType ort_hw_device_type;  // Supported OrtHardwareDevice
+  const std::string qnn_backend_type;              // QNN backend type for OrtHardwareDevice
+};
+
+extern "C" {
+//
+// Public symbols
+//
+OrtStatus* CreateEpFactories(const char* /*registration_name*/, const OrtApiBase* ort_api_base,
+                             OrtEpFactory** factories, size_t max_factories, size_t* num_factories) {
+  const OrtApi* ort_api = ort_api_base->GetApi(ORT_API_VERSION);
+
+  // Factory could use registration_name or define its own EP name.
+  auto factory_npu = std::make_unique<QnnEpFactory>(*ort_api,
+                                                    onnxruntime::kQnnExecutionProvider,
+                                                    OrtHardwareDeviceType_NPU, "htp");
+
+  // If want to support GPU, create a new factory instance because QNN EP is not currently setup to partition a single model
+  // among heterogeneous devices.
+  // std::unique_ptr<OrtEpFactory> factory_gpu = std::make_unique<QnnEpFactory>(*ort_api, "QNNExecutionProvider_GPU", OrtHardwareDeviceType_GPU, "gpu");
+
+  if (max_factories < 1) {
+    return ort_api->CreateStatus(ORT_INVALID_ARGUMENT,
+                                 "Not enough space to return EP factory. Need at least one.");
+  }
+
+  factories[0] = factory_npu.release();
+  *num_factories = 1;
+
+  return nullptr;
+}
+
+OrtStatus* ReleaseEpFactory(OrtEpFactory* factory) {
+  delete static_cast<QnnEpFactory*>(factory);
+  return nullptr;
+}
+}
 #endif  // !BUILD_QNN_EP_STATIC_LIB
diff --git a/onnxruntime/core/providers/qnn/symbols.def b/onnxruntime/core/providers/qnn/symbols.def
@@ -1,2 +1,4 @@
 EXPORTS
    GetProvider
+   CreateEpFactories
+   ReleaseEpFactory
diff --git a/onnxruntime/test/providers/qnn/qnn_basic_test.cc b/onnxruntime/test/providers/qnn/qnn_basic_test.cc
@@ -16,6 +16,7 @@
 #include "core/session/onnxruntime_run_options_config_keys.h"
 
 #include "test/providers/qnn/qnn_test_utils.h"
+#include "test/util/include/api_asserts.h"
 
 #include "gtest/gtest.h"
 #include "gmock/gmock.h"
@@ -37,24 +38,24 @@ namespace test {
 // TODO: When we need QNN in a minimal build we should add an ORT format version of the model
 #if !defined(ORT_MINIMAL_BUILD)
 
+static bool SessionHasEp(Ort::Session& session, const char* ep_name) {
+  // Access the underlying InferenceSession.
+  const OrtSession* ort_session = session;
+  const InferenceSession* s = reinterpret_cast<const InferenceSession*>(ort_session);
+  bool has_ep = false;
+
+  for (const auto& provider : s->GetRegisteredProviderTypes()) {
+    if (provider == ep_name) {
+      has_ep = true;
+      break;
+    }
+  }
+  return has_ep;
+}
+
 // Tests that the QNN EP is registered when added via the public C++ API.
 // Loads a simple ONNX model that adds floats.
 TEST_F(QnnHTPBackendTests, TestAddEpUsingPublicApi) {
-  auto session_has_qnn_ep = [](Ort::Session& session) -> bool {
-    // Access the underlying InferenceSession.
-    const OrtSession* ort_session = session;
-    const InferenceSession* s = reinterpret_cast<const InferenceSession*>(ort_session);
-    bool have_qnn_ep = false;
-
-    for (const auto& provider : s->GetRegisteredProviderTypes()) {
-      if (provider == kQnnExecutionProvider) {
-        have_qnn_ep = true;
-        break;
-      }
-    }
-    return have_qnn_ep;
-  };
-
   onnxruntime::ProviderOptions options;
 #if defined(_WIN32)
   options["backend_path"] = "QnnHtp.dll";
@@ -77,8 +78,9 @@ TEST_F(QnnHTPBackendTests, TestAddEpUsingPublicApi) {
     so.AppendExecutionProvider("QNN", options);
 
     Ort::Session session(*ort_env, ort_model_path, so);
-    ASSERT_TRUE(session_has_qnn_ep(session)) << "QNN EP was not found in registered providers for session "
-                                             << "when added to session with name 'QNN'";
+    ASSERT_TRUE(SessionHasEp(session, kQnnExecutionProvider))
+        << "QNN EP was not found in registered providers for session "
+        << "providers for session when added to session with name 'QNN'";
   }
 
   {
@@ -92,8 +94,9 @@ TEST_F(QnnHTPBackendTests, TestAddEpUsingPublicApi) {
     so.AppendExecutionProvider(kQnnExecutionProvider, options);
 
     Ort::Session session(*ort_env, ort_model_path, so);
-    ASSERT_TRUE(session_has_qnn_ep(session)) << "QNN EP was not found in registered providers for session "
-                                             << "when added to session with name '" << kQnnExecutionProvider << "'";
+    ASSERT_TRUE(SessionHasEp(session, kQnnExecutionProvider))
+        << "QNN EP was not found in registered providers for session "
+        << "when added to session with name '" << kQnnExecutionProvider << "'";
   }
 }
 
@@ -1265,6 +1268,24 @@ TEST_F(QnnHTPBackendTests, LoadingAndUnloadingOfQnnLibrary_FixSegFault) {
 }
 #endif  // !BUILD_QNN_EP_STATIC_LIB
 
+#if defined(WIN32) && !BUILD_QNN_EP_STATIC_LIB
+// Tests autoEP feature to automatically select an EP that supports the NPU.
+// Currently only works on Windows.
+TEST_F(QnnHTPBackendTests, AutoEp_PreferNpu) {
+  ASSERT_ORTSTATUS_OK(Ort::GetApi().RegisterExecutionProviderLibrary(*ort_env, kQnnExecutionProvider,
+                                                                     ORT_TSTR("onnxruntime_providers_qnn.dll")));
+
+  Ort::SessionOptions so;
+  so.SetEpSelectionPolicy(OrtExecutionProviderDevicePolicy_PREFER_NPU);
+
+  const ORTCHAR_T* ort_model_path = ORT_MODEL_FOLDER "nhwc_resize_sizes_opset18.quant.onnx";
+  Ort::Session session(*ort_env, ort_model_path, so);
+  EXPECT_TRUE(SessionHasEp(session, kQnnExecutionProvider));
+
+  ASSERT_ORTSTATUS_OK(Ort::GetApi().UnregisterExecutionProviderLibrary(*ort_env, kQnnExecutionProvider));
+}
+#endif  // defined(WIN32) && !BUILD_QNN_EP_STATIC_LIB
+
 #endif  // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
 #endif  // !defined(ORT_MINIMAL_BUILD)
 

Original file line number	Diff line number	Diff line change
`@@ -155,7 +155,7 @@ std::string CPUIDInfo::GetX86Vendor(int32_t* data) {`
`155`	`155`	`uint32_t CPUIDInfo::GetVendorId(const std::string& vendor) {`
`156`	`156`	`if (vendor == "GenuineIntel") return 0x8086;`
`157`	`157`	`if (vendor == "GenuineAMD") return 0x1022;`
`158`		`- if (vendor.find("Qualcomm") == 0) return 'Q' << 24 \| 'C' << 16 \| 'O' << 8 \| 'M';`
	`158`	`+ if (vendor.find("Qualcomm") == 0) return 'Q' \| ('C' << 8) \| ('O' << 16) \| ('M' << 24);`
`159`	`159`	`if (vendor.find("NV") == 0) return 0x10DE;`
`160`	`160`	`return 0;`
`161`	`161`	`}`