[QNN-EP] Enable verbose and artifacts saving in onnxruntime_provider_test.exe (#26396)

qti-hungjuiw · web-flow · commit f02a6407687e · 2025-11-26T09:23:04.000-08:00
### Description
&lt;!-- Describe your changes. --&gt;
- The change allows users to better debug unit tests by adding the
following environment variables:
    - `QNN_DUMP_ONNX`: Dump input onnx model
- `QNN_DUMP_JSON`: Dump json qnn graph with provider_option
`dump_json_qnn_graph`
- `QNN_DUMP_DLC`: Dump dlc with provider_option `qnn_ir_backend_path`
    - `QNN_VERBOSE`: Use the log level `ORT_LOGGING_LEVEL_VERBOSE`
- Developers can use the environment variables above to save the
artifacts of QNN-EP testcases to a directory named with
`&lt;TestSuite&gt;_&lt;TestName&gt;`
    ```
        .
├── QnnCPUBackendTests_BatchNorm2D_fp32 # RunQnnModelTest
│ ├── dumped_f32_model.onnx # float32 ONNX model
        │   ├── QNNExecutionProvider_QNN_XXXX_X_X.dlc
        │   └── QNNExecutionProvider_QNN_XXXX_X_X.json
├── QnnHTPBackendTests_BatchNorm_FP16 # TestFp16ModelAccuracy
│ ├── dumped_f16_model.onnx # float16 ONNX model
│ ├── dumped_f32_model.onnx # float32 ONNX model
        │   ├── QNNExecutionProvider_QNN_XXXX_X_X.dlc
        │   └── QNNExecutionProvider_QNN_XXXX_X_X.json
└── QnnHTPBackendTests_BatchNorm2D_U8U8S32 # TestQDQModelAccuracy
├── dumped_f32_model.onnx # float32 ONNX model
            ├── dumped_qdq_model.onnx                   # QDQ ONNX model
            ├── QNNExecutionProvider_QNN_XXXX_X_X.dlc
            └── QNNExecutionProvider_QNN_XXXX_X_X.json

# All artifact files are placed under the current working directory from
which the test binary is invoked.
    ```

### Motivation and Context
&lt;!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. --&gt;
- The Json qnn graph/dlc are helpful for backend to debug
performance/accuracy issues
- By comparing the onnx and Json qnn graph/dlc, we can locate the issue
about graph manipulation.
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.cc
@@ -76,6 +76,9 @@ Status BaseOpBuilder::ProcessDataTypes(QnnModelWrapper& qnn_model_wrapper,
     return CheckHtpDataTypes(input_qnn_dtypes, output_qnn_dtypes);
   } else if (IsGpuBackend(qnn_model_wrapper.GetQnnBackendType())) {
     return CheckGpuDataTypes(input_qnn_dtypes, output_qnn_dtypes);
+  } else if (IsIrBackend(qnn_model_wrapper.GetQnnBackendType())) {
+    // TODO: CheckIrDataTypes
+    return Status::OK();
   }
   return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Only support backend: CPU, HTP and GPU");
 }
diff --git a/onnxruntime/core/providers/qnn/builder/qnn_def.cc b/onnxruntime/core/providers/qnn/builder/qnn_def.cc
@@ -574,6 +574,10 @@ bool QnnOpConfigWrapper::CreateQnnGraphOp(const QNN_INTERFACE_VER_TYPE& qnn_inte
   return true;
 }
 
+bool IsIrBackend(QnnBackendType backend_type) {
+  return backend_type == QnnBackendType::SERIALIZER;
+}
+
 bool IsNpuBackend(QnnBackendType backend_type) {
   return backend_type == QnnBackendType::HTP || backend_type == QnnBackendType::DSP;
 }
diff --git a/onnxruntime/core/providers/qnn/builder/qnn_def.h b/onnxruntime/core/providers/qnn/builder/qnn_def.h
@@ -96,6 +96,8 @@ enum class QnnBackendType : uint8_t {
   SERIALIZER,
 };
 
+bool IsIrBackend(QnnBackendType backend_type);
+
 bool IsCpuBackend(QnnBackendType backend_type);
 
 bool IsNpuBackend(QnnBackendType backend_type);
diff --git a/onnxruntime/test/providers/qnn/README.md b/onnxruntime/test/providers/qnn/README.md
@@ -0,0 +1,70 @@
+# ONNX Runtime QNN Execution Provider Tests
+## Overview
+1. The `onnxruntime/test/providers/qnn` directory contains integration tests for the Qualcomm Neural Network (QNN) execution provider.
+2. Most testcases run an ONNX model through the QNN-EP, then verifies the inference result against the one on CPU-EP
+
+## Building the Tests
+The tests are built as part of the regular ONNX Runtime build. After a successful build you will have an executable named
+- onnxruntime_provider_test.exe   (Windows)
+- onnxruntime_provider_test      (Linux/macOS)
+
+## Running the Tests
+1. QNN supports several backends. You can use the standard Google‑Test syntax for filtering:
+    - `onnxruntime_provider_test.exe --gtest_filter=QnnCPUBackendTests.*`
+    - `onnxruntime_provider_test.exe --gtest_filter=QnnHTPBackendTests.*`
+    - `onnxruntime_provider_test.exe --gtest_filter=QnnGPUBackendTests.*`
+    - `onnxruntime_provider_test.exe --gtest_filter=QnnIRBackendTests.*`
+2. Saving Test Artifacts
+    - For debugging it is often helpful to keep the intermediate files that the tests generate. The following environment
+    variables are recognized by the test binary:
+        - `QNN_DUMP_ONNX`: Saves the input ONNX model used for the test
+        - `QNN_DUMP_JSON`: Save json qnn graph with provider_option `dump_json_qnn_graph`
+        - `QNN_DUMP_DLC`: Saves the compiled QNN DLC file by specifying the provider_option `backend_path` to `QnnIr.dll`
+    - The artifacts will be saved to a directory named with `<TestSuite>_<TestName>`
+        ```
+        .
+        ├── QnnCPUBackendTests_BatchNorm2D_fp32         # RunQnnModelTest
+        │   ├── dumped_f32_model.onnx                   # float32 ONNX model
+        │   ├── QNNExecutionProvider_QNN_XXXX_X_X.dlc
+        │   └── QNNExecutionProvider_QNN_XXXX_X_X.json
+        ├── QnnHTPBackendTests_BatchNorm_FP16           # TestFp16ModelAccuracy
+        │   ├── dumped_f16_model.onnx                   # float16 ONNX model
+        │   ├── dumped_f32_model.onnx                   # float32 ONNX model
+        │   ├── QNNExecutionProvider_QNN_XXXX_X_X.dlc
+        │   └── QNNExecutionProvider_QNN_XXXX_X_X.json
+        └── QnnHTPBackendTests_BatchNorm2D_U8U8S32      # TestQDQModelAccuracy
+            ├── dumped_f32_model.onnx                   # float32 ONNX model
+            ├── dumped_qdq_model.onnx                   # QDQ ONNX model
+            ├── QNNExecutionProvider_QNN_XXXX_X_X.dlc
+            └── QNNExecutionProvider_QNN_XXXX_X_X.json
+
+        # All artifact files are placed under the current working directory from which the test binary is invoked.
+        ```
+3. Verbose
+    - `QNN_VERBOSE`: Sets the ONNX Runtime log level to `ORT_LOGGING_LEVEL_VERBOSE`
+
+4. You can enable any combination of these environment variables, for example:
+    - On Linux/macOS
+        ```bash
+        export QNN_DUMP_ONNX=1
+        export QNN_DUMP_JSON=1
+        export QNN_DUMP_DLC=1
+        export QNN_VERBOSE=1
+        ```
+    - On Windows
+        ```cmd
+        set QNN_DUMP_ONNX=1
+        set QNN_DUMP_JSON=1
+        set QNN_DUMP_DLC=1
+        set QNN_VERBOSE=1
+        ```
+        ```ps1
+        $Env:QNN_DUMP_ONNX = "1"
+        $Env:QNN_DUMP_JSON = "1"
+        $Env:QNN_DUMP_DLC = "1"
+        $Env:QNN_VERBOSE = "1"
+        ```
+
+# Note
+- An issue on QNN backends can prevent the test artifacts from being successfully saved.
+- The `onnxruntime_provider_test.exe` does not automatically delete the artifact directories, so you may want to prune them after a debugging session.
diff --git a/onnxruntime/test/providers/qnn/qnn_test_utils.cc b/onnxruntime/test/providers/qnn/qnn_test_utils.cc
@@ -101,6 +101,12 @@ void RunQnnModelTest(const GetTestModelFn& build_test_case, ProviderOptions prov
                      int opset_version, ExpectedEPNodeAssignment expected_ep_assignment,
                      float fp32_abs_err, logging::Severity log_severity, bool verify_outputs,
                      std::function<void(const Graph&)>* ep_graph_checker) {
+  std::filesystem::path output_dir;
+  if (QNNTestEnvironment::GetInstance().dump_onnx() ||
+      QNNTestEnvironment::GetInstance().dump_json() ||
+      QNNTestEnvironment::GetInstance().dump_dlc()) {
+    output_dir = QNNTestEnvironment::GetInstance().CreateTestcaseDirs();
+  }
   EPVerificationParams verification_params;
   verification_params.ep_node_assignment = expected_ep_assignment;
   verification_params.fp32_abs_err = fp32_abs_err;
@@ -110,6 +116,10 @@ void RunQnnModelTest(const GetTestModelFn& build_test_case, ProviderOptions prov
 
   auto& logging_manager = DefaultLoggingManager();
   logging_manager.SetDefaultLoggerSeverity(log_severity);
+  if (QNNTestEnvironment::GetInstance().verbose()) {
+    logging_manager.RemoveSink(logging::SinkType::EtwSink);
+    logging_manager.SetDefaultLoggerSeverity(logging::Severity::kVERBOSE);
+  }
 
   onnxruntime::Model model("QNN_EP_TestModel", false, ModelMetaData(), PathString(),
                            IOnnxRuntimeOpSchemaRegistryList(), domain_to_version, {},
@@ -123,7 +133,27 @@ void RunQnnModelTest(const GetTestModelFn& build_test_case, ProviderOptions prov
   // Serialize the model to a string.
   std::string model_data;
   model.ToProto().SerializeToString(&model_data);
+
+  if (QNNTestEnvironment::GetInstance().dump_onnx()) {
+    auto dump_path = output_dir / ToPathString("dumped_f32_model.onnx");
+    LOGS(logging_manager.DefaultLogger(), VERBOSE) << "Save onnx model at: " << dump_path;
+    ASSERT_STATUS_OK(onnxruntime::Model::Save(model, dump_path));
+  }
+
   TryEnableQNNSaver(provider_options);
+  if (QNNTestEnvironment::GetInstance().dump_dlc()) {
+    provider_options["dump_qnn_ir_dlc"] = "1";
+    provider_options["dump_qnn_ir_dlc_dir"] = output_dir.string();
+#if defined(_WIN32)
+    provider_options["qnn_ir_backend_path"] = "QnnIr.dll";
+#else
+    provider_options["qnn_ir_backend_path"] = "libQnnIr.so";
+#endif  // defined(_WIN32)
+  }
+  if (QNNTestEnvironment::GetInstance().dump_json()) {
+    provider_options["dump_json_qnn_graph"] = "1";
+    provider_options["json_qnn_graph_dir"] = output_dir.string();
+  }
   RunAndVerifyOutputsWithEP(AsByteSpan(model_data.data(), model_data.size()), "QNN_EP_TestLogID",
                             QnnExecutionProviderWithOptions(provider_options),
                             helper.feeds_, verification_params,
@@ -134,11 +164,21 @@ void RunQnnModelTestHTPNoVerify(const GetTestModelFn& build_test_case, ProviderO
                                 int opset_version, ExpectedEPNodeAssignment expected_ep_assignment,
                                 logging::Severity log_severity,
                                 std::function<void(const Graph&)>* ep_graph_checker) {
+  std::filesystem::path output_dir;
+  if (QNNTestEnvironment::GetInstance().dump_onnx() ||
+      QNNTestEnvironment::GetInstance().dump_dlc() ||
+      QNNTestEnvironment::GetInstance().dump_json()) {
+    output_dir = QNNTestEnvironment::GetInstance().CreateTestcaseDirs();
+  }
   // Add kMSDomain to cover contrib op like Gelu
   const std::unordered_map<std::string, int> domain_to_version = {{"", opset_version}, {kMSDomain, 1}};
 
   auto& logging_manager = DefaultLoggingManager();
   logging_manager.SetDefaultLoggerSeverity(log_severity);
+  if (QNNTestEnvironment::GetInstance().verbose()) {
+    logging_manager.RemoveSink(logging::SinkType::EtwSink);
+    logging_manager.SetDefaultLoggerSeverity(logging::Severity::kVERBOSE);
+  }
 
   onnxruntime::Model model("QNN_EP_TestModel", false, ModelMetaData(), PathString(),
                            IOnnxRuntimeOpSchemaRegistryList(), domain_to_version, {},
@@ -152,7 +192,27 @@ void RunQnnModelTestHTPNoVerify(const GetTestModelFn& build_test_case, ProviderO
   // Serialize the model to a string.
   std::string model_data;
   model.ToProto().SerializeToString(&model_data);
+
+  if (QNNTestEnvironment::GetInstance().dump_onnx()) {
+    auto dump_path = output_dir / ToPathString("dumped_f32_model.onnx");
+    LOGS(logging_manager.DefaultLogger(), VERBOSE) << "Save onnx model at: " << dump_path;
+    ASSERT_STATUS_OK(onnxruntime::Model::Save(model, dump_path));
+  }
+
   TryEnableQNNSaver(provider_options);
+  if (QNNTestEnvironment::GetInstance().dump_dlc()) {
+    provider_options["dump_qnn_ir_dlc"] = "1";
+    provider_options["dump_qnn_ir_dlc_dir"] = output_dir.string();
+#if defined(_WIN32)
+    provider_options["qnn_ir_backend_path"] = "QnnIr.dll";
+#else
+    provider_options["qnn_ir_backend_path"] = "libQnnIr.so";
+#endif  // defined(_WIN32)
+  }
+  if (QNNTestEnvironment::GetInstance().dump_json()) {
+    provider_options["dump_json_qnn_graph"] = "1";
+    provider_options["json_qnn_graph_dir"] = output_dir.string();
+  }
 
   SessionOptions so;
   so.session_logid = "QNN_EP_TestLogID";
diff --git a/onnxruntime/test/providers/qnn/qnn_test_utils.h b/onnxruntime/test/providers/qnn/qnn_test_utils.h

Original file line number	Diff line number	Diff line change
`@@ -76,6 +76,9 @@ Status BaseOpBuilder::ProcessDataTypes(QnnModelWrapper& qnn_model_wrapper,`
`76`	`76`	`return CheckHtpDataTypes(input_qnn_dtypes, output_qnn_dtypes);`
`77`	`77`	`} else if (IsGpuBackend(qnn_model_wrapper.GetQnnBackendType())) {`
`78`	`78`	`return CheckGpuDataTypes(input_qnn_dtypes, output_qnn_dtypes);`
	`79`	`+ } else if (IsIrBackend(qnn_model_wrapper.GetQnnBackendType())) {`
	`80`	`+ // TODO: CheckIrDataTypes`
	`81`	`+ return Status::OK();`
`79`	`82`	`}`
`80`	`83`	`return ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "Only support backend: CPU, HTP and GPU");`
`81`	`84`	`}`
Original file line number	Diff line number	Diff line change
`@@ -574,6 +574,10 @@ bool QnnOpConfigWrapper::CreateQnnGraphOp(const QNN_INTERFACE_VER_TYPE& qnn_inte`
`574`	`574`	`return true;`
`575`	`575`	`}`
`576`	`576`
	`577`	`+bool IsIrBackend(QnnBackendType backend_type) {`
	`578`	`+ return backend_type == QnnBackendType::SERIALIZER;`
	`579`	`+}`
	`580`	`+`
`577`	`581`	`bool IsNpuBackend(QnnBackendType backend_type) {`
`578`	`582`	`return backend_type == QnnBackendType::HTP \|\| backend_type == QnnBackendType::DSP;`
`579`	`583`	`}`