From 2ab51c70131d2c86686269ffd35d6bdb8bc48116 Mon Sep 17 00:00:00 2001 From: Adrian Lizarraga Date: Thu, 16 Jan 2025 16:20:33 -0800 Subject: [PATCH] [QNN EP] Fix segfault when unregistering HTP shared memory handles (#23402) ### Description - Fixes segfault when the function that cleans up HTP memory handles uses an invalid Logger. - Fixes unit test that compares output from QNN EP with exact float values. QNN HTP runs float32 models with float16 precision, so need to use a tolerance in the comparison. ### Motivation and Context Fixes issues with using QNN HTP memory sharing on Windows ARM64. This is also needed to test HTP shared memory with https://github.com/microsoft/onnxruntime/pull/23120. --- .../builder/qnn_context_mem_handle_manager.cc | 16 ++++++++++------ onnxruntime/test/providers/qnn/qnn_basic_test.cc | 6 ++++++ onnxruntime/test/shared_lib/test_inference.cc | 15 ++++++++++++--- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/onnxruntime/core/providers/qnn/builder/qnn_context_mem_handle_manager.cc b/onnxruntime/core/providers/qnn/builder/qnn_context_mem_handle_manager.cc index 7ce539c21ef94..22bbc2d48e8e4 100644 --- a/onnxruntime/core/providers/qnn/builder/qnn_context_mem_handle_manager.cc +++ b/onnxruntime/core/providers/qnn/builder/qnn_context_mem_handle_manager.cc @@ -86,13 +86,17 @@ Status QnnContextMemHandleManager::GetOrRegister(void* shared_memory_address, co LOGS(logger_, VERBOSE) << "Registered QNN mem handle. mem_handle: " << raw_mem_handle; - const auto unregister_mem_handle = [this](Qnn_MemHandle_t raw_mem_handle) { - LOGS(logger_, VERBOSE) << "Unregistering QNN mem handle. mem_handle: " << raw_mem_handle; - - const auto unregister_result = qnn_interface_.memDeRegister(&raw_mem_handle, 1); + // NOTE: Must use the default ORT logger inside this lambda. Don't capture this->logger_ because it may be deleted + // by the time we need to unregister all memory handles. This happens when this->logger_ is a session logger: + // ~InferenceSession() -> ~Logger() -> ~QnnExecutionProvider() -> ~QnnBackendManager() -> + // ~QnnContextMemHandleManager() -> unregister_mem_handle() segfault + const auto unregister_mem_handle = [&qnn_interface = this->qnn_interface_](Qnn_MemHandle_t raw_mem_handle) { + LOGS_DEFAULT(VERBOSE) << "Unregistering QNN mem handle. mem_handle: " << raw_mem_handle; + + const auto unregister_result = qnn_interface.memDeRegister(&raw_mem_handle, 1); if (unregister_result != QNN_SUCCESS) { - LOGS(logger_, ERROR) << "qnn_interface.memDeRegister() failed: " - << utils::GetVerboseQnnErrorMessage(qnn_interface_, unregister_result); + LOGS_DEFAULT(ERROR) << "qnn_interface.memDeRegister() failed: " + << utils::GetVerboseQnnErrorMessage(qnn_interface, unregister_result); } }; diff --git a/onnxruntime/test/providers/qnn/qnn_basic_test.cc b/onnxruntime/test/providers/qnn/qnn_basic_test.cc index 8dfae0fd5b0a4..92ec4ba3b0d28 100644 --- a/onnxruntime/test/providers/qnn/qnn_basic_test.cc +++ b/onnxruntime/test/providers/qnn/qnn_basic_test.cc @@ -1113,6 +1113,12 @@ TEST_F(QnnHTPBackendTests, UseHtpSharedMemoryAllocatorForInputs) { qnn_ep = QnnExecutionProviderWithOptions(provider_options); } catch (const OnnxRuntimeException& e) { // handle particular exception that indicates that the libcdsprpc.so / dll can't be loaded + // NOTE: To run this on a local Windows ARM64 device, you need to copy libcdsprpc.dll to the build directory: + // - Open File Explorer + // - Go to C:/Windows/System32/DriverStore/FileRepository/ + // - Search for a folder that begins with qcnspmcdm8380.inf_arm64_ and open it + // - Copy the libcdsprpc.dll into the build/[PATH CONTAINING onnxruntime.dll] directory of the application. + // TODO(adrianlizarraga): Update CMake build for unittests to automatically copy libcdsprpc.dll into build directory #if defined(_WIN32) constexpr const char* expected_error_message = "Failed to load libcdsprpc.dll"; #else diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc index dab73d3824d3b..2ea99151c2bfd 100644 --- a/onnxruntime/test/shared_lib/test_inference.cc +++ b/onnxruntime/test/shared_lib/test_inference.cc @@ -1960,6 +1960,12 @@ static bool CreateSessionWithQnnEpAndQnnHtpSharedMemoryAllocator(PATH_TYPE model return true; } catch (const Ort::Exception& e) { // handle particular exception that indicates that the libcdsprpc.so / dll can't be loaded + // NOTE: To run this on a local Windows ARM64 device, you need to copy libcdsprpc.dll to the build directory: + // - Open File Explorer + // - Go to C:/Windows/System32/DriverStore/FileRepository/ + // - Search for a folder that begins with qcnspmcdm8380.inf_arm64_ and open it + // - Copy the libcdsprpc.dll into the build/[PATH CONTAINING onnxruntime.dll] directory of the application. + // TODO(adrianlizarraga): Update CMake build for unittests to automatically copy libcdsprpc.dll into build directory std::string_view error_message = e.what(); #if defined(_WIN32) @@ -2275,8 +2281,11 @@ TEST(CApiTest, io_binding_qnn_htp_shared) { Ort::Value bound_x = Ort::Value::CreateTensor(info_qnn_htp_shared, reinterpret_cast(input_data.get()), x_values.size(), x_shape.data(), x_shape.size()); + // Setup expected output (y) from model. Note that QNN EP runs float32 operators as float16, + // so the output will not be exactly equal. const std::array expected_y_shape = {3, 2}; const std::array expected_y = {1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f}; + constexpr float y_max_abs_err = 1e-5f; auto output_data = qnn_htp_shared_allocator.GetAllocation(expected_y.size() * sizeof(float)); ASSERT_NE(output_data.get(), nullptr); @@ -2293,7 +2302,7 @@ TEST(CApiTest, io_binding_qnn_htp_shared) { // Check the values against the bound raw memory { gsl::span y{reinterpret_cast(output_data.get()), expected_y.size()}; - ASSERT_TRUE(std::equal(std::begin(y), std::end(y), std::begin(expected_y))); + EXPECT_THAT(expected_y, ::testing::Pointwise(::testing::FloatNear(y_max_abs_err), y)); } // Now compare values via GetOutputValues @@ -2308,7 +2317,7 @@ TEST(CApiTest, io_binding_qnn_htp_shared) { ASSERT_EQ(expected_y.size(), count); gsl::span y{Y_value.GetTensorData(), count}; - ASSERT_TRUE(std::equal(std::begin(y), std::end(y), std::begin(expected_y))); + EXPECT_THAT(expected_y, ::testing::Pointwise(::testing::FloatNear(y_max_abs_err), y)); } { @@ -2336,7 +2345,7 @@ TEST(CApiTest, io_binding_qnn_htp_shared) { ASSERT_EQ(expected_y.size(), count); gsl::span y{Y_value.GetTensorData(), count}; - ASSERT_TRUE(std::equal(std::begin(y), std::end(y), std::begin(expected_y))); + EXPECT_THAT(expected_y, ::testing::Pointwise(::testing::FloatNear(y_max_abs_err), y)); } // Clean up