From e31608abcde64aee494fba3bda944f5e0c14d683 Mon Sep 17 00:00:00 2001 From: vipandya Date: Wed, 28 Jan 2026 12:22:11 +0000 Subject: [PATCH 1/4] avoid repetitive creation fp4 native-custom-op domains --- .../nv_execution_provider_custom_ops.cc | 47 ++++++++++--------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc index 90e488a1eda18..059ac81b9bc08 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc @@ -32,13 +32,17 @@ common::Status CreateTensorRTCustomOpDomainList(std::vector& static std::vector> created_custom_op_list; static std::unique_ptr native_custom_op_domain = std::make_unique(); static std::vector> native_custom_op_list; + static bool native_custom_ops_initialized = false; static std::mutex mutex; std::lock_guard lock(mutex); + + // Add already-initialized native ops to domain list + if (native_custom_ops_initialized) { + domain_list.push_back(native_custom_op_domain.get()); + } + if (custom_op_domain->domain_ != "" && custom_op_domain->custom_ops_.size() > 0) { domain_list.push_back(custom_op_domain.get()); - if (native_custom_op_domain->domain_ != "" && native_custom_op_domain->custom_ops_.size() > 0) { - domain_list.push_back(native_custom_op_domain.get()); - } return Status::OK(); } @@ -132,35 +136,34 @@ common::Status CreateTensorRTCustomOpDomainList(std::vector& } // Register native custom ops (register these independent of TRT plugin library availability) - const char* native_custom_ops_names[] = {"TRT_FP4DynamicQuantize", "TRT_FP8QuantizeLinear", "TRT_FP8DequantizeLinear"}; - int num_native_custom_ops = std::size(native_custom_ops_names); + if (!native_custom_ops_initialized) { + const char* native_custom_ops_names[] = {"TRT_FP4DynamicQuantize", "TRT_FP8QuantizeLinear", "TRT_FP8DequantizeLinear"}; + int num_native_custom_ops = std::size(native_custom_ops_names); + + for (int i = 0; i < num_native_custom_ops; i++) { + native_custom_op_list.push_back(std::make_unique(onnxruntime::kNvTensorRTRTXExecutionProvider, nullptr)); + native_custom_op_list.back()->SetName(native_custom_ops_names[i]); + native_custom_op_domain->custom_ops_.push_back(native_custom_op_list.back().get()); + } - for (int i = 0; i < num_native_custom_ops; i++) { - native_custom_op_list.push_back(std::make_unique(onnxruntime::kNvTensorRTRTXExecutionProvider, nullptr)); - native_custom_op_list.back()->SetName(native_custom_ops_names[i]); - native_custom_op_domain->custom_ops_.push_back(native_custom_op_list.back().get()); + native_custom_op_domain->domain_ = "trt"; + domain_list.push_back(native_custom_op_domain.get()); + native_custom_ops_initialized = true; } - native_custom_op_domain->domain_ = "trt"; - domain_list.push_back(native_custom_op_domain.get()); return Status::OK(); } void ReleaseTensorRTCustomOpDomain(OrtCustomOpDomain* domain) { - if (domain != nullptr) { - for (auto ptr : domain->custom_ops_) { - if (ptr != nullptr) { - delete ptr; - } - } - delete domain; - } + (void)domain; // Suppress unused parameter warning + // The custom ops (TensorRTCustomOp) and domain (OrtCustomOpDomain) are marked as static + // with unique_ptr at the time of creation in CreateTensorRTCustomOpDomainList() function. + // Deleting them here can cause double-delete. } void ReleaseTensorRTCustomOpDomainList(std::vector& custom_op_domain_list) { - for (auto ptr : custom_op_domain_list) { - ReleaseTensorRTCustomOpDomain(ptr); - } + // Only clear the reference vector, don't delete the static domain objects. + custom_op_domain_list.clear(); } } // namespace onnxruntime From d68be0e1f67e1962434362492014781cd09ba2c4 Mon Sep 17 00:00:00 2001 From: vipandya Date: Wed, 4 Feb 2026 08:30:31 +0000 Subject: [PATCH 2/4] nit --- .../nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc index 059ac81b9bc08..3951fd23ea4c4 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc @@ -158,7 +158,7 @@ void ReleaseTensorRTCustomOpDomain(OrtCustomOpDomain* domain) { (void)domain; // Suppress unused parameter warning // The custom ops (TensorRTCustomOp) and domain (OrtCustomOpDomain) are marked as static // with unique_ptr at the time of creation in CreateTensorRTCustomOpDomainList() function. - // Deleting them here can cause double-delete. + // Deleting them here can risk double-delete. } void ReleaseTensorRTCustomOpDomainList(std::vector& custom_op_domain_list) { From 872f539eb2c934652960af26846a4a1c36d61835 Mon Sep 17 00:00:00 2001 From: vipandya Date: Tue, 10 Feb 2026 06:20:31 +0000 Subject: [PATCH 3/4] double delete comment update and static objs comment add --- .../nv_execution_provider_custom_ops.cc | 26 +++++++++++++++---- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc index 3951fd23ea4c4..bda4ba02a893d 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc @@ -28,11 +28,24 @@ extern TensorrtLogger& GetTensorrtLogger(bool verbose); * So, TensorRTCustomOp uses variadic inputs/outputs to pass ONNX graph validation. */ common::Status CreateTensorRTCustomOpDomainList(std::vector& domain_list, const std::string extra_plugin_lib_paths) { + // Domain for TRT plugin custom ops (domain name: "trt.plugins"). Owns the OrtCustomOpDomain object. + // Raw pointers from .get() are handed out to callers via domain_list and may be held by InferenceSession. static std::unique_ptr custom_op_domain = std::make_unique(); + + // Owns the TensorRTCustomOp objects for TRT plugins. Raw pointers are stored in custom_op_domain->custom_ops_. static std::vector> created_custom_op_list; + + // Domain for native custom ops (domain name: "trt"). Owns the OrtCustomOpDomain object. + // Raw pointers from .get() are handed out to callers via domain_list and may be held by InferenceSession. static std::unique_ptr native_custom_op_domain = std::make_unique(); + + // Owns the TensorRTCustomOp objects for native custom ops. Raw pointers are stored in native_custom_op_domain->custom_ops_. static std::vector> native_custom_op_list; + + // Tracks whether native custom ops have been registered to avoid re-registration on subsequent calls. static bool native_custom_ops_initialized = false; + + // Protects concurrent access to all the above static members. static std::mutex mutex; std::lock_guard lock(mutex); @@ -138,9 +151,9 @@ common::Status CreateTensorRTCustomOpDomainList(std::vector& // Register native custom ops (register these independent of TRT plugin library availability) if (!native_custom_ops_initialized) { const char* native_custom_ops_names[] = {"TRT_FP4DynamicQuantize", "TRT_FP8QuantizeLinear", "TRT_FP8DequantizeLinear"}; - int num_native_custom_ops = std::size(native_custom_ops_names); + size_t num_native_custom_ops = std::size(native_custom_ops_names); - for (int i = 0; i < num_native_custom_ops; i++) { + for (size_t i = 0; i < num_native_custom_ops; i++) { native_custom_op_list.push_back(std::make_unique(onnxruntime::kNvTensorRTRTXExecutionProvider, nullptr)); native_custom_op_list.back()->SetName(native_custom_ops_names[i]); native_custom_op_domain->custom_ops_.push_back(native_custom_op_list.back().get()); @@ -156,9 +169,12 @@ common::Status CreateTensorRTCustomOpDomainList(std::vector& void ReleaseTensorRTCustomOpDomain(OrtCustomOpDomain* domain) { (void)domain; // Suppress unused parameter warning - // The custom ops (TensorRTCustomOp) and domain (OrtCustomOpDomain) are marked as static - // with unique_ptr at the time of creation in CreateTensorRTCustomOpDomainList() function. - // Deleting them here can risk double-delete. + // The domain and its custom ops are owned by static unique_ptrs in CreateTensorRTCustomOpDomainList(). + // Callers receive raw pointers via .get(). + // 1. Manually deleting them would cause a double-free when the static unique_ptrs are destroyed at program exit. + // 2. Resetting the static unique_ptrs is also unsafe because other EP instances or InferenceSession objects + // may still hold raw pointers to these same objects (handed out via domain_list). + // The static objects would be shared across EP instances and would persist for the program lifetime. } void ReleaseTensorRTCustomOpDomainList(std::vector& custom_op_domain_list) { From 3506183fb4eb8fcba517a515043cb3696164bceb Mon Sep 17 00:00:00 2001 From: vipandya Date: Wed, 11 Feb 2026 05:08:32 +0000 Subject: [PATCH 4/4] avoid new variable for native-ops check --- .../nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc index bda4ba02a893d..a7c37cd481894 100644 --- a/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc +++ b/onnxruntime/core/providers/nv_tensorrt_rtx/nv_execution_provider_custom_ops.cc @@ -40,17 +40,15 @@ common::Status CreateTensorRTCustomOpDomainList(std::vector& static std::unique_ptr native_custom_op_domain = std::make_unique(); // Owns the TensorRTCustomOp objects for native custom ops. Raw pointers are stored in native_custom_op_domain->custom_ops_. + // Non-empty list indicates native custom ops have been registered (used to avoid re-registration on subsequent calls). static std::vector> native_custom_op_list; - // Tracks whether native custom ops have been registered to avoid re-registration on subsequent calls. - static bool native_custom_ops_initialized = false; - // Protects concurrent access to all the above static members. static std::mutex mutex; std::lock_guard lock(mutex); // Add already-initialized native ops to domain list - if (native_custom_ops_initialized) { + if (!native_custom_op_list.empty()) { domain_list.push_back(native_custom_op_domain.get()); } @@ -149,7 +147,7 @@ common::Status CreateTensorRTCustomOpDomainList(std::vector& } // Register native custom ops (register these independent of TRT plugin library availability) - if (!native_custom_ops_initialized) { + if (native_custom_op_list.empty()) { const char* native_custom_ops_names[] = {"TRT_FP4DynamicQuantize", "TRT_FP8QuantizeLinear", "TRT_FP8DequantizeLinear"}; size_t num_native_custom_ops = std::size(native_custom_ops_names); @@ -161,7 +159,6 @@ common::Status CreateTensorRTCustomOpDomainList(std::vector& native_custom_op_domain->domain_ = "trt"; domain_list.push_back(native_custom_op_domain.get()); - native_custom_ops_initialized = true; } return Status::OK();