tests pass

Edwardius · Edwardius · commit 31dc8f999c16 · 2025-09-26T22:48:28.000Z
diff --git a/DEVELOPING.md b/DEVELOPING.md
@@ -31,6 +31,9 @@ This project includes VS Code dev container configurations for easy ROS2 develop
 Inside the container:
 
 ```bash
+# Update apt
+sudo apt update
+
 # Update rosdep
 rosdep update
 
diff --git a/deep_core/include/deep_core/deep_node_base.hpp b/deep_core/include/deep_core/deep_node_base.hpp
@@ -135,10 +135,10 @@ class DeepNodeBase : public rclcpp_lifecycle::LifecycleNode
 
   /**
    * @brief Run inference on input tensor
-   * @param inputs Input tensor for inference
+   * @param inputs Input tensor for inference (note: some backends may require mutable access for zero-copy operations)
    * @return Output tensor from inference
    */
-  Tensor run_inference(const Tensor & inputs);
+  Tensor run_inference(Tensor & inputs);
 
   /**
    * @brief Check if a backend plugin is loaded
diff --git a/deep_core/include/deep_core/plugin_interfaces/backend_inference_executor.hpp b/deep_core/include/deep_core/plugin_interfaces/backend_inference_executor.hpp
@@ -45,12 +45,12 @@ class BackendInferenceExecutor
 
   /**
    * @brief Run inference on input tensor
-   * @param input Input tensor
+   * @param input Input tensor (note: some backends may require mutable access for zero-copy operations)
    * @return Output tensor
    * @throws std::invalid_argument if input tensor is invalid
    * @throws std::runtime_error if no model is loaded
    */
-  Tensor run_inference(const Tensor & input);
+  Tensor run_inference(Tensor & input);
 
   /**
    * @brief Unload the currently loaded model
@@ -80,8 +80,9 @@ class BackendInferenceExecutor
 
   /**
    * @brief Implementation of run_inference (to be overridden by backends)
+   * @param input Input tensor (note: some backends may require mutable access)
    */
-  virtual Tensor run_inference_impl(const Tensor & input) = 0;
+  virtual Tensor run_inference_impl(Tensor & input) = 0;
 
   /**
    * @brief Implementation of unload_model (to be overridden by backends)
diff --git a/deep_core/package.xml b/deep_core/package.xml
@@ -13,7 +13,6 @@
   <depend>pluginlib</depend>
   <depend>bondcpp</depend>
 
-
   <export>
     <build_type>ament_cmake</build_type>
   </export>
diff --git a/deep_core/src/backend_inference_executor.cpp b/deep_core/src/backend_inference_executor.cpp
@@ -34,7 +34,7 @@ bool BackendInferenceExecutor::load_model(const std::filesystem::path & model_pa
   return success;
 }
 
-Tensor BackendInferenceExecutor::run_inference(const Tensor & input)
+Tensor BackendInferenceExecutor::run_inference(Tensor & input)
 {
   // Validate input tensor
   if (input.data() == nullptr) {
diff --git a/deep_core/src/deep_node_base.cpp b/deep_core/src/deep_node_base.cpp
@@ -191,7 +191,7 @@ void DeepNodeBase::unload_model()
   }
 }
 
-Tensor DeepNodeBase::run_inference(const Tensor & inputs)
+Tensor DeepNodeBase::run_inference(Tensor & inputs)
 {
   if (!plugin_) {
     throw std::runtime_error("No plugin loaded");
diff --git a/deep_ort_backend_plugin/include/deep_ort_backend_plugin/ort_backend_executor.hpp b/deep_ort_backend_plugin/include/deep_ort_backend_plugin/ort_backend_executor.hpp
@@ -46,34 +46,34 @@ class OrtBackendExecutor : public deep_ros::BackendInferenceExecutor
    */
   ~OrtBackendExecutor() override = default;
 
+  /**
+   * @brief Get supported model formats
+   * @return Vector containing "onnx"
+   */
+  std::vector<std::string> supported_model_formats() const override;
+
+protected:
   /**
    * @brief Load an ONNX model from file
    * @param model_path Path to the .onnx model file
    * @return true if successful, false otherwise
    */
-  bool load_model(const std::filesystem::path & model_path) override;
+  bool load_model_impl(const std::filesystem::path & model_path) override;
 
   /**
    * @brief Run inference using zero-copy IO binding
    * @param input Input tensor (must be compatible with model input)
    * @return Output tensor with inference results
    * @throws std::runtime_error if inference fails or no model loaded
    */
-  deep_ros::Tensor run_inference(deep_ros::Tensor input) override;
+  deep_ros::Tensor run_inference_impl(deep_ros::Tensor & input) override;
 
   /**
    * @brief Unload the currently loaded model
    */
-  void unload_model() override;
-
-  /**
-   * @brief Get supported model formats
-   * @return Vector containing "onnx"
-   */
-  std::vector<std::string> supported_model_formats() const override;
+  void unload_model_impl() override;
 
 private:
-  bool model_loaded_{false};
   std::filesystem::path model_path_;
 
   std::unique_ptr<Ort::Env> env_;
diff --git a/deep_ort_backend_plugin/include/deep_ort_backend_plugin/ort_cpu_memory_allocator.hpp b/deep_ort_backend_plugin/include/deep_ort_backend_plugin/ort_cpu_memory_allocator.hpp
@@ -14,6 +14,8 @@
 
 #pragma once
 
+#include <onnxruntime_c_api.h>
+
 #include <memory>
 #include <string>
 
@@ -26,7 +28,8 @@ namespace deep_ort_backend
  * @brief ONNX Runtime optimized CPU memory allocator
  *
  * Provides CPU memory allocation optimized for ONNX Runtime operations
- * with proper alignment for SIMD operations.
+ * with proper alignment for SIMD operations. Implements both deep_ros
+ * BackendMemoryAllocator interface and OrtAllocator interface directly.
  */
 class OrtCpuMemoryAllocator : public deep_ros::BackendMemoryAllocator
 {
@@ -39,7 +42,19 @@ class OrtCpuMemoryAllocator : public deep_ros::BackendMemoryAllocator
   /**
    * @brief Destructor
    */
-  ~OrtCpuMemoryAllocator() override = default;
+  ~OrtCpuMemoryAllocator() override;
+
+  /**
+   * @brief Get the OrtAllocator interface for use with ONNX Runtime
+   * @return Pointer to OrtAllocator struct
+   */
+  OrtAllocator * get_ort_allocator();
+
+  /**
+   * @brief Get the OrtMemoryInfo for this allocator
+   * @return Pointer to OrtMemoryInfo
+   */
+  const OrtMemoryInfo * get_ort_memory_info() const;
 
   /**
    * @brief Allocate aligned memory for CPU operations
@@ -54,41 +69,55 @@ class OrtCpuMemoryAllocator : public deep_ros::BackendMemoryAllocator
    */
   void deallocate(void * ptr) override;
 
+  /**
+   * @brief Check if this is device memory
+   * @return false (CPU memory is host memory)
+   */
+  bool is_device_memory() const override;
+
+  /**
+   * @brief Get device name
+   * @return "cpu"
+   */
+  std::string device_name() const override;
+
+protected:
   /**
    * @brief Copy from host memory (same as device for CPU)
    * @param dst Destination pointer
    * @param src Source pointer
    * @param bytes Number of bytes to copy
    */
-  void copy_from_host(void * dst, const void * src, size_t bytes) override;
+  void copy_from_host_impl(void * dst, const void * src, size_t bytes) override;
 
   /**
    * @brief Copy to host memory (same as device for CPU)
    * @param dst Destination pointer
    * @param src Source pointer
    * @param bytes Number of bytes to copy
    */
-  void copy_to_host(void * dst, const void * src, size_t bytes) override;
+  void copy_to_host_impl(void * dst, const void * src, size_t bytes) override;
 
   /**
    * @brief Copy between CPU memory locations
    * @param dst Destination pointer
    * @param src Source pointer
    * @param bytes Number of bytes to copy
    */
-  void copy_device_to_device(void * dst, const void * src, size_t bytes) override;
+  void copy_device_to_device_impl(void * dst, const void * src, size_t bytes) override;
 
-  /**
-   * @brief Check if this is device memory
-   * @return false (CPU memory is host memory)
-   */
-  bool is_device_memory() const override;
+private:
+  OrtAllocator ort_allocator_;
+  OrtMemoryInfo * ort_memory_info_;
 
-  /**
-   * @brief Get device name
-   * @return "cpu"
-   */
-  std::string device_name() const override;
+  // Store a pointer to self in a way that callbacks can access it
+  static OrtCpuMemoryAllocator * instance_;
+
+  // Static callback functions for OrtAllocator interface
+  static void * ORT_API_CALL ort_alloc(OrtAllocator * this_, size_t size);
+  static void ORT_API_CALL ort_free(OrtAllocator * this_, void * p);
+  static const OrtMemoryInfo * ORT_API_CALL ort_info(const OrtAllocator * this_);
+  static void * ORT_API_CALL ort_reserve(OrtAllocator * this_, size_t size);
 };
 
 /**
diff --git a/deep_ort_backend_plugin/src/ort_backend_executor.cpp b/deep_ort_backend_plugin/src/ort_backend_executor.cpp
@@ -14,6 +14,9 @@
 
 #include "deep_ort_backend_plugin/ort_backend_executor.hpp"
 
+#include <onnxruntime_cxx_api.h>
+
+#include <cstring>
 #include <memory>
 #include <stdexcept>
 #include <string>
@@ -29,9 +32,19 @@ OrtBackendExecutor::OrtBackendExecutor()
 {
   env_ = std::make_unique<Ort::Env>(ORT_LOGGING_LEVEL_WARNING, "deep_ort_backend");
   memory_info_ = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
+
+  // Register our custom allocator with the environment
+  auto custom_allocator_shared = get_ort_cpu_allocator();
+  auto * custom_allocator = static_cast<OrtCpuMemoryAllocator *>(custom_allocator_shared.get());
+  OrtStatus * status =
+    OrtGetApiBase()->GetApi(ORT_API_VERSION)->RegisterAllocator(*env_, custom_allocator->get_ort_allocator());
+  if (status != nullptr) {
+    OrtGetApiBase()->GetApi(ORT_API_VERSION)->ReleaseStatus(status);
+    // Log warning but don't fail - we can still work with default allocator
+  }
 }
 
-bool OrtBackendExecutor::load_model(const std::filesystem::path & model_path)
+bool OrtBackendExecutor::load_model_impl(const std::filesystem::path & model_path)
 {
   if (!std::filesystem::exists(model_path)) {
     return false;
@@ -42,35 +55,35 @@ bool OrtBackendExecutor::load_model(const std::filesystem::path & model_path)
     session_options.SetIntraOpNumThreads(1);
     session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
 
+    // Configure session to use environment allocators (our custom allocator)
+    session_options.AddConfigEntry("session.use_env_allocators", "1");
+
     session_ = std::make_unique<Ort::Session>(*env_, model_path.c_str(), session_options);
 
     model_path_ = model_path;
-    model_loaded_ = true;
     return true;
   } catch (const std::exception & e) {
-    model_loaded_ = false;
     return false;
   }
 }
 
-deep_ros::Tensor OrtBackendExecutor::run_inference(deep_ros::Tensor input)
+deep_ros::Tensor OrtBackendExecutor::run_inference_impl(deep_ros::Tensor & input)
 {
-  if (!model_loaded_) {
-    throw std::runtime_error("No model loaded for inference");
-  }
-
   if (!session_) {
     throw std::runtime_error("No ONNX session available");
   }
 
   try {
     // Convert deep_ros::DataType to ONNX tensor element type
     ONNXTensorElementDataType onnx_type = convert_to_onnx_type(input.dtype());
-
-    // Create input OrtValue that wraps the input tensor's memory (zero-copy!)
-    size_t input_size_bytes = input.size() * get_element_size(input.dtype());
     std::vector<int64_t> input_shape_int64(input.shape().begin(), input.shape().end());
 
+    // Get our custom allocator for output binding
+    auto custom_allocator_shared = get_ort_cpu_allocator();
+    auto * custom_allocator = static_cast<OrtCpuMemoryAllocator *>(custom_allocator_shared.get());
+
+    // Create input tensor that wraps existing input memory (zero-copy!)
+    size_t input_size_bytes = input.size() * get_element_size(input.dtype());
     Ort::Value ort_input = Ort::Value::CreateTensor(
       memory_info_, input.data(), input_size_bytes, input_shape_int64.data(), input_shape_int64.size(), onnx_type);
 
@@ -79,42 +92,38 @@ deep_ros::Tensor OrtBackendExecutor::run_inference(deep_ros::Tensor input)
     auto input_name = session_->GetInputNameAllocated(0, allocator);
     auto output_name = session_->GetOutputNameAllocated(0, allocator);
 
-    // Get output shape (assuming we know it or can infer it)
-    auto output_shape = get_output_shape(input.shape());
-
-    // Allocate output tensor using our custom allocator
-    auto tensor_allocator = get_ort_cpu_allocator();
-    deep_ros::Tensor output(output_shape, input.dtype(), tensor_allocator);
-
-    // Create output OrtValue that wraps the output tensor's memory (zero-copy!)
-    size_t output_size_bytes = output.size() * get_element_size(output.dtype());
-    std::vector<int64_t> output_shape_int64(output.shape().begin(), output.shape().end());
-
-    Ort::Value ort_output = Ort::Value::CreateTensor(
-      memory_info_, output.data(), output_size_bytes, output_shape_int64.data(), output_shape_int64.size(), onnx_type);
-
     // Create IO binding for zero-copy inference
     Ort::IoBinding binding(*session_);
     binding.BindInput(input_name.get(), ort_input);
-    binding.BindOutput(output_name.get(), ort_output);
 
-    // Run inference with IO binding (zero-copy!)
+    // Bind output to use our custom allocator - ONNX Runtime will allocate using our allocator
+    binding.BindOutput(output_name.get(), custom_allocator->get_ort_memory_info());
+
+    // Run inference with IO binding (zero-copy for both input and output!)
     Ort::RunOptions run_options;
     session_->Run(run_options, binding);
 
+    // Get output values allocated by ONNX Runtime using our custom allocator
+    Ort::AllocatorWithDefaultOptions default_allocator;
+    std::vector<Ort::Value> output_tensors = binding.GetOutputValues(default_allocator);
+
+    // Get output shape and create our tensor wrapping the ONNX-allocated memory
+    auto output_shape = get_output_shape(input.shape());
+    void * output_data = output_tensors[0].GetTensorMutableData<void>();
+
+    // Create deep_ros tensor that wraps the ONNX-allocated memory (zero-copy!)
+    deep_ros::Tensor output(output_data, output_shape, input.dtype());
+
     return output;
   } catch (const std::exception & e) {
     throw std::runtime_error("ONNX Runtime inference failed: " + std::string(e.what()));
   }
 }
 
-void OrtBackendExecutor::unload_model()
+void OrtBackendExecutor::unload_model_impl()
 {
-  if (model_loaded_) {
-    session_.reset();
-    model_loaded_ = false;
-    model_path_.clear();
-  }
+  session_.reset();
+  model_path_.clear();
 }
 
 std::vector<std::string> OrtBackendExecutor::supported_model_formats() const
diff --git a/deep_ort_backend_plugin/src/ort_cpu_memory_allocator.cpp b/deep_ort_backend_plugin/src/ort_cpu_memory_allocator.cpp

Original file line number	Diff line number	Diff line change
`@@ -34,7 +34,7 @@ bool BackendInferenceExecutor::load_model(const std::filesystem::path & model_pa`
`34`	`34`	`return success;`
`35`	`35`	`}`
`36`	`36`
`37`		`-Tensor BackendInferenceExecutor::run_inference(const Tensor & input)`
	`37`	`+Tensor BackendInferenceExecutor::run_inference(Tensor & input)`
`38`	`38`	`{`
`39`	`39`	`// Validate input tensor`
`40`	`40`	`if (input.data() == nullptr) {`
Original file line number	Diff line number	Diff line change
`@@ -191,7 +191,7 @@ void DeepNodeBase::unload_model()`
`191`	`191`	`}`
`192`	`192`	`}`
`193`	`193`
`194`		`-Tensor DeepNodeBase::run_inference(const Tensor & inputs)`
	`194`	`+Tensor DeepNodeBase::run_inference(Tensor & inputs)`
`195`	`195`	`{`
`196`	`196`	`if (!plugin_) {`
`197`	`197`	`throw std::runtime_error("No plugin loaded");`