WATonomous · Edwardius · Dec 5, 2025 · Nov 20, 2025 · Nov 22, 2025 · Nov 22, 2025
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
@@ -1,7 +1,6 @@
 # Multi-stage ROS2 development container (CPU/GPU compatible)
 ARG ROS_DISTRO=humble
-ARG CUDA_VERSION=12.4.0
-ARG UBUNTU_VERSION=22.04
+ARG NVIDIA_CONTAINER_TAG=12.4.1-cudnn-runtime-ubuntu22.04
 
 # ===============================================
 # CPU Base - Standard ROS2 image
@@ -11,7 +10,7 @@ FROM ros:${ROS_DISTRO}-ros-base AS cpu-base
 # ===============================================
 # GPU Base - CUDA with manual ROS2 install
 # ===============================================
-FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} AS gpu-base
+FROM nvidia/cuda:${NVIDIA_CONTAINER_TAG} AS gpu-base
 ARG ROS_DISTRO
 ARG TENSORRT_RUNTIME_VERSION
 ARG TENSORRT_CUDA_VERSION
@@ -43,12 +42,13 @@ RUN curl -fsSL -o cuda-keyring_1.1-1_all.deb https://developer.download.nvidia.c
     && rm cuda-keyring_1.1-1_all.deb
 
 # ===============================================
-# Install Common Development Tools from Either Base
+# Install Common Development Tools in Either Base
 # ===============================================
 FROM ${TARGETARCH:-cpu}-base AS dev-tools
 
 # Install development tools not in base image
 RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
     python3-pip \
     python3-colcon-common-extensions \
     python3-rosdep \
@@ -70,7 +70,9 @@ FROM dev-tools AS source
 
 SHELL ["/bin/bash", "-o", "pipefail", "-c"]
 RUN --mount=type=bind,source=.,target=/tmp/src \
-    apt-get -qq update && rosdep update && \
+    apt-get -qq update && \
+    rosdep init || true && \
+    rosdep update && \
     rosdep install --from-paths /tmp/src --ignore-src -r -s \
         | (grep 'apt-get install' || true) \
         | awk '{print $3}' \

diff --git a/.devcontainer/generate_devcontainer.sh b/.devcontainer/generate_devcontainer.sh
@@ -15,14 +15,13 @@
 # limitations under the License.
 # generate_devcontainer.sh
 # Usage:
-#   ./generate_devcontainer.sh <ros_distro> <container_type> [cuda_version] [ubuntu_version]
+#   ./generate_devcontainer.sh <ros_distro> <container_type> [nvidia_container_tag]
 
 set -e
 
 ROS_DISTRO=${1:-humble}
 CONTAINER_TYPE=${2:-cpu}
-CUDA_VERSION=${3:-12.4.0}
-UBUNTU_VERSION=${4:-22.04}
+NVIDIA_CONTAINER_TAG=${3:-12.4.1-cudnn-runtime-ubuntu22.04}
 USERNAME=${USER:-vscode}
 
 # TensorRT configuration
@@ -35,8 +34,7 @@ echo "ROS Distribution: $ROS_DISTRO"
 echo "Container Type: $CONTAINER_TYPE"
 
 if [ "$CONTAINER_TYPE" = "gpu" ]; then
-    echo "CUDA Version: $CUDA_VERSION"
-    echo "Ubuntu Version: $UBUNTU_VERSION"
+    echo "NVIDIA Container Tag: $NVIDIA_CONTAINER_TAG"
     echo "TensorRT Runtime Version: $TENSORRT_RUNTIME_VERSION"
     echo "TensorRT CUDA Version: $TENSORRT_CUDA_VERSION"
 fi
@@ -47,8 +45,7 @@ if [ "$CONTAINER_TYPE" = "gpu" ]; then
     BUILD_ARGS='"ROS_DISTRO": "'$ROS_DISTRO'",
       "USERNAME": "'$USERNAME'",
       "TARGETARCH": "gpu",
-      "CUDA_VERSION": "'$CUDA_VERSION'",
-      "UBUNTU_VERSION": "'$UBUNTU_VERSION'",
+      "NVIDIA_CONTAINER_TAG": "'$NVIDIA_CONTAINER_TAG'",
       "TENSORRT_RUNTIME_VERSION": "'$TENSORRT_RUNTIME_VERSION'",
       "TENSORRT_CUDA_VERSION": "'$TENSORRT_CUDA_VERSION'",
       "USER_UID": "'$(id -u)'",

diff --git a/.vscode/tasks.json b/.vscode/tasks.json
@@ -27,8 +27,7 @@
       "args": [
         "${input:rosDistro}",
         "gpu",
-        "${input:cudaVersion}",
-        "${input:ubuntuVersion}"
+        "${input:nvidiaContainer}"
       ],
       "problemMatcher": [],
       "group": "build",
@@ -72,41 +71,32 @@
       "default": "cpu"
     },
     {
-      "id": "cudaVersion",
+      "id": "nvidiaContainer",
       "type": "pickString",
-      "description": "Select CUDA version",
+      "description": "Select NVIDIA CUDA container (runtime + cuDNN, Ubuntu 20/22/24)",
       "options": [
-        "12.8.0",
-        "12.6.2",
-        "12.5.1",
-        "12.4.1",
-        "12.4.0",
-        "12.3.2",
-        "12.2.2",
-        "12.1.1",
-        "12.0.1"
+        "12.8.0-cudnn-runtime-ubuntu24.04",
+        "12.8.0-cudnn-runtime-ubuntu22.04",
+        "12.8.0-cudnn-runtime-ubuntu20.04",
+        "12.6.2-cudnn-runtime-ubuntu24.04",
+        "12.6.2-cudnn-runtime-ubuntu22.04",
+        "12.6.2-cudnn-runtime-ubuntu20.04",
+        "12.5.1-cudnn-runtime-ubuntu24.04",
+        "12.5.1-cudnn-runtime-ubuntu22.04",
+        "12.5.1-cudnn-runtime-ubuntu20.04",
+        "12.4.1-cudnn-runtime-ubuntu24.04",
+        "12.4.1-cudnn-runtime-ubuntu22.04",
+        "12.4.1-cudnn-runtime-ubuntu20.04",
+        "12.3.2-cudnn-runtime-ubuntu22.04",
+        "12.3.2-cudnn-runtime-ubuntu20.04",
+        "12.2.2-cudnn8-runtime-ubuntu22.04",
+        "12.2.2-cudnn8-runtime-ubuntu20.04",
+        "12.1.1-cudnn8-runtime-ubuntu22.04",
+        "12.1.1-cudnn8-runtime-ubuntu20.04",
+        "12.0.1-cudnn8-runtime-ubuntu22.04",
+        "12.0.1-cudnn8-runtime-ubuntu20.04"
       ],
-      "default": "12.4.0"
-    },
-    {
-      "id": "ubuntuVersion",
-      "type": "pickString",
-      "description": "Select Ubuntu version",
-      "options": [
-        {
-          "label": "Ubuntu 24.04 (Noble)",
-          "value": "24.04"
-        },
-        {
-          "label": "Ubuntu 22.04 (Jammy)",
-          "value": "22.04"
-        },
-        {
-          "label": "Ubuntu 20.04 (Focal)",
-          "value": "20.04"
-        }
-      ],
-      "default": "22.04"
+      "default": "12.4.1-cudnn-runtime-ubuntu22.04"
     }
   ]
 }
diff --git a/deep_ort_gpu_backend_plugin/CMakeLists.txt b/deep_ort_gpu_backend_plugin/CMakeLists.txt
@@ -0,0 +1,98 @@
+# Copyright (c) 2025-present WATonomous. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+cmake_minimum_required(VERSION 3.22)
+project(deep_ort_gpu_backend_plugin)
+
+if(NOT CMAKE_CXX_STANDARD)
+  set(CMAKE_CXX_STANDARD 17)
+endif()
+
+if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+  add_compile_options(-Wall -Wextra -Wpedantic)
+  add_link_options(-Wl,-no-undefined)
+endif()
+
+# Find required packages
+find_package(ament_cmake REQUIRED)
+find_package(deep_core REQUIRED)
+find_package(onnxruntime_gpu_vendor REQUIRED)
+find_package(pluginlib REQUIRED)
+find_package(rclcpp REQUIRED)
+find_package(rclcpp_lifecycle REQUIRED)
+
+set(include_dir ${CMAKE_CURRENT_SOURCE_DIR}/include)
+
+# deep_ort_gpu_backend_plugin library
+set(DEEP_ORT_LIB ${PROJECT_NAME}_lib)
+add_library(${DEEP_ORT_LIB} SHARED
+  src/ort_gpu_memory_allocator.cpp
+  src/ort_gpu_backend_executor.cpp
+  src/ort_gpu_backend_plugin.cpp
+)
+
+target_include_directories(${DEEP_ORT_LIB}
+  PUBLIC
+    $<BUILD_INTERFACE:${include_dir}>
+    $<INSTALL_INTERFACE:include>
+)
+
+# Link libraries
+target_link_libraries(${DEEP_ORT_LIB}
+  PUBLIC
+    pluginlib::pluginlib
+    rclcpp::rclcpp
+  PRIVATE
+    deep_core::deep_core_lib
+    onnxruntime_gpu_vendor::onnxruntime_gpu_lib
+)
+
+install(TARGETS
+  ${DEEP_ORT_LIB}
+  EXPORT ${PROJECT_NAME}Targets
+  ARCHIVE DESTINATION lib
+  LIBRARY DESTINATION lib
+  RUNTIME DESTINATION bin
+)
+
+install(EXPORT ${PROJECT_NAME}Targets
+  NAMESPACE ${PROJECT_NAME}::
+  DESTINATION share/${PROJECT_NAME}/cmake
+)
+
+install(DIRECTORY include/
+  DESTINATION include
+)
+
+install(FILES plugins.xml
+  DESTINATION share/${PROJECT_NAME}
+)
+
+# Export plugin description file to ament index
+pluginlib_export_plugin_description_file(deep_core plugins.xml)
+
+if(BUILD_TESTING)
+  find_package(deep_test REQUIRED)
+
+  add_deep_test(test_ort_gpu_backend test/test_ort_gpu_backend.cpp
+    LIBRARIES
+      ${DEEP_ORT_LIB}
+      deep_core::deep_core_lib
+      onnxruntime_gpu_vendor::onnxruntime_gpu_lib
+  )
+endif()
+
+ament_export_targets(${PROJECT_NAME}Targets HAS_LIBRARY_TARGET)
+ament_export_libraries(${DEEP_ORT_LIB})
+ament_package()
diff --git a/deep_ort_gpu_backend_plugin/README.md b/deep_ort_gpu_backend_plugin/README.md
@@ -0,0 +1,47 @@
+# deep_ort_gpu_backend_plugin
+
+ONNX Runtime GPU backend plugin for deep_core.
+
+## Overview
+
+Provides:
+- GPU inference executor using ONNX Runtime with options for CUDA or TensorRT(untested) execution provider
+- Device context management for multi-GPU systems
+
+## Plugin Name
+
+`onnxruntime_gpu`
+
+## Supported Formats
+
+ONNX models (.onnx files)
+
+## Usage
+
+Add to your `package.xml`:
+
+```xml
+<exec_depend>deep_ort_gpu_backend_plugin</exec_depend>
+```
+
+Configure your inference nodes to use this plugin:
+
+```yaml
+inference_node:
+  ros__parameters:
+    Backend.plugin: "onnxruntime_gpu"
+    model_path: "/path/to/model.onnx"
+```
+
+## Dependencies
+
+- deep_core
+- onnxruntime_gpu_vendor
+
+## Current problems
+
+  1. No proper IO binding - Despite documentation claiming "zero-copy," the code doesn't use Ort::IoBinding. The CPU backend does this correctly but GPU backend doesn't.
+  2. Thread-local caching bug (ort_gpu_backend_executor.cpp:104-108) - Input/output names are cached as static thread_local, which will break if models are reloaded or multiple instances exist.
+  3. Hardcoded float types (ort_gpu_backend_executor.cpp:114,139) - Input/output tensors are hardcoded to <float>, ignoring the actual data type, which will fail for non-float models.
+  4. Stub implementations - Methods like verify_gpu_availability() and set_device() are empty/always return true.
+  5. Unused member - persistent_cuda_ptr_ is declared but never used, suggesting incomplete GPU memory management.