Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Multi-stage ROS2 development container (CPU/GPU compatible)
ARG ROS_DISTRO=humble
ARG CUDA_VERSION=12.4.0
ARG UBUNTU_VERSION=22.04
ARG NVIDIA_CONTAINER_TAG=12.4.1-cudnn-runtime-ubuntu22.04

# ===============================================
# CPU Base - Standard ROS2 image
Expand All @@ -11,7 +10,7 @@ FROM ros:${ROS_DISTRO}-ros-base AS cpu-base
# ===============================================
# GPU Base - CUDA with manual ROS2 install
# ===============================================
FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} AS gpu-base
FROM nvidia/cuda:${NVIDIA_CONTAINER_TAG} AS gpu-base
ARG ROS_DISTRO
ARG TENSORRT_RUNTIME_VERSION
ARG TENSORRT_CUDA_VERSION
Expand Down Expand Up @@ -43,12 +42,13 @@ RUN curl -fsSL -o cuda-keyring_1.1-1_all.deb https://developer.download.nvidia.c
&& rm cuda-keyring_1.1-1_all.deb

# ===============================================
# Install Common Development Tools from Either Base
# Install Common Development Tools in Either Base
# ===============================================
FROM ${TARGETARCH:-cpu}-base AS dev-tools

# Install development tools not in base image
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
python3-pip \
python3-colcon-common-extensions \
python3-rosdep \
Expand All @@ -70,7 +70,9 @@ FROM dev-tools AS source

SHELL ["/bin/bash", "-o", "pipefail", "-c"]
RUN --mount=type=bind,source=.,target=/tmp/src \
apt-get -qq update && rosdep update && \
apt-get -qq update && \
rosdep init || true && \
rosdep update && \
rosdep install --from-paths /tmp/src --ignore-src -r -s \
| (grep 'apt-get install' || true) \
| awk '{print $3}' \
Expand Down
11 changes: 4 additions & 7 deletions .devcontainer/generate_devcontainer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,13 @@
# limitations under the License.
# generate_devcontainer.sh
# Usage:
# ./generate_devcontainer.sh <ros_distro> <container_type> [cuda_version] [ubuntu_version]
# ./generate_devcontainer.sh <ros_distro> <container_type> [nvidia_container_tag]

set -e

ROS_DISTRO=${1:-humble}
CONTAINER_TYPE=${2:-cpu}
CUDA_VERSION=${3:-12.4.0}
UBUNTU_VERSION=${4:-22.04}
NVIDIA_CONTAINER_TAG=${3:-12.4.1-cudnn-runtime-ubuntu22.04}
USERNAME=${USER:-vscode}

# TensorRT configuration
Expand All @@ -35,8 +34,7 @@ echo "ROS Distribution: $ROS_DISTRO"
echo "Container Type: $CONTAINER_TYPE"

if [ "$CONTAINER_TYPE" = "gpu" ]; then
echo "CUDA Version: $CUDA_VERSION"
echo "Ubuntu Version: $UBUNTU_VERSION"
echo "NVIDIA Container Tag: $NVIDIA_CONTAINER_TAG"
echo "TensorRT Runtime Version: $TENSORRT_RUNTIME_VERSION"
echo "TensorRT CUDA Version: $TENSORRT_CUDA_VERSION"
fi
Expand All @@ -47,8 +45,7 @@ if [ "$CONTAINER_TYPE" = "gpu" ]; then
BUILD_ARGS='"ROS_DISTRO": "'$ROS_DISTRO'",
"USERNAME": "'$USERNAME'",
"TARGETARCH": "gpu",
"CUDA_VERSION": "'$CUDA_VERSION'",
"UBUNTU_VERSION": "'$UBUNTU_VERSION'",
"NVIDIA_CONTAINER_TAG": "'$NVIDIA_CONTAINER_TAG'",
"TENSORRT_RUNTIME_VERSION": "'$TENSORRT_RUNTIME_VERSION'",
"TENSORRT_CUDA_VERSION": "'$TENSORRT_CUDA_VERSION'",
"USER_UID": "'$(id -u)'",
Expand Down
58 changes: 24 additions & 34 deletions .vscode/tasks.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@
"args": [
"${input:rosDistro}",
"gpu",
"${input:cudaVersion}",
"${input:ubuntuVersion}"
"${input:nvidiaContainer}"
],
"problemMatcher": [],
"group": "build",
Expand Down Expand Up @@ -72,41 +71,32 @@
"default": "cpu"
},
{
"id": "cudaVersion",
"id": "nvidiaContainer",
"type": "pickString",
"description": "Select CUDA version",
"description": "Select NVIDIA CUDA container (runtime + cuDNN, Ubuntu 20/22/24)",
"options": [
"12.8.0",
"12.6.2",
"12.5.1",
"12.4.1",
"12.4.0",
"12.3.2",
"12.2.2",
"12.1.1",
"12.0.1"
"12.8.0-cudnn-runtime-ubuntu24.04",
"12.8.0-cudnn-runtime-ubuntu22.04",
"12.8.0-cudnn-runtime-ubuntu20.04",
"12.6.2-cudnn-runtime-ubuntu24.04",
"12.6.2-cudnn-runtime-ubuntu22.04",
"12.6.2-cudnn-runtime-ubuntu20.04",
"12.5.1-cudnn-runtime-ubuntu24.04",
"12.5.1-cudnn-runtime-ubuntu22.04",
"12.5.1-cudnn-runtime-ubuntu20.04",
"12.4.1-cudnn-runtime-ubuntu24.04",
"12.4.1-cudnn-runtime-ubuntu22.04",
"12.4.1-cudnn-runtime-ubuntu20.04",
"12.3.2-cudnn-runtime-ubuntu22.04",
"12.3.2-cudnn-runtime-ubuntu20.04",
"12.2.2-cudnn8-runtime-ubuntu22.04",
"12.2.2-cudnn8-runtime-ubuntu20.04",
"12.1.1-cudnn8-runtime-ubuntu22.04",
"12.1.1-cudnn8-runtime-ubuntu20.04",
"12.0.1-cudnn8-runtime-ubuntu22.04",
"12.0.1-cudnn8-runtime-ubuntu20.04"
],
"default": "12.4.0"
},
{
"id": "ubuntuVersion",
"type": "pickString",
"description": "Select Ubuntu version",
"options": [
{
"label": "Ubuntu 24.04 (Noble)",
"value": "24.04"
},
{
"label": "Ubuntu 22.04 (Jammy)",
"value": "22.04"
},
{
"label": "Ubuntu 20.04 (Focal)",
"value": "20.04"
}
],
"default": "22.04"
"default": "12.4.1-cudnn-runtime-ubuntu22.04"
}
]
}
98 changes: 98 additions & 0 deletions deep_ort_gpu_backend_plugin/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# Copyright (c) 2025-present WATonomous. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

cmake_minimum_required(VERSION 3.22)
project(deep_ort_gpu_backend_plugin)

if(NOT CMAKE_CXX_STANDARD)
set(CMAKE_CXX_STANDARD 17)
endif()

if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
add_compile_options(-Wall -Wextra -Wpedantic)
add_link_options(-Wl,-no-undefined)
endif()

# Find required packages
find_package(ament_cmake REQUIRED)
find_package(deep_core REQUIRED)
find_package(onnxruntime_gpu_vendor REQUIRED)
find_package(pluginlib REQUIRED)
find_package(rclcpp REQUIRED)
find_package(rclcpp_lifecycle REQUIRED)

set(include_dir ${CMAKE_CURRENT_SOURCE_DIR}/include)

# deep_ort_gpu_backend_plugin library
set(DEEP_ORT_LIB ${PROJECT_NAME}_lib)
add_library(${DEEP_ORT_LIB} SHARED
src/ort_gpu_memory_allocator.cpp
src/ort_gpu_backend_executor.cpp
src/ort_gpu_backend_plugin.cpp
)

target_include_directories(${DEEP_ORT_LIB}
PUBLIC
$<BUILD_INTERFACE:${include_dir}>
$<INSTALL_INTERFACE:include>
)

# Link libraries
target_link_libraries(${DEEP_ORT_LIB}
PUBLIC
pluginlib::pluginlib
rclcpp::rclcpp
PRIVATE
deep_core::deep_core_lib
onnxruntime_gpu_vendor::onnxruntime_gpu_lib
)

install(TARGETS
${DEEP_ORT_LIB}
EXPORT ${PROJECT_NAME}Targets
ARCHIVE DESTINATION lib
LIBRARY DESTINATION lib
RUNTIME DESTINATION bin
)

install(EXPORT ${PROJECT_NAME}Targets
NAMESPACE ${PROJECT_NAME}::
DESTINATION share/${PROJECT_NAME}/cmake
)

install(DIRECTORY include/
DESTINATION include
)

install(FILES plugins.xml
DESTINATION share/${PROJECT_NAME}
)

# Export plugin description file to ament index
pluginlib_export_plugin_description_file(deep_core plugins.xml)

if(BUILD_TESTING)
find_package(deep_test REQUIRED)

add_deep_test(test_ort_gpu_backend test/test_ort_gpu_backend.cpp
LIBRARIES
${DEEP_ORT_LIB}
deep_core::deep_core_lib
onnxruntime_gpu_vendor::onnxruntime_gpu_lib
)
endif()

ament_export_targets(${PROJECT_NAME}Targets HAS_LIBRARY_TARGET)
ament_export_libraries(${DEEP_ORT_LIB})
ament_package()
47 changes: 47 additions & 0 deletions deep_ort_gpu_backend_plugin/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# deep_ort_gpu_backend_plugin

ONNX Runtime GPU backend plugin for deep_core.

## Overview

Provides:
- GPU inference executor using ONNX Runtime with options for CUDA or TensorRT(untested) execution provider
- Device context management for multi-GPU systems

## Plugin Name

`onnxruntime_gpu`

## Supported Formats

ONNX models (.onnx files)

## Usage

Add to your `package.xml`:

```xml
<exec_depend>deep_ort_gpu_backend_plugin</exec_depend>
```

Configure your inference nodes to use this plugin:

```yaml
inference_node:
ros__parameters:
Backend.plugin: "onnxruntime_gpu"
model_path: "/path/to/model.onnx"
```

## Dependencies

- deep_core
- onnxruntime_gpu_vendor

## Current problems

1. No proper IO binding - Despite documentation claiming "zero-copy," the code doesn't use Ort::IoBinding. The CPU backend does this correctly but GPU backend doesn't.
2. Thread-local caching bug (ort_gpu_backend_executor.cpp:104-108) - Input/output names are cached as static thread_local, which will break if models are reloaded or multiple instances exist.
3. Hardcoded float types (ort_gpu_backend_executor.cpp:114,139) - Input/output tensors are hardcoded to <float>, ignoring the actual data type, which will fail for non-float models.
4. Stub implementations - Methods like verify_gpu_availability() and set_device() are empty/always return true.
5. Unused member - persistent_cuda_ptr_ is declared but never used, suggesting incomplete GPU memory management.
Loading