Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Multi-stage ROS2 development container (CPU/GPU compatible)
ARG ROS_DISTRO=humble
ARG CUDA_VERSION=12.4.0
ARG UBUNTU_VERSION=22.04
ARG NVIDIA_CONTAINER_TAG=12.4.1-cudnn-runtime-ubuntu22.04

# ===============================================
# CPU Base - Standard ROS2 image
Expand All @@ -11,7 +10,7 @@ FROM ros:${ROS_DISTRO}-ros-base AS cpu-base
# ===============================================
# GPU Base - CUDA with manual ROS2 install
# ===============================================
FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} AS gpu-base
FROM nvidia/cuda:${NVIDIA_CONTAINER_TAG} AS gpu-base
ARG ROS_DISTRO
ARG TENSORRT_RUNTIME_VERSION
ARG TENSORRT_CUDA_VERSION
Expand Down Expand Up @@ -39,16 +38,19 @@ ENV DEBIAN_FRONTEND=interactive
RUN curl -fsSL -o cuda-keyring_1.1-1_all.deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb \
&& dpkg -i cuda-keyring_1.1-1_all.deb \
&& apt-get update && apt-get install -y --no-install-recommends \
libnvinfer-lean10=${TENSORRT_RUNTIME_VERSION}-1+cuda${TENSORRT_CUDA_VERSION} \
libnvinfer10=${TENSORRT_RUNTIME_VERSION}-1+cuda${TENSORRT_CUDA_VERSION} \
libnvinfer-plugin10=${TENSORRT_RUNTIME_VERSION}-1+cuda${TENSORRT_CUDA_VERSION} \
libnvonnxparsers10=${TENSORRT_RUNTIME_VERSION}-1+cuda${TENSORRT_CUDA_VERSION} \
&& rm cuda-keyring_1.1-1_all.deb

# ===============================================
# Install Common Development Tools from Either Base
# Install Common Development Tools in Either Base
# ===============================================
FROM ${TARGETARCH:-cpu}-base AS dev-tools

# Install development tools not in base image
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
python3-pip \
python3-colcon-common-extensions \
python3-rosdep \
Expand All @@ -70,7 +72,9 @@ FROM dev-tools AS source

SHELL ["/bin/bash", "-o", "pipefail", "-c"]
RUN --mount=type=bind,source=.,target=/tmp/src \
apt-get -qq update && rosdep update && \
apt-get -qq update && \
rosdep init || true && \
rosdep update && \
rosdep install --from-paths /tmp/src --ignore-src -r -s \
| (grep 'apt-get install' || true) \
| awk '{print $3}' \
Expand Down
11 changes: 4 additions & 7 deletions .devcontainer/generate_devcontainer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,13 @@
# limitations under the License.
# generate_devcontainer.sh
# Usage:
# ./generate_devcontainer.sh <ros_distro> <container_type> [cuda_version] [ubuntu_version]
# ./generate_devcontainer.sh <ros_distro> <container_type> [nvidia_container_tag]

set -e

ROS_DISTRO=${1:-humble}
CONTAINER_TYPE=${2:-cpu}
CUDA_VERSION=${3:-12.4.0}
UBUNTU_VERSION=${4:-22.04}
NVIDIA_CONTAINER_TAG=${3:-12.4.1-cudnn-runtime-ubuntu22.04}
USERNAME=${USER:-vscode}

# TensorRT configuration
Expand All @@ -35,8 +34,7 @@ echo "ROS Distribution: $ROS_DISTRO"
echo "Container Type: $CONTAINER_TYPE"

if [ "$CONTAINER_TYPE" = "gpu" ]; then
echo "CUDA Version: $CUDA_VERSION"
echo "Ubuntu Version: $UBUNTU_VERSION"
echo "NVIDIA Container Tag: $NVIDIA_CONTAINER_TAG"
echo "TensorRT Runtime Version: $TENSORRT_RUNTIME_VERSION"
echo "TensorRT CUDA Version: $TENSORRT_CUDA_VERSION"
fi
Expand All @@ -47,8 +45,7 @@ if [ "$CONTAINER_TYPE" = "gpu" ]; then
BUILD_ARGS='"ROS_DISTRO": "'$ROS_DISTRO'",
"USERNAME": "'$USERNAME'",
"TARGETARCH": "gpu",
"CUDA_VERSION": "'$CUDA_VERSION'",
"UBUNTU_VERSION": "'$UBUNTU_VERSION'",
"NVIDIA_CONTAINER_TAG": "'$NVIDIA_CONTAINER_TAG'",
"TENSORRT_RUNTIME_VERSION": "'$TENSORRT_RUNTIME_VERSION'",
"TENSORRT_CUDA_VERSION": "'$TENSORRT_CUDA_VERSION'",
"USER_UID": "'$(id -u)'",
Expand Down
4 changes: 2 additions & 2 deletions .github/actions/build-and-test/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@ runs:
shell: bash
run: |
source /opt/ros/${{ inputs.ros-distro }}/setup.bash
colcon build --merge-install
IS_CI=1 colcon build --merge-install
- name: ✅ Run tests
shell: bash
run: |
source /opt/ros/${{ inputs.ros-distro }}/setup.bash
colcon test --merge-install --event-handlers console_cohesion+
IS_CI=1 colcon test --merge-install --event-handlers console_cohesion+
colcon test-result --verbose
- name: 🗃️ Upload test logs as artifacts
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ log/
.devcontainer/devcontainer.json
.devcontainer/.env

# Launch test cache
__pycache__/

# Claude helpers
.claude/
CLAUDE.md
Expand Down
58 changes: 24 additions & 34 deletions .vscode/tasks.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@
"args": [
"${input:rosDistro}",
"gpu",
"${input:cudaVersion}",
"${input:ubuntuVersion}"
"${input:nvidiaContainer}"
],
"problemMatcher": [],
"group": "build",
Expand Down Expand Up @@ -72,41 +71,32 @@
"default": "cpu"
},
{
"id": "cudaVersion",
"id": "nvidiaContainer",
"type": "pickString",
"description": "Select CUDA version",
"description": "Select NVIDIA CUDA container (runtime + cuDNN, Ubuntu 20/22/24)",
"options": [
"12.8.0",
"12.6.2",
"12.5.1",
"12.4.1",
"12.4.0",
"12.3.2",
"12.2.2",
"12.1.1",
"12.0.1"
"12.8.0-cudnn-runtime-ubuntu24.04",
"12.8.0-cudnn-runtime-ubuntu22.04",
"12.8.0-cudnn-runtime-ubuntu20.04",
"12.6.2-cudnn-runtime-ubuntu24.04",
"12.6.2-cudnn-runtime-ubuntu22.04",
"12.6.2-cudnn-runtime-ubuntu20.04",
"12.5.1-cudnn-runtime-ubuntu24.04",
"12.5.1-cudnn-runtime-ubuntu22.04",
"12.5.1-cudnn-runtime-ubuntu20.04",
"12.4.1-cudnn-runtime-ubuntu24.04",
"12.4.1-cudnn-runtime-ubuntu22.04",
"12.4.1-cudnn-runtime-ubuntu20.04",
"12.3.2-cudnn-runtime-ubuntu22.04",
"12.3.2-cudnn-runtime-ubuntu20.04",
"12.2.2-cudnn8-runtime-ubuntu22.04",
"12.2.2-cudnn8-runtime-ubuntu20.04",
"12.1.1-cudnn8-runtime-ubuntu22.04",
"12.1.1-cudnn8-runtime-ubuntu20.04",
"12.0.1-cudnn8-runtime-ubuntu22.04",
"12.0.1-cudnn8-runtime-ubuntu20.04"
],
"default": "12.4.0"
},
{
"id": "ubuntuVersion",
"type": "pickString",
"description": "Select Ubuntu version",
"options": [
{
"label": "Ubuntu 24.04 (Noble)",
"value": "24.04"
},
{
"label": "Ubuntu 22.04 (Jammy)",
"value": "22.04"
},
{
"label": "Ubuntu 20.04 (Focal)",
"value": "20.04"
}
],
"default": "22.04"
"default": "12.4.1-cudnn-runtime-ubuntu22.04"
}
]
}
34 changes: 34 additions & 0 deletions DEVELOPING.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,37 @@ This project includes VS Code dev container configurations for easy ROS2 develop
### Common Commands

Inside the container, you can do ros2 commands, colcon commands, rosdep, etc.

## Testing

### CI Testing (CPU Only)

```bash
export IS_CI=1
colcon build
source install/setup.bash && colcon test
colcon test-result --verbose
```

GPU backends are automatically skipped when `IS_CI=1`.

### Local GPU Testing

**Requires**: NVIDIA container with GPU access (Runtime Libraries and CuDNN)
Testing with GPU can be done using one of the support GPU devcontainers.

```bash
export IS_CI=0 # or unset IS_CI
colcon build
source install/setup.bash && colcon test
colcon test-result --verbose
```

### Testing Philosophy

Given that access to GPU runners are limited, we settled on a testing procedure to test GPU-related software locally. The codebase is designed such that **if a backend works with `deep_sample`, it works everywhere.**

- CPU backends: Tested in CI automatically
- GPU backends: Must be tested locally with GPU before release
- `deep_sample` validates all backend plugins end-to-end
- Other packages tested with CPU only in CI
2 changes: 1 addition & 1 deletion deep_core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@ target_include_directories(${DEEP_CORE_LIB} PUBLIC
target_link_libraries(${DEEP_CORE_LIB}
PUBLIC
pluginlib::pluginlib
PRIVATE
rclcpp::rclcpp
rclcpp_lifecycle::rclcpp_lifecycle
PRIVATE
bondcpp::bondcpp
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include <string>
#include <vector>

#include <rclcpp/rclcpp.hpp>

#include "deep_core/types/tensor.hpp"

namespace deep_ros
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#include <string>
#include <vector>

#include <rclcpp/rclcpp.hpp>

namespace deep_ros
{

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
#include <memory>
#include <string>

#include <rclcpp/rclcpp.hpp>
#include <rclcpp_lifecycle/lifecycle_node.hpp>

#include "deep_core/plugin_interfaces/backend_inference_executor.hpp"
#include "deep_core/plugin_interfaces/backend_memory_allocator.hpp"

Expand All @@ -34,6 +37,12 @@ class DeepBackendPlugin
public:
virtual ~DeepBackendPlugin() = default;

/**
* @brief Initialize plugin with node instance to enable parameter loading
* @param node Shared pointer to the lifecycle node instance
*/
virtual void initialize(rclcpp_lifecycle::LifecycleNode::SharedPtr node) = 0;

/**
* @brief Get the backend name
* @return Backend name (e.g., "onnxruntime", "tensorrt")
Expand Down
2 changes: 2 additions & 0 deletions deep_core/src/deep_node_base.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ bool DeepNodeBase::load_plugin(const std::string & plugin_name)
try {
RCLCPP_INFO(get_logger(), "Loading plugin: %s", plugin_name.c_str());
plugin_ = plugin_loader_->createUniqueInstance(plugin_name);
plugin_->initialize(shared_from_this());

current_plugin_name_ = plugin_name;
RCLCPP_INFO(get_logger(), "Successfully loaded plugin: %s", plugin_name.c_str());
return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ class OrtBackendPlugin : public deep_ros::DeepBackendPlugin
*/
~OrtBackendPlugin() override = default;

/**
* @brief Initialize plugin with node instance (no-op for CPU backend)
* @param node Lifecycle node instance
*/
void initialize(rclcpp_lifecycle::LifecycleNode::SharedPtr node) override;

/**
* @brief Get backend name
* @return "onnxruntime"
Expand All @@ -62,6 +68,7 @@ class OrtBackendPlugin : public deep_ros::DeepBackendPlugin
std::shared_ptr<deep_ros::BackendInferenceExecutor> get_inference_executor() const override;

private:
rclcpp_lifecycle::LifecycleNode::SharedPtr node_;
std::shared_ptr<deep_ros::BackendMemoryAllocator> allocator_;
std::shared_ptr<deep_ros::BackendInferenceExecutor> executor_;
};
Expand Down
4 changes: 2 additions & 2 deletions deep_ort_backend_plugin/src/ort_backend_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ OrtBackendExecutor::OrtBackendExecutor()
bool OrtBackendExecutor::load_model_impl(const std::filesystem::path & model_path)
{
if (!std::filesystem::exists(model_path)) {
return false;
throw std::runtime_error("Model file not found: " + model_path.string());
}

try {
Expand All @@ -63,7 +63,7 @@ bool OrtBackendExecutor::load_model_impl(const std::filesystem::path & model_pat
model_path_ = model_path;
return true;
} catch (const std::exception & e) {
return false;
throw std::runtime_error("Failed to load ONNX model: " + std::string(e.what()));
}
}

Expand Down
5 changes: 5 additions & 0 deletions deep_ort_backend_plugin/src/ort_backend_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ OrtBackendPlugin::OrtBackendPlugin()
, executor_(std::make_shared<OrtBackendExecutor>())
{}

void OrtBackendPlugin::initialize(rclcpp_lifecycle::LifecycleNode::SharedPtr node)
{
node_ = node;
}

std::string OrtBackendPlugin::backend_name() const
{
return "onnxruntime_cpu";
Expand Down
5 changes: 2 additions & 3 deletions deep_ort_backend_plugin/test/test_ort_backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,9 @@ TEST_CASE("OrtBackendExecutor basic functionality", "[executor]")
REQUIRE(formats[0] == "onnx");
}

SECTION("Load nonexistent model fails")
SECTION("Load nonexistent model throws")
{
bool result = executor.load_model("/nonexistent/model.onnx");
REQUIRE_FALSE(result);
REQUIRE_THROWS_AS(executor.load_model("/nonexistent/model.onnx"), std::runtime_error);
}

SECTION("Inference without model throws")
Expand Down
Loading