autowarefoundation · sykwer · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026 · Mar 13, 2026
@@ -0,0 +1,45 @@
+cmake_minimum_required(VERSION 3.14)
+project(agnocast_cuda LANGUAGES CXX CUDA)
+
+if(NOT CMAKE_CXX_STANDARD)
+  set(CMAKE_CXX_STANDARD 17)
+endif()
+
+if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+  add_compile_options(-Wall -Wextra -Wpedantic)
+endif()
+
+find_package(ament_cmake REQUIRED)
+find_package(agnocastlib REQUIRED)
+find_package(sensor_msgs REQUIRED)
+find_package(CUDAToolkit REQUIRED)
+
+add_library(agnocast_cuda SHARED
+  src/cuda_ipc_backend.cpp
+  src/vmm_backend.cpp
+  src/nvscibuf_backend.cpp
+  src/unified_memory_backend.cpp
+  src/get_backend.cpp)
+
+target_include_directories(agnocast_cuda PUBLIC
+  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+  $<INSTALL_INTERFACE:include>)
+
+ament_target_dependencies(agnocast_cuda agnocastlib sensor_msgs)
+target_link_libraries(agnocast_cuda CUDA::cudart)
+
+install(TARGETS agnocast_cuda
+  EXPORT export_${PROJECT_NAME}
+  LIBRARY DESTINATION lib
+  ARCHIVE DESTINATION lib
+  RUNTIME DESTINATION bin
+  INCLUDES DESTINATION include)
+
+install(
+  DIRECTORY include/
+  DESTINATION include)
+
+ament_export_targets(export_${PROJECT_NAME} HAS_LIBRARY_TARGET)
+ament_export_include_directories(include)
+ament_export_dependencies(agnocastlib sensor_msgs)
+ament_package()
diff --git a/src/agnocast_cuda/include/agnocast/cuda/types.hpp b/src/agnocast_cuda/include/agnocast/cuda/types.hpp
@@ -0,0 +1,42 @@
+#pragma once
+
+#include "agnocast/cuda_message_tag.hpp"
+
+#include <sensor_msgs/msg/image.hpp>
+#include <sensor_msgs/msg/point_cloud2.hpp>
+
+#include <cstddef>
+#include <cstdint>
+
+namespace agnocast::cuda
+{
+
+struct PointCloud2 : public sensor_msgs::msg::PointCloud2, public agnocast::cuda_message_tag
+{
+  uint8_t * data = nullptr;  // GPU device pointer (shadows base class std::vector<uint8_t> data)
+};
+
+struct Image : public sensor_msgs::msg::Image, public agnocast::cuda_message_tag
+{
+  uint8_t * data = nullptr;  // GPU device pointer (shadows base class std::vector<uint8_t> data)
+};
+
+}  // namespace agnocast::cuda
+
+namespace agnocast
+{
+
+template <>
+inline size_t get_cuda_gpu_data_size(const agnocast::cuda::PointCloud2 & msg)
+{
+  return static_cast<size_t>(msg.height) * static_cast<size_t>(msg.width) *
+         static_cast<size_t>(msg.point_step);
+}
+
+template <>
+inline size_t get_cuda_gpu_data_size(const agnocast::cuda::Image & msg)
+{
+  return static_cast<size_t>(msg.height) * msg.step;
+}
+
+}  // namespace agnocast
@@ -0,0 +1,20 @@
+<?xml version="1.0"?>
+<?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
+<package format="3">
+  <name>agnocast_cuda</name>
+  <version>0.1.0</version>
+  <description>
+    CUDA IPC support for Agnocast. Provides GPU message types and cross-process GPU buffer sharing.
+  </description>
+  <maintainer email="sykwer@gmail.com">Takahiro Ishikawa-Aso</maintainer>
+  <license>Apache License 2.0</license>
+
+  <buildtool_depend>ament_cmake</buildtool_depend>
+
+  <depend>agnocastlib</depend>
+  <depend>sensor_msgs</depend>
+
+  <export>
+    <build_type>ament_cmake</build_type>
+  </export>
+</package>
@@ -0,0 +1,57 @@
+#include "cuda_ipc_backend.hpp"
+
+#include <cuda_runtime.h>
+
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+namespace
+{
+
+void check_cuda_error(cudaError_t err, const char * operation)
+{
+  if (err != cudaSuccess) {
+    std::fprintf(
+      stderr, "[agnocast_cuda] FATAL: %s failed: %s\n", operation, cudaGetErrorString(err));
+    std::abort();
+  }
+}
+
+}  // namespace
+
+namespace agnocast::cuda
+{
+
+GpuHandle CudaIpcBackend::export_handle(void * device_ptr, size_t /*size*/)
+{
+  GpuHandle h{};
+  static_assert(sizeof(cudaIpcMemHandle_t) <= sizeof(h.opaque));
+  cudaIpcMemHandle_t ipc_handle;
+  check_cuda_error(cudaIpcGetMemHandle(&ipc_handle, device_ptr), "cudaIpcGetMemHandle");
+  std::memcpy(h.opaque, &ipc_handle, sizeof(ipc_handle));
+  return h;
+}
+
+void CudaIpcBackend::free_device_memory(void * device_ptr)
+{
+  check_cuda_error(cudaFree(device_ptr), "cudaFree");
+}
+
+void * CudaIpcBackend::import_handle(const GpuHandle & handle, size_t /*size*/)
+{
+  cudaIpcMemHandle_t ipc_handle;
+  std::memcpy(&ipc_handle, handle.opaque, sizeof(ipc_handle));
+  void * ptr = nullptr;
+  check_cuda_error(
+    cudaIpcOpenMemHandle(&ptr, ipc_handle, cudaIpcMemLazyEnablePeerAccess),
+    "cudaIpcOpenMemHandle");
+  return ptr;
+}
+
+void CudaIpcBackend::release_handle(void * local_ptr)
+{
+  check_cuda_error(cudaIpcCloseMemHandle(local_ptr), "cudaIpcCloseMemHandle");
+}
+
+}  // namespace agnocast::cuda
@@ -0,0 +1,19 @@
+// Internal header — kept in src/ so it is NOT installed or visible to downstream packages.
+// Only get_backend.cpp includes this to instantiate the singleton.
+#pragma once
+
+#include "agnocast/gpu_transfer_backend.hpp"
+
+namespace agnocast::cuda
+{
+
+class CudaIpcBackend : public GpuTransferBackend
+{
+public:
+  GpuHandle export_handle(void * device_ptr, size_t size) override;
+  void free_device_memory(void * device_ptr) override;
+  void * import_handle(const GpuHandle & handle, size_t size) override;
+  void release_handle(void * local_ptr) override;
+};
+
+}  // namespace agnocast::cuda
@@ -0,0 +1,64 @@
+#include "agnocast/gpu_transfer_backend.hpp"
+#include "cuda_ipc_backend.hpp"
+#include "nvscibuf_backend.hpp"
+#include "unified_memory_backend.hpp"
+#include "vmm_backend.hpp"
+
+#include <cuda_runtime.h>
+
+#include <cstdio>
+#include <cstdlib>
+#include <memory>
+
+namespace agnocast::cuda
+{
+
+namespace
+{
+
+std::unique_ptr<GpuTransferBackend> select_backend()
+{
+  int device = 0;
+  cudaError_t err = cudaGetDevice(&device);
+  if (err != cudaSuccess) {
+    std::fprintf(
+      stderr, "[agnocast_cuda] FATAL: cudaGetDevice failed: %s\n", cudaGetErrorString(err));
+    std::abort();
+  }
+
+  int is_integrated = 0;
+  err = cudaDeviceGetAttribute(&is_integrated, cudaDevAttrIntegrated, device);
+  if (err != cudaSuccess) {
+    std::fprintf(
+      stderr, "[agnocast_cuda] FATAL: cudaDeviceGetAttribute failed: %s\n",
+      cudaGetErrorString(err));
+    std::abort();
+  }
+
+  if (!is_integrated) {
+    // Discrete GPU (GeForce, Quadro, Tesla, A/H series) — CUDA IPC is supported.
+    std::fprintf(stderr, "[agnocast_cuda] Discrete GPU detected, using CudaIpcBackend.\n");
+    return std::make_unique<CudaIpcBackend>();
+  }
+
+  // Integrated GPU (Jetson Xavier/Orin/Thor, DRIVE).
+  // TODO(agnocast): Implement and select the appropriate backend.
+  //   - Jetson Thor (CUDA 13.0+): CudaIpcBackend may work via OpenRM.
+  //   - Jetson Xavier/Orin: NvSciBufBackend or UnifiedMemoryBackend.
+  //   - DRIVE: NvSciBufBackend.
+  std::fprintf(
+    stderr,
+    "[agnocast_cuda] FATAL: Integrated GPU detected (Jetson/DRIVE). "
+    "No backend is implemented yet for this platform.\n");
+  std::abort();
+}
+
+}  // namespace
+
+GpuTransferBackend & get_backend()
+{
+  static auto instance = select_backend();
+  return *instance;
+}
+
+}  // namespace agnocast::cuda
@@ -0,0 +1,30 @@
+#include "nvscibuf_backend.hpp"
+
+#include <stdexcept>
+
+namespace agnocast::cuda
+{
+
+GpuHandle NvSciBufBackend::export_handle(void * /*device_ptr*/, size_t /*size*/)
+{
+  throw std::runtime_error(
+    "[agnocast_cuda] NvSciBufBackend is not yet implemented. "
+    "Requires NvSciBuf (Jetson Xavier/Orin, NVIDIA DRIVE).");
+}
+
+void NvSciBufBackend::free_device_memory(void * /*device_ptr*/)
+{
+  throw std::runtime_error("[agnocast_cuda] NvSciBufBackend is not yet implemented.");
+}
+
+void * NvSciBufBackend::import_handle(const GpuHandle & /*handle*/, size_t /*size*/)
+{
+  throw std::runtime_error("[agnocast_cuda] NvSciBufBackend is not yet implemented.");
+}
+
+void NvSciBufBackend::release_handle(void * /*local_ptr*/)
+{
+  throw std::runtime_error("[agnocast_cuda] NvSciBufBackend is not yet implemented.");
+}
+
+}  // namespace agnocast::cuda
@@ -0,0 +1,21 @@
+// Internal header — kept in src/ so it is NOT installed or visible to downstream packages.
+// Only get_backend.cpp includes this to instantiate the singleton.
+#pragma once
+
+#include "agnocast/gpu_transfer_backend.hpp"
+
+namespace agnocast::cuda
+{
+
+// Placeholder backend using NvSciBuf / NvSciSync.
+// Target platforms: Jetson Xavier/Orin, NVIDIA DRIVE.
+class NvSciBufBackend : public GpuTransferBackend
+{
+public:
+  GpuHandle export_handle(void * device_ptr, size_t size) override;
+  void free_device_memory(void * device_ptr) override;
+  void * import_handle(const GpuHandle & handle, size_t size) override;
+  void release_handle(void * local_ptr) override;
+};
+
+}  // namespace agnocast::cuda
@@ -0,0 +1,30 @@
+#include "unified_memory_backend.hpp"
+
+#include <stdexcept>
+
+namespace agnocast::cuda
+{
+
+GpuHandle UnifiedMemoryBackend::export_handle(void * /*device_ptr*/, size_t /*size*/)
+{
+  throw std::runtime_error(
+    "[agnocast_cuda] UnifiedMemoryBackend is not yet implemented. "
+    "Requires POSIX shm + cudaHostRegister (Jetson unified memory).");
+}
+
+void UnifiedMemoryBackend::free_device_memory(void * /*device_ptr*/)
+{
+  throw std::runtime_error("[agnocast_cuda] UnifiedMemoryBackend is not yet implemented.");
+}
+
+void * UnifiedMemoryBackend::import_handle(const GpuHandle & /*handle*/, size_t /*size*/)
+{
+  throw std::runtime_error("[agnocast_cuda] UnifiedMemoryBackend is not yet implemented.");
+}
+
+void UnifiedMemoryBackend::release_handle(void * /*local_ptr*/)
+{
+  throw std::runtime_error("[agnocast_cuda] UnifiedMemoryBackend is not yet implemented.");
+}
+
+}  // namespace agnocast::cuda
@@ -0,0 +1,21 @@
+// Internal header — kept in src/ so it is NOT installed or visible to downstream packages.
+// Only get_backend.cpp includes this to instantiate the singleton.
+#pragma once
+
+#include "agnocast/gpu_transfer_backend.hpp"
+
+namespace agnocast::cuda
+{
+
+// Placeholder backend using POSIX shared memory + cudaHostRegister for Jetson platforms
+// where CPU and GPU share the same physical memory (unified memory architecture).
+class UnifiedMemoryBackend : public GpuTransferBackend
+{
+public:
+  GpuHandle export_handle(void * device_ptr, size_t size) override;
+  void free_device_memory(void * device_ptr) override;
+  void * import_handle(const GpuHandle & handle, size_t size) override;
+  void release_handle(void * local_ptr) override;
+};
+
+}  // namespace agnocast::cuda
@@ -0,0 +1,30 @@
+#include "vmm_backend.hpp"
+
+#include <stdexcept>
+
+namespace agnocast::cuda
+{
+
+GpuHandle VmmBackend::export_handle(void * /*device_ptr*/, size_t /*size*/)
+{
+  throw std::runtime_error(
+    "[agnocast_cuda] VmmBackend is not yet implemented. "
+    "Requires cuMemExportToShareableHandle (CUDA Driver API).");
+}
+
+void VmmBackend::free_device_memory(void * /*device_ptr*/)
+{
+  throw std::runtime_error("[agnocast_cuda] VmmBackend is not yet implemented.");
+}
+
+void * VmmBackend::import_handle(const GpuHandle & /*handle*/, size_t /*size*/)
+{
+  throw std::runtime_error("[agnocast_cuda] VmmBackend is not yet implemented.");
+}
+
+void VmmBackend::release_handle(void * /*local_ptr*/)
+{
+  throw std::runtime_error("[agnocast_cuda] VmmBackend is not yet implemented.");
+}
+
+}  // namespace agnocast::cuda