-
Notifications
You must be signed in to change notification settings - Fork 16
feat: Agnocast CUDA IPC #1179
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
feat: Agnocast CUDA IPC #1179
Changes from 1 commit
ceaeec2
9dd28ed
8e69df3
3c4e4e5
0188770
d9d438e
e336b33
1c1ea2a
1f5400b
8933d20
ec3226b
34f93f7
66f4619
ef83ec7
31a6fb8
a653148
fc4f87a
ce5f5f0
2336546
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,45 @@ | ||
| cmake_minimum_required(VERSION 3.14) | ||
| project(agnocast_cuda LANGUAGES CXX CUDA) | ||
|
|
||
| if(NOT CMAKE_CXX_STANDARD) | ||
| set(CMAKE_CXX_STANDARD 17) | ||
| endif() | ||
|
|
||
| if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") | ||
| add_compile_options(-Wall -Wextra -Wpedantic) | ||
| endif() | ||
|
|
||
| find_package(ament_cmake REQUIRED) | ||
| find_package(agnocastlib REQUIRED) | ||
| find_package(sensor_msgs REQUIRED) | ||
| find_package(CUDAToolkit REQUIRED) | ||
|
|
||
| add_library(agnocast_cuda SHARED | ||
| src/cuda_ipc_backend.cpp | ||
| src/vmm_backend.cpp | ||
| src/nvscibuf_backend.cpp | ||
| src/unified_memory_backend.cpp | ||
| src/get_backend.cpp) | ||
|
|
||
| target_include_directories(agnocast_cuda PUBLIC | ||
| $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> | ||
| $<INSTALL_INTERFACE:include>) | ||
|
|
||
| ament_target_dependencies(agnocast_cuda agnocastlib sensor_msgs) | ||
| target_link_libraries(agnocast_cuda CUDA::cudart) | ||
|
|
||
| install(TARGETS agnocast_cuda | ||
| EXPORT export_${PROJECT_NAME} | ||
| LIBRARY DESTINATION lib | ||
| ARCHIVE DESTINATION lib | ||
| RUNTIME DESTINATION bin | ||
| INCLUDES DESTINATION include) | ||
|
|
||
| install( | ||
| DIRECTORY include/ | ||
| DESTINATION include) | ||
|
|
||
| ament_export_targets(export_${PROJECT_NAME} HAS_LIBRARY_TARGET) | ||
| ament_export_include_directories(include) | ||
| ament_export_dependencies(agnocastlib sensor_msgs) | ||
| ament_package() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,42 @@ | ||
| #pragma once | ||
|
|
||
| #include "agnocast/cuda_message_tag.hpp" | ||
|
|
||
| #include <sensor_msgs/msg/image.hpp> | ||
| #include <sensor_msgs/msg/point_cloud2.hpp> | ||
|
|
||
| #include <cstddef> | ||
| #include <cstdint> | ||
|
|
||
| namespace agnocast::cuda | ||
| { | ||
|
|
||
| struct PointCloud2 : public sensor_msgs::msg::PointCloud2, public agnocast::cuda_message_tag | ||
| { | ||
| uint8_t * data = nullptr; // GPU device pointer (shadows base class std::vector<uint8_t> data) | ||
| }; | ||
|
|
||
| struct Image : public sensor_msgs::msg::Image, public agnocast::cuda_message_tag | ||
| { | ||
| uint8_t * data = nullptr; // GPU device pointer (shadows base class std::vector<uint8_t> data) | ||
| }; | ||
|
|
||
| } // namespace agnocast::cuda | ||
|
|
||
| namespace agnocast | ||
| { | ||
|
|
||
| template <> | ||
| inline size_t get_cuda_gpu_data_size(const agnocast::cuda::PointCloud2 & msg) | ||
| { | ||
| return static_cast<size_t>(msg.height) * static_cast<size_t>(msg.width) * | ||
| static_cast<size_t>(msg.point_step); | ||
| } | ||
|
|
||
| template <> | ||
| inline size_t get_cuda_gpu_data_size(const agnocast::cuda::Image & msg) | ||
| { | ||
| return static_cast<size_t>(msg.height) * msg.step; | ||
| } | ||
|
|
||
| } // namespace agnocast |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,20 @@ | ||
| <?xml version="1.0"?> | ||
| <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?> | ||
| <package format="3"> | ||
| <name>agnocast_cuda</name> | ||
| <version>0.1.0</version> | ||
| <description> | ||
| CUDA IPC support for Agnocast. Provides GPU message types and cross-process GPU buffer sharing. | ||
| </description> | ||
| <maintainer email="sykwer@gmail.com">Takahiro Ishikawa-Aso</maintainer> | ||
| <license>Apache License 2.0</license> | ||
|
|
||
| <buildtool_depend>ament_cmake</buildtool_depend> | ||
|
|
||
| <depend>agnocastlib</depend> | ||
| <depend>sensor_msgs</depend> | ||
|
|
||
| <export> | ||
| <build_type>ament_cmake</build_type> | ||
| </export> | ||
| </package> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,57 @@ | ||
| #include "cuda_ipc_backend.hpp" | ||
|
|
||
| #include <cuda_runtime.h> | ||
|
|
||
| #include <cstdio> | ||
| #include <cstdlib> | ||
| #include <cstring> | ||
|
|
||
| namespace | ||
| { | ||
|
|
||
| void check_cuda_error(cudaError_t err, const char * operation) | ||
| { | ||
| if (err != cudaSuccess) { | ||
| std::fprintf( | ||
| stderr, "[agnocast_cuda] FATAL: %s failed: %s\n", operation, cudaGetErrorString(err)); | ||
| std::abort(); | ||
| } | ||
| } | ||
|
|
||
| } // namespace | ||
|
|
||
| namespace agnocast::cuda | ||
| { | ||
|
|
||
| GpuHandle CudaIpcBackend::export_handle(void * device_ptr, size_t /*size*/) | ||
| { | ||
| GpuHandle h{}; | ||
| static_assert(sizeof(cudaIpcMemHandle_t) <= sizeof(h.opaque)); | ||
| cudaIpcMemHandle_t ipc_handle; | ||
| check_cuda_error(cudaIpcGetMemHandle(&ipc_handle, device_ptr), "cudaIpcGetMemHandle"); | ||
| std::memcpy(h.opaque, &ipc_handle, sizeof(ipc_handle)); | ||
| return h; | ||
| } | ||
|
|
||
| void CudaIpcBackend::free_device_memory(void * device_ptr) | ||
| { | ||
| check_cuda_error(cudaFree(device_ptr), "cudaFree"); | ||
| } | ||
|
|
||
| void * CudaIpcBackend::import_handle(const GpuHandle & handle, size_t /*size*/) | ||
| { | ||
| cudaIpcMemHandle_t ipc_handle; | ||
| std::memcpy(&ipc_handle, handle.opaque, sizeof(ipc_handle)); | ||
| void * ptr = nullptr; | ||
| check_cuda_error( | ||
| cudaIpcOpenMemHandle(&ptr, ipc_handle, cudaIpcMemLazyEnablePeerAccess), | ||
| "cudaIpcOpenMemHandle"); | ||
| return ptr; | ||
| } | ||
|
|
||
| void CudaIpcBackend::release_handle(void * local_ptr) | ||
| { | ||
| check_cuda_error(cudaIpcCloseMemHandle(local_ptr), "cudaIpcCloseMemHandle"); | ||
| } | ||
|
|
||
| } // namespace agnocast::cuda |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| // Internal header — kept in src/ so it is NOT installed or visible to downstream packages. | ||
| // Only get_backend.cpp includes this to instantiate the singleton. | ||
| #pragma once | ||
|
|
||
| #include "agnocast/gpu_transfer_backend.hpp" | ||
|
|
||
| namespace agnocast::cuda | ||
| { | ||
|
|
||
| class CudaIpcBackend : public GpuTransferBackend | ||
| { | ||
| public: | ||
| GpuHandle export_handle(void * device_ptr, size_t size) override; | ||
| void free_device_memory(void * device_ptr) override; | ||
| void * import_handle(const GpuHandle & handle, size_t size) override; | ||
| void release_handle(void * local_ptr) override; | ||
| }; | ||
|
|
||
| } // namespace agnocast::cuda |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,64 @@ | ||
| #include "agnocast/gpu_transfer_backend.hpp" | ||
| #include "cuda_ipc_backend.hpp" | ||
| #include "nvscibuf_backend.hpp" | ||
| #include "unified_memory_backend.hpp" | ||
| #include "vmm_backend.hpp" | ||
|
|
||
| #include <cuda_runtime.h> | ||
|
|
||
| #include <cstdio> | ||
| #include <cstdlib> | ||
| #include <memory> | ||
|
|
||
| namespace agnocast::cuda | ||
| { | ||
|
|
||
| namespace | ||
| { | ||
|
|
||
| std::unique_ptr<GpuTransferBackend> select_backend() | ||
| { | ||
| int device = 0; | ||
| cudaError_t err = cudaGetDevice(&device); | ||
| if (err != cudaSuccess) { | ||
| std::fprintf( | ||
| stderr, "[agnocast_cuda] FATAL: cudaGetDevice failed: %s\n", cudaGetErrorString(err)); | ||
| std::abort(); | ||
| } | ||
|
|
||
| int is_integrated = 0; | ||
| err = cudaDeviceGetAttribute(&is_integrated, cudaDevAttrIntegrated, device); | ||
| if (err != cudaSuccess) { | ||
| std::fprintf( | ||
| stderr, "[agnocast_cuda] FATAL: cudaDeviceGetAttribute failed: %s\n", | ||
| cudaGetErrorString(err)); | ||
| std::abort(); | ||
| } | ||
|
|
||
| if (!is_integrated) { | ||
| // Discrete GPU (GeForce, Quadro, Tesla, A/H series) — CUDA IPC is supported. | ||
| std::fprintf(stderr, "[agnocast_cuda] Discrete GPU detected, using CudaIpcBackend.\n"); | ||
| return std::make_unique<CudaIpcBackend>(); | ||
| } | ||
|
|
||
| // Integrated GPU (Jetson Xavier/Orin/Thor, DRIVE). | ||
| // TODO(agnocast): Implement and select the appropriate backend. | ||
| // - Jetson Thor (CUDA 13.0+): CudaIpcBackend may work via OpenRM. | ||
| // - Jetson Xavier/Orin: NvSciBufBackend or UnifiedMemoryBackend. | ||
| // - DRIVE: NvSciBufBackend. | ||
|
Comment on lines
+45
to
+49
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After a long discussion with Gemini, it seems that CUDA VMM API is the best solution for implementing Agnocast on Tegra-family GPUs where CUDA IPC is not usable. NvSciBuf has the following pros and cons. Pros:
Cons (big one):
Note A: |
||
| std::fprintf( | ||
| stderr, | ||
| "[agnocast_cuda] FATAL: Integrated GPU detected (Jetson/DRIVE). " | ||
| "No backend is implemented yet for this platform.\n"); | ||
| std::abort(); | ||
| } | ||
|
|
||
| } // namespace | ||
|
|
||
| GpuTransferBackend & get_backend() | ||
| { | ||
| static auto instance = select_backend(); | ||
| return *instance; | ||
| } | ||
|
|
||
| } // namespace agnocast::cuda | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| #include "nvscibuf_backend.hpp" | ||
|
|
||
| #include <stdexcept> | ||
|
|
||
| namespace agnocast::cuda | ||
| { | ||
|
|
||
| GpuHandle NvSciBufBackend::export_handle(void * /*device_ptr*/, size_t /*size*/) | ||
| { | ||
| throw std::runtime_error( | ||
| "[agnocast_cuda] NvSciBufBackend is not yet implemented. " | ||
| "Requires NvSciBuf (Jetson Xavier/Orin, NVIDIA DRIVE)."); | ||
| } | ||
|
|
||
| void NvSciBufBackend::free_device_memory(void * /*device_ptr*/) | ||
| { | ||
| throw std::runtime_error("[agnocast_cuda] NvSciBufBackend is not yet implemented."); | ||
| } | ||
|
|
||
| void * NvSciBufBackend::import_handle(const GpuHandle & /*handle*/, size_t /*size*/) | ||
| { | ||
| throw std::runtime_error("[agnocast_cuda] NvSciBufBackend is not yet implemented."); | ||
| } | ||
|
|
||
| void NvSciBufBackend::release_handle(void * /*local_ptr*/) | ||
| { | ||
| throw std::runtime_error("[agnocast_cuda] NvSciBufBackend is not yet implemented."); | ||
| } | ||
|
|
||
| } // namespace agnocast::cuda |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,21 @@ | ||
| // Internal header — kept in src/ so it is NOT installed or visible to downstream packages. | ||
| // Only get_backend.cpp includes this to instantiate the singleton. | ||
| #pragma once | ||
|
|
||
| #include "agnocast/gpu_transfer_backend.hpp" | ||
|
|
||
| namespace agnocast::cuda | ||
| { | ||
|
|
||
| // Placeholder backend using NvSciBuf / NvSciSync. | ||
| // Target platforms: Jetson Xavier/Orin, NVIDIA DRIVE. | ||
| class NvSciBufBackend : public GpuTransferBackend | ||
| { | ||
| public: | ||
| GpuHandle export_handle(void * device_ptr, size_t size) override; | ||
| void free_device_memory(void * device_ptr) override; | ||
| void * import_handle(const GpuHandle & handle, size_t size) override; | ||
| void release_handle(void * local_ptr) override; | ||
| }; | ||
|
|
||
| } // namespace agnocast::cuda |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| #include "unified_memory_backend.hpp" | ||
|
|
||
| #include <stdexcept> | ||
|
|
||
| namespace agnocast::cuda | ||
| { | ||
|
|
||
| GpuHandle UnifiedMemoryBackend::export_handle(void * /*device_ptr*/, size_t /*size*/) | ||
| { | ||
| throw std::runtime_error( | ||
| "[agnocast_cuda] UnifiedMemoryBackend is not yet implemented. " | ||
| "Requires POSIX shm + cudaHostRegister (Jetson unified memory)."); | ||
| } | ||
|
|
||
| void UnifiedMemoryBackend::free_device_memory(void * /*device_ptr*/) | ||
| { | ||
| throw std::runtime_error("[agnocast_cuda] UnifiedMemoryBackend is not yet implemented."); | ||
| } | ||
|
|
||
| void * UnifiedMemoryBackend::import_handle(const GpuHandle & /*handle*/, size_t /*size*/) | ||
| { | ||
| throw std::runtime_error("[agnocast_cuda] UnifiedMemoryBackend is not yet implemented."); | ||
| } | ||
|
|
||
| void UnifiedMemoryBackend::release_handle(void * /*local_ptr*/) | ||
| { | ||
| throw std::runtime_error("[agnocast_cuda] UnifiedMemoryBackend is not yet implemented."); | ||
| } | ||
|
|
||
| } // namespace agnocast::cuda |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,21 @@ | ||
| // Internal header — kept in src/ so it is NOT installed or visible to downstream packages. | ||
| // Only get_backend.cpp includes this to instantiate the singleton. | ||
| #pragma once | ||
|
|
||
| #include "agnocast/gpu_transfer_backend.hpp" | ||
|
|
||
| namespace agnocast::cuda | ||
| { | ||
|
|
||
| // Placeholder backend using POSIX shared memory + cudaHostRegister for Jetson platforms | ||
| // where CPU and GPU share the same physical memory (unified memory architecture). | ||
| class UnifiedMemoryBackend : public GpuTransferBackend | ||
| { | ||
| public: | ||
| GpuHandle export_handle(void * device_ptr, size_t size) override; | ||
| void free_device_memory(void * device_ptr) override; | ||
| void * import_handle(const GpuHandle & handle, size_t size) override; | ||
| void release_handle(void * local_ptr) override; | ||
| }; | ||
|
|
||
| } // namespace agnocast::cuda |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| #include "vmm_backend.hpp" | ||
|
|
||
| #include <stdexcept> | ||
|
|
||
| namespace agnocast::cuda | ||
| { | ||
|
|
||
| GpuHandle VmmBackend::export_handle(void * /*device_ptr*/, size_t /*size*/) | ||
| { | ||
| throw std::runtime_error( | ||
| "[agnocast_cuda] VmmBackend is not yet implemented. " | ||
| "Requires cuMemExportToShareableHandle (CUDA Driver API)."); | ||
| } | ||
|
|
||
| void VmmBackend::free_device_memory(void * /*device_ptr*/) | ||
| { | ||
| throw std::runtime_error("[agnocast_cuda] VmmBackend is not yet implemented."); | ||
| } | ||
|
|
||
| void * VmmBackend::import_handle(const GpuHandle & /*handle*/, size_t /*size*/) | ||
| { | ||
| throw std::runtime_error("[agnocast_cuda] VmmBackend is not yet implemented."); | ||
| } | ||
|
|
||
| void VmmBackend::release_handle(void * /*local_ptr*/) | ||
| { | ||
| throw std::runtime_error("[agnocast_cuda] VmmBackend is not yet implemented."); | ||
| } | ||
|
|
||
| } // namespace agnocast::cuda |
Uh oh!
There was an error while loading. Please reload this page.