diff --git a/VisionPilot/Iceoryx/CMakeLists.txt b/VisionPilot/Iceoryx/CMakeLists.txt new file mode 100644 index 000000000..0bcdce7fa --- /dev/null +++ b/VisionPilot/Iceoryx/CMakeLists.txt @@ -0,0 +1,80 @@ +cmake_minimum_required(VERSION 3.16) +project(iceoryx_demo) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# Find dependencies +find_package(iceoryx_posh REQUIRED) +find_package(iceoryx_binding_c REQUIRED) +find_package(OpenCV REQUIRED COMPONENTS core highgui imgproc) +find_package(CUDA REQUIRED) + +# Add our common libraries from the VisionPilot project +# Adjust the relative paths if necessary +set(COMMON_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../common) + +# GStreamer Engine Library +add_library(gstreamer_engine STATIC + ${COMMON_DIR}/sensors/gstreamer_engine.cpp +) +target_include_directories(gstreamer_engine PUBLIC + ${COMMON_DIR}/include + ${OpenCV_INCLUDE_DIRS} +) +# GStreamer requires pkg-config to find its libraries +find_package(PkgConfig REQUIRED) +pkg_check_modules(GSTREAMER REQUIRED gstreamer-1.0 gstreamer-app-1.0) +target_include_directories(gstreamer_engine PUBLIC ${GSTREAMER_INCLUDE_DIRS}) +target_link_libraries(gstreamer_engine PUBLIC + ${GSTREAMER_LIBRARIES} + ${OpenCV_LIBS} +) +target_compile_definitions(gstreamer_engine PUBLIC LOG_TYPE=0) # Use printf for logging + +# AutoSpeed TensorRT Backend Library +add_library(autospeed_backend STATIC + ${COMMON_DIR}/backends/autospeed/tensorrt_engine.cpp +) +target_include_directories(autospeed_backend PUBLIC + ${COMMON_DIR}/include + ${CUDA_INCLUDE_DIRS} + /usr/include/x86_64-linux-gnu/ # For TensorRT headers +) +target_link_libraries(autospeed_backend PUBLIC + gstreamer_engine + ${CUDA_LIBRARIES} + nvinfer + nvonnxparser +) +target_compile_definitions(autospeed_backend PUBLIC LOG_TYPE=0) # Use printf for logging + +# Set include directory for the topic header +include_directories(include) + +# Publisher Executable +add_executable(publisher src/publisher_node.cpp) +target_link_libraries(publisher + PRIVATE + autospeed_backend + gstreamer_engine + iceoryx_posh::iceoryx_posh + iceoryx_binding_c::iceoryx_binding_c + ${OpenCV_LIBS} +) +target_compile_options(publisher PRIVATE "-Wno-unused-parameter") + +# Subscriber Executable +add_executable(subscriber src/subscriber_node.cpp) +target_link_libraries(subscriber + PRIVATE + iceoryx_posh::iceoryx_posh + iceoryx_binding_c::iceoryx_binding_c + ${OpenCV_LIBS} +) +target_compile_options(subscriber PRIVATE "-Wno-unused-parameter") + +# Install executables +install(TARGETS publisher subscriber + RUNTIME DESTINATION bin +) diff --git a/VisionPilot/Iceoryx/include/iceoryx_topic.hpp b/VisionPilot/Iceoryx/include/iceoryx_topic.hpp new file mode 100644 index 000000000..16e31c1aa --- /dev/null +++ b/VisionPilot/Iceoryx/include/iceoryx_topic.hpp @@ -0,0 +1,47 @@ + +#include + +// Define a POD (Plain Old Data) struct for a single detection. +// This is necessary for sending data through Iceoryx, which works with raw memory. +struct DetectionPOD { + float x1, y1, x2, y2; + float score; + int32_t class_id; +}; + +// The main data structure (topic) to be sent over Iceoryx. +// This struct lives in shared memory - both publisher and subscriber work +// directly with this memory (zero-copy IPC). +// +// Publisher workflow: +// 1. Loan this struct from Iceoryx shared memory +// 2. Write frame data directly into frame_data[] buffer +// 3. Run inference on cv::Mat wrapper pointing to frame_data[] +// 4. Publish (no copy - just memory ownership transfer) +// +// Subscriber workflow: +// 1. Receive reference to this struct in shared memory +// 2. Create cv::Mat wrapper around frame_data[] (no copy) +// 3. Process/visualize directly from shared memory +struct FrameDetectionsTopic { + // Timestamps for latency measurement (e.g., from std::chrono::steady_clock) + uint64_t capture_timestamp_ns; // When the frame was grabbed from the source + uint64_t publish_timestamp_ns; // When the packet is sent by the publisher + + // Frame metadata + uint32_t frame_width; + uint32_t frame_height; + uint32_t frame_channels; + uint64_t frame_data_size; // Actual size of data in the buffer + + // Detection metadata + uint32_t num_detections; + + // Fixed-size buffers for performance. + // Ensure these are large enough for your use case. + static constexpr uint32_t MAX_DETECTIONS = 100; + static constexpr uint64_t MAX_FRAME_SIZE = 3840 * 2160 * 3; // 4K UHD RGB (24,883,200 bytes) + + DetectionPOD detections[MAX_DETECTIONS]; + uint8_t frame_data[MAX_FRAME_SIZE]; +}; diff --git a/VisionPilot/Iceoryx/roudi_config.toml b/VisionPilot/Iceoryx/roudi_config.toml new file mode 100644 index 000000000..e1731c74a --- /dev/null +++ b/VisionPilot/Iceoryx/roudi_config.toml @@ -0,0 +1,43 @@ +[general] +version = 1 + +[[segment]] + +[[segment.mempool]] +size = 128 +count = 10000 + +[[segment.mempool]] +size = 1024 +count = 5000 + +[[segment.mempool]] +size = 16384 +count = 1000 + +[[segment.mempool]] +size = 131072 +count = 200 + +[[segment.mempool]] +size = 524288 +count = 50 + +[[segment.mempool]] +size = 1048576 +count = 30 + +[[segment.mempool]] +size = 4194304 +count = 10 + +# Custom large memory pools for vision data +[[segment.mempool]] +size = 8388608 +count = 10 + +# Extra-large pool for 4K video frames (32 MB chunks) +[[segment.mempool]] +size = 33554432 +count = 10 + diff --git a/VisionPilot/Iceoryx/run_iceoryx_demo.sh b/VisionPilot/Iceoryx/run_iceoryx_demo.sh new file mode 100644 index 000000000..ff6ba92b9 --- /dev/null +++ b/VisionPilot/Iceoryx/run_iceoryx_demo.sh @@ -0,0 +1,91 @@ +#!/bin/bash +# +# Script to build and run the full Iceoryx publisher/subscriber demo pipeline. +# +# It performs the following steps: +# 1. Builds the publisher and subscriber executables using CMake. +# 2. Starts the Iceoryx router (iox-roudi) in the background. +# 3. Starts the subscriber application in the background. +# 4. Starts the publisher application in the foreground. +# 5. Cleans up all background processes on exit (e.g., via Ctrl+C). +# + +# --- Configuration --- +VIDEO_PATH="/home/pranavdoma/Downloads/autoware.privately-owned-vehicles/VisionPilot/ROS2/data/Road_Driving_Scenes_Normal.mp4" +MODEL_PATH="/home/pranavdoma/Downloads/autoware.privately-owned-vehicles/VisionPilot/ROS2/data/models/AutoSpeed_n.onnx" +PRECISION="fp16" +# --------------------- + +# Exit immediately if a command exits with a non-zero status. +set -e + +# Function to clean up background processes +cleanup() { + echo -e "\n\nShutting down Iceoryx demo..." + # Kill all background jobs of this script + # The negative PID kills the entire process group + if [ -n "$ROUDI_PID" ]; then + kill $ROUDI_PID 2>/dev/null && echo "Stopped iox-roudi (PID $ROUDI_PID)." + fi + if [ -n "$SUBSCRIBER_PID" ]; then + kill $SUBSCRIBER_PID 2>/dev/null && echo "Stopped subscriber (PID $SUBSCRIBER_PID)." + fi + echo "Cleanup complete." +} + +# Trap the EXIT signal to ensure cleanup runs, even on Ctrl+C +trap cleanup EXIT + +# --- Main Script --- + +# Check if required files exist +if [ ! -f "$VIDEO_PATH" ]; then + echo "Error: Video file not found: $VIDEO_PATH" + exit 1 +fi +if [ ! -f "$MODEL_PATH" ]; then + echo "Error: Model file not found: $MODEL_PATH" + exit 1 +fi + +# Get the directory of this script +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +cd "$SCRIPT_DIR" + +# 1. Start Iceoryx Router (roudi) with custom configuration +echo "--- Starting Iceoryx router (iox-roudi) in the background ---" +# Check if roudi is already running to avoid errors +if ! pgrep -x "iox-roudi" > /dev/null +then + iox-roudi -c "$SCRIPT_DIR/roudi_config.toml" > /dev/null 2>&1 & + ROUDI_PID=$! + # Give roudi a moment to initialize + sleep 1 + echo "iox-roudi started with PID $ROUDI_PID (using custom config for large frames)." +else + echo "iox-roudi is already running." +fi +echo "" + +# 2. Start the Subscriber in the background +echo "--- Starting subscriber in the background ---" +./build/subscriber & +SUBSCRIBER_PID=$! +echo "Subscriber started with PID $SUBSCRIBER_PID." +echo "An OpenCV window from the subscriber should appear shortly." +echo "Subscriber metrics will be printed to this terminal." +echo "" + +# 3. Start the Publisher in the foreground +echo "--- Starting publisher in the foreground ---" +echo "Video: $VIDEO_PATH" +echo "Model: $MODEL_PATH" +echo "Precision: $PRECISION" +echo "--------------------------------------------" +echo "Press Ctrl+C to stop the publisher and clean up all processes." +echo "" + +# The script will wait here until the publisher finishes or is interrupted +./build/publisher "$VIDEO_PATH" "$MODEL_PATH" "$PRECISION" + +echo "Publisher finished." diff --git a/VisionPilot/Iceoryx/src/publisher_node.cpp b/VisionPilot/Iceoryx/src/publisher_node.cpp new file mode 100644 index 000000000..1b1fa00ce --- /dev/null +++ b/VisionPilot/Iceoryx/src/publisher_node.cpp @@ -0,0 +1,151 @@ + +#include "iceoryx_posh/popo/publisher.hpp" +#include "iceoryx_posh/runtime/posh_runtime.hpp" +#include "iceoryx_topic.hpp" + +#include "../../common/include/gstreamer_engine.hpp" +#include "../../common/backends/autospeed/tensorrt_engine.hpp" + +#include +#include +#include + +using namespace autoware_pov::vision; +using namespace autoware_pov::vision::autospeed; +using iox::runtime::PoshRuntime; + +// Helper to get current time in nanoseconds +uint64_t now_ns() { + return std::chrono::duration_cast( + std::chrono::steady_clock::now().time_since_epoch() + ).count(); +} + +int main(int argc, char** argv) +{ + if (argc < 4) { + std::cerr << "Usage: " << argv[0] << " \n"; + std::cerr << " stream_source: RTSP URL, /dev/videoX, or video file\n"; + std::cerr << " model_path: .pt or .onnx model file\n"; + std::cerr << " precision: fp32 or fp16\n"; + return 1; + } + + std::string stream_source = argv[1]; + std::string model_path = argv[2]; + std::string precision = argv[3]; + float conf_thresh = 0.6f; + float iou_thresh = 0.45f; + int gpu_id = 0; + + // 1. Initialize GStreamer + GStreamerEngine gstreamer(stream_source, 0, 0, false); // false = benchmark mode + if (!gstreamer.initialize() || !gstreamer.start()) { + std::cerr << "Failed to initialize GStreamer" << std::endl; + return 1; + } + + // 2. Initialize TensorRT backend + AutoSpeedTensorRTEngine backend(model_path, precision, gpu_id); + + // 3. Initialize Iceoryx Runtime and Publisher + constexpr char APP_NAME[] = "vision-publisher"; + PoshRuntime::initRuntime(APP_NAME); + + iox::popo::PublisherOptions publisherOptions; + publisherOptions.historyCapacity = 5; // Keep a few samples in history + iox::popo::Publisher publisher({"Vision", "AutoSpeed", "Frame"}, publisherOptions); + + std::cout << "Publisher running... " << std::endl; + std::cout << "Capturing from: " << stream_source << std::endl; + std::cout << "Publishing to Iceoryx topic 'Vision/AutoSpeed/Frame'" << std::endl; + + // Performance tracking + int frame_count = 0; + uint32_t last_detection_count = 0; + long total_inference_us = 0; + long total_publish_us = 0; + + while (gstreamer.isActive()) { + auto capture_start_time = now_ns(); + + // Loan Iceoryx memory FIRST - work directly with shared memory (zero-copy) + publisher.loan() + .and_then([&](auto& sample) { + // A. Capture frame from GStreamer + cv::Mat gstreamer_frame = gstreamer.getFrame(); + if (gstreamer_frame.empty()) { + std::cerr << "Failed to capture frame, skipping." << std::endl; + return; + } + + // B. Create cv::Mat wrapper around loaned shared memory (no copy!) + sample->frame_width = gstreamer_frame.cols; + sample->frame_height = gstreamer_frame.rows; + sample->frame_channels = gstreamer_frame.channels(); + sample->frame_data_size = gstreamer_frame.total() * gstreamer_frame.elemSize(); + + if (sample->frame_data_size > FrameDetectionsTopic::MAX_FRAME_SIZE) { + std::cerr << "Error: Frame size exceeds buffer, skipping." << std::endl; + return; + } + + // Create Mat wrapper pointing to shared memory buffer + cv::Mat shared_frame(gstreamer_frame.rows, gstreamer_frame.cols, + CV_8UC3, sample->frame_data); + + // Copy from GStreamer to shared memory (unavoidable, but only ONE copy) + gstreamer_frame.copyTo(shared_frame); + + // C. Run inference directly on shared memory frame (no extra copy!) + auto infer_start = now_ns(); + std::vector detections = backend.inference(shared_frame, conf_thresh, iou_thresh); + auto infer_end = now_ns(); + long inference_us = (infer_end - infer_start) / 1000; + total_inference_us += inference_us; + + // D. Store detections and metadata directly in loaned sample + sample->num_detections = std::min((uint32_t)detections.size(), + FrameDetectionsTopic::MAX_DETECTIONS); + last_detection_count = sample->num_detections; + for (uint32_t i = 0; i < sample->num_detections; ++i) { + sample->detections[i].x1 = detections[i].x1; + sample->detections[i].y1 = detections[i].y1; + sample->detections[i].x2 = detections[i].x2; + sample->detections[i].y2 = detections[i].y2; + sample->detections[i].score = detections[i].confidence; + sample->detections[i].class_id = detections[i].class_id; + } + + // E. Set timestamps and publish + sample->capture_timestamp_ns = capture_start_time; + sample->publish_timestamp_ns = now_ns(); + sample.publish(); + + // Track publish overhead (timestamp setting is negligible) + long publish_us = (now_ns() - infer_end) / 1000; + total_publish_us += publish_us; + frame_count++; + }) + .or_else([](auto& error) { + std::cerr << "Failed to loan sample, error: " << error << std::endl; + }); + + // Print metrics every 30 frames + if (frame_count % 30 == 0) { + std::cout << "\n========================================\n"; + std::cout << "PUBLISHER METRICS - Frame " << frame_count << "\n"; + std::cout << "========================================\n"; + std::cout << "Avg Inference Time: " << std::fixed << std::setprecision(3) + << (total_inference_us / (double)frame_count / 1000.0) << " ms\n"; + std::cout << "Avg Publish Time: " << (total_publish_us / (double)frame_count / 1000.0) << " ms\n"; + std::cout << "Detections: " << last_detection_count << "\n"; + std::cout << "========================================\n\n"; + } + } + + gstreamer.stop(); + std::cout << "\nPublisher stopped." << std::endl; + std::cout << "Total frames processed: " << frame_count << std::endl; + return 0; +} diff --git a/VisionPilot/Iceoryx/src/subscriber_node.cpp b/VisionPilot/Iceoryx/src/subscriber_node.cpp new file mode 100644 index 000000000..4a3026319 --- /dev/null +++ b/VisionPilot/Iceoryx/src/subscriber_node.cpp @@ -0,0 +1,130 @@ + +#include "iceoryx_posh/popo/subscriber.hpp" +#include "iceoryx_posh/runtime/posh_runtime.hpp" +#include "iceoryx_topic.hpp" + +#include +#include +#include +#include +#include + +using iox::runtime::PoshRuntime; + +// Helper to get current time in nanoseconds +uint64_t now_ns() { + return std::chrono::duration_cast( + std::chrono::steady_clock::now().time_since_epoch() + ).count(); +} + +// Visualization function (from infer_stream.cpp) +void drawDetections(cv::Mat& frame, const std::vector& detections) +{ + auto getColor = [](int class_id) -> cv::Scalar { + switch(class_id) { + case 1: return cv::Scalar(0, 0, 255); // Red + case 2: return cv::Scalar(0, 255, 255); // Yellow + case 3: return cv::Scalar(255, 255, 0); // Cyan + default: return cv::Scalar(255, 255, 255); // White + } + }; + + for (const auto& det : detections) { + cv::Scalar color = getColor(det.class_id); + cv::rectangle(frame, + cv::Point(static_cast(det.x1), static_cast(det.y1)), + cv::Point(static_cast(det.x2), static_cast(det.y2)), + color, 2); + } +} + + +int main(int argc, char** argv) +{ + // 1. Initialize Iceoryx Runtime and Subscriber + constexpr char APP_NAME[] = "vision-subscriber"; + PoshRuntime::initRuntime(APP_NAME); + + iox::popo::SubscriberOptions subscriberOptions; + subscriberOptions.queueCapacity = 5; + subscriberOptions.historyRequest = 0; // Don't care about history + iox::popo::Subscriber subscriber({"Vision", "AutoSpeed", "Frame"}, subscriberOptions); + + std::cout << "Subscriber running..." << std::endl; + std::cout << "Subscribed to Iceoryx topic 'Vision/AutoSpeed/Frame'" << std::endl; + std::cout << "Press 'q' in the window to quit." << std::endl; + + cv::namedWindow("Iceoryx Subscriber", cv::WINDOW_NORMAL); + cv::resizeWindow("Iceoryx Subscriber", 960, 540); + + // For metrics + std::atomic total_ipc_us{0}; // IPC overhead: publish → receive + std::atomic total_e2e_us{0}; // End-to-end: capture → receive + std::atomic total_visualization_us{0}; // Time to draw and display + std::atomic frame_count{0}; + std::atomic running{true}; + + while (running.load()) + { + subscriber.take() + .and_then([&](const auto& sample) { + auto reception_time_ns = now_ns(); + + // A. Calculate latencies + long ipc_us = (reception_time_ns - sample->publish_timestamp_ns) / 1000; + long e2e_us = (reception_time_ns - sample->capture_timestamp_ns) / 1000; + total_ipc_us.fetch_add(ipc_us); + total_e2e_us.fetch_add(e2e_us); + int count = frame_count.fetch_add(1) + 1; + + // B. Create cv::Mat wrapper around shared memory (zero-copy!) + auto viz_start = now_ns(); + cv::Mat frame(sample->frame_height, sample->frame_width, CV_8UC3, (void*)sample->frame_data); + + // Wrap detections array (no copy until visualization needs it) + std::vector detections; + detections.assign(sample->detections, sample->detections + sample->num_detections); + + // C. Visualize directly on shared memory frame + drawDetections(frame, detections); + cv::imshow("Iceoryx Subscriber", frame); + + if (cv::waitKey(1) == 'q') { + running.store(false); + } + + auto viz_end = now_ns(); + long viz_us = (viz_end - viz_start) / 1000; + total_visualization_us.fetch_add(viz_us); + + // D. Print metrics periodically + if (count % 30 == 0) { + long avg_ipc = total_ipc_us.load() / count; + long avg_e2e = total_e2e_us.load() / count; + long avg_viz = total_visualization_us.load() / count; + + std::cout << "\n========================================\n"; + std::cout << "ICEORYX BENCHMARK - Frame " << count << "\n"; + std::cout << "========================================\n"; + std::cout << "IPC Latency (publish → receive): " << std::fixed << std::setprecision(3) + << (avg_ipc / 1000.0) << " ms\n"; + std::cout << "End-to-End (capture → receive): " << (avg_e2e / 1000.0) << " ms\n"; + std::cout << "Visualization (draw + display): " << (avg_viz / 1000.0) << " ms\n"; + std::cout << "Detections in frame: " << sample->num_detections << "\n"; + std::cout << "========================================\n\n"; + } + }) + .or_else([&](auto& result) { + if (result != iox::popo::ChunkReceiveResult::NO_CHUNK_AVAILABLE) { + std::cerr << "Error receiving chunk: " << result << std::endl; + } + // To prevent a busy-wait loop, sleep briefly if no chunk is available + std::this_thread::sleep_for(std::chrono::milliseconds(2)); + }); + } + + cv::destroyAllWindows(); + std::cout << "Subscriber stopped." << std::endl; + return 0; +}