Added yolo logging bboxes feature for testing

IasonTheodorou · IasonTheodorou · commit dfe4a3c5e029 · 2026-01-27T21:15:32.000+01:00
diff --git a/ed_sensor_integration/include/ed/kinect/segmenter.h b/ed_sensor_integration/include/ed/kinect/segmenter.h
@@ -12,6 +12,7 @@
 #include <ed/types.h>
 
 #include <vector>
+#include <utility>
 
 namespace cv
 {
@@ -57,9 +58,9 @@ class Segmenter
      * @param sensor_pose
      * @param clusters
      * @param rgb_image
-     * @return std::vector<cv::Mat> masks // 3D pointcloud masks of all the segmented objects
+     * @return std::pair<std::vector<cv::Mat>, std::vector<cv::Rect>> // 3D pointcloud masks and bounding boxes of all the segmented objects
      */
-    std::vector<cv::Mat> cluster(const cv::Mat& depth_image, const geo::DepthCamera& cam_model,
+    std::pair<std::vector<cv::Mat>, std::vector<cv::Rect>> cluster(const cv::Mat& depth_image, const geo::DepthCamera& cam_model,
                  const geo::Pose3D& sensor_pose, std::vector<EntityUpdate>& clusters, const cv::Mat& rgb_image, bool logging=false);
 
 private:
diff --git a/ed_sensor_integration/include/ed/kinect/updater.h b/ed_sensor_integration/include/ed/kinect/updater.h
@@ -69,6 +69,7 @@ class Updater
     //For displaying SAM MASK
     ros::Publisher mask_pub_;
     ros::Publisher cloud_pub_;
+    ros::Publisher box_pub_;
     bool logging;
 
 };
diff --git a/ed_sensor_integration/include/ed_sensor_integration/kinect/segmodules/sam_seg_module.h b/ed_sensor_integration/include/ed_sensor_integration/kinect/segmodules/sam_seg_module.h
@@ -8,15 +8,16 @@
 
 #include "ed/kinect/entity_update.h"
 
+#include <utility>
 #include <vector>
 
 /**
  * @brief Segmentation pipeline that processes the input image and generates segmentation masks.
  *
  * @param img The input RGB image to segment.
- * @return std::vector<cv::Mat> The generated segmentation masks.
+ * @return std::pair<std::vector<cv::Mat>, std::vector<cv::Rect>> The generated segmentation masks and bounding boxes.
  */
-std::vector<cv::Mat> SegmentationPipeline(const cv::Mat& img, tue::Configuration& config);
+std::pair<std::vector<cv::Mat>, std::vector<cv::Rect>> SegmentationPipeline(const cv::Mat& img, tue::Configuration& config);
 
 /**
  * @brief Overlay segmentation masks on the RGB image for visualization purposes.
@@ -33,10 +34,12 @@ void overlayMasksOnImage_(cv::Mat& rgb, const std::vector<cv::Mat>& masks);
  * @param rgb The RGB image to publish.
  * @param sensor_pose The pose of the sensor.
  * @param clustered_images The clustered segmentation masks.
+ * @param boxes The bounding boxes to visualize.
  * @param mask_pub_ The ROS publisher for the mask images.
  * @param cloud_pub_ The ROS publisher for the point cloud data.
  * @param res_updates The entity updates to publish.
  */
 void publishSegmentationResults(const cv::Mat& filtered_depth_image, const cv::Mat& rgb,
                                 const geo::Pose3D& sensor_pose, std::vector<cv::Mat>& clustered_images,
-                                ros::Publisher& mask_pub_, ros::Publisher& cloud_pub_, std::vector<EntityUpdate>& res_updates);
+                                const std::vector<cv::Rect>& boxes,
+                                ros::Publisher& box_pub_, ros::Publisher& mask_pub_, ros::Publisher& cloud_pub_, std::vector<EntityUpdate>& res_updates);
diff --git a/ed_sensor_integration/src/kinect/sam_seg_module.cpp b/ed_sensor_integration/src/kinect/sam_seg_module.cpp
@@ -2,6 +2,7 @@
 
 #include <cv_bridge/cv_bridge.h>
 #include <filesystem>
+#include <utility>
 #include <pcl/point_cloud.h>
 #include <pcl/point_types.h>
 #include <pcl_conversions/pcl_conversions.h>
@@ -11,7 +12,7 @@
 #include <yolo_onnx_ros/detection.hpp>
 
 
-std::vector<cv::Mat> SegmentationPipeline(const cv::Mat& img, tue::Configuration& config)
+std::pair<std::vector<cv::Mat>, std::vector<cv::Rect>> SegmentationPipeline(const cv::Mat& img, tue::Configuration& config)
 {
     ////////////////////////// YOLO //////////////////////////////////////
     std::unique_ptr<YOLO_V8> yoloDetector;
@@ -52,7 +53,7 @@ std::vector<cv::Mat> SegmentationPipeline(const cv::Mat& img, tue::Configuration
 
     SegmentAnything(samSegmentors, params_encoder, params_decoder, img, resSam, res);
 
-    return std::move(res.masks);
+    return std::make_pair(std::move(res.masks), std::move(res.boxes));
 }
 
 
@@ -111,11 +112,23 @@ void overlayMasksOnImage_(cv::Mat& rgb, const std::vector<cv::Mat>& masks)
 
 void publishSegmentationResults(const cv::Mat& filtered_depth_image, const cv::Mat& rgb,
                                 const geo::Pose3D& sensor_pose, std::vector<cv::Mat>& clustered_images,
-                                ros::Publisher& mask_pub_, ros::Publisher& cloud_pub_, std::vector<EntityUpdate>& res_updates)
+                                const std::vector<cv::Rect>& boxes,
+                                ros::Publisher& box_pub_, ros::Publisher& mask_pub_, ros::Publisher& cloud_pub_, std::vector<EntityUpdate>& res_updates)
 {
     // Overlay masks on the RGB image
     cv::Mat visualization = rgb.clone();
 
+    // Box visualization
+    cv::Mat box_visualization = rgb.clone();
+    for(const auto& box : boxes)
+    {
+        cv::rectangle(box_visualization, box, cv::Scalar(0, 255, 0), 2);
+    }
+    
+    sensor_msgs::ImagePtr box_msg = cv_bridge::CvImage(std_msgs::Header(), "bgr8", box_visualization).toImageMsg();
+    box_msg->header.stamp = ros::Time::now();
+    box_pub_.publish(box_msg);
+
     // Create a path to save the image using platform-independent temp directory
     std::filesystem::path temp_dir = std::filesystem::temp_directory_path();
     cv::imwrite((temp_dir / "visualization.png").string(), visualization);
diff --git a/ed_sensor_integration/src/kinect/segmenter.cpp b/ed_sensor_integration/src/kinect/segmenter.cpp
@@ -199,14 +199,16 @@ cv::Mat Segmenter::preprocessRGBForSegmentation(const cv::Mat& rgb_image,
 }
 // ----------------------------------------------------------------------------------------------------
 
-std::vector<cv::Mat> Segmenter::cluster(const cv::Mat& depth_image, const geo::DepthCamera& cam_model,
+std::pair<std::vector<cv::Mat>, std::vector<cv::Rect>> Segmenter::cluster(const cv::Mat& depth_image, const geo::DepthCamera& cam_model,
                         const geo::Pose3D& sensor_pose, std::vector<EntityUpdate>& clusters, const cv::Mat& rgb_image, bool logging)
 {
     int width = depth_image.cols;
     int height = depth_image.rows;
     ROS_DEBUG("Cluster with depth image of size %i, %i", width, height);
 
-    std::vector<cv::Mat> masks = SegmentationPipeline(rgb_image.clone(), config_);
+    std::pair<std::vector<cv::Mat>, std::vector<cv::Rect>> seg_result = SegmentationPipeline(rgb_image.clone(), config_);
+    std::vector<cv::Mat>& masks = seg_result.first;
+
     ROS_DEBUG("Creating clusters");
 
     // Pre-allocate temporary storage (one per mask, avoid push_back races)
@@ -325,5 +327,5 @@ std::vector<cv::Mat> Segmenter::cluster(const cv::Mat& depth_image, const geo::D
         }
     }
 
-    return masks;
+    return seg_result;
 }
diff --git a/ed_sensor_integration/src/kinect/updater.cpp b/ed_sensor_integration/src/kinect/updater.cpp
@@ -222,7 +222,8 @@ Updater::Updater(tue::Configuration config) : logging(false)
     {
         ros::NodeHandle nh("~");
         mask_pub_ = nh.advertise<sensor_msgs::Image>("segmentation_masks", 1);
-        cloud_pub_ = nh.advertise<sensor_msgs::PointCloud2>("point_cloud_ooo", 1);
+        cloud_pub_ = nh.advertise<sensor_msgs::PointCloud2>("point_cloud_sam", 1);
+        box_pub_ = nh.advertise<sensor_msgs::Image>("bounding_boxes_yolo", 1);
     }
 }
 
@@ -426,10 +427,13 @@ bool Updater::update(const ed::WorldModel& world, const rgbd::ImageConstPtr& ima
     // - - - - - - - - - - - - - - - - - - - - - - - -
     // Cluster
     filtered_rgb_image = segmenter_->preprocessRGBForSegmentation(rgb, filtered_depth_image);
-    std::vector<cv::Mat> clustered_images = segmenter_->cluster(filtered_depth_image, cam_model, sensor_pose, res.entity_updates, filtered_rgb_image, logging);
+    std::pair<std::vector<cv::Mat>, std::vector<cv::Rect>> cluster_result = segmenter_->cluster(filtered_depth_image, cam_model, sensor_pose, res.entity_updates, filtered_rgb_image, logging);
+    
+    std::vector<cv::Mat>& clustered_images = cluster_result.first;
+
     if (logging)
     {
-        publishSegmentationResults(filtered_depth_image, filtered_rgb_image, sensor_pose, clustered_images, mask_pub_, cloud_pub_,  res.entity_updates);
+        publishSegmentationResults(filtered_depth_image, filtered_rgb_image, sensor_pose, clustered_images, cluster_result.second, box_pub_, mask_pub_, cloud_pub_,  res.entity_updates);
     }
     // - - - - - - - - - - - - - - - - - - - - - - - -
     // Merge the detected clusters if they overlap in XY or Z

Original file line number	Diff line number	Diff line change
`@@ -199,14 +199,16 @@ cv::Mat Segmenter::preprocessRGBForSegmentation(const cv::Mat& rgb_image,`
`199`	`199`	`}`
`200`	`200`	`// ----------------------------------------------------------------------------------------------------`
`201`	`201`
`202`		`-std::vector<cv::Mat> Segmenter::cluster(const cv::Mat& depth_image, const geo::DepthCamera& cam_model,`
	`202`	`+std::pair<std::vector<cv::Mat>, std::vector<cv::Rect>> Segmenter::cluster(const cv::Mat& depth_image, const geo::DepthCamera& cam_model,`
`203`	`203`	`const geo::Pose3D& sensor_pose, std::vector<EntityUpdate>& clusters, const cv::Mat& rgb_image, bool logging)`
`204`	`204`	`{`
`205`	`205`	`int width = depth_image.cols;`
`206`	`206`	`int height = depth_image.rows;`
`207`	`207`	`ROS_DEBUG("Cluster with depth image of size %i, %i", width, height);`
`208`	`208`
`209`		`- std::vector<cv::Mat> masks = SegmentationPipeline(rgb_image.clone(), config_);`
	`209`	`+ std::pair<std::vector<cv::Mat>, std::vector<cv::Rect>> seg_result = SegmentationPipeline(rgb_image.clone(), config_);`
	`210`	`+ std::vector<cv::Mat>& masks = seg_result.first;`
	`211`	`+`
`210`	`212`	`ROS_DEBUG("Creating clusters");`
`211`	`213`
`212`	`214`	`// Pre-allocate temporary storage (one per mask, avoid push_back races)`
`@@ -325,5 +327,5 @@ std::vector<cv::Mat> Segmenter::cluster(const cv::Mat& depth_image, const geo::D`
`325`	`327`	`}`
`326`	`328`	`}`
`327`	`329`
`328`		`- return masks;`
	`330`	`+ return seg_result;`
`329`	`331`	`}`
Original file line number	Diff line number	Diff line change
`@@ -222,7 +222,8 @@ Updater::Updater(tue::Configuration config) : logging(false)`
`222`	`222`	`{`
`223`	`223`	`ros::NodeHandle nh("~");`
`224`	`224`	`mask_pub_ = nh.advertise<sensor_msgs::Image>("segmentation_masks", 1);`
`225`		`- cloud_pub_ = nh.advertise<sensor_msgs::PointCloud2>("point_cloud_ooo", 1);`
	`225`	`+ cloud_pub_ = nh.advertise<sensor_msgs::PointCloud2>("point_cloud_sam", 1);`
	`226`	`+ box_pub_ = nh.advertise<sensor_msgs::Image>("bounding_boxes_yolo", 1);`
`226`	`227`	`}`
`227`	`228`	`}`
`228`	`229`
`@@ -426,10 +427,13 @@ bool Updater::update(const ed::WorldModel& world, const rgbd::ImageConstPtr& ima`
`426`	`427`	`// - - - - - - - - - - - - - - - - - - - - - - - -`
`427`	`428`	`// Cluster`
`428`	`429`	`filtered_rgb_image = segmenter_->preprocessRGBForSegmentation(rgb, filtered_depth_image);`
`429`		`- std::vector<cv::Mat> clustered_images = segmenter_->cluster(filtered_depth_image, cam_model, sensor_pose, res.entity_updates, filtered_rgb_image, logging);`
	`430`	`+ std::pair<std::vector<cv::Mat>, std::vector<cv::Rect>> cluster_result = segmenter_->cluster(filtered_depth_image, cam_model, sensor_pose, res.entity_updates, filtered_rgb_image, logging);`
	`431`	`+`
	`432`	`+ std::vector<cv::Mat>& clustered_images = cluster_result.first;`
	`433`	`+`
`430`	`434`	`if (logging)`
`431`	`435`	`{`
`432`		`- publishSegmentationResults(filtered_depth_image, filtered_rgb_image, sensor_pose, clustered_images, mask_pub_, cloud_pub_, res.entity_updates);`
	`436`	`+ publishSegmentationResults(filtered_depth_image, filtered_rgb_image, sensor_pose, clustered_images, cluster_result.second, box_pub_, mask_pub_, cloud_pub_, res.entity_updates);`
`433`	`437`	`}`
`434`	`438`	`// - - - - - - - - - - - - - - - - - - - - - - - -`
`435`	`439`	`// Merge the detected clusters if they overlap in XY or Z`