Merge pull request #30 from AGH-CEAI/feature/add_spatial_detection

sivral · web-flow · commit 85405da589e2 · 2025-02-25T16:12:07.000+01:00
Add spatial detection
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -17,10 +17,11 @@ repos:
       - id: end-of-file-fixer
       - id: fix-byte-order-marker
       - id: mixed-line-ending
-      - id: pretty-format-json
       - id: trailing-whitespace
       - id: check-yaml
         exclude: joint_limits.yaml # uses custom macro for deg<->rad transformation
+      - id: pretty-format-json
+        args: ['--autofix', '--no-sort-keys', '--indent', '2']
 
   - repo: https://github.com/codespell-project/codespell
     rev: v2.4.1
diff --git a/aegis_bringup/docs/launch_diagram.plantuml b/aegis_bringup/docs/launch_diagram.plantuml
@@ -40,7 +40,8 @@ package aegis_control {
             class ur_drivers << (Y,#ffffc9) YAML >> {}
         }
         package cameras {
-            class cameras << (Y,#ffffc9) YAML >> {}
+            class depthai_cameras << (Y,#ffffc9) YAML >> {}
+            class yolo << (J,#ffffc9) JSON >> {}
         }
     }
 }
@@ -116,4 +117,5 @@ hide << YAML >> members
 hide << urdf.xacro >> members
 hide << SRDF >> members
 hide << rviz >> members
+hide << JSON >> members
 @enduml
diff --git a/aegis_control/CHANGELOG.md b/aegis_control/CHANGELOG.md
@@ -9,8 +9,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
+* [PR-30](https://github.com/AGH-CEAI/aegis_ros/pull/30) - Implemented YOLOv5 model for spatial detection.
+* [PR-24](https://github.com/AGH-CEAI/aegis_ros/pull/24) - Implemented point cloud support.
 * [PR-21](https://github.com/AGH-CEAI/aegis_ros/pull/21) - Initial version of the DepthAI driver with support for the OAK-D Pro camera.
-* [PR-24](https://github.com/AGH-CEAI/aegis_ros/pull/24) - Added a point cloud node.
 * [PR-9](https://github.com/AGH-CEAI/aegis_ros/pull/9) - Initial version of the `aegis_control` package.
 
 ### Changed
diff --git a/aegis_control/README.md b/aegis_control/README.md
@@ -30,6 +30,9 @@ aegis_control/
 | [start_drivers.launch.py](./launch/start_drivers.launch.py)                   | The main launch file to run the entire Aegis' `ros2_control` stack.                                                                                           |
 | [ur_driver.launch.py](./launch/ur_driver.launch.py)                           | Launches nodes from the [ur_robot_driver](https://github.com/UniversalRobots/Universal_Robots_ROS2_Driver) to control the UR5e robot.                         |
 
+## Neural network
+For details on training and deploying YOLOv5 model, see the [tutorial](./docs/yolov5_tutorial.md).
+
 ## Development notes
 
 - ROS 2 Humble ships with the older structure of the [Universal_Robots_ROS2_Driver](https://github.com/UniversalRobots/Universal_Robots_ROS2_Driver/tree/humble) Due to the complexity of our project (where we need to merge several other controllers into a single `control_manager` ode configuration), the `ur_driver.launch.py` file is based on version `2.5.1` of UR's `ur_robot_driver` [launch file](https://github.com/UniversalRobots/Universal_Robots_ROS2_Driver/blob/humble/ur_robot_driver/launch/ur_control.launch.py). The main difference is the removal of all unused parameters, which are set by default by the driver itself.
diff --git a/aegis_control/config/cameras/depthai_cameras.yaml b/aegis_control/config/cameras/depthai_cameras.yaml
@@ -5,12 +5,13 @@
       i_mx_id: 184430108157970F00
       i_nn_type: spatial
       i_pipeline_type: RGBD
-    rgb:
-      i_enable_preview: true
-      i_keep_preview_aspect_ratio: false
     nn:
-      i_enable_passthrough: true
       i_disable_resize: true
+      i_enable_passthrough: true
+    rgb:
+      i_enable_preview: true
+      i_keep_preview_aspect_ratio: true
+      i_preview_size: 416
     stereo:
       i_subpixel: true
   spatial_bb_node:
diff --git a/aegis_control/config/cameras/yolo.json b/aegis_control/config/cameras/yolo.json
@@ -0,0 +1,61 @@
+{
+  "model": {
+    "zoo": "path"
+  },
+  "nn_config": {
+    "output_format": "detection",
+    "NN_family": "YOLO",
+    "input_size": "416x416",
+    "NN_specific_metadata": {
+      "classes": 4,
+      "coordinates": 4,
+      "anchors": [
+        10,
+        13,
+        16,
+        30,
+        33,
+        23,
+        30,
+        61,
+        62,
+        45,
+        59,
+        119,
+        116,
+        90,
+        156,
+        198,
+        373,
+        326
+      ],
+      "anchor_masks": {
+        "side52": [
+          0,
+          1,
+          2
+        ],
+        "side26": [
+          3,
+          4,
+          5
+        ],
+        "side13": [
+          6,
+          7,
+          8
+        ]
+      },
+      "iou_threshold": 0.5,
+      "confidence_threshold": 0.5
+    }
+  },
+  "mappings": {
+    "labels": [
+      "tetragon",
+      "hexagon",
+      "octagon",
+      "dodecagon"
+    ]
+  }
+}
diff --git a/aegis_control/docs/yolov5_tutorial.md b/aegis_control/docs/yolov5_tutorial.md
@@ -0,0 +1,78 @@
+# YOLOv5 tutorial
+This guide explains how to set up, train, convert and deploy a YOLOv5 model for this project.
+
+## Directory structure
+After training, your directory structure should be organized as follows:
+
+```
+datasets
+└── polygons
+    ├── test
+    │   ├── images
+    │   └── labels
+    ├── train
+    │   ├── images
+    │   └── labels
+    └── val
+        ├── images
+        └── labels
+yolov5
+├── data
+│   └── polygons.yaml
+├── runs
+│   └── train
+│       └── exp
+│           └── weights
+│               └── best.pt
+└── train.py
+```
+
+## Dataset
+Download the `polygons` dataset from [this repository](https://github.com/Patrycj2a/praca_inzynierska/tree/main/datasets/polygons/) and place it in the `datasets` folder.
+
+## Training configuration file
+Create the `polygons.yaml` file inside the `yolov5/data` directory with the following content:
+
+```yaml
+names:
+  0: tetragon
+  1: hexagon
+  2: octagon
+  3: dodecagon
+nc: 4
+path: ../datasets/polygons
+train: train/images
+val: val/images
+test: test/images
+```
+
+## YOLOv5 setup
+Set up the YOLOv5 repository by running the following commands:
+
+```bash
+git clone https://github.com/ultralytics/yolov5.git
+cd yolov5
+git pull
+pip install -U -r yolov5/requirements.txt
+```
+
+## Model training
+Train the YOLOv5 model using the following command:
+
+```bash
+python3 train.py --img 416 --batch 16 --epochs 1000 --data polygons.yaml --weights yolov5s.pt --cos-lr
+```
+
+The trained model weights will be saved as `best.pt` in the `yolov5/runs/train/exp/weights` directory.
+
+## Model conversion
+1. Go to [Luxonis Tools](https://tools.luxonis.com/).
+2. Set `Yolo Version` to `YOLOv5`.
+3. Click `File` and upload the obtained model weights (`best.pt`).
+4. Set `Input image shape` to `416`.
+5. In advanced options, set `Shaves` to `5`.
+6. Click `Submit` to start the conversion.
+7. Extract the downloaded ZIP file and locate the model named `best_openvino_2022.1_5shave.blob`.
+
+## Model placement
+Move the `best_openvino_2022.1_5shave.blob` file to the `ceai_models` in your home directory and rename it to `yolo.blob`.
diff --git a/aegis_control/launch/depthai_cameras_driver.launch.py b/aegis_control/launch/depthai_cameras_driver.launch.py
@@ -1,27 +1,25 @@
-from launch import LaunchDescription
+import json
+import yaml
+import tempfile
+from pathlib import Path
+from launch import LaunchDescription, LaunchContext
 from launch.actions import OpaqueFunction
 from launch.conditions import UnlessCondition
-from launch.substitutions import LaunchConfiguration, PathJoinSubstitution
-from launch_ros.actions import Node, ComposableNodeContainer, LoadComposableNodes
+from launch.substitutions import LaunchConfiguration
+from launch_ros.actions import ComposableNodeContainer, LoadComposableNodes, Node
 from launch_ros.descriptions import ComposableNode
-from launch_ros.substitutions import FindPackageShare
+from ament_index_python.packages import get_package_share_directory
 
 
 class DepthAIConfig:
     def __init__(self):
+        self._modify_config()
+
         self.name_pro_scene = LaunchConfiguration(
             "name_pro_scene", default="oak_d_pro_scene"
         )
-        self.params_file = PathJoinSubstitution(
-            [
-                FindPackageShare("aegis_control"),
-                "config",
-                "cameras",
-                "depthai_cameras.yaml",
-            ]
-        )
-        # TODO(issue#26) create proper mock for the luxonis cameras
-        self.mock_hardware = LaunchConfiguration("mock_hardware", default="true")
+        # TODO(issue#26) Introduce a mock for the DepthAI cameras
+        self.mock_hardware = LaunchConfiguration("mock_hardware", default="false")
 
         self.cam_model_pro_scene = LaunchConfiguration(
             "camera_model_pro_scene", default="OAK-D-S2"
@@ -49,25 +47,59 @@ def __init__(self):
         )
         self.cam_yaw_pro_scene = LaunchConfiguration("cam_yaw_pro_scene", default="0")
 
+    def _modify_config(self) -> None:
+        # TODO(issue#31) Fix YOLO configuration not being applied correctly
+        package_share_path = Path(get_package_share_directory("aegis_control"))
+        model_path = Path.home() / "ceai_models" / "yolo.blob"
+        yolo_src_cfg_path = package_share_path / "config" / "cameras" / "yolo.json"
+        cam_src_params_path = (
+            package_share_path / "config" / "cameras" / "depthai_cameras.yaml"
+        )
+
+        self.yolo_cfg_path = Path(
+            tempfile.NamedTemporaryFile(suffix=".json", delete=False).name
+        )
+        self.cam_params_path = Path(
+            tempfile.NamedTemporaryFile(suffix=".yaml", delete=False).name
+        )
+
+        with open(yolo_src_cfg_path, "r") as file:
+            yolo_cfg = json.load(file)
+
+        yolo_cfg["model"]["model_name"] = str(model_path)
+
+        with open(self.yolo_cfg_path, "w") as file:
+            json.dump(yolo_cfg, file, indent=2)
+
+        with open(cam_src_params_path, "r") as file:
+            cam_params = yaml.safe_load(file)
+
+        cam_params["/oak_d_pro_scene"]["ros__parameters"]["nn"]["i_nn_config_path"] = (
+            str(self.yolo_cfg_path)
+        )
+
+        with open(self.cam_params_path, "w") as file:
+            yaml.safe_dump(cam_params, file)
+
 
 def generate_launch_description() -> LaunchDescription:
     return LaunchDescription([OpaqueFunction(function=launch_setup)])
 
 
-def launch_setup(context) -> list[Node]:
+def launch_setup(context: LaunchContext) -> list[Node]:
     # TODO(issue#22): Setup global log level configuration
     log_level = "info"
     if context.environment.get("DEPTHAI_DEBUG") == "1":
         log_level = "debug"
 
     cfg = DepthAIConfig()
-    name_pro_scene_str = cfg.name_pro_scene.perform(context)
+    name_pro_scene = cfg.name_pro_scene.perform(context)
 
     # TODO(issue#23): Investigate the necessity of tf parameters
     tf_params_pro_scene = {
         "camera": {
             "i_publish_tf_from_calibration": False,
-            "i_tf_tf_prefix": name_pro_scene_str,
+            "i_tf_tf_prefix": name_pro_scene,
             "i_tf_camera_model": cfg.cam_model_pro_scene,
             "i_tf_parent_frame": cfg.parent_frame_pro_scene.perform(context),
             "i_tf_base_frame": cfg.base_frame_pro_scene.perform(context),
@@ -83,22 +115,22 @@ def launch_setup(context) -> list[Node]:
     return [
         create_camera_node(
             cfg.mock_hardware,
-            name_pro_scene_str,
+            name_pro_scene,
             tf_params_pro_scene,
-            cfg.params_file,
+            cfg.cam_params_path,
             log_level,
         ),
-        create_rectify_node(cfg.mock_hardware, name_pro_scene_str),
-        create_spatial_bb_node(cfg.mock_hardware, name_pro_scene_str, cfg.params_file),
-        create_point_cloud_node(cfg.mock_hardware, name_pro_scene_str),
+        create_rectify_node(cfg.mock_hardware, name_pro_scene),
+        create_spatial_bb_node(cfg.mock_hardware, name_pro_scene, cfg.cam_params_path),
+        create_point_cloud_node(cfg.mock_hardware, name_pro_scene),
     ]
 
 
 def create_camera_node(
     mock_hardware: LaunchConfiguration,
     name: str,
     tf_params: dict,
-    params_file: LaunchConfiguration,
+    cam_params_path: LaunchConfiguration,
     log_level: str,
 ) -> LoadComposableNodes:
     return ComposableNodeContainer(
@@ -112,7 +144,7 @@ def create_camera_node(
                 package="depthai_ros_driver",
                 plugin="depthai_ros_driver::Camera",
                 name=name,
-                parameters=[params_file, tf_params],
+                parameters=[cam_params_path, tf_params],
             )
         ],
         arguments=["--ros-args", "--log-level", log_level],
@@ -150,7 +182,7 @@ def create_rectify_node(
 def create_spatial_bb_node(
     mock_hardware: LaunchConfiguration,
     name: str,
-    params_file: LaunchConfiguration,
+    cam_params_path: LaunchConfiguration,
 ) -> LoadComposableNodes:
     return LoadComposableNodes(
         condition=UnlessCondition(mock_hardware),
@@ -167,7 +199,7 @@ def create_spatial_bb_node(
                     ("overlay", name + "/overlay"),
                     ("spatial_bb", name + "/spatial_bb"),
                 ],
-                parameters=[params_file],
+                parameters=[cam_params_path],
             ),
         ],
     )
diff --git a/aegis_control/launch/start_drivers.launch.py b/aegis_control/launch/start_drivers.launch.py
@@ -54,6 +54,7 @@ def generate_launch_description() -> LaunchDescription:
                 ]
             )
         ),
+        launch_arguments=launch_args.items(),
     )
 
     return LaunchDescription(

Original file line number	Diff line number	Diff line change
`@@ -40,7 +40,8 @@ package aegis_control {`
`40`	`40`	`class ur_drivers << (Y,#ffffc9) YAML >> {}`
`41`	`41`	`}`
`42`	`42`	`package cameras {`
`43`		`- class cameras << (Y,#ffffc9) YAML >> {}`
	`43`	`+ class depthai_cameras << (Y,#ffffc9) YAML >> {}`
	`44`	`+ class yolo << (J,#ffffc9) JSON >> {}`
`44`	`45`	`}`
`45`	`46`	`}`
`46`	`47`	`}`
`@@ -116,4 +117,5 @@ hide << YAML >> members`
`116`	`117`	`hide << urdf.xacro >> members`
`117`	`118`	`hide << SRDF >> members`
`118`	`119`	`hide << rviz >> members`
	`120`	`+hide << JSON >> members`
`119`	`121`	`@enduml`
Original file line number	Diff line number	Diff line change
`@@ -54,6 +54,7 @@ def generate_launch_description() -> LaunchDescription:`
`54`	`54`	`]`
`55`	`55`	`)`
`56`	`56`	`),`
	`57`	`+ launch_arguments=launch_args.items(),`
`57`	`58`	`)`
`58`	`59`
`59`	`60`	`return LaunchDescription(`