diff --git a/Dockerfile b/Dockerfile index 1e59121..9a7dc7e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -28,6 +28,7 @@ RUN apt-get update \ ros-"$ROS_DISTRO"-mola-lidar-odometry \ python3-pip \ python3-vcstool \ + wget \ && pip install --no-cache-dir mcap pandas colorama \ segments-ai awscli boto3 scipy watchdog colorlog \ && pip install --no-cache-dir --upgrade setuptools pip \ @@ -69,7 +70,7 @@ RUN groupadd -g $GROUP_ID $USERNAME && \ usermod -aG sudo $USERNAME && \ echo "$USERNAME ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers -# Setup ros2_bag_exporter +# Setup tartan_rosbag_exporter RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive \ apt-get -y --quiet --no-install-recommends install \ @@ -87,11 +88,12 @@ RUN apt-get update \ && pip install --no-cache-dir mcap colorama \ && rm -rf /var/lib/apt/lists/* -ENV EXPORTER=$ROS_WS/src/tartan_rosbag_exporter -RUN git clone -b 1.1.0 https://github.com/ipab-rad/tartan_rosbag_exporter.git $EXPORTER \ +RUN mkdir -p "$ROS_WS"/src \ + && wget -q -O /tmp/exporter.tar.gz "https://github.com/ipab-rad/tartan_rosbag_exporter/archive/refs/tags/2.0.0.tar.gz" \ + && tar -xzf /tmp/exporter.tar.gz -C "$ROS_WS"/src \ && . /opt/ros/"$ROS_DISTRO"/setup.sh \ && colcon build --cmake-args -DCMAKE_BUILD_TYPE=Release \ - && rm -rf $ROS_WS/build $EXPORTER + && rm -rf "$ROS_WS"/build /tmp/exporter.tar.gz "$ROS_WS"/src # Give read/write permissions to the user on the ROS_WS directory RUN chown -R $USERNAME:$USERNAME $ROS_WS && \ diff --git a/README.md b/README.md index bd64159..9a3f09b 100644 --- a/README.md +++ b/README.md @@ -1,25 +1,51 @@ # Tartan Forklift -Collection of tools to manage ROSbag recordings data from AV vehicle. +## Overview -## Labelling preproc +This repository contains all the modules required for the vehicle data uploading pipeline to the EIDF-VM, along with tools to convert uploaded recordings into Segments.ai datasets. -This package contains different modules to read and parse exported ROS sensor data to create and prepare a dataset sample for the [Segments.ai](https://segments.ai/) platform for labelling. +![Alt text](./docs/data_pipeline_diagram.jpg) -Currently, the modules assume the following: +The workflow begins with the **Rosbag Uploader**, which compresses and transfers ROS bag recordings from the vehicle’s storage to the EIDF-VM. - 1. A ROS bag was exported using [ipab-rad/tartan_rosbag_exporter](https://github.com/ipab-rad/tartan_rosbag_exporter). - 2. The user is familiar with Segments.ai platform and its sample formats, and has created a dataset with [multi-sensor sequence](https://docs.segments.ai/reference/sample-types#multi-sensor-sequence) support. - 3. The user has access to both EIDF S3 and Segments.ai. +Once uploaded, the **New Rosbag Watchdog** monitors the EIDF-VM storage and detects the arrival of new recordings. When a new ROS bag is found, the **ROS2 Bag Extractor** processes it, exporting its contents into system files such as images, point clouds, transformations, and calibration files. -### Usage guide +With the exported data ready, the **DatasetCreator** builds a Segments.ai dataset using both the processed files and the original ROS bag metadata. This stage includes: +- **Trajectory Generator** – creates a trajectory for each point cloud in the recording and stores the trajectory points in `.tum` file. +- **Data Uploader** – transfers the exported files to an S3 bucket for Segments.ai to access. +- **S3 Backup Agent** – stores a backup of all newly processed ROS bag recordings in an S3 bucket. -To use the `labelling_preproc`'s modules to upload and add a **multi-sensor sequence** to segments.ai, you will need access key tokens. +This pipeline ensures that data collected from the vehicle is uploaded, processed, backed up, and prepared for use within the Segments.ai platform. -Create a file named `dataset_keys.env` inside a `keys` directory in the parent directory of this repository: +## Usage guide - Data Pipeline + +Requirements: + + 1. The user has access to EIDF virtual machine (EIDF-VM) SSD storage `/mnt/vdb/*` and S3 + 2. The user has access to Segments.ai platform + 3. The user has defined a SSH alias for the EIDF-VM on vehicle's server + - Create an SSH alias in `~/.ssh/config` to access the EIDF-VM, if you haven't. + Add these two aliases to the `config` file and make sure to `User` points to your EIDF-VM username + ```bash + Host eidf-gateway + HostName eidf-gateway.epcc.ed.ac.uk + User + IdentityFile ~/.ssh/id_ed25519 + + Host eidf-vm + HostName + User + IdentityFile ~/.ssh/id_ed25519 + ProxyJump eidf-gateway + ``` + - See EIDF's [guide](https://docs.eidf.ac.uk/access/ssh/) for further reference + +Setup this repository by defining your access key tokens in a `dataset_keys.env` file. Create the file named inside a `keys` directory in the parent directory of this repository: ```bash -mkdir -p keys && touch keys/dataset_keys.env +cd /tartan_forklift + +mkdir -p keys && touch ./keys/dataset_keys.env ``` Add the following environment variables to `dataset_keys.env`: @@ -29,23 +55,24 @@ Add the following environment variables to `dataset_keys.env`: AWS_ACCESS_KEY_ID=my_access_key_id AWS_SECRET_ACCESS_KEY=my_secret_access_key AWS_ENDPOINT_URL=my_s3_organisation_url -BUCKET_NAME=my_bucket_name +AWS_BUCKET_NAME=my_bucket_name +AWS_ROSBAG_BACKUP_BUCKET_NAME=my_backup_bucket_name EIDF_PROJECT_NAME=my_projectxyz # Segments.ai key SEGMENTS_API_KEY=my_segment_ai_api_key ``` -The `dev.sh` script will attempt to locate the `dataset_keys.env` file. If the file is missing or incorrectly named, the script will throw an error. File and path names are case-sensitive. +Both `dev.sh` and `runtime.sh` scripts will attempt to locate the `dataset_keys.env` file. If the file is missing or incorrectly named, the script will throw an error. File and path names are case-sensitive. For access credentials, please contact [Hector Cruz](@hect95) or [Alejandro Bordallo](@GreatAlexander). -#### Build and run the Docker container +### Build and run the Docker container To build and run the Docker container interactively, use: ```bash -./dev.sh -l -p -o +./runtime.sh -l -p -o ``` where: @@ -55,17 +82,130 @@ where: The input directories will be mounted in `/opt/ros_ws/rosbags` and `/opt/ros_ws/exported_data` in the container respectively. -After running the Docker container, install the Python modules: +If running in dev mode `dev.sh`, install the Python modules after running the docker: ```bash +./dev.sh -l -p -o + pip install -e ./scripts ``` +### Run the Data pipeline + +#### 1. BEFORE YOU DO ANYTHING: Start the `data_manager` + +- Log in to the **EIDF-VM**. +- Ensure you have set up the `tartan_forklift` keys as described in the [Usage guide](https://github.com/ipab-rad/tartan_forklift/tree/new-sensors-configuration?tab=readme-ov-file#usage-guide). +- Use the **admin** S3 account keys and secret from the EIDF Project [dashboard](https://portal.eidf.ac.uk/project/view/eidf150). +- Set `AWS_BUCKET_NAME` and `AWS_ROSBAG_BACKUP_BUCKET_NAME` environment variables in your `dataset_keys.env` as described [here](https://github.com/ipab-rad/tartan_carpet/wiki/How-to-upload-rosbags-after-data-collection#1-before-you-do-anything-start-the-data_manager) + +Start the `data_manager`: +```bash +cd /your_path/tartan_forklift +./runtime.sh data_manager +``` + +#### 2. Upload the rosbags + +- Log in to the vehicle's server. +- Default upload sources: + - `/mnt/sata_ssd_raid/edinburgh` (vehicle server) + - `/mnt/vdb/data` (EIDF-VM) + +If you need a different location, update `upload_rosbags/upload_config.yaml`. + +Activate the Python environment: +```bash +source ~/python_default/bin/activate +``` + +Start the upload: +```bash +cd ~/tartan_forklift +python3 -m upload_rosbags.upload_rosbags --config ./upload_rosbags/upload_config.yaml --lftp-password +``` + +#### 3. What happens next + +When a rosbag recording is fully uploaded, the `data_manager` (on the EIDF-VM) detects it and automatically: +- Creates a dataset in Segments.ai. +- Backs up the rosbag to S3. + + +### Run each module manually +Each module was designed to run as a standalone script please pass the `--help` flag to each one of them to check its usage/arguments + +#### Upload rosbags + +This script automates the process of uploading rosbags from the IPAB-RAD autonomous vehicle server to a cloud instance within the [EIDF](https://edinburgh-international-data-facility.ed.ac.uk/) (Edinburgh International Data Facility) infrastructure. It streamlines data collection and transfer by first compressing the rosbags using the [MCAP CLI](https://mcap.dev/guides/cli), and then uploading the compressed files. This ensures efficient handling and storage of large datasets generated by vehicle sensors. + +**Dependencies** + +- **Vehicle machine (host)** + + - Install Python dependencies: + + ```bash + pip install colorlog paramiko paramiko_jump + ``` + + - Install MCAP CLI `v0.0.47` for rosbag compression: + + ```bash + wget -O $HOME/mcap https://github.com/foxglove/mcap/releases/download/releases%2Fmcap-cli%2Fv0.0.47/mcap-linux-amd64 + chmod +x $HOME/mcap + ``` + + - Set up an FTP server by following this [guide](https://documentation.ubuntu.com/server/how-to/networking/ftp/index.html) + +- ***EIDF (cloud)*** + + - Install `lftp`: + ```bash + sudo apt update && sudo apt install lftp + ``` + +**Usage** + +To execute the script from the host machine, run: + +```bash +python3 -m upload_rosbags.upload_rosbags \ + --config ./upload_rosbags/upload_config.yaml \ + --lftp-password \ + +# Add --debug flag to set DEBUG log level +``` + +**YAML Parameters** + +The configuration file accepts the following parameters: + +- `local_host_user` (str): Username for the host machine. +- `local_hostname` (str): IP address or hostname of the host machine (interface connected to the internet). +- `local_rosbags_directory` (str): Path to the directory on the host machine containing the rosbags. +- `cloud_ssh_alias` (str): SSH alias for the cloud server defined in `~/.ssh/config`. If unset, `cloud_user` and `cloud_hostname` must be provided. +- `cloud_user` (str): Username for the cloud target machine. Ignored if `cloud_ssh_alias` is defined and valid. +- `cloud_hostname` (str): Hostname or IP of the cloud target machine. Ignored if `cloud_ssh_alias` is defined and valid. +- `cloud_upload_directory` (str): Destination directory on the cloud server for uploading compressed files. +- `mcap_bin_path` (str): Full path to the `mcap` CLI binary. +- `mcap_compression_chunk_size` (int): Chunk size in bytes used during MCAP compression. +- `compression_parallel_workers` (int): Number of parallel worker threads for compression. +- `compression_queue_max_size` (int): Maximum number of compressed rosbags allowed in the queue at any time. + +See [upload\_config.yaml](./upload_rosbags/upload_config.yaml) for a sample configuration. + +**Logging** + +The script logs its actions to a file named `_rosbag_upload.log`. -#### Export your ROS bags -As mentioned above, the `labelling_preproc` modules expect exported data before creating a sample. You can export your rosbags with the following command: +#### Export ROS bags + +You can export your rosbags with the following command: ```bash +./runtime.sh bash + cd /opt/ros_ws ros2 run ros2_bag_exporter bag_exporter --ros-args \ @@ -84,7 +224,18 @@ The exporter will create a directory inside `exported_data/`. This directory wil We'll refer to this directory as ``. -#### Add a multi-sensor sequence sample +#### Create a Dataset +Export the ROSbag recording first and then run: + +```bash +./runtime.sh bash + +dataset_creator --export_directory ./exported_data/ \ + --recording_directory ./rosbags/ \ + --dataset_attributes_file ./config/dataset_attributes.json +``` + +`dataset_creator` runs the following sub-modules in the background, but if for some reason you need to run each sub-module individually here is what you need to do: Create a new dataset on the Segments.ai platform if you haven't already. For consistency, name the dataset exactly the same as your exported `` directory. On Segments.ai, datasets follow the format `organisation_name/dataset_name`. Therefore, your full `dataset_name` should be `UniversityOfEdinburgh/_name`, where `UniversityOfEdinburgh` is the organisation name currently in use. This naming convention helps keep your exported data and Segments.ai datasets aligned. @@ -127,128 +278,14 @@ Create a new dataset on the Segments.ai platform if you haven't already. For con If successful, you will see your new sequence listed in the _Samples_ tab on your dataset page. -## Metadata Generator Usage - -This script generates metadata for ROSbag MCAP files. The metadata is compiled into a `resources.json` file that complies with the EIDF requirements - -### Features -- Reads MCAP files and extracts metadata such as: - - Duration of the log - - Topics and message counts - - File size - - File hash (MD5) -- Generates a JSON file (`resources.json`) with metadata for all MCAP files in a given directory. -- Metadata includes: - - File name - - Identifier - - Description - - Format - - License - - Size - - Hash - - Issued date - - Modified date - - Duration - - Topics and message counts - -### Usage - -#### 1. Setup - -Ensure all dependencies are installed. You can use the following command to install required packages: - -```bash -pip install mcap -``` - -#### 2. Running the Script - -To generate the metadata JSON file, follow these steps: - -- Place all your MCAP files in a directory. -- The default directory is `/recorded_datasets/edinburgh` -- Run the script: - - ```bash - python metadata_generator.py - ``` - -If you want to generate metadata for files in a specified path, run the script: - -```bash -python metadata_generator.py -p path/to/file -``` - -#### 3. Output - -The script will generate a `resources.json` file in the specified directory. This JSON file will contain metadata for each MCAP file in the directory. - -## Upload rosbags - -This script automates the process of uploading rosbags from the IPAB-RAD autonomous vehicle server to a cloud instance within the [EIDF](https://edinburgh-international-data-facility.ed.ac.uk/) (Edinburgh International Data Facility) infrastructure. It streamlines data collection and transfer by first compressing the rosbags using the [MCAP CLI](https://mcap.dev/guides/cli), and then uploading the compressed files. This ensures efficient handling and storage of large datasets generated by vehicle sensors. - -### 1. Dependencies - -#### Vehicle machine (host) - -- Install Python dependencies: - - ```bash - pip install colorlog paramiko paramiko_jump - ``` - -- Install MCAP CLI `v0.0.47` for rosbag compression: - - ```bash - wget -O $HOME/mcap https://github.com/foxglove/mcap/releases/download/releases%2Fmcap-cli%2Fv0.0.47/mcap-linux-amd64 - chmod +x $HOME/mcap - ``` - -- Set up an FTP server by following this [guide](https://documentation.ubuntu.com/server/how-to/networking/ftp/index.html) - -#### EIDF (cloud) - -- Install `lftp`: - ```bash - sudo apt update && sudo apt install lftp - ``` - -### 2. Usage - -To execute the script from the host machine, run: +#### Backup Rosbags ```bash -python3 -m upload_rosbags.upload_rosbags \ - --config ./upload_rosbags/upload_config.yaml \ - --lftp-password \ +./runtime.sh bash -# Add --debug flag to set DEBUG log level +s3_backup_agent --recordings_list /opt/ros_ws/rosbags/2025_08_12-12_22_49_meadows ``` -### 3. YAML Parameters - -The configuration file accepts the following parameters: - -- `local_host_user` (str): Username for the host machine. -- `local_hostname` (str): IP address or hostname of the host machine (interface connected to the internet). -- `local_rosbags_directory` (str): Path to the directory on the host machine containing the rosbags. -- `cloud_ssh_alias` (str): SSH alias for the cloud server defined in `~/.ssh/config`. If unset, `cloud_user` and `cloud_hostname` must be provided. -- `cloud_user` (str): Username for the cloud target machine. Ignored if `cloud_ssh_alias` is defined and valid. -- `cloud_hostname` (str): Hostname or IP of the cloud target machine. Ignored if `cloud_ssh_alias` is defined and valid. -- `cloud_upload_directory` (str): Destination directory on the cloud server for uploading compressed files. -- `mcap_bin_path` (str): Full path to the `mcap` CLI binary. -- `mcap_compression_chunk_size` (int): Chunk size in bytes used during MCAP compression. -- `compression_parallel_workers` (int): Number of parallel worker threads for compression. -- `compression_queue_max_size` (int): Maximum number of compressed rosbags allowed in the queue at any time. - -See [upload\_config.yaml](./upload_rosbags/upload_config.yaml) for a sample configuration. - -### 4. Logging - -The script logs its actions to a file named `_rosbag_upload.log`. - - - ## ROS2 Bag Merging Script This script automates the merging of ROS2 bag files using the `ros2 bag convert` command. diff --git a/config/av_sensor_export_config.yaml b/config/av_sensor_export_config.yaml index 2092caf..2451d44 100644 --- a/config/av_sensor_export_config.yaml +++ b/config/av_sensor_export_config.yaml @@ -5,15 +5,16 @@ storage_id: "mcap" # List of topics to extract from the ROS2 bag topics: # Configuration for Pointclouds, defined first to use their timestamp for sensor sync - - name: "/sensor/lidar/top/points" # 10Hz + - name: "/sensor/lidar/top/points" type: "PointCloud2" - sample_interval: 1 # 10Hz + sample_interval: 1 topic_dir: "lidar/top" # Configuration for Compressed Images - - name: "/sensor/camera/fsp_l/image_rect_color/compressed" # 20Hz + - name: "/sensor/camera/fsp_l/image_rect_color/compressed" type: "CompressedImage" - sample_interval: 1 # 20Hz + sample_interval: 1 + timestamp_offset_ms: 70 topic_dir: "camera/fsp_l" - name: "/sensor/camera/fsp_l/camera_info" @@ -21,9 +22,21 @@ topics: sample_interval: 0 topic_dir: "camera/fsp_l" - - name: "/sensor/camera/rsp_l/image_rect_color/compressed" # 20Hz + - name: "/sensor/camera/fsp_r/image_rect_color/compressed" + type: "CompressedImage" + sample_interval: 1 + timestamp_offset_ms: 70 + topic_dir: "camera/fsp_r" + + - name: "/sensor/camera/fsp_r/camera_info" + type: "CameraInfo" + sample_interval: 0 + topic_dir: "camera/fsp_r" + + - name: "/sensor/camera/rsp_l/image_rect_color/compressed" type: "CompressedImage" - sample_interval: 1 # 20Hz + sample_interval: 1 + timestamp_offset_ms: 20 topic_dir: "camera/rsp_l" - name: "/sensor/camera/rsp_l/camera_info" @@ -31,9 +44,21 @@ topics: sample_interval: 0 topic_dir: "camera/rsp_l" - - name: "/sensor/camera/lspf_r/image_rect_color/compressed" # 20Hz + - name: "/sensor/camera/rsp_r/image_rect_color/compressed" + type: "CompressedImage" + sample_interval: 1 + timestamp_offset_ms: 20 + topic_dir: "camera/rsp_r" + + - name: "/sensor/camera/rsp_r/camera_info" + type: "CameraInfo" + sample_interval: 0 + topic_dir: "camera/rsp_r" + + - name: "/sensor/camera/lspf_r/image_rect_color/compressed" type: "CompressedImage" - sample_interval: 1 # 20Hz + sample_interval: 1 + timestamp_offset_ms: 51 topic_dir: "camera/lspf_r" - name: "/sensor/camera/lspf_r/camera_info" @@ -41,9 +66,10 @@ topics: sample_interval: 0 topic_dir: "camera/lspf_r" - - name: "/sensor/camera/lspr_l/image_rect_color/compressed" # 20Hz + - name: "/sensor/camera/lspr_l/image_rect_color/compressed" type: "CompressedImage" - sample_interval: 1 # 20Hz + sample_interval: 1 + timestamp_offset_ms: 38 topic_dir: "camera/lspr_l" - name: "/sensor/camera/lspr_l/camera_info" @@ -51,9 +77,10 @@ topics: sample_interval: 0 topic_dir: "camera/lspr_l" - - name: "/sensor/camera/rspf_l/image_rect_color/compressed" # 20Hz + - name: "/sensor/camera/rspf_l/image_rect_color/compressed" type: "CompressedImage" - sample_interval: 1 # 10Hz + sample_interval: 1 + timestamp_offset_ms: 88 topic_dir: "camera/rspf_l" - name: "/sensor/camera/rspf_l/camera_info" @@ -61,9 +88,10 @@ topics: sample_interval: 0 topic_dir: "camera/rspf_l" - - name: "/sensor/camera/rspr_r/image_rect_color/compressed" # 20Hz + - name: "/sensor/camera/rspr_r/image_rect_color/compressed" type: "CompressedImage" - sample_interval: 1 # 20Hz + sample_interval: 1 + timestamp_offset_ms: 101 topic_dir: "camera/rspr_r" - name: "/sensor/camera/rspr_r/camera_info" @@ -75,15 +103,3 @@ topics: type: "TF" sample_interval: 1 topic_dir: "extrinsics" - -# # Configuration for IMU -# - name: "/sensor/imu/front/data" # 200Hz -# type: "IMU" -# sample_interval: 1 # 200Hz -# topic_dir: "imu" - -# # Configuration for GPS -# - name: "/sensor/gps/nav_sat_fix" # 20Hz -# type: "GPS" -# sample_interval: 1 # 20Hz -# topic_dir: "gps" diff --git a/config/dataset_attributes.json b/config/dataset_attributes.json index 4b8caa9..2df33a4 100644 --- a/config/dataset_attributes.json +++ b/config/dataset_attributes.json @@ -2,30 +2,30 @@ "format_version": "0.1", "categories": [ { - "name": "object", + "name": "vehicle_bus", "id": 1, "color": [ - 0, - 113, - 188 + 255, + 5, + 80 ], "attributes": [ { "name": "position", - "input_type": "vector3", "is_mandatory": true, + "input_type": "vector3", "is_track_level": false }, { "name": "dimensions", - "input_type": "vector3", "is_mandatory": true, + "input_type": "vector3", "is_track_level": false }, { "name": "rotation", - "input_type": "quaternion", "is_mandatory": true, + "input_type": "quaternion", "is_track_level": false } ], @@ -35,189 +35,190 @@ "name": "vehicle_car", "id": 2, "color": [ - 216, - 82, - 24 + 0, + 255, + 157 ], "attributes": [ { "name": "position", - "input_type": "vector3", "is_mandatory": true, + "input_type": "vector3", "is_track_level": false }, { "name": "dimensions", - "input_type": "vector3", "is_mandatory": true, + "input_type": "vector3", "is_track_level": false }, { "name": "rotation", - "input_type": "quaternion", "is_mandatory": true, + "input_type": "quaternion", "is_track_level": false } ], "link_attributes": [] }, { - "name": "vehicle_truck", + "name": "vehicle_lorry", "id": 3, "color": [ - 236, - 176, - 31 + 125, + 46, + 141 ], "attributes": [ { "name": "position", - "input_type": "vector3", "is_mandatory": true, + "input_type": "vector3", "is_track_level": false }, { "name": "dimensions", - "input_type": "vector3", "is_mandatory": true, + "input_type": "vector3", "is_track_level": false }, { "name": "rotation", - "input_type": "quaternion", "is_mandatory": true, + "input_type": "quaternion", "is_track_level": false } ], "link_attributes": [] }, { - "name": "vehicle_lorry", + "name": "vehicle_taxi", "id": 4, "color": [ - 125, - 46, - 141 + 0, + 0, + 255 ], "attributes": [ { "name": "position", - "input_type": "vector3", "is_mandatory": true, + "input_type": "vector3", "is_track_level": false }, { "name": "dimensions", - "input_type": "vector3", "is_mandatory": true, + "input_type": "vector3", "is_track_level": false }, { "name": "rotation", - "input_type": "quaternion", "is_mandatory": true, + "input_type": "quaternion", "is_track_level": false } ], "link_attributes": [] }, { - "name": "vehicle_van", + "name": "vehicle_tram", "id": 5, "color": [ - 118, - 171, - 47 + 190, + 190, + 0 ], "attributes": [ { "name": "position", - "input_type": "vector3", "is_mandatory": true, + "input_type": "vector3", "is_track_level": false }, { "name": "dimensions", - "input_type": "vector3", "is_mandatory": true, + "input_type": "vector3", "is_track_level": false }, { "name": "rotation", - "input_type": "quaternion", "is_mandatory": true, + "input_type": "quaternion", "is_track_level": false } ], "link_attributes": [] }, { - "name": "vehicle_bus", + "name": "vehicle_truck", "id": 6, "color": [ - 161, - 19, - 46 + 236, + 176, + 31 ], "attributes": [ { "name": "position", - "input_type": "vector3", "is_mandatory": true, + "input_type": "vector3", "is_track_level": false }, { "name": "dimensions", - "input_type": "vector3", "is_mandatory": true, + "input_type": "vector3", "is_track_level": false }, { "name": "rotation", - "input_type": "quaternion", "is_mandatory": true, + "input_type": "quaternion", "is_track_level": false } ], "link_attributes": [] }, + { - "name": "vehicle_bicycle", + "name": "vehicle_van", "id": 7, "color": [ - 255, - 0, - 0 + 118, + 171, + 47 ], "attributes": [ { "name": "position", - "input_type": "vector3", "is_mandatory": true, + "input_type": "vector3", "is_track_level": false }, { "name": "dimensions", - "input_type": "vector3", "is_mandatory": true, + "input_type": "vector3", "is_track_level": false }, { "name": "rotation", - "input_type": "quaternion", "is_mandatory": true, + "input_type": "quaternion", "is_track_level": false } ], "link_attributes": [] }, { - "name": "pedestrian", + "name": "vru_animal", "id": 8, "color": [ - 255, - 127, - 0 + 250, + 254, + 1 ], "attributes": [ { @@ -242,73 +243,103 @@ "link_attributes": [] }, { - "name": "vehicle_tram", + "name": "vru_bicycle", "id": 9, "color": [ - 190, - 190, + 255, + 0, 0 ], "attributes": [ { "name": "position", - "input_type": "vector3", "is_mandatory": true, + "input_type": "vector3", "is_track_level": false }, { "name": "dimensions", - "input_type": "vector3", "is_mandatory": true, + "input_type": "vector3", "is_track_level": false }, { "name": "rotation", - "input_type": "quaternion", "is_mandatory": true, + "input_type": "quaternion", "is_track_level": false } ], "link_attributes": [] }, { - "name": "vehicle_taxi", + "name": "vru_pedestrian", "id": 10, "color": [ 0, - 0, - 255 + 255, + 238 ], "attributes": [ { "name": "position", - "input_type": "vector3", "is_mandatory": true, + "input_type": "vector3", "is_track_level": false }, { "name": "dimensions", - "input_type": "vector3", "is_mandatory": true, + "input_type": "vector3", "is_track_level": false }, { "name": "rotation", - "input_type": "quaternion", "is_mandatory": true, + "input_type": "quaternion", "is_track_level": false } ], "link_attributes": [] }, { + "name": "vru_pram", "id": 11, - "name": "pram", "color": [ 170, 0, 255 ], + "attributes": [ + { + "name": "position", + "is_mandatory": true, + "input_type": "vector3", + "is_track_level": false + }, + { + "name": "dimensions", + "is_mandatory": true, + "input_type": "vector3", + "is_track_level": false + }, + { + "name": "rotation", + "is_mandatory": true, + "input_type": "quaternion", + "is_track_level": false + } + ], + "link_attributes": [] + }, + { + "name": "vru_wheelchair", + "id": 12, + "color": [ + 242, + 7, + 223 + ], "attributes": [ { "name": "position", diff --git a/dev.sh b/dev.sh index b53369f..6a61bf0 100755 --- a/dev.sh +++ b/dev.sh @@ -105,11 +105,14 @@ if [ ! -f "$KEYS_FILE" ]; then exit 1 fi +mkdir -p $SCRIPT_DIR/logs + # Run docker image with local code volumes for development docker run -it --rm --net host \ --user "$(id -u):$(id -g)" \ $CYCLONE_VOL \ -v $KEYS_FILE:/keys/dataset_keys.env \ + -v $SCRIPT_DIR/logs:/opt/ros_ws/logs_tartan_forklift \ -v $EXPORTS_OUTPUT_DIR:/opt/ros_ws/exported_data \ -v $ROSBAGS_DIR:/opt/ros_ws/rosbags \ -v $SCRIPT_DIR/config:/opt/ros_ws/config \ diff --git a/docs/data_pipeline_diagram.jpg b/docs/data_pipeline_diagram.jpg new file mode 100644 index 0000000..044e412 Binary files /dev/null and b/docs/data_pipeline_diagram.jpg differ diff --git a/runtime.sh b/runtime.sh index 095713c..4835147 100755 --- a/runtime.sh +++ b/runtime.sh @@ -120,11 +120,14 @@ if [ ! -f "$KEYS_FILE" ]; then exit 1 fi +mkdir -p $SCRIPT_DIR/logs + # Run docker image docker run -it --rm --net host \ --user "$(id -u):$(id -g)" \ $CYCLONE_VOL \ -v $KEYS_FILE:/keys/dataset_keys.env \ + -v $SCRIPT_DIR/logs:/opt/ros_ws/logs_tartan_forklift \ -v $EXPORTS_OUTPUT_DIR:/opt/ros_ws/exported_data \ -v $ROSBAGS_DIR:/opt/ros_ws/rosbags \ -v /etc/localtime:/etc/localtime:ro \ diff --git a/scripts/data_manager/data_manager.py b/scripts/data_manager/data_manager.py index 97980e2..a3e702c 100644 --- a/scripts/data_manager/data_manager.py +++ b/scripts/data_manager/data_manager.py @@ -41,10 +41,11 @@ def __init__( exporter_config_file: str, dataset_attributes_file: str, s3_organisation: str, + logs_directory: str, debug_mode: bool, ) -> None: """Initialise the DataManager and configure logging.""" - self.logger = self.setup_logging(debug_mode=debug_mode) + self.logger = self.setup_logging(logs_directory, debug_mode) # Polling interval in seconds to check for new recordings. self.POLLING_INTERVAL_SEC = 1 self.output_directory = output_directory @@ -56,7 +57,9 @@ def __init__( ) self.s3_backup_agent = S3RosbagBackupAgent(self.logger) - def setup_logging(self, debug_mode: bool) -> logging.Logger: + def setup_logging( + self, logs_directory: str, debug_mode: bool + ) -> logging.Logger: """ Configure logging with colour support and rotating file log. @@ -68,6 +71,7 @@ def setup_logging(self, debug_mode: bool) -> logging.Logger: """ timestamp = datetime.now().strftime('%Y_%m_%d-%H_%M_%S') log_filename = f'{timestamp}_data_manager.log' + log_path = Path(logs_directory) / log_filename logger = logging.getLogger('data_manager') logger.setLevel(logging.DEBUG) @@ -76,7 +80,7 @@ def setup_logging(self, debug_mode: bool) -> logging.Logger: console_handler = logging.StreamHandler() console_handler.setLevel(logging.DEBUG if debug_mode else logging.INFO) - file_handler = logging.FileHandler(log_filename) + file_handler = logging.FileHandler(log_path) file_handler.setLevel(logging.DEBUG) color_formatter = colorlog.ColoredFormatter( @@ -243,28 +247,28 @@ def main() -> None: parser.add_argument( '--rosbags_directory', type=str, - required=True, + default='/opt/ros_ws/rosbags', help='Path to the directory to monitor for new rosbags.', ) parser.add_argument( '--output_directory', type=str, - required=True, + default='/opt/ros_ws/exported_data', help='Parent directory to save exported data.', ) parser.add_argument( '--export_config_file', type=str, - required=True, + default='/opt/ros_ws/config/av_sensor_export_config.yaml', help='Configuration file for the bag exporter', ) parser.add_argument( '--dataset_attributes_file', type=str, - required=True, + default='/opt/ros_ws/config/dataset_attributes.json', help='Absolute path to the dataset attributes file.', ) parser.add_argument( @@ -278,6 +282,12 @@ def main() -> None: '(Optional, default: eidf)' ), ) + parser.add_argument( + '--logs_directory', + type=str, + default='/opt/ros_ws/logs_tartan_forklift', + help='Directory to save log files.', + ) parser.add_argument( '--debug', action='store_true', @@ -290,6 +300,7 @@ def main() -> None: exporter_config_file = args.export_config_file dataset_attributes_file = args.dataset_attributes_file s3_organisation = args.s3_org + logs_directory = args.logs_directory debug_mode = args.debug data_manager = DataManager( @@ -297,6 +308,7 @@ def main() -> None: exporter_config_file, dataset_attributes_file, s3_organisation, + logs_directory, debug_mode, ) diff --git a/scripts/labelling_preproc/add_segmentsai_sample.py b/scripts/labelling_preproc/add_segmentsai_sample.py index dc7b715..a4843ee 100755 --- a/scripts/labelling_preproc/add_segmentsai_sample.py +++ b/scripts/labelling_preproc/add_segmentsai_sample.py @@ -6,7 +6,10 @@ from pathlib import Path from labelling_preproc.common.ego_setup import EgoPoses -from labelling_preproc.common.response import PreprocessingResponse +from labelling_preproc.common.response import ( + PreprocessingError, + PreprocessingResponse, +) from labelling_preproc.common.sample_formats import ( camera_ids_list, sensor_sequence_struct, @@ -115,11 +118,16 @@ def add( [ok, msg] = ego_poses.validatePoseCount(len(sync_key_frames)) if not ok: - raise ValueError( - 'The number of poses is not equal to the number of key frames.' + msg = ( + 'The number of poses is not equal to the number of key frames' f'\n{msg}\n' ) - + response = PreprocessingResponse( + ok=False, + error=PreprocessingError.InvalidPoseCountError, + error_message=msg, + ) + return response # Initialise sensors' frames lists pointcloud_frames = [] cameras_frames = {} diff --git a/scripts/labelling_preproc/common/response.py b/scripts/labelling_preproc/common/response.py index 22f1727..94fec4b 100644 --- a/scripts/labelling_preproc/common/response.py +++ b/scripts/labelling_preproc/common/response.py @@ -15,6 +15,7 @@ class PreprocessingError(Enum): SegmentsAlreadyExistsError = 'SegmentsAlreadyExistsError' SegmentsTimeoutError = 'SegmentsTimeoutError' SegmentsUnknownError = 'SegmentsUnknownError' + InvalidPoseCountError = 'InvalidPoseCountError' @dataclass diff --git a/scripts/labelling_preproc/common/sample_formats.py b/scripts/labelling_preproc/common/sample_formats.py index 9a695a7..8de6586 100644 --- a/scripts/labelling_preproc/common/sample_formats.py +++ b/scripts/labelling_preproc/common/sample_formats.py @@ -10,16 +10,27 @@ # Cameras' id list # Do not modify unless you know what you are doing! -camera_ids_list = ['fsp_l', 'rsp_l', 'lspf_r', 'lspr_l', 'rspf_l', 'rspr_r'] +camera_ids_list = [ + 'fsp_l', + 'fsp_r', + 'rsp_l', + 'rsp_r', + 'lspf_r', + 'lspr_l', + 'rspf_l', + 'rspr_r', +] # Cameras' position in Segments.ai grid camera_grid_positions = { 'fsp_l': {'row': 0, 'col': 1}, + 'fsp_r': {'row': 0, 'col': 2}, 'rsp_l': {'row': 1, 'col': 1}, + 'rsp_r': {'row': 1, 'col': 2}, 'lspf_r': {'row': 0, 'col': 0}, 'lspr_l': {'row': 1, 'col': 0}, - 'rspf_l': {'row': 0, 'col': 2}, - 'rspr_r': {'row': 1, 'col': 2}, + 'rspf_l': {'row': 0, 'col': 3}, + 'rspr_r': {'row': 1, 'col': 3}, } # Image sample dictionary structure diff --git a/scripts/labelling_preproc/common/sensor_frame_creator.py b/scripts/labelling_preproc/common/sensor_frame_creator.py index 2293ebe..ff6b8e3 100644 --- a/scripts/labelling_preproc/common/sensor_frame_creator.py +++ b/scripts/labelling_preproc/common/sensor_frame_creator.py @@ -57,7 +57,7 @@ def __init__(self, data_directory: Path, cameras_info: list): self.camera_calibration_parser = CameraCalibrationParser() self.cameras_data = {} - self.LIDAR_FRAME_ID = 'lidar_ouster_top' + self.LIDAR_FRAME_ID = 'lidar_top' self.get_cameras_calibration(cameras_info) diff --git a/scripts/labelling_preproc/dataset_creator.py b/scripts/labelling_preproc/dataset_creator.py index f4275fd..f080736 100644 --- a/scripts/labelling_preproc/dataset_creator.py +++ b/scripts/labelling_preproc/dataset_creator.py @@ -264,6 +264,9 @@ def create(self, export_directory: Path, recording_directory: Path) -> str: export_sub_directory_list = self.sort_sub_directories(export_directory) + total_sequences = len(export_sub_directory_list) + sequences_created = 0 + skipped_sequences = [] # Iterate through sub-directories in the export directory # and create a Segments.ai sample for each for export_sub_directory in export_sub_directory_list: @@ -284,8 +287,25 @@ def create(self, export_directory: Path, recording_directory: Path) -> str: self.asset_uploader.run(export_sub_directory.resolve()) # Create Segments.ai sample - self.create_sample(dataset_full_name, export_sub_directory) + if self.create_sample(dataset_full_name, export_sub_directory): + sequences_created += 1 + else: + skipped_sequences.append(export_sub_directory) + if sequences_created == total_sequences: + self.logger.info( + f'[DatasetCreator] All {sequences_created} sequences created ' + f'successfully for dataset "{dataset_full_name}"' + ) + else: + # Convert skipped sequences to a string list + skipped_list_str = '\n'.join(str(p) for p in skipped_sequences) + self.logger.warning( + f'[DatasetCreator] Created ' + f'{sequences_created}/{total_sequences}' + f' sequences for dataset "{dataset_full_name}"\n' + f'Skipped sequences:\n{skipped_list_str}' + ) return dataset_full_name @@ -315,7 +335,7 @@ def main(): parser.add_argument( '--dataset_attributes_file', type=str, - required=True, + default='/opt/ros_ws/config/dataset_attributes.json', help='Absolute path to the dataset attributes file.', ) diff --git a/scripts/labelling_preproc/generate_ego_trajectory.py b/scripts/labelling_preproc/generate_ego_trajectory.py index 901bceb..b364e9d 100755 --- a/scripts/labelling_preproc/generate_ego_trajectory.py +++ b/scripts/labelling_preproc/generate_ego_trajectory.py @@ -59,8 +59,12 @@ def run_mola_lidar_odometry( cmd = ( # Avoid relying on the lidar -> base_link transform 'export MOLA_USE_FIXED_LIDAR_POSE=true && ' + 'export MOLA_IMU_NAME=/sensor/imu/front/data && ' # Ensure a pose is generated for every lidar frame 'export MOLA_MIN_XYZ_BETWEEN_MAP_UPDATES=0.0001 && ' + 'export MOLA_MIN_ROT_BETWEEN_MAP_UPDATES=0.0001 && ' + 'export MOLA_MINIMUM_ICP_QUALITY=0.1 && ' + 'export MOLA_MAP_CLOUD_DECIMATION=0.005 && ' 'mola-lidar-odometry-cli ' f'-c {mola_config_path} ' f'--input-rosbag2 {rosbag_path} ' diff --git a/upload_rosbags/upload_config.yaml b/upload_rosbags/upload_config.yaml index 5ee7f2d..2f60780 100644 --- a/upload_rosbags/upload_config.yaml +++ b/upload_rosbags/upload_config.yaml @@ -1,11 +1,11 @@ local_host_user: "lxo" local_hostname: "129.215.117.145" -local_rosbags_directory: "/recorded_datasets/edinburgh" +local_rosbags_directory: "/mnt/sata_ssd_raid/edinburgh" cloud_ssh_alias: "eidf-vm" cloud_user : "" cloud_hostname : "" -cloud_upload_directory: "/mnt/vdb/test_bags" +cloud_upload_directory: "/mnt/vdb/data" upload_attempts: 3 mcap_bin_path: "/home/lxo/mcap" diff --git a/upload_rosbags/upload_rosbags.py b/upload_rosbags/upload_rosbags.py index e7734de..dec1b7b 100644 --- a/upload_rosbags/upload_rosbags.py +++ b/upload_rosbags/upload_rosbags.py @@ -123,11 +123,20 @@ def order_based_on_sufix(path: Path): (file for file in base_path.rglob('*.mcap') if file.is_file()), key=order_based_on_sufix, ) + + # Filter mcap files that are in the temporary directory + # to avoid uploading already compressed files + filtered_mcap_files = [ + mcap + for mcap in mcap_files + if self.temp_directory_name not in mcap.parts + ] + return [ Rosbag( absolute_path=file.resolve(), size_bytes=file.stat().st_size ) - for file in mcap_files + for file in filtered_mcap_files ] def resolve_remote_destination_path(