|
| 1 | +""" |
| 2 | +Minimal example script for converting a dataset to LeRobot format. |
| 3 | +
|
| 4 | +We use the Libero dataset (stored in RLDS) for this example, but it can be easily |
| 5 | +modified for any other data you have saved in a custom format. |
| 6 | +
|
| 7 | +Usage: |
| 8 | +uv run examples/libero/convert_libero_data_to_lerobot.py --data_dir /path/to/your/data |
| 9 | +
|
| 10 | +If you want to push your dataset to the Hugging Face Hub, you can use the following command: |
| 11 | +uv run examples/libero/convert_libero_data_to_lerobot.py --data_dir /path/to/your/data --push_to_hub |
| 12 | +
|
| 13 | +Note: to run the script, you need to install tensorflow_datasets: |
| 14 | +`uv pip install tensorflow tensorflow_datasets` |
| 15 | +
|
| 16 | +You can download the raw Libero datasets from https://huggingface.co/datasets/openvla/modified_libero_rlds |
| 17 | +The resulting dataset will get saved to the $HF_LEROBOT_HOME directory. |
| 18 | +Running this conversion script will take approximately 30 minutes. |
| 19 | +""" |
| 20 | +import os |
| 21 | +import shutil |
| 22 | +from pathlib import Path |
| 23 | +from PIL import Image |
| 24 | +import numpy as np |
| 25 | + |
| 26 | +from lerobot.common.datasets.lerobot_dataset import HF_LEROBOT_HOME |
| 27 | +from lerobot.common.datasets.lerobot_dataset import LeRobotDataset |
| 28 | +import tensorflow_datasets as tfds |
| 29 | +import tyro |
| 30 | + |
| 31 | +REPO_NAME = "ur3" # Name of the output dataset, also used for the Hugging Face Hub |
| 32 | +# RAW_DATASET_NAMES = [ |
| 33 | +# "libero_10_no_noops", |
| 34 | +# "libero_goal_no_noops", |
| 35 | +# "libero_object_no_noops", |
| 36 | +# "libero_spatial_no_noops", |
| 37 | +# ] # For simplicity we will combine multiple Libero datasets into one training dataset |
| 38 | + |
| 39 | +RAW_DATASET_NAMES = [ |
| 40 | + "libero_goal_no_noops", |
| 41 | +] |
| 42 | + |
| 43 | +def main(data_dir: str, *, push_to_hub: bool = False): |
| 44 | + # Clean up any existing dataset in the output directory |
| 45 | + #output_path = HF_LEROBOT_HOME / REPO_NAME |
| 46 | + output_path = Path("/map-vepfs/haoxiao/yijingkun/ur3") |
| 47 | + if output_path.exists(): |
| 48 | + shutil.rmtree(output_path) |
| 49 | + |
| 50 | + # Create LeRobot dataset, define features to store |
| 51 | + # OpenPi assumes that proprio is stored in `state` and actions in `action` |
| 52 | + # LeRobot assumes that dtype of image data is `image` |
| 53 | + dataset = LeRobotDataset.create( |
| 54 | + repo_id=REPO_NAME, |
| 55 | + robot_type="panda", |
| 56 | + fps=10, |
| 57 | + features={ |
| 58 | + "image": { |
| 59 | + "dtype": "image", |
| 60 | + "shape": (224, 224, 3), |
| 61 | + "names": ["height", "width", "channel"], |
| 62 | + }, |
| 63 | + "wrist_image": { |
| 64 | + "dtype": "image", |
| 65 | + "shape": (224, 224, 3), |
| 66 | + "names": ["height", "width", "channel"], |
| 67 | + }, |
| 68 | + "state": { |
| 69 | + "dtype": "float32", |
| 70 | + "shape": (8,), |
| 71 | + "names": ["state"], |
| 72 | + }, |
| 73 | + "actions": { |
| 74 | + "dtype": "float32", |
| 75 | + "shape": (8,), |
| 76 | + "names": ["actions"], |
| 77 | + }, |
| 78 | + }, |
| 79 | + image_writer_threads=10, |
| 80 | + image_writer_processes=5, |
| 81 | + ) |
| 82 | + |
| 83 | + # Loop over raw Libero datasets and write episodes to the LeRobot dataset |
| 84 | + # You can modify this for your own data format |
| 85 | + # for raw_dataset_name in RAW_DATASET_NAMES: |
| 86 | + # raw_dataset = tfds.load(raw_dataset_name, data_dir=data_dir, split="train") |
| 87 | + # for episode in raw_dataset: |
| 88 | + # for step in episode["steps"].as_numpy_iterator(): |
| 89 | + # dataset.add_frame( |
| 90 | + # { |
| 91 | + # "image": step["observation"]["image"], |
| 92 | + # "wrist_image": step["observation"]["wrist_image"], |
| 93 | + # "state": step["observation"]["state"], |
| 94 | + # "actions": step["action"], |
| 95 | + # "task": step["language_instruction"].decode(), |
| 96 | + # } |
| 97 | + # ) |
| 98 | + # dataset.save_episode() |
| 99 | + # Get list of available episode directories |
| 100 | + episode_dirs = [d for d in os.listdir(data_dir) if d.isdigit() and os.path.isdir(os.path.join(data_dir, d))] |
| 101 | + episode_dirs.sort(key=int) # Sort numerically |
| 102 | + |
| 103 | + for episode_dir in episode_dirs: |
| 104 | + i = int(episode_dir) |
| 105 | + episode_path = os.path.join(data_dir, episode_dir) |
| 106 | + joint_states_path = os.path.join(episode_path, 'joint_states') |
| 107 | + |
| 108 | + # Check if required directories exist |
| 109 | + if not os.path.exists(joint_states_path): |
| 110 | + print(f"Skipping episode {i} - joint_states directory not found") |
| 111 | + continue |
| 112 | + |
| 113 | + len_frame = len(os.listdir(joint_states_path)) |
| 114 | + episode_frame_count = 0 |
| 115 | + |
| 116 | + for j in range(len_frame): |
| 117 | + try: |
| 118 | + front_img_path = f'{episode_path}/images/front_images/{j}.png' |
| 119 | + ee_img_path = f'{episode_path}/images/ee_images/{j}.png' |
| 120 | + joint_state_path = f'{episode_path}/joint_states/{j}.npy' |
| 121 | + |
| 122 | + # Check if all required files exist |
| 123 | + if not all(os.path.exists(path) for path in [front_img_path, ee_img_path, joint_state_path]): |
| 124 | + print(f"Skipping episode {i}, frame {j} - missing files") |
| 125 | + continue |
| 126 | + |
| 127 | + img = Image.open(front_img_path) |
| 128 | + front_image = np.array(img) |
| 129 | + |
| 130 | + img = Image.open(ee_img_path) |
| 131 | + ee_image = np.array(img) |
| 132 | + |
| 133 | + state = np.load(joint_state_path).astype(np.float32) |
| 134 | + state = np.insert(state, 6, 0.0) |
| 135 | + |
| 136 | + actions = np.load(joint_state_path).astype(np.float32) |
| 137 | + actions = np.insert(actions, 6, 0.0) |
| 138 | + |
| 139 | + dataset.add_frame( |
| 140 | + { |
| 141 | + "image": front_image, |
| 142 | + "wrist_image": ee_image, |
| 143 | + "state": state, |
| 144 | + "actions": actions, |
| 145 | + "task": "pick up the red cube and place on the top of blue cube", |
| 146 | + } |
| 147 | + ) |
| 148 | + episode_frame_count += 1 |
| 149 | + except (OSError, IOError) as e: |
| 150 | + print(f"Skipping corrupted image at episode {i}, frame {j}: {e}") |
| 151 | + continue |
| 152 | + |
| 153 | + # Only save episode if we have at least one valid frame |
| 154 | + if episode_frame_count > 0: |
| 155 | + dataset.save_episode() |
| 156 | + print(f"Saved episode {i} with {episode_frame_count} frames") |
| 157 | + else: |
| 158 | + print(f"Skipping episode {i} - no valid frames") |
| 159 | + |
| 160 | + # Optionally push to the Hugging Face Hub |
| 161 | + if push_to_hub: |
| 162 | + dataset.push_to_hub( |
| 163 | + tags=["libero", "panda", "rlds"], |
| 164 | + private=False, |
| 165 | + push_videos=True, |
| 166 | + license="apache-2.0", |
| 167 | + ) |
| 168 | + |
| 169 | + |
| 170 | +if __name__ == "__main__": |
| 171 | + tyro.cli(main) |
0 commit comments