Farama-Foundation · alexdavey · Apr 13, 2024 · Apr 13, 2024 · Jul 29, 2024 · Oct 8, 2024
diff --git a/README.md b/README.md
@@ -20,10 +20,10 @@ The point maze datasets have been regenerated using the same `q_iteration` exper
 
 You can run the script used to regenerate the datasets with:
 ```
-python scripts/pointmaze/create_pointmaze_dataset --env "PointMaze_UMaze-v3" --dataset_name="pointmaze-umaze-v0" --maze-solver="QIteration"
+python scripts/pointmaze/create_pointmaze_dataset.py
 ```
 
-This will generate a local Minari dataset named `pointmaze-umaze-v0` for the `PointMaze_UMaze-v3` environment, using `q_iteration` as the expert policy, Depth First Search can also be used as the algorithm to generate a path to the goal by passing "DFS" instead of "QIteration".
+This will generate a set of local Minari datasets named e.g. `pointmaze/umaze-v0`, using `q_iteration` as the expert policy. Depth First Search can also be used as the algorithm to generate a path to the goal by passing "DFS" instead of "QIteration".
 
 ### Adroit Hand
 

diff --git a/checks/check_maze_dataset.py b/checks/check_maze_dataset.py
@@ -30,8 +30,8 @@ def print_maze_stats(dataset):
         coords = ep.observations["achieved_goal"]
         velocity_sum += np.linalg.norm(coords[1:] - coords[:-1], axis=1).sum()
 
-    print("  | Success rate:", successes / dataset.total_episodes)
     print("  | Avg velocity:", velocity_sum / dataset.total_steps)
+    print("  | Success rate:", successes / len(dataset))
 
 
 def check_maze_reset_nonterminal(dataset, reset_threshold=0.5):
@@ -52,7 +52,7 @@ def check_qpos_qvel_identical_values(dataset):
     qpos = check_dataset.get_infos(dataset, "qpos")
     qvel = check_dataset.get_infos(dataset, "qvel")
 
-    for i in range(dataset.total_episodes):
+    for i in range(len(dataset)):
         for values in [qpos[i], qvel[i]]:
             if len(values) < 3:
                 continue
@@ -90,16 +90,16 @@ def check_qpos_qvel_shapes(dataset):
     qvel = check_dataset.get_infos(dataset, "qvel")
 
     qpos_message = (
-        f"Expected infos/qpos to have length {dataset.total_episodes}, got {len(qpos)}"
+        f"Expected infos/qpos to have length {len(dataset)}, got {len(qpos)}"
     )
     qvel_message = (
-        f"Expected infos/qvel to have length {dataset.total_episodes}, got {len(qvel)}"
+        f"Expected infos/qvel to have length {len(dataset)}, got {len(qvel)}"
     )
-    assert len(qpos) == dataset.total_episodes, qpos_message
-    assert len(qvel) == dataset.total_episodes, qvel_message
+    assert len(qpos) == len(dataset), qpos_message
+    assert len(qvel) == len(dataset), qvel_message
 
     for i, ep in enumerate(dataset):
-        num_steps = ep.total_timesteps + 1  # Same number of steps as observation
+        num_steps = len(ep) + 1  # Same number of steps as observation
         qpos_shape_message = (
             f"Expected infos/qpos (episode {i}) to have shape "
             f"{(num_steps, num_q)}, got {qpos[i].shape}"

diff --git a/scripts/antmaze/create_antmaze_dataset.py b/scripts/antmaze/create_antmaze_dataset.py
@@ -8,17 +8,20 @@
 See --help for full list of options.
 """
 
-import sys
+import argparse
 import os
+import random
+import sys
+from copy import deepcopy
+
 import gymnasium as gym
 import minari
+import numpy as np
+import torch
 from minari import DataCollector, StepDataCallback
+from stable_baselines3 import SAC
 from tqdm import tqdm
-from copy import deepcopy
-import numpy as np
-import argparse
 
-from stable_baselines3 import SAC
 from controller import WaypointController
 
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../checks")))
@@ -28,13 +31,19 @@
 G = "g"
 INFO_KEYS = ["success"]
 
+def seed_everything(seed):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    torch.backends.cudnn.deterministic = True
 
 class AntMazeStepDataCallback(StepDataCallback):
-    """Add environment state information to 'infos'.
+    """Add environment state information to 'info'.
 
     Also, since the environment generates a new target every time it reaches a goal, the
     environment is never terminated or truncated. This callback overrides the truncation
-    value to True when the step returns a True 'success' key in 'infos'. This way we can
+    value to True when the step returns a True 'success' key in 'info'. This way we can
     divide the Minari dataset into different trajectories.
     """
 
@@ -44,14 +53,14 @@ def __call__(
         step_data = super().__call__(env, obs, info, action, rew, terminated, truncated)
 
         # Filter out info keys that we don't want to store
-        step_data["infos"] = {k: step_data["infos"][k] for k in INFO_KEYS}
+        step_data["info"] = {k: step_data["info"][k] for k in INFO_KEYS}
 
         # To restore the MuJoCo simulation state, we need to store qpos and qvel
-        step_data["infos"]["qpos"] = np.concatenate(
+        step_data["info"]["qpos"] = np.concatenate(
             [obs["achieved_goal"], obs["observation"][:13]]
         )
-        step_data["infos"]["qvel"] = obs["observation"][13:]
-        step_data["infos"]["goal"] = obs["desired_goal"]
+        step_data["info"]["qvel"] = obs["observation"][13:]
+        step_data["info"]["goal"] = obs["desired_goal"]
 
         return step_data
 
@@ -99,12 +108,12 @@ def init_dataset(collector_env, dataset_id, eval_env_spec, expert_policy, args):
                     [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
                 }
 
-DATASET_ID_TO_ENV_ID = {"antmaze-umaze-v1": "AntMaze_UMaze-v4",
-                        "antmaze-umaze-diverse-v1": "AntMaze_UMaze-v4",
-                        "antmaze-medium-play-v1": "AntMaze_Medium-v4",
-                        "antmaze-medium-diverse-v1": "AntMaze_Medium_Diverse_GR-v4",
-                        "antmaze-large-diverse-v1": "AntMaze_Large_Diverse_GR-v4",
-                        "antmaze-large-play-v1": "AntMaze_Large-v4"}
+DATASET_ID_TO_ENV_ID = {"D4RL/antmaze/umaze-v2": "AntMaze_UMaze-v4",
+                        "D4RL/antmaze/umaze-diverse-v2": "AntMaze_UMaze-v4",
+                        "D4RL/antmaze/medium-play-v2": "AntMaze_Medium-v4",
+                        "D4RL/antmaze/medium-diverse-v2": "AntMaze_Medium_Diverse_GR-v4",
+                        "D4RL/antmaze/large-diverse-v2": "AntMaze_Large_Diverse_GR-v4",
+                        "D4RL/antmaze/large-play-v2": "AntMaze_Large-v4"}
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
@@ -141,8 +150,8 @@ def init_dataset(collector_env, dataset_id, eval_env_spec, expert_policy, args):
         # is also not reset when it is reached, leading to reward accumulation.
         # We set the maximum episode steps to the desired size of our Minari
         # dataset (evade truncation due to time limit)
-        split_dataset_id = dataset_id.split('-')
-        if split_dataset_id[1] == "umaze" and split_dataset_id[2] != "diverse":
+        split_dataset_id = dataset_id.split('/')[-1].split('-')
+        if split_dataset_id[0] == "umaze" and split_dataset_id[1] != "diverse":
             maze_map = [[1, 1, 1, 1, 1],
                         [1, G, 0, 0, 1],
                         [1, 1, 1, 0, 1],
@@ -156,15 +165,15 @@ def init_dataset(collector_env, dataset_id, eval_env_spec, expert_policy, args):
                 env_id, continuing_task=True, reset_target=False,
             )
         # Data collector wrapper to save temporary data while stepping. Characteristics:
-        #   * Custom StepDataCallback to add extra state information to 'infos' and divide dataset in
+        #   * Custom StepDataCallback to add extra state information to 'info' and divide dataset in
         #     different episodes by overriding truncation value to True when target is reached
         #   * Record the 'info' value of every step
         collector_env = DataCollector(
             env, step_data_callback=AntMazeStepDataCallback, record_infos=True
         )
 
         seed = args.seed
-        np.random.seed(seed)
+        seed_everything(seed)
 
         model = SAC.load(args.policy_file)
 
@@ -189,7 +198,7 @@ def action_callback(obs, waypoint_xy):
 
                 if dataset is None:
                     eval_env_spec = deepcopy(env.spec)
-                    eval_env_spec.kwargs['maze_map'] = EVAL_ENV_MAPS[split_dataset_id[1]]
+                    eval_env_spec.kwargs['maze_map'] = EVAL_ENV_MAPS[split_dataset_id[0]]
                     eval_env = gym.make(eval_env_spec)
                     eval_waypoint_controller = WaypointController(eval_env.unwrapped.maze, action_callback)
                     dataset = init_dataset(collector_env, dataset_id, eval_env_spec, eval_waypoint_controller.compute_action, args)
@@ -200,6 +209,7 @@ def action_callback(obs, waypoint_xy):
             # Reset the environment, either due to timeout or checkpointing.
             if truncated:
                 seed += 1  # Increment the seed to prevent repeating old episodes
+                seed_everything(seed)
                 obs, info = collector_env.reset(seed=seed)
 
         print(f"Checking {dataset_id}:")

diff --git a/scripts/antmaze/requirements.txt b/scripts/antmaze/requirements.txt
@@ -2,6 +2,8 @@ numpy==1.26.4
 scipy==1.12.0
 packaging==24.0
 gymnasium-robotics==1.2.4
-minari==0.4.3
+minari[create]==0.5.1
 stable_baselines3@git+https://github.com/DLR-RM/stable-baselines3.git@f56ddeda10b1e3669a77a1c28c56944036286833
 tqdm==4.66.2
+minigrid==2.3.1
+mujoco==2.3.7