Skip to content

Maze upload typos #15

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ The point maze datasets have been regenerated using the same `q_iteration` exper

You can run the script used to regenerate the datasets with:
```
python scripts/pointmaze/create_pointmaze_dataset --env "PointMaze_UMaze-v3" --dataset_name="pointmaze-umaze-v0" --maze-solver="QIteration"
python scripts/pointmaze/create_pointmaze_dataset.py
```

This will generate a local Minari dataset named `pointmaze-umaze-v0` for the `PointMaze_UMaze-v3` environment, using `q_iteration` as the expert policy, Depth First Search can also be used as the algorithm to generate a path to the goal by passing "DFS" instead of "QIteration".
This will generate a set of local Minari datasets named e.g. `pointmaze/umaze-v0`, using `q_iteration` as the expert policy. Depth First Search can also be used as the algorithm to generate a path to the goal by passing "DFS" instead of "QIteration".

### Adroit Hand

Expand Down
14 changes: 7 additions & 7 deletions checks/check_maze_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ def print_maze_stats(dataset):
coords = ep.observations["achieved_goal"]
velocity_sum += np.linalg.norm(coords[1:] - coords[:-1], axis=1).sum()

print(" | Success rate:", successes / dataset.total_episodes)
print(" | Avg velocity:", velocity_sum / dataset.total_steps)
print(" | Success rate:", successes / len(dataset))


def check_maze_reset_nonterminal(dataset, reset_threshold=0.5):
Expand All @@ -52,7 +52,7 @@ def check_qpos_qvel_identical_values(dataset):
qpos = check_dataset.get_infos(dataset, "qpos")
qvel = check_dataset.get_infos(dataset, "qvel")

for i in range(dataset.total_episodes):
for i in range(len(dataset)):
for values in [qpos[i], qvel[i]]:
if len(values) < 3:
continue
Expand Down Expand Up @@ -90,16 +90,16 @@ def check_qpos_qvel_shapes(dataset):
qvel = check_dataset.get_infos(dataset, "qvel")

qpos_message = (
f"Expected infos/qpos to have length {dataset.total_episodes}, got {len(qpos)}"
f"Expected infos/qpos to have length {len(dataset)}, got {len(qpos)}"
)
qvel_message = (
f"Expected infos/qvel to have length {dataset.total_episodes}, got {len(qvel)}"
f"Expected infos/qvel to have length {len(dataset)}, got {len(qvel)}"
)
assert len(qpos) == dataset.total_episodes, qpos_message
assert len(qvel) == dataset.total_episodes, qvel_message
assert len(qpos) == len(dataset), qpos_message
assert len(qvel) == len(dataset), qvel_message

for i, ep in enumerate(dataset):
num_steps = ep.total_timesteps + 1 # Same number of steps as observation
num_steps = len(ep) + 1 # Same number of steps as observation
qpos_shape_message = (
f"Expected infos/qpos (episode {i}) to have shape "
f"{(num_steps, num_q)}, got {qpos[i].shape}"
Expand Down
54 changes: 32 additions & 22 deletions scripts/antmaze/create_antmaze_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,20 @@
See --help for full list of options.
"""

import sys
import argparse
import os
import random
import sys
from copy import deepcopy

import gymnasium as gym
import minari
import numpy as np
import torch
from minari import DataCollector, StepDataCallback
from stable_baselines3 import SAC
from tqdm import tqdm
from copy import deepcopy
import numpy as np
import argparse

from stable_baselines3 import SAC
from controller import WaypointController

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../checks")))
Expand All @@ -28,13 +31,19 @@
G = "g"
INFO_KEYS = ["success"]

def seed_everything(seed):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True

class AntMazeStepDataCallback(StepDataCallback):
"""Add environment state information to 'infos'.
"""Add environment state information to 'info'.

Also, since the environment generates a new target every time it reaches a goal, the
environment is never terminated or truncated. This callback overrides the truncation
value to True when the step returns a True 'success' key in 'infos'. This way we can
value to True when the step returns a True 'success' key in 'info'. This way we can
divide the Minari dataset into different trajectories.
"""

Expand All @@ -44,14 +53,14 @@ def __call__(
step_data = super().__call__(env, obs, info, action, rew, terminated, truncated)

# Filter out info keys that we don't want to store
step_data["infos"] = {k: step_data["infos"][k] for k in INFO_KEYS}
step_data["info"] = {k: step_data["info"][k] for k in INFO_KEYS}

# To restore the MuJoCo simulation state, we need to store qpos and qvel
step_data["infos"]["qpos"] = np.concatenate(
step_data["info"]["qpos"] = np.concatenate(
[obs["achieved_goal"], obs["observation"][:13]]
)
step_data["infos"]["qvel"] = obs["observation"][13:]
step_data["infos"]["goal"] = obs["desired_goal"]
step_data["info"]["qvel"] = obs["observation"][13:]
step_data["info"]["goal"] = obs["desired_goal"]

return step_data

Expand Down Expand Up @@ -99,12 +108,12 @@ def init_dataset(collector_env, dataset_id, eval_env_spec, expert_policy, args):
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
}

DATASET_ID_TO_ENV_ID = {"antmaze-umaze-v1": "AntMaze_UMaze-v4",
"antmaze-umaze-diverse-v1": "AntMaze_UMaze-v4",
"antmaze-medium-play-v1": "AntMaze_Medium-v4",
"antmaze-medium-diverse-v1": "AntMaze_Medium_Diverse_GR-v4",
"antmaze-large-diverse-v1": "AntMaze_Large_Diverse_GR-v4",
"antmaze-large-play-v1": "AntMaze_Large-v4"}
DATASET_ID_TO_ENV_ID = {"D4RL/antmaze/umaze-v2": "AntMaze_UMaze-v4",
"D4RL/antmaze/umaze-diverse-v2": "AntMaze_UMaze-v4",
"D4RL/antmaze/medium-play-v2": "AntMaze_Medium-v4",
"D4RL/antmaze/medium-diverse-v2": "AntMaze_Medium_Diverse_GR-v4",
"D4RL/antmaze/large-diverse-v2": "AntMaze_Large_Diverse_GR-v4",
"D4RL/antmaze/large-play-v2": "AntMaze_Large-v4"}

if __name__ == "__main__":
parser = argparse.ArgumentParser()
Expand Down Expand Up @@ -141,8 +150,8 @@ def init_dataset(collector_env, dataset_id, eval_env_spec, expert_policy, args):
# is also not reset when it is reached, leading to reward accumulation.
# We set the maximum episode steps to the desired size of our Minari
# dataset (evade truncation due to time limit)
split_dataset_id = dataset_id.split('-')
if split_dataset_id[1] == "umaze" and split_dataset_id[2] != "diverse":
split_dataset_id = dataset_id.split('/')[-1].split('-')
if split_dataset_id[0] == "umaze" and split_dataset_id[1] != "diverse":
maze_map = [[1, 1, 1, 1, 1],
[1, G, 0, 0, 1],
[1, 1, 1, 0, 1],
Expand All @@ -156,15 +165,15 @@ def init_dataset(collector_env, dataset_id, eval_env_spec, expert_policy, args):
env_id, continuing_task=True, reset_target=False,
)
# Data collector wrapper to save temporary data while stepping. Characteristics:
# * Custom StepDataCallback to add extra state information to 'infos' and divide dataset in
# * Custom StepDataCallback to add extra state information to 'info' and divide dataset in
# different episodes by overriding truncation value to True when target is reached
# * Record the 'info' value of every step
collector_env = DataCollector(
env, step_data_callback=AntMazeStepDataCallback, record_infos=True
)

seed = args.seed
np.random.seed(seed)
seed_everything(seed)

model = SAC.load(args.policy_file)

Expand All @@ -189,7 +198,7 @@ def action_callback(obs, waypoint_xy):

if dataset is None:
eval_env_spec = deepcopy(env.spec)
eval_env_spec.kwargs['maze_map'] = EVAL_ENV_MAPS[split_dataset_id[1]]
eval_env_spec.kwargs['maze_map'] = EVAL_ENV_MAPS[split_dataset_id[0]]
eval_env = gym.make(eval_env_spec)
eval_waypoint_controller = WaypointController(eval_env.unwrapped.maze, action_callback)
dataset = init_dataset(collector_env, dataset_id, eval_env_spec, eval_waypoint_controller.compute_action, args)
Expand All @@ -200,6 +209,7 @@ def action_callback(obs, waypoint_xy):
# Reset the environment, either due to timeout or checkpointing.
if truncated:
seed += 1 # Increment the seed to prevent repeating old episodes
seed_everything(seed)
obs, info = collector_env.reset(seed=seed)

print(f"Checking {dataset_id}:")
Expand Down
4 changes: 3 additions & 1 deletion scripts/antmaze/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ numpy==1.26.4
scipy==1.12.0
packaging==24.0
gymnasium-robotics==1.2.4
minari==0.4.3
minari[create]==0.5.1
stable_baselines3@git+https://github.com/DLR-RM/stable-baselines3.git@f56ddeda10b1e3669a77a1c28c56944036286833
tqdm==4.66.2
minigrid==2.3.1
mujoco==2.3.7
Loading