diff --git a/.gitignore b/.gitignore index c28fc87..7063d26 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ -./data/temp_obs + +factored_ssl +__pycache__ +temp* \ No newline at end of file diff --git a/configs/data_generator/config.yaml b/configs/data_generator/config.yaml index e9fb687..5c2050b 100644 --- a/configs/data_generator/config.yaml +++ b/configs/data_generator/config.yaml @@ -23,8 +23,8 @@ state_attributes: [agent_pos, agent_dir, goal_pos, key_pos, door_pos, holding_ke state_attribute_types: {'agent_pos': ['coordinate_width', 'coordinate_height'], 'agent_dir':['agent_dir'], 'goal_pos':['coordinate_width', 'coordinate_height'], 'key_pos':['coordinate_width', 'coordinate_height'], 'door_pos':['coordinate_width', 'coordinate_height'], 'holding_key': ['boolean'], 'door_locked':['boolean'], 'door_open':['boolean']} #options: [default, random, custom] -reset_type: default - +#reset_type: default +reset_type: custom #options: [agent_pos: [x,y], agent_dir: [int], goal_pos: [x,y], key_pos: [x,y], door_pos: [x,y], holding_key: true/false, door_locked: true/false, door_open: true/false] #agent_dir: 0 - right, 1 - down, 2 - left, 3 - up diff --git a/data/__pycache__/data_augmentor.cpython-39.pyc b/data/__pycache__/data_augmentor.cpython-39.pyc index 2219c2b..e0a4ea0 100644 Binary files a/data/__pycache__/data_augmentor.cpython-39.pyc and b/data/__pycache__/data_augmentor.cpython-39.pyc differ diff --git a/data/__pycache__/data_generator.cpython-39.pyc b/data/__pycache__/data_generator.cpython-39.pyc index b73f59e..0aa5831 100644 Binary files a/data/__pycache__/data_generator.cpython-39.pyc and b/data/__pycache__/data_generator.cpython-39.pyc differ diff --git a/data/data_generator.py b/data/data_generator.py index 7b323e0..7fd119e 100644 --- a/data/data_generator.py +++ b/data/data_generator.py @@ -31,6 +31,14 @@ [DONE] TODO: implement (multiple) controlled environment factors at once - Random bug sometimes does not allow for factored expert state to be retrieved ''' + + +''' +[CH 10/23] Add global action dict +''' +action_list = ["left","right","forward","pickup","drop","activate","done"] + + class StochasticActionWrapper(gym.ActionWrapper): """ Add stochasticity to the actions @@ -59,16 +67,17 @@ def load_config(self, file_path): config = yaml.safe_load(file) return config - def __init__(self): + def __init__(self, config_path): super(DataGenerator, self).__init__() # Parse yaml file parameters for data generator - configs = self.load_config(os.path.join(os.path.dirname(__file__), '../configs/data_generator/config.yaml')) + configs = self.load_config(os.path.join(config_path)) # Configs from the yaml file self.observation_type = configs['observation_space'] self.state_attributes = configs['state_attributes'] + self.state_attribute_types = configs['state_attribute_types'] self.reset_type = configs['reset_type'] # Create the environment @@ -86,7 +95,7 @@ def __init__(self): #Store the controlled factors array self.controlled_factors = configs['controlled_factors'] - + #storing the custom reset function if needed self.custom_resetter = CustomEnvReset(configs['environment_name']) @@ -98,10 +107,15 @@ def __init__(self): #creating the observation space and actions space self.action_space = self.env.action_space + + self.gym_space_params = {'boolean': (0, 1, int), + 'coordinate_width': (0, self.env.grid.width, int), + 'coordinate_height': (0, self.env.grid.height, int), + 'agent_dir': (0, 3, int) + } self.observation_space = self._create_observation_space(configs['state_attribute_types']) - #creating other gym environment attributes self.spec = self.env.spec self.metadata = self.env.metadata @@ -116,9 +130,7 @@ def _create_observation_space(self, state_attribute_types): return gym.spaces.Box(low=0, high=255, shape=frame.shape, dtype=np.uint8) elif self.observation_type == 'expert': - - gym_space_params = {'boolean': (0, 1, int), 'coordinate_width': (0, self.env.grid.width, int), 'coordinate_height': (0, self.env.grid.height, int), 'agent_dir': (0, 3, int)} - + relevant_state_variables = list(self._construct_state().keys()) min_values = np.array([]); max_values = np.array([]) @@ -128,7 +140,7 @@ def _create_observation_space(self, state_attribute_types): for t in types: - space_param = gym_space_params[t] + space_param = self.gym_space_params[t] min_values = np.append(min_values, space_param[0]) max_values = np.append(max_values, space_param[1]) @@ -137,8 +149,6 @@ def _create_observation_space(self, state_attribute_types): return gym.spaces.Box(low=min_values, high=max_values, dtype=int) - - elif self.observation_type == 'factored': raise NotImplementedError('ERROR: to be implemented after factored representation encoder') @@ -209,7 +219,6 @@ def reset(self, seed=None): elif self.reset_type == 'random': self._randomize_reset() - frame = self.env.render() #add the visual observation before augmentation for debugging info['original_obs'] = frame @@ -229,21 +238,11 @@ def reset(self, seed=None): info['state_dict'] = state state = [item for sublist in state.values() for item in (sublist if isinstance(sublist, tuple) else [sublist])] - - observation = self._get_obs(image = frame, state = state, factored = factored) - - return observation, info - - - - - - def _factorize_obs(self, observation): #TODO: implement inference time call to factored representation model return None @@ -254,7 +253,6 @@ def _construct_state(self): #extract the types of all tiles in the grid: useful for goal, key and door position types = np.array([x.type if x is not None else None for x in self.env.unwrapped.grid.grid]) - for attr in self.state_attributes: if hasattr(self.env.unwrapped, attr): @@ -267,7 +265,6 @@ def _construct_state(self): state[attr] = self.env.unwrapped.grid.grid[np.where(types=='key')[0][0]].cur_pos elif ('door' in types) and (attr == 'door_pos'): state[attr] = self.env.unwrapped.grid.grid[np.where(types=='door')[0][0]].cur_pos - #other attributes like opening, holding, locked etc... elif ('key' in types) and (attr == 'holding_key'): @@ -298,10 +295,8 @@ def _get_obs(self, image=None, state=None, factored=None): if __name__ == '__main__': - pdb.set_trace() - - data_generator = DataGenerator() - + # pdb.set_trace() + data_generator = DataGenerator(config_path='../configs/data_generator/config.yaml') MAX_STEPS = 5 temp_dir = os.path.relpath('./temp_obs') @@ -311,29 +306,19 @@ def _get_obs(self, image=None, state=None, factored=None): # pdb.set_trace() obs, info = data_generator.reset(seed=j) img = Image.fromarray(info['obs']) - img.save(os.path.join(temp_dir, 'reset_test.jpeg')) + img.save(os.path.join(temp_dir, f'run_{j}_reset.jpeg')) for i in range(MAX_STEPS): - rand_action = 6 - while (rand_action == 6): - rand_action = data_generator.env.action_space.sample() + observation, reward, terminated, truncated, info = data_generator.step(rand_action) - observation, reward, terminated, truncated, info = data_generator.step(rand_action) - - print('Current State :', observation) + # print('Current State :', observation) + print('Action: ', action_list[rand_action]) print('Info: ', info['state_dict']) print('Reward: ', reward) img = Image.fromarray(info['obs']) - - img.save(os.path.join(temp_dir, '{}_modified.jpeg'.format(i))) - - # img = Image.fromarray(info['original_obs']) - - # img.save(os.path.join(temp_dir, '{}_original.jpeg'.format(i))) - - + img.save(os.path.join(temp_dir, f'run_{j}_step_{i}_action_{rand_action}.jpeg')) diff --git a/data/temp_obs/0.jpeg b/data/temp_obs/0.jpeg deleted file mode 100644 index 73c0eaf..0000000 Binary files a/data/temp_obs/0.jpeg and /dev/null differ diff --git a/data/temp_obs/0_modified.jpeg b/data/temp_obs/0_modified.jpeg deleted file mode 100644 index 4561814..0000000 Binary files a/data/temp_obs/0_modified.jpeg and /dev/null differ diff --git a/data/temp_obs/0_original.jpeg b/data/temp_obs/0_original.jpeg deleted file mode 100644 index 027e70d..0000000 Binary files a/data/temp_obs/0_original.jpeg and /dev/null differ diff --git a/data/temp_obs/1.jpeg b/data/temp_obs/1.jpeg deleted file mode 100644 index e524b57..0000000 Binary files a/data/temp_obs/1.jpeg and /dev/null differ diff --git a/data/temp_obs/2.jpeg b/data/temp_obs/2.jpeg deleted file mode 100644 index 8981fb5..0000000 Binary files a/data/temp_obs/2.jpeg and /dev/null differ diff --git a/data/temp_obs/3.jpeg b/data/temp_obs/3.jpeg deleted file mode 100644 index 8981fb5..0000000 Binary files a/data/temp_obs/3.jpeg and /dev/null differ diff --git a/data/temp_obs/4.jpeg b/data/temp_obs/4.jpeg deleted file mode 100644 index be0864d..0000000 Binary files a/data/temp_obs/4.jpeg and /dev/null differ diff --git a/data/temp_obs/mod_img.jpeg b/data/temp_obs/mod_img.jpeg deleted file mode 100644 index 71df741..0000000 Binary files a/data/temp_obs/mod_img.jpeg and /dev/null differ diff --git a/data/utils/controlled_reset.py b/data/utils/controlled_reset.py index 6fa970d..02f4b2a 100644 --- a/data/utils/controlled_reset.py +++ b/data/utils/controlled_reset.py @@ -12,13 +12,11 @@ class CustomEnvReset: def __init__(self, env_name): custom_reset = {'DoorKey': self._custom_reset_doorkey, 'LavaCrossing': self._custom_reset_lavacrossing, 'FourRooms': self._custom_reset_fourrooms} - for k in custom_reset.keys(): if k in env_name: self.factored_reset = custom_reset[k] def _custom_reset_doorkey(self, env, width, height, controlled_factors): - #change the random seed locally curr_rng = env.unwrapped.np_random local_rng = np.random.default_rng(int(100*random.random())) @@ -34,11 +32,11 @@ def _custom_reset_doorkey(self, env, width, height, controlled_factors): env.unwrapped.grid.wall_rect(0, 0, width, height) # factor 1: control goal position - goal_pos = (controlled_factors['goal_pos'][0], controlled_factors['goal_pos'][1]) if 'goal_pos' in controlled_factors else (env.unwrapped._rand_int(1, width - 1), env.unwrapped._rand_int(1, height - 1)) - if goal_pos not in used_locations: - env.unwrapped.put_obj(Goal(), goal_pos[0], goal_pos[1]) - used_locations.add(goal_pos) - + goal_pos = (controlled_factors['goal_pos'][0], controlled_factors['goal_pos'][1]) if 'goal_pos' in controlled_factors else None + while goal_pos is None or ('door_pos' in controlled_factors and goal_pos[0]==controlled_factors['door_pos'][0]): + goal_pos = (env.unwrapped._rand_int(1, width - 1), env.unwrapped._rand_int(1, height - 1)) + env.unwrapped.put_obj(Goal(), goal_pos[0], goal_pos[1]) + used_locations.add(goal_pos) # factor 2: control door position if 'door_pos' in controlled_factors: @@ -46,40 +44,71 @@ def _custom_reset_doorkey(self, env, width, height, controlled_factors): doorIdx = controlled_factors['door_pos'][1] else: splitIdx = None; doorIdx = None - while (splitIdx, doorIdx) in used_locations or (splitIdx is None or doorIdx is None): + while (splitIdx, doorIdx) in used_locations or (splitIdx is None or doorIdx is None) or (goal_pos[0]==splitIdx): splitIdx = env.unwrapped._rand_int(2, width - 2) doorIdx = env.unwrapped._rand_int(1, height - 2) # factor 3: control door locked / unlocked # factor 4: control door open/closed env.unwrapped.grid.vert_wall(splitIdx, 0) - door_locked = controlled_factors['door_locked'] if 'door_locked' in controlled_factors else True - door_open = controlled_factors['door_open'] if 'door_open' in controlled_factors else False + door_locked = controlled_factors['door_locked'] if 'door_locked' in controlled_factors else np.random.randint(0, 2) + if 'door_open' in controlled_factors: + door_open = controlled_factors['door_open'] + if door_open: + door_locked = False + elif door_locked: + door_open = False + else: + door_open = np.random.randint(0, 2) env.unwrapped.put_obj(Door("yellow", is_locked=door_locked, is_open=door_open), splitIdx, doorIdx) used_locations.add((splitIdx, doorIdx)) + # factor 5: control key position # factor 6: control holding key # pdb.set_trace() - if not (('door_locked' in controlled_factors and controlled_factors['door_locked'] is False) or ('holding_key' in controlled_factors and controlled_factors['holding_key'] is True)): - - if 'key_pos' in controlled_factors: - key_top = controlled_factors['key_pos'] - key_size = (1,1) - else: - key_top = (0,0) - key_size = (splitIdx, height) - - env.unwrapped.place_obj(obj=Key("yellow"), top= key_top, size= key_size) + #if not (('door_locked' in controlled_factors and controlled_factors['door_locked'] is False) or ('holding_key' in controlled_factors and controlled_factors['holding_key']==1)): + if 'key_pos' in controlled_factors: + holding_key = 0 + key_top = controlled_factors['key_pos'] + key_size = (1,1) + env.unwrapped.place_obj(obj=Key("yellow"), top= key_top, size= key_size,max_tries=10) else: - #need to set the agent property as holding key - env.unwrapped.carrying = Key("yellow") - + if 'holding_key' in controlled_factors and controlled_factors['holding_key'] == 1: + #need to set the agent property as holding key + holding_key = 1 + env.unwrapped.carrying = Key("yellow") + else: + if 'holding_key' in controlled_factors and controlled_factors['holding_key'] == 0: + holding_key = 0 + if 'key_pos' in controlled_factors: + key_top = controlled_factors['key_pos'] + key_size = (1,1) + env.unwrapped.place_obj(obj=Key("yellow"), top= key_top, size= key_size,max_tries=10) + else: + key_top = (0,0) + key_size = (splitIdx, height) + env.unwrapped.place_obj(obj=Key("yellow"), top= key_top, size= key_size,max_tries=10) + else: + # randomly decide hold key or not + holding_key = np.random.randint(0,2) + if holding_key: + env.unwrapped.carrying = Key("yellow") + else: + if 'key_pos' in controlled_factors: + key_top = controlled_factors['key_pos'] + key_size = (1,1) + env.unwrapped.place_obj(obj=Key("yellow"), top= key_top, size= key_size,max_tries=10) + else: + key_top = (0,0) + key_size = (splitIdx, height) + env.unwrapped.place_obj(obj=Key("yellow"), top= key_top, size= key_size,max_tries=10) + # factor 7: control agent position agent_top = tuple(controlled_factors['agent_pos']) if 'agent_pos' in controlled_factors else (0,0) agent_size = (1,1) if 'agent_pos' in controlled_factors else (splitIdx, height) - env.unwrapped.place_agent(top=agent_top, size=agent_size) - + env.unwrapped.place_agent(top=agent_top, size=agent_size, max_tries=10) + #factor 8: control agent direction if 'agent_dir' in controlled_factors: env.unwrapped.agent_dir = controlled_factors['agent_dir'] @@ -89,7 +118,6 @@ def _custom_reset_doorkey(self, env, width, height, controlled_factors): env.unwrapped.mission = "use the key to open the door and then get to the goal" #reset the original rng after resetting env env.unwrapped.np_random = curr_rng - return env @@ -162,7 +190,7 @@ def _custom_reset_fourrooms(self, env, width, height, controlled_factors): env.unwrapped.put_obj(goal, *tuple(controlled_factors['goal_pos'])) goal.init_pos, goal.cur_pos = tuple(controlled_factors['goal_pos']), tuple(controlled_factors['goal_pos']) else: - env.unwrapped.place_obj(Goal()) + env.unwrapped.place_obj(Goal(),max_tries=10) env.unwrapped.mission = "reach the goal" diff --git a/frame.jpeg b/frame.jpeg new file mode 100644 index 0000000..4165a88 Binary files /dev/null and b/frame.jpeg differ diff --git a/script_gen_data.py b/script_gen_data.py new file mode 100644 index 0000000..6fd3c80 --- /dev/null +++ b/script_gen_data.py @@ -0,0 +1,320 @@ +import os +from PIL import Image +import torch + +from tqdm import tqdm +import numpy as np +import copy + +action_list = ["left","right","forward","pickup","drop","activate","done"] + +class GenerateDataset: + def __init__( + self, + data_generator, + dataset_root_path, + episode_max_length + ): + self.data_generator = data_generator + self.dataset_root_path = dataset_root_path + self.episode_max_length = episode_max_length + + if not os.path.exists(dataset_root_path): + os.makedirs(dataset_root_path) + + self._save_func = {"episode_obs": self._save_episode_obs, + "episode_info": self._save_episode_info, + "episode_action": self._save_episode_action, + "episode_reward": self._save_episode_reward + } + + # episode (o0, a0, o1, a1,..., on) + def sample_episodes(self, policy_net=None, optimizer=None, num_episodes=1000, save_features=["obs","action","info"]): + print(" Sampling episodes...") + self._set_data_generator_reset_type("default") + + do_random_policy = True if policy_net == None else False + + samples = [] + for episode in tqdm(range(num_episodes), desc="Running episodes"): + + episode_dict = {"obs":[], "info":[], "action":[], "reward":[]} + done = False + steps = 0 + obs, info = self._get_reset_data_generator() + episode_dict["obs"].append(obs) # obs: image, expert, or factored + episode_dict["info"].append(info) # info: everything, regard as learning target for obs + + while not done and steps