Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
./data/temp_obs

factored_ssl
__pycache__
temp*
4 changes: 2 additions & 2 deletions configs/data_generator/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ state_attributes: [agent_pos, agent_dir, goal_pos, key_pos, door_pos, holding_ke
state_attribute_types: {'agent_pos': ['coordinate_width', 'coordinate_height'], 'agent_dir':['agent_dir'], 'goal_pos':['coordinate_width', 'coordinate_height'], 'key_pos':['coordinate_width', 'coordinate_height'], 'door_pos':['coordinate_width', 'coordinate_height'], 'holding_key': ['boolean'], 'door_locked':['boolean'], 'door_open':['boolean']}

#options: [default, random, custom]
reset_type: default

#reset_type: default
reset_type: custom

#options: [agent_pos: [x,y], agent_dir: [int], goal_pos: [x,y], key_pos: [x,y], door_pos: [x,y], holding_key: true/false, door_locked: true/false, door_open: true/false]
#agent_dir: 0 - right, 1 - down, 2 - left, 3 - up
Expand Down
Binary file modified data/__pycache__/data_augmentor.cpython-39.pyc
Binary file not shown.
Binary file modified data/__pycache__/data_generator.cpython-39.pyc
Binary file not shown.
69 changes: 27 additions & 42 deletions data/data_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,14 @@
[DONE] TODO: implement (multiple) controlled environment factors at once
- Random bug sometimes does not allow for factored expert state to be retrieved
'''


'''
[CH 10/23] Add global action dict
'''
action_list = ["left","right","forward","pickup","drop","activate","done"]


class StochasticActionWrapper(gym.ActionWrapper):
"""
Add stochasticity to the actions
Expand Down Expand Up @@ -59,16 +67,17 @@ def load_config(self, file_path):
config = yaml.safe_load(file)
return config

def __init__(self):
def __init__(self, config_path):

super(DataGenerator, self).__init__()

# Parse yaml file parameters for data generator
configs = self.load_config(os.path.join(os.path.dirname(__file__), '../configs/data_generator/config.yaml'))
configs = self.load_config(os.path.join(config_path))

# Configs from the yaml file
self.observation_type = configs['observation_space']
self.state_attributes = configs['state_attributes']
self.state_attribute_types = configs['state_attribute_types']
self.reset_type = configs['reset_type']

# Create the environment
Expand All @@ -86,7 +95,7 @@ def __init__(self):

#Store the controlled factors array
self.controlled_factors = configs['controlled_factors']

#storing the custom reset function if needed
self.custom_resetter = CustomEnvReset(configs['environment_name'])

Expand All @@ -98,10 +107,15 @@ def __init__(self):

#creating the observation space and actions space
self.action_space = self.env.action_space

self.gym_space_params = {'boolean': (0, 1, int),
'coordinate_width': (0, self.env.grid.width, int),
'coordinate_height': (0, self.env.grid.height, int),
'agent_dir': (0, 3, int)
}

self.observation_space = self._create_observation_space(configs['state_attribute_types'])


#creating other gym environment attributes
self.spec = self.env.spec
self.metadata = self.env.metadata
Expand All @@ -116,9 +130,7 @@ def _create_observation_space(self, state_attribute_types):
return gym.spaces.Box(low=0, high=255, shape=frame.shape, dtype=np.uint8)

elif self.observation_type == 'expert':

gym_space_params = {'boolean': (0, 1, int), 'coordinate_width': (0, self.env.grid.width, int), 'coordinate_height': (0, self.env.grid.height, int), 'agent_dir': (0, 3, int)}


relevant_state_variables = list(self._construct_state().keys())

min_values = np.array([]); max_values = np.array([])
Expand All @@ -128,7 +140,7 @@ def _create_observation_space(self, state_attribute_types):

for t in types:

space_param = gym_space_params[t]
space_param = self.gym_space_params[t]

min_values = np.append(min_values, space_param[0])
max_values = np.append(max_values, space_param[1])
Expand All @@ -137,8 +149,6 @@ def _create_observation_space(self, state_attribute_types):
return gym.spaces.Box(low=min_values, high=max_values, dtype=int)




elif self.observation_type == 'factored':
raise NotImplementedError('ERROR: to be implemented after factored representation encoder')

Expand Down Expand Up @@ -209,7 +219,6 @@ def reset(self, seed=None):
elif self.reset_type == 'random':
self._randomize_reset()


frame = self.env.render()
#add the visual observation before augmentation for debugging
info['original_obs'] = frame
Expand All @@ -229,21 +238,11 @@ def reset(self, seed=None):
info['state_dict'] = state

state = [item for sublist in state.values() for item in (sublist if isinstance(sublist, tuple) else [sublist])]


observation = self._get_obs(image = frame, state = state, factored = factored)



return observation, info








def _factorize_obs(self, observation):
#TODO: implement inference time call to factored representation model
return None
Expand All @@ -254,7 +253,6 @@ def _construct_state(self):

#extract the types of all tiles in the grid: useful for goal, key and door position
types = np.array([x.type if x is not None else None for x in self.env.unwrapped.grid.grid])

for attr in self.state_attributes:

if hasattr(self.env.unwrapped, attr):
Expand All @@ -267,7 +265,6 @@ def _construct_state(self):
state[attr] = self.env.unwrapped.grid.grid[np.where(types=='key')[0][0]].cur_pos
elif ('door' in types) and (attr == 'door_pos'):
state[attr] = self.env.unwrapped.grid.grid[np.where(types=='door')[0][0]].cur_pos


#other attributes like opening, holding, locked etc...
elif ('key' in types) and (attr == 'holding_key'):
Expand Down Expand Up @@ -298,10 +295,8 @@ def _get_obs(self, image=None, state=None, factored=None):

if __name__ == '__main__':

pdb.set_trace()

data_generator = DataGenerator()

# pdb.set_trace()
data_generator = DataGenerator(config_path='../configs/data_generator/config.yaml')
MAX_STEPS = 5

temp_dir = os.path.relpath('./temp_obs')
Expand All @@ -311,29 +306,19 @@ def _get_obs(self, image=None, state=None, factored=None):
# pdb.set_trace()
obs, info = data_generator.reset(seed=j)
img = Image.fromarray(info['obs'])
img.save(os.path.join(temp_dir, 'reset_test.jpeg'))
img.save(os.path.join(temp_dir, f'run_{j}_reset.jpeg'))

for i in range(MAX_STEPS):

rand_action = 6

while (rand_action == 6):

rand_action = data_generator.env.action_space.sample()
observation, reward, terminated, truncated, info = data_generator.step(rand_action)

observation, reward, terminated, truncated, info = data_generator.step(rand_action)

print('Current State :', observation)
# print('Current State :', observation)
print('Action: ', action_list[rand_action])
print('Info: ', info['state_dict'])
print('Reward: ', reward)

img = Image.fromarray(info['obs'])

img.save(os.path.join(temp_dir, '{}_modified.jpeg'.format(i)))

# img = Image.fromarray(info['original_obs'])

# img.save(os.path.join(temp_dir, '{}_original.jpeg'.format(i)))


img.save(os.path.join(temp_dir, f'run_{j}_step_{i}_action_{rand_action}.jpeg'))

Binary file removed data/temp_obs/0.jpeg
Binary file not shown.
Binary file removed data/temp_obs/0_modified.jpeg
Binary file not shown.
Binary file removed data/temp_obs/0_original.jpeg
Binary file not shown.
Binary file removed data/temp_obs/1.jpeg
Binary file not shown.
Binary file removed data/temp_obs/2.jpeg
Binary file not shown.
Binary file removed data/temp_obs/3.jpeg
Binary file not shown.
Binary file removed data/temp_obs/4.jpeg
Binary file not shown.
Binary file removed data/temp_obs/mod_img.jpeg
Binary file not shown.
82 changes: 55 additions & 27 deletions data/utils/controlled_reset.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,11 @@ class CustomEnvReset:
def __init__(self, env_name):

custom_reset = {'DoorKey': self._custom_reset_doorkey, 'LavaCrossing': self._custom_reset_lavacrossing, 'FourRooms': self._custom_reset_fourrooms}

for k in custom_reset.keys():
if k in env_name:
self.factored_reset = custom_reset[k]

def _custom_reset_doorkey(self, env, width, height, controlled_factors):

#change the random seed locally
curr_rng = env.unwrapped.np_random
local_rng = np.random.default_rng(int(100*random.random()))
Expand All @@ -34,52 +32,83 @@ def _custom_reset_doorkey(self, env, width, height, controlled_factors):
env.unwrapped.grid.wall_rect(0, 0, width, height)

# factor 1: control goal position
goal_pos = (controlled_factors['goal_pos'][0], controlled_factors['goal_pos'][1]) if 'goal_pos' in controlled_factors else (env.unwrapped._rand_int(1, width - 1), env.unwrapped._rand_int(1, height - 1))
if goal_pos not in used_locations:
env.unwrapped.put_obj(Goal(), goal_pos[0], goal_pos[1])
used_locations.add(goal_pos)

goal_pos = (controlled_factors['goal_pos'][0], controlled_factors['goal_pos'][1]) if 'goal_pos' in controlled_factors else None
while goal_pos is None or ('door_pos' in controlled_factors and goal_pos[0]==controlled_factors['door_pos'][0]):
goal_pos = (env.unwrapped._rand_int(1, width - 1), env.unwrapped._rand_int(1, height - 1))
env.unwrapped.put_obj(Goal(), goal_pos[0], goal_pos[1])
used_locations.add(goal_pos)

# factor 2: control door position
if 'door_pos' in controlled_factors:
splitIdx = controlled_factors['door_pos'][0]
doorIdx = controlled_factors['door_pos'][1]
else:
splitIdx = None; doorIdx = None
while (splitIdx, doorIdx) in used_locations or (splitIdx is None or doorIdx is None):
while (splitIdx, doorIdx) in used_locations or (splitIdx is None or doorIdx is None) or (goal_pos[0]==splitIdx):
splitIdx = env.unwrapped._rand_int(2, width - 2)
doorIdx = env.unwrapped._rand_int(1, height - 2)

# factor 3: control door locked / unlocked
# factor 4: control door open/closed
env.unwrapped.grid.vert_wall(splitIdx, 0)
door_locked = controlled_factors['door_locked'] if 'door_locked' in controlled_factors else True
door_open = controlled_factors['door_open'] if 'door_open' in controlled_factors else False
door_locked = controlled_factors['door_locked'] if 'door_locked' in controlled_factors else np.random.randint(0, 2)
if 'door_open' in controlled_factors:
door_open = controlled_factors['door_open']
if door_open:
door_locked = False
elif door_locked:
door_open = False
else:
door_open = np.random.randint(0, 2)
env.unwrapped.put_obj(Door("yellow", is_locked=door_locked, is_open=door_open), splitIdx, doorIdx)
used_locations.add((splitIdx, doorIdx))


# factor 5: control key position
# factor 6: control holding key
# pdb.set_trace()
if not (('door_locked' in controlled_factors and controlled_factors['door_locked'] is False) or ('holding_key' in controlled_factors and controlled_factors['holding_key'] is True)):

if 'key_pos' in controlled_factors:
key_top = controlled_factors['key_pos']
key_size = (1,1)
else:
key_top = (0,0)
key_size = (splitIdx, height)

env.unwrapped.place_obj(obj=Key("yellow"), top= key_top, size= key_size)
#if not (('door_locked' in controlled_factors and controlled_factors['door_locked'] is False) or ('holding_key' in controlled_factors and controlled_factors['holding_key']==1)):
if 'key_pos' in controlled_factors:
holding_key = 0
key_top = controlled_factors['key_pos']
key_size = (1,1)
env.unwrapped.place_obj(obj=Key("yellow"), top= key_top, size= key_size,max_tries=10)
else:
#need to set the agent property as holding key
env.unwrapped.carrying = Key("yellow")

if 'holding_key' in controlled_factors and controlled_factors['holding_key'] == 1:
#need to set the agent property as holding key
holding_key = 1
env.unwrapped.carrying = Key("yellow")
else:
if 'holding_key' in controlled_factors and controlled_factors['holding_key'] == 0:
holding_key = 0
if 'key_pos' in controlled_factors:
key_top = controlled_factors['key_pos']
key_size = (1,1)
env.unwrapped.place_obj(obj=Key("yellow"), top= key_top, size= key_size,max_tries=10)
else:
key_top = (0,0)
key_size = (splitIdx, height)
env.unwrapped.place_obj(obj=Key("yellow"), top= key_top, size= key_size,max_tries=10)
else:
# randomly decide hold key or not
holding_key = np.random.randint(0,2)
if holding_key:
env.unwrapped.carrying = Key("yellow")
else:
if 'key_pos' in controlled_factors:
key_top = controlled_factors['key_pos']
key_size = (1,1)
env.unwrapped.place_obj(obj=Key("yellow"), top= key_top, size= key_size,max_tries=10)
else:
key_top = (0,0)
key_size = (splitIdx, height)
env.unwrapped.place_obj(obj=Key("yellow"), top= key_top, size= key_size,max_tries=10)

# factor 7: control agent position
agent_top = tuple(controlled_factors['agent_pos']) if 'agent_pos' in controlled_factors else (0,0)
agent_size = (1,1) if 'agent_pos' in controlled_factors else (splitIdx, height)
env.unwrapped.place_agent(top=agent_top, size=agent_size)
env.unwrapped.place_agent(top=agent_top, size=agent_size, max_tries=10)

#factor 8: control agent direction
if 'agent_dir' in controlled_factors:
env.unwrapped.agent_dir = controlled_factors['agent_dir']
Expand All @@ -89,7 +118,6 @@ def _custom_reset_doorkey(self, env, width, height, controlled_factors):
env.unwrapped.mission = "use the key to open the door and then get to the goal"
#reset the original rng after resetting env
env.unwrapped.np_random = curr_rng

return env


Expand Down Expand Up @@ -162,7 +190,7 @@ def _custom_reset_fourrooms(self, env, width, height, controlled_factors):
env.unwrapped.put_obj(goal, *tuple(controlled_factors['goal_pos']))
goal.init_pos, goal.cur_pos = tuple(controlled_factors['goal_pos']), tuple(controlled_factors['goal_pos'])
else:
env.unwrapped.place_obj(Goal())
env.unwrapped.place_obj(Goal(),max_tries=10)

env.unwrapped.mission = "reach the goal"

Expand Down
Binary file added frame.jpeg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading