Shreyas-S-Raman · swimmincatt35 · Oct 23, 2024 · Oct 24, 2024 · Oct 24, 2024 · Oct 24, 2024
diff --git a/.gitignore b/.gitignore
@@ -1 +1,4 @@
-./data/temp_obs
+
+factored_ssl
+__pycache__
+temp*
diff --git a/configs/data_generator/config.yaml b/configs/data_generator/config.yaml
@@ -23,8 +23,8 @@ state_attributes: [agent_pos, agent_dir, goal_pos, key_pos, door_pos, holding_ke
 state_attribute_types: {'agent_pos': ['coordinate_width', 'coordinate_height'], 'agent_dir':['agent_dir'], 'goal_pos':['coordinate_width', 'coordinate_height'], 'key_pos':['coordinate_width', 'coordinate_height'], 'door_pos':['coordinate_width', 'coordinate_height'], 'holding_key': ['boolean'], 'door_locked':['boolean'], 'door_open':['boolean']}
 
 #options: [default, random, custom]
-reset_type: default
-
+#reset_type: default
+reset_type: custom
 
 #options: [agent_pos: [x,y], agent_dir: [int], goal_pos: [x,y], key_pos: [x,y], door_pos: [x,y], holding_key: true/false, door_locked: true/false, door_open: true/false]
 #agent_dir: 0 - right, 1 - down, 2 - left, 3 - up

diff --git a/data/__pycache__/data_augmentor.cpython-39.pyc b/data/__pycache__/data_augmentor.cpython-39.pyc
diff --git a/data/__pycache__/data_generator.cpython-39.pyc b/data/__pycache__/data_generator.cpython-39.pyc
diff --git a/data/data_generator.py b/data/data_generator.py
@@ -31,6 +31,14 @@
 [DONE] TODO: implement (multiple) controlled environment factors at once 
     - Random bug sometimes does not allow for factored expert state to be retrieved 
 '''
+
+
+'''
+[CH 10/23] Add global action dict
+'''
+action_list = ["left","right","forward","pickup","drop","activate","done"]
+
+
 class StochasticActionWrapper(gym.ActionWrapper):
     """
     Add stochasticity to the actions
@@ -59,16 +67,17 @@ def load_config(self, file_path):
             config = yaml.safe_load(file)
         return config
 
-    def __init__(self):
+    def __init__(self, config_path):
 
         super(DataGenerator, self).__init__()
 
         # Parse yaml file parameters for data generator
-        configs = self.load_config(os.path.join(os.path.dirname(__file__), '../configs/data_generator/config.yaml'))
+        configs = self.load_config(os.path.join(config_path))
 
         # Configs from the yaml file
         self.observation_type = configs['observation_space']
         self.state_attributes = configs['state_attributes']
+        self.state_attribute_types = configs['state_attribute_types']
         self.reset_type = configs['reset_type']
 
         # Create the environment
@@ -86,7 +95,7 @@ def __init__(self):
 
         #Store the controlled factors array
         self.controlled_factors = configs['controlled_factors']
-
+        
         #storing the custom reset function if needed
         self.custom_resetter = CustomEnvReset(configs['environment_name'])
 
@@ -98,10 +107,15 @@ def __init__(self):
 
         #creating the observation space and actions space
         self.action_space = self.env.action_space
+
+        self.gym_space_params = {'boolean': (0, 1, int), 
+                                'coordinate_width': (0, self.env.grid.width, int), 
+                                'coordinate_height': (0, self.env.grid.height, int), 
+                                'agent_dir': (0, 3, int)
+                                }
 
         self.observation_space = self._create_observation_space(configs['state_attribute_types'])
 
-
         #creating other gym environment attributes
         self.spec = self.env.spec
         self.metadata = self.env.metadata
@@ -116,9 +130,7 @@ def _create_observation_space(self, state_attribute_types):
             return gym.spaces.Box(low=0, high=255, shape=frame.shape, dtype=np.uint8)
 
         elif self.observation_type == 'expert':
-
-            gym_space_params = {'boolean': (0, 1, int), 'coordinate_width': (0, self.env.grid.width, int), 'coordinate_height': (0, self.env.grid.height, int), 'agent_dir': (0, 3, int)}
-
+
             relevant_state_variables = list(self._construct_state().keys())
 
             min_values = np.array([]); max_values = np.array([])
@@ -128,7 +140,7 @@ def _create_observation_space(self, state_attribute_types):
 
                 for t in types:
 
-                    space_param = gym_space_params[t]
+                    space_param = self.gym_space_params[t]
 
                     min_values = np.append(min_values, space_param[0])
                     max_values = np.append(max_values, space_param[1])
@@ -137,8 +149,6 @@ def _create_observation_space(self, state_attribute_types):
             return gym.spaces.Box(low=min_values, high=max_values, dtype=int)
 
 
-
-
         elif self.observation_type == 'factored':
             raise NotImplementedError('ERROR: to be implemented after factored representation encoder')
 
@@ -209,7 +219,6 @@ def reset(self, seed=None):
         elif self.reset_type == 'random':
             self._randomize_reset()
 
-
         frame = self.env.render()
         #add the visual observation before augmentation for debugging
         info['original_obs'] = frame
@@ -229,21 +238,11 @@ def reset(self, seed=None):
         info['state_dict'] = state
 
         state = [item for sublist in state.values() for item in (sublist if isinstance(sublist, tuple) else [sublist])] 
-
-
         observation = self._get_obs(image = frame, state = state, factored = factored)
-
-
 
         return observation, info
 
-
-
-
-
-
 
-
     def _factorize_obs(self, observation):
         #TODO: implement inference time call to factored representation model
         return None
@@ -254,7 +253,6 @@ def _construct_state(self):
 
         #extract the types of all tiles in the grid: useful for goal, key and door position
         types = np.array([x.type if x is not None else None for x in self.env.unwrapped.grid.grid])
-
         for attr in self.state_attributes:
 
             if hasattr(self.env.unwrapped, attr):
@@ -267,7 +265,6 @@ def _construct_state(self):
                 state[attr] = self.env.unwrapped.grid.grid[np.where(types=='key')[0][0]].cur_pos
             elif ('door' in types) and (attr == 'door_pos'):
                 state[attr] = self.env.unwrapped.grid.grid[np.where(types=='door')[0][0]].cur_pos
-
 
             #other attributes like opening, holding, locked etc...
             elif ('key' in types) and (attr == 'holding_key'):
@@ -298,10 +295,8 @@ def _get_obs(self, image=None, state=None, factored=None):
 
 if __name__ == '__main__':
 
-    pdb.set_trace()
-
-    data_generator = DataGenerator()
-
+    # pdb.set_trace()
+    data_generator = DataGenerator(config_path='../configs/data_generator/config.yaml')
     MAX_STEPS = 5
 
     temp_dir = os.path.relpath('./temp_obs')
@@ -311,29 +306,19 @@ def _get_obs(self, image=None, state=None, factored=None):
         # pdb.set_trace()
         obs, info = data_generator.reset(seed=j)
         img = Image.fromarray(info['obs'])
-        img.save(os.path.join(temp_dir, 'reset_test.jpeg'))
+        img.save(os.path.join(temp_dir, f'run_{j}_reset.jpeg'))
 
         for i in range(MAX_STEPS):
-
             rand_action = 6
-
             while (rand_action == 6):
-
                 rand_action = data_generator.env.action_space.sample()
+            observation, reward, terminated, truncated, info = data_generator.step(rand_action)            
 
-            observation, reward, terminated, truncated, info = data_generator.step(rand_action)
-
-            print('Current State :', observation)
+            # print('Current State :', observation)
+            print('Action: ', action_list[rand_action])
             print('Info: ', info['state_dict'])
             print('Reward: ', reward)
 
             img = Image.fromarray(info['obs'])
-
-            img.save(os.path.join(temp_dir, '{}_modified.jpeg'.format(i)))
-
-            # img = Image.fromarray(info['original_obs'])
-
-            # img.save(os.path.join(temp_dir, '{}_original.jpeg'.format(i)))
-
-
+            img.save(os.path.join(temp_dir, f'run_{j}_step_{i}_action_{rand_action}.jpeg'))
 
diff --git a/data/temp_obs/0.jpeg b/data/temp_obs/0.jpeg
diff --git a/data/temp_obs/0_modified.jpeg b/data/temp_obs/0_modified.jpeg
diff --git a/data/temp_obs/0_original.jpeg b/data/temp_obs/0_original.jpeg
diff --git a/data/temp_obs/1.jpeg b/data/temp_obs/1.jpeg
diff --git a/data/temp_obs/2.jpeg b/data/temp_obs/2.jpeg
diff --git a/data/temp_obs/3.jpeg b/data/temp_obs/3.jpeg
diff --git a/data/temp_obs/4.jpeg b/data/temp_obs/4.jpeg
diff --git a/data/temp_obs/mod_img.jpeg b/data/temp_obs/mod_img.jpeg
diff --git a/data/utils/controlled_reset.py b/data/utils/controlled_reset.py
@@ -12,13 +12,11 @@ class CustomEnvReset:
     def __init__(self, env_name):
 
         custom_reset = {'DoorKey': self._custom_reset_doorkey, 'LavaCrossing': self._custom_reset_lavacrossing, 'FourRooms': self._custom_reset_fourrooms}
-
         for k in custom_reset.keys():
             if k in env_name:
                 self.factored_reset = custom_reset[k]
 
     def _custom_reset_doorkey(self, env, width, height, controlled_factors):
-
         #change the random seed locally 
         curr_rng = env.unwrapped.np_random
         local_rng = np.random.default_rng(int(100*random.random()))
@@ -34,52 +32,83 @@ def _custom_reset_doorkey(self, env, width, height, controlled_factors):
         env.unwrapped.grid.wall_rect(0, 0, width, height)
 
         # factor 1: control goal position 
-        goal_pos = (controlled_factors['goal_pos'][0], controlled_factors['goal_pos'][1]) if 'goal_pos' in controlled_factors else (env.unwrapped._rand_int(1, width - 1), env.unwrapped._rand_int(1, height - 1))
-        if goal_pos not in used_locations:
-            env.unwrapped.put_obj(Goal(), goal_pos[0], goal_pos[1])
-            used_locations.add(goal_pos)
-
+        goal_pos = (controlled_factors['goal_pos'][0], controlled_factors['goal_pos'][1]) if 'goal_pos' in controlled_factors else None
+        while goal_pos is None or ('door_pos' in controlled_factors and goal_pos[0]==controlled_factors['door_pos'][0]):
+            goal_pos = (env.unwrapped._rand_int(1, width - 1), env.unwrapped._rand_int(1, height - 1))
+        env.unwrapped.put_obj(Goal(), goal_pos[0], goal_pos[1])
+        used_locations.add(goal_pos)
 
         # factor 2: control door position
         if 'door_pos' in controlled_factors:
             splitIdx = controlled_factors['door_pos'][0]
             doorIdx = controlled_factors['door_pos'][1]
         else:
             splitIdx = None; doorIdx = None
-            while (splitIdx, doorIdx) in used_locations or (splitIdx is None or doorIdx is None):
+            while (splitIdx, doorIdx) in used_locations or (splitIdx is None or doorIdx is None) or (goal_pos[0]==splitIdx):
                 splitIdx = env.unwrapped._rand_int(2, width - 2)
                 doorIdx = env.unwrapped._rand_int(1, height - 2)
 
         # factor 3: control door locked / unlocked
         # factor 4: control door open/closed
         env.unwrapped.grid.vert_wall(splitIdx, 0)
-        door_locked = controlled_factors['door_locked'] if 'door_locked' in controlled_factors else True
-        door_open = controlled_factors['door_open'] if 'door_open' in controlled_factors else False
+        door_locked = controlled_factors['door_locked'] if 'door_locked' in controlled_factors else np.random.randint(0, 2)
+        if 'door_open' in controlled_factors:
+            door_open = controlled_factors['door_open']
+            if door_open:
+                door_locked = False
+        elif door_locked:
+            door_open = False
+        else:
+            door_open = np.random.randint(0, 2)
         env.unwrapped.put_obj(Door("yellow", is_locked=door_locked, is_open=door_open), splitIdx, doorIdx)
         used_locations.add((splitIdx, doorIdx))
 
+
         # factor 5: control key position 
         # factor 6: control holding key
         # pdb.set_trace()
-        if not (('door_locked' in controlled_factors and controlled_factors['door_locked'] is False) or  ('holding_key' in controlled_factors and controlled_factors['holding_key'] is True)):
-
-            if 'key_pos' in controlled_factors:
-                key_top = controlled_factors['key_pos'] 
-                key_size = (1,1) 
-            else:
-                key_top = (0,0)
-                key_size = (splitIdx, height)
-
-            env.unwrapped.place_obj(obj=Key("yellow"), top= key_top, size= key_size)
+        #if not (('door_locked' in controlled_factors and controlled_factors['door_locked'] is False) or  ('holding_key' in controlled_factors and controlled_factors['holding_key']==1)):
+        if 'key_pos' in controlled_factors:
+            holding_key = 0
+            key_top = controlled_factors['key_pos'] 
+            key_size = (1,1) 
+            env.unwrapped.place_obj(obj=Key("yellow"), top= key_top, size= key_size,max_tries=10)
         else:
-            #need to set the agent property as holding key
-            env.unwrapped.carrying = Key("yellow")
-
+            if 'holding_key' in controlled_factors and controlled_factors['holding_key'] == 1:
+                #need to set the agent property as holding key
+                holding_key = 1
+                env.unwrapped.carrying = Key("yellow")
+            else:
+                if 'holding_key' in controlled_factors and controlled_factors['holding_key'] == 0:  
+                    holding_key = 0
+                    if 'key_pos' in controlled_factors:
+                        key_top = controlled_factors['key_pos'] 
+                        key_size = (1,1) 
+                        env.unwrapped.place_obj(obj=Key("yellow"), top= key_top, size= key_size,max_tries=10)
+                    else:
+                        key_top = (0,0)
+                        key_size = (splitIdx, height)
+                        env.unwrapped.place_obj(obj=Key("yellow"), top= key_top, size= key_size,max_tries=10)            
+                else:
+                    # randomly decide hold key or not
+                    holding_key = np.random.randint(0,2)
+                    if holding_key:
+                        env.unwrapped.carrying = Key("yellow")
+                    else:
+                        if 'key_pos' in controlled_factors:
+                            key_top = controlled_factors['key_pos'] 
+                            key_size = (1,1) 
+                            env.unwrapped.place_obj(obj=Key("yellow"), top= key_top, size= key_size,max_tries=10)
+                        else:
+                            key_top = (0,0)
+                            key_size = (splitIdx, height)
+                            env.unwrapped.place_obj(obj=Key("yellow"), top= key_top, size= key_size,max_tries=10)
+
         # factor 7: control agent position
         agent_top = tuple(controlled_factors['agent_pos']) if 'agent_pos' in controlled_factors else (0,0)
         agent_size = (1,1) if 'agent_pos' in controlled_factors else (splitIdx, height)
-        env.unwrapped.place_agent(top=agent_top, size=agent_size)
-        
+        env.unwrapped.place_agent(top=agent_top, size=agent_size, max_tries=10)
+
         #factor 8: control agent direction 
         if 'agent_dir' in controlled_factors:
             env.unwrapped.agent_dir = controlled_factors['agent_dir']
@@ -89,7 +118,6 @@ def _custom_reset_doorkey(self, env, width, height, controlled_factors):
         env.unwrapped.mission = "use the key to open the door and then get to the goal"
         #reset the original rng after resetting env
         env.unwrapped.np_random = curr_rng
-
         return env
 
 
@@ -162,7 +190,7 @@ def _custom_reset_fourrooms(self, env, width, height, controlled_factors):
             env.unwrapped.put_obj(goal, *tuple(controlled_factors['goal_pos']))
             goal.init_pos, goal.cur_pos = tuple(controlled_factors['goal_pos']), tuple(controlled_factors['goal_pos'])
         else:
-            env.unwrapped.place_obj(Goal())
+            env.unwrapped.place_obj(Goal(),max_tries=10)
 
         env.unwrapped.mission = "reach the goal"
 

diff --git a/frame.jpeg b/frame.jpeg