From e26307efb621be8012dd321f15fa187aa12523d9 Mon Sep 17 00:00:00 2001 From: santara Date: Thu, 5 Dec 2019 14:11:17 +0530 Subject: [PATCH 01/46] exception handling in step is problematic --- MADRaS/envs/gym_madras_v2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index 337bd72..9747ee6 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -170,6 +170,7 @@ def step_vanilla(self, action): except Exception as e: print("Exception {} caught at port {}".format(str(e), self.torcs_server_port)) self.wait_for_observation() + # TODO(santara): step not performed... game_state = {"torcs_reward": r, "torcs_done": done, @@ -210,7 +211,6 @@ def step_pid(self, desire): except Exception as e: print("Exception {} caught at port {}".format(str(e), self.torcs_server_port)) self.wait_for_observation() - # TODO(santara): step not performed... game_state = {"torcs_reward": r, "torcs_done": done, From 8f6a34f19c56db5d70eee115485f46efefb9c625 Mon Sep 17 00:00:00 2001 From: santara Date: Fri, 6 Dec 2019 17:02:41 +0530 Subject: [PATCH 02/46] Added Parked Agents, and reward function and observation for single agent training in traffic --- MADRaS/envs/data/sim_options_v2.yml | 220 ++++++++++++++++++++-------- MADRaS/envs/done_manager.py | 17 +++ MADRaS/envs/done_manager_v2.py | 131 +++++++++++++++++ MADRaS/envs/gym_madras_v2.py | 34 +++-- MADRaS/envs/observation_manager.py | 29 ++++ MADRaS/envs/reward_manager.py | 18 +++ MADRaS/test_environment_v2.py | 7 +- MADRaS/traffic/traffic.py | 58 +++++++- MADRaS/utils/gym_torcs.py | 6 +- MADRaS/utils/gym_torcs_v2.py | 1 - MADRaS/utils/snakeoil3_gym_v2.py | 3 +- 11 files changed, 442 insertions(+), 82 deletions(-) create mode 100644 MADRaS/envs/done_manager_v2.py diff --git a/MADRaS/envs/data/sim_options_v2.yml b/MADRaS/envs/data/sim_options_v2.yml index 2f5f5ef..d73e5cb 100644 --- a/MADRaS/envs/data/sim_options_v2.yml +++ b/MADRaS/envs/data/sim_options_v2.yml @@ -1,5 +1,5 @@ torcs_server_port: 60934 -visualise: True # whether to render TORCS window +visualise: False # whether to render TORCS window no_of_visualisations: 1 # To visualize multiple training instances (under MPI) in parallel set it to more than 1 track_len: 2843.10 # in metres. CG Track 3. All track lengths can be found here: http://www.berniw.org/trb/tracks/tracklist.php max_steps: 20000 # max episode length @@ -14,7 +14,7 @@ agents: gear_change: False client_max_steps: -1 # to be interpreted as np.inf target_speed: 13.89 # metres per sec - state_dim: 24 + state_dim: 60 # 24 early_stop: True normalize_actions: True # all actions in [-1, 1] # PID params @@ -34,7 +34,7 @@ agents: # Observation mode observations: - mode: SingleAgentSimpleLapObs + mode: SingleAgentInTrafficObs normalize: False # gym_torcs normalizes by default obs_min: angle: -3.142 @@ -47,7 +47,7 @@ agents: # Reward function rewards: - ProgressReward3: + ProgressReward2: scale: 1.0 AvgSpeedReward: scale: 1.0 @@ -58,6 +58,8 @@ agents: AngAcclPenalty: scale: 1.0 max_ang_accl: 2.0 + SuccessfulOvertakeReward: + scale: 10.0 # Done function dones: @@ -66,17 +68,77 @@ agents: - Collision - TurnBackward - OutOfTrack - - MadrasAgent: - vision: False # whether to use vision input - throttle: True - gear_change: False - client_max_steps: -1 # to be interpreted as np.inf - target_speed: 12.89 # metres per sec - state_dim: 24 - early_stop: True - normalize_actions: True # all actions in [-1, 1] - # PID params - pid_assist: True + - Rank1 + # - MadrasAgent: + # vision: False # whether to use vision input + # throttle: True + # gear_change: False + # client_max_steps: -1 # to be interpreted as np.inf + # target_speed: 12.89 # metres per sec + # state_dim: 24 + # early_stop: True + # normalize_actions: True # all actions in [-1, 1] + # # PID params + # pid_assist: True + # pid_settings: + # accel_pid: + # - 10.5 # a_x + # - 0.05 # a_y + # - 2.8 # a_z + # steer_pid: + # - 5.1 + # - 0.001 + # - 0.000001 + # accel_scale: 1.0 + # steer_scale: 0.1 + # pid_latency: 10 + + # # Observation mode + # observations: + # mode: SingleAgentSimpleLapObs + # normalize: False # gym_torcs normalizes by default + # obs_min: + # angle: -3.142 + # track: 0.0 + # trackPos: -1.0 + # obs_max: + # angle: 3.142 + # track: 200.0 + # trackPos: 1.0 + + # # Reward function + # rewards: + # ProgressReward3: + # scale: 1.0 + # AvgSpeedReward: + # scale: 1.0 + # CollisionPenalty: + # scale: 10.0 + # TurnBackwardPenalty: + # scale: 10.0 + # AngAcclPenalty: + # scale: 1.0 + # max_ang_accl: 2.0 + + # # Done function + # dones: + # - RaceOver + # - TimeOut + # - Collision + # - TurnBackward + # - OutOfTrack + +# Traffic agents +traffic: + - ParkedAgent: + target_speed: 80 + parking_lane_pos: + low: -0.6 + high: 0.0 + parking_dist_from_start: + low: 100 + high: 120 + collision_time_window: 1.2 pid_settings: accel_pid: - 10.5 # a_x @@ -88,49 +150,35 @@ agents: - 0.000001 accel_scale: 1.0 steer_scale: 0.1 - pid_latency: 10 - - # Observation mode - observations: - mode: SingleAgentSimpleLapObs - normalize: False # gym_torcs normalizes by default - obs_min: - angle: -3.142 - track: 0.0 - trackPos: -1.0 - obs_max: - angle: 3.142 - track: 200.0 - trackPos: 1.0 - - # Reward function - rewards: - ProgressReward3: - scale: 1.0 - AvgSpeedReward: - scale: 1.0 - CollisionPenalty: - scale: 10.0 - TurnBackwardPenalty: - scale: 10.0 - AngAcclPenalty: - scale: 1.0 - max_ang_accl: 2.0 - - # Done function - dones: - - RaceOver - - TimeOut - - Collision - - TurnBackward - - OutOfTrack - -# Traffic agents -traffic: - - ConstVelTrafficAgent: - target_speed: 30 - target_lane_pos: -0.5 - collision_time_window: 1 # second + - ParkedAgent: + target_speed: 80 + parking_lane_pos: + low: 0.0 + high: 0.4 + parking_dist_from_start: + low: 80 + high: 95 + collision_time_window: 1.2 + pid_settings: + accel_pid: + - 10.5 # a_x + - 0.05 # a_y + - 2.8 # a_z + steer_pid: + - 5.1 + - 0.001 + - 0.000001 + accel_scale: 1.0 + steer_scale: 0.1 + - ParkedAgent: + target_speed: 80 + parking_lane_pos: + low: -0.6 + high: 0.0 + parking_dist_from_start: + low: 50 + high: 75 + collision_time_window: 1.2 pid_settings: accel_pid: - 10.5 # a_x @@ -142,6 +190,62 @@ traffic: - 0.000001 accel_scale: 1.0 steer_scale: 0.1 + - ParkedAgent: + target_speed: 80 + parking_lane_pos: + low: 0.0 + high: 0.4 + parking_dist_from_start: + low: 30 + high: 45 + collision_time_window: 1.2 + pid_settings: + accel_pid: + - 10.5 # a_x + - 0.05 # a_y + - 2.8 # a_z + steer_pid: + - 5.1 + - 0.001 + - 0.000001 + accel_scale: 1.0 + steer_scale: 0.1 + - ParkedAgent: + target_speed: 80 + parking_lane_pos: + low: -0.6 + high: 0.0 + parking_dist_from_start: + low: 15 + high: 25 + collision_time_window: 1.2 + pid_settings: + accel_pid: + - 10.5 # a_x + - 0.05 # a_y + - 2.8 # a_z + steer_pid: + - 5.1 + - 0.001 + - 0.000001 + accel_scale: 1.0 + steer_scale: 0.1 + + # - ConstVelTrafficAgent: + # target_speed: 30 + # target_lane_pos: -0.5 + # collision_time_window: 1 # second + # pid_settings: + # accel_pid: + # - 10.5 # a_x + # - 0.05 # a_y + # - 2.8 # a_z + # steer_pid: + # - 5.1 + # - 0.001 + # - 0.000001 + # accel_scale: 1.0 + # steer_scale: 0.1 # - SinusoidalSpeedAgent: # speed_amplitude: 50 # speed_time_period: 1000 diff --git a/MADRaS/envs/done_manager.py b/MADRaS/envs/done_manager.py index 0373807..45dbf59 100644 --- a/MADRaS/envs/done_manager.py +++ b/MADRaS/envs/done_manager.py @@ -130,5 +130,22 @@ def check_done(self, game_config, game_state): else: return False + def reset(self): + self.num_steps = 0 + + +class Rank1(MadrasDone): + def __init__(self): + self.num_steps = 0 + + def check_done(self, game_config, game_state): + self.num_steps += 1 + if game_state["racePos"] == 1: + print("Done: Episode terminated because agent is Rank 1 after {} steps.".format(self.num_steps)) + self.num_steps = 0 + return True + else: + return False + def reset(self): self.num_steps = 0 \ No newline at end of file diff --git a/MADRaS/envs/done_manager_v2.py b/MADRaS/envs/done_manager_v2.py new file mode 100644 index 0000000..e7e4938 --- /dev/null +++ b/MADRaS/envs/done_manager_v2.py @@ -0,0 +1,131 @@ +import numpy as np +import math +import warnings + + +class DoneManager(object): + """Composes the done function from a given done configuration.""" + def __init__(self, cfg): + self.dones = {} + for key in cfg: + try: + exec("self.dones['{}'] = {}()".format(key, key)) + except: + raise ValueError("Unknown done class {}".format(key)) + + if not self.dones: + warnings.warn("No done function specified. Setting TorcsDone " + "as done.") + self.dones['TorcsDone'] = TorcsDone() + + def get_done_signal(self, game_config, game_state): + done_signals = [] + for done_function in self.dones.values(): + done_signals.append(done_function.check_done(game_config, game_state)) + return np.any(done_signals) + + def reset(self): + for done_function in self.dones.values(): + done_function.reset() + + +class MadrasDone(object): + """Base class of MADRaS done function classes. + Any new done class must inherit this class and implement + the following methods: + - [required] check_done(game_config, game_state) + - [optional] reset() + """ + def __init__(self): + pass + + def check_done(self, game_config, game_state): + del game_config, game_state + raise NotImplementedError("Successor class must implement this method.") + + def reset(self): + pass + + +class TorcsDone(MadrasDone): + """Vanilla done function provided by TORCS.""" + def check_done(self, game_config, game_state): + del game_config + if not math.isnan(game_state["torcs_done"]): + return game_state["torcs_done"] + else: + return True + + +class RaceOver(MadrasDone): + """Terminates episode when the agent has finishes one lap.""" + def check_done(self, game_config, game_state): + if game_state["distance_traversed"] >= game_config.track_len: + print("Done: Race over!") + return True + else: + return False + + +class TimeOut(MadrasDone): + def check_done(self, game_config, game_state): + self.num_steps = game_state["num_steps"] + if not game_config.max_steps: + max_steps = int(game_config.track_len / game_config.target_speed * 50) + else: + max_steps = game_config.max_steps + if self.num_steps >= max_steps: + print("Done: Episode terminated due to timeout.") + return True + else: + return False + + +class Collision(MadrasDone): + def __init__(self): + self.damage = 0.0 + + def check_done(self, game_config, game_state): + del game_config + if self.damage < game_state["damage"]: + print("Done: Episode terminated because agent collided.") + self.damage = 0.0 + return True + else: + return False + + def reset(self): + self.damage = 0.0 + + +class TurnBackward(MadrasDone): + def check_done(self, game_config, game_state): + del game_config + if np.cos(game_state["angle"]) < 0: + print("Done: Episode terminated because agent turned backward.") + return True + else: + return False + + +class OutOfTrack(MadrasDone): + def check_done(self, game_config, game_state): + self.num_steps = game_state["num_steps"] + if (game_state["trackPos"] < -1 or game_state["trackPos"] > 1 + or np.any(np.asarray(game_state["track"]) < 0)): + print("Done: Episode terminated because agent went out of track" + " after {} steps.".format(self.num_steps)) + return True + else: + return False + + +class Rank1(MadrasDone): + def check_done(self, game_config, game_state): + self.num_steps = game_state["num_steps"] + if game_state["racePos"] == 1: + print("Done: Episode terminated because agent is Rank 1" + " after {} steps.".format(self.num_steps)) + return True + else: + return False \ No newline at end of file diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index 9747ee6..8d2c853 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -28,7 +28,7 @@ import socket import envs.config_parser as config_parser import envs.reward_manager as rm -import envs.done_manager as dm +import envs.done_manager_v2 as dm import envs.observation_manager as om import traffic.traffic as traffic import multiprocessing @@ -66,6 +66,7 @@ def __init__(self, name, torcs_server_port, cfg, sim_info={}): self.reward_manager = rm.RewardManager(self._config.rewards) self.done_manager = dm.DoneManager(self._config.dones) self.initial_reset = True + self.step_num = 0 def create_new_client(self): while True: @@ -85,7 +86,7 @@ def get_observation_from_server(self): raw_ob = self.client.S.d # Get the current full-observation from torcs self.ob = self.make_observation(raw_ob) - print("[{}]: Initial observation: {}".format(self.name, self.ob)) + # print("[{}]: Initial observation: {}".format(self.name, self.ob)) if np.any(np.asarray(self.ob.track) < 0): print("Reset produced bad track values.") self.distance_traversed = 0 @@ -98,6 +99,7 @@ def complete_reset(self): self.PID_controller.reset() self.reward_manager.reset() self.done_manager.reset() + self.step_num = 0 print("Reset: Starting new episode") @@ -160,7 +162,6 @@ def step_vanilla(self, action): if self._config.normalize_actions: action[1] = (action[1] + 1) / 2.0 # acceleration back to [0, 1] action[2] = (action[2] + 1) / 2.0 # brake back to [0, 1] - r = 0.0 try: self.ob, r, done, info = TorcsEnv.step(self, 0, @@ -178,7 +179,9 @@ def step_vanilla(self, action): "angle": self.client.S.d["angle"], "damage": self.client.S.d["damage"], "trackPos": self.client.S.d["trackPos"], - "track": self.client.S.d["track"]} + "track": self.client.S.d["track"], + "racePos": self.client.S.d["racePos"], + "num_steps": self.step_num} reward = self.reward_manager.get_reward(self._config, game_state) done = self.done_manager.get_done_signal(self._config, game_state) @@ -218,7 +221,9 @@ def step_pid(self, desire): "angle": self.client.S.d["angle"], "damage": self.client.S.d["damage"], "trackPos": self.client.S.d["trackPos"], - "track": self.client.S.d["track"]} + "track": self.client.S.d["track"], + "racePos": self.client.S.d["racePos"], + "num_steps": self.step_num} reward += self.reward_manager.get_reward(self._config, game_state) if self._config.pid_assist: @@ -237,6 +242,12 @@ def step(self, action, return_dict={}): return_dict[self.name] = self.step_vanilla(action) return return_dict + def increment_step(self): + if self._config.pid_assist: + self.step_num += self._config.pid_settings['pid_latency'] + else: + self.step_num += 1 + class MadrasEnv(gym.Env): """Definition of the Gym Madras Environment.""" @@ -248,19 +259,22 @@ def __init__(self, cfg_path=DEFAULT_SIM_OPTIONS_FILE): self.seed() self.start_torcs_process() self.agents ={} + + if self._config.traffic: + self.traffic_manager = traffic.MadrasTrafficManager( + self._config.torcs_server_port, len(self.agents), self._config.traffic) + num_traffic_agents = len(self._config.traffic) if self._config.traffic else 0 if self._config.agents: for i, agent in enumerate(self._config.agents): agent_name = [x for x in agent.keys()][0] agent_cfg = agent[agent_name] name = '{}_{}'.format(agent_name, i) - torcs_server_port = self._config.torcs_server_port + i + torcs_server_port = self._config.torcs_server_port + i + num_traffic_agents self.agents[name] = MadrasAgent(name, torcs_server_port, agent_cfg, {"track_len": self._config.track_len, "max_steps": self._config.max_steps }) - if self._config.traffic: - self.traffic_manager = traffic.MadrasTrafficManager( - self._config.torcs_server_port, len(self.agents), self._config.traffic) + # self.action_dim = self.agents[0].action_dim # TODO(santara): Can I not have different action dims for different agents? self.initial_reset = True print("Madras agents are: ", self.agents) @@ -369,11 +383,11 @@ def step(self, action): for proc in jobs: proc.join() - print (return_dict) for agent in self.agents: next_obs[agent] = return_dict[agent][0] reward[agent] = return_dict[agent][1] done[agent] = return_dict[agent][2] info[agent] = return_dict[agent][3] + self.agents[agent].increment_step() return next_obs, reward, done, info diff --git a/MADRaS/envs/observation_manager.py b/MADRaS/envs/observation_manager.py index 810b181..bd6a582 100644 --- a/MADRaS/envs/observation_manager.py +++ b/MADRaS/envs/observation_manager.py @@ -116,4 +116,33 @@ def observation_space(self, vision): observation_space = spaces.Box(low=low, high=high) + return observation_space + + +class SingleAgentInTrafficObs(MadrasObs): + def get_obs(self, full_obs): + track = [(x if x > 0 else 0) for x in full_obs.track] + obs = np.hstack((full_obs.angle, + track, + full_obs.trackPos, + full_obs.speedX, + full_obs.speedY, + full_obs.speedZ, + full_obs.opponents)) + return obs + + @property + def observation_dim(self): + return 60 + + def observation_space(self, vision): + if vision: + raise NotImplementedError("Vision inputs not supported yet.") + high = np.asarray([1] + 19*[1] + [np.inf] + 3*[np.inf] + 36*[1], + dtype=MadrasDatatypes.floatX) + low = np.asarray([-1] + 19*[0] + [-np.inf] + 3*[-np.inf] + 36*[0], + dtype=MadrasDatatypes.floatX) + + observation_space = spaces.Box(low=low, high=high) + return observation_space \ No newline at end of file diff --git a/MADRaS/envs/reward_manager.py b/MADRaS/envs/reward_manager.py index 7ef3684..b43d592 100644 --- a/MADRaS/envs/reward_manager.py +++ b/MADRaS/envs/reward_manager.py @@ -153,4 +153,22 @@ def compute_reward(self, game_config, game_state): else: reward = 0 self.prev_angles = self.prev_angles[1:] + return reward + + +class SuccessfulOvertakeReward(MadrasReward): + def __init__(self, cfg): + self.rank = np.inf + super(SuccessfulOvertakeReward, self).__init__(cfg) + + def reset(self): + self.rank = np.inf + + def compute_reward(self, game_config, game_state): + reward = 0.0 + if math.isinf(self.rank): # very fist step + self.rank = game_state["racePos"] + elif game_state["racePos"] < self.rank: + self.rank = game_state["racePos"] + reward = self.cfg["scale"] return reward \ No newline at end of file diff --git a/MADRaS/test_environment_v2.py b/MADRaS/test_environment_v2.py index 8ab1369..c12bff7 100644 --- a/MADRaS/test_environment_v2.py +++ b/MADRaS/test_environment_v2.py @@ -25,11 +25,12 @@ def test_madras_pid(): print("Testing reset...") obs = env.reset() print("Initial observation: {}." - " Verify if the number of dimensions {} is 29.".format(obs, len(obs))) + " Verify if the number of dimensions {} is right.".format(obs, len(obs))) print("Testing step...") for t in range(2000): - obs, r, done, _ = env.step([[0.0, 1.0], - [0.0, 1.0]]) + obs, r, done, _ = env.step([[0.3, 0.5], + # [0.0, 1.0] + ]) print("{}: reward={}, done={}".format(t, r, done)) dones = [x for x in done.values()] if np.all(dones): diff --git a/MADRaS/traffic/traffic.py b/MADRaS/traffic/traffic.py index abc68bd..b620898 100644 --- a/MADRaS/traffic/traffic.py +++ b/MADRaS/traffic/traffic.py @@ -4,6 +4,7 @@ from controllers.pid import PIDController import utils.madras_datatypes as md from multiprocessing import Process +from time import time MadrasDatatypes = md.MadrasDatatypes() @@ -12,24 +13,29 @@ class MadrasTrafficManager(object): def __init__(self, torcs_server_port, num_learning_agents, cfg): self.traffic_agents = {} self.traffic_processes = [] + self.num_episodes_of_training = 0 for i, agent in enumerate(cfg): agent_type = [x for x in agent.keys()][0] # TODO(santara): find a better way of extracting key from a dictionary with a single entry agent_config = agent[agent_type] agent_name = '{}_{}'.format(agent_type, i) - agent_port = torcs_server_port + i + num_learning_agents + agent_port = torcs_server_port + i try: exec("self.traffic_agents['{}'] = {}({}, {}, '{}')".format( agent_name, agent_type, agent_port, agent_config, agent_name)) except Exception as e: - raise ValueError("Unknown traffic class {} or incomplete specification.\n Original error: {}".format( + raise ValueError("Unknown traffic class {} or " + "incomplete specification.\n Original error: {}".format( agent_type, e)) def get_traffic_agents(self): return self.traffic_agents def flag_off_traffic(self): - for agent in self.traffic_agents.values(): - self.traffic_processes.append(Process(target=agent.flag_off)) + self.num_episodes_of_training += 1 + for i, agent in enumerate(self.traffic_agents.values()): + self.traffic_processes.append( + Process(target=agent.flag_off, + args=(i+self.num_episodes_of_training,))) for traffic_process in self.traffic_processes: traffic_process.start() @@ -84,7 +90,8 @@ def wait_for_observation(self): def get_action(self): raise NotImplementedError("Successor classes must implement this method") - def flag_off(self, sleep=0): + def flag_off(self, random_seed=0): + del random_seed self.wait_for_observation() print("[{}]: My server is at {}".format(self.name, self.client.serverPID)) self.is_alive = True @@ -92,7 +99,7 @@ def flag_off(self, sleep=0): if self.is_alive: action = self.get_action() try: - self.ob, _, done, info = self.env.step(0, self.client, action) + self.ob, _, done, _ = self.env.step(0, self.client, action) except Exception as e: print("Exception {} caught by {} traffic agent at port {}".format( @@ -209,3 +216,42 @@ def get_action(self): self.brake = 1 self.stopped_for -= 1 return np.asarray([self.steer, self.accel, self.brake]) + + +class ParkedAgent(MadrasTrafficAgent): + def __init__(self, port, cfg, name): + super(ParkedAgent, self).__init__(port, cfg, name) + self.target_speed = cfg["target_speed"]/self.env.default_speed + + def flag_off(self, random_seed=0): + self.time = time() + np.random.seed(random_seed) + parking_lane_pos_range = (self.cfg["parking_lane_pos"]["high"] - + self.cfg["parking_lane_pos"]["low"]) + self.parking_lane_pos = (self.cfg["parking_lane_pos"]["low"] + + np.random.random()*parking_lane_pos_range) + parking_dist_range = (self.cfg["parking_dist_from_start"]["high"] - + self.cfg["parking_dist_from_start"]["low"]) + self.parking_dist_from_start = (self.cfg["parking_dist_from_start"]["low"] + + np.random.random()*parking_dist_range) + self.behind_finish_line = True + self.prev_dist_from_start = 0 + self.parked = False + super(ParkedAgent, self).flag_off() + + def get_action(self): + self.distance_from_start = self.ob.distFromStart + if self.distance_from_start < self.prev_dist_from_start: + self.behind_finish_line = False + if not self.behind_finish_line and self.distance_from_start >= self.parking_dist_from_start: + self.steer, self.accel, self.brake = 0.0, 0.0, 1.0 + if not self.parked: + print("{} parked at lanepos: {}, distFromStart: {} after time {} sec".format( + self.name, self.parking_lane_pos, self.parking_dist_from_start, time()-self.time)) + self.parked = True + else: + action = self.PID_controller.get_action([self.parking_lane_pos, self.target_speed]) + self.steer, self.accel, self.brake = action[0], action[1], action[2] + self.avoid_impending_collision() + self.prev_dist_from_start = self.distance_from_start + return np.asarray([self.steer, self.accel, self.brake]) \ No newline at end of file diff --git a/MADRaS/utils/gym_torcs.py b/MADRaS/utils/gym_torcs.py index af0d3bc..002006c 100644 --- a/MADRaS/utils/gym_torcs.py +++ b/MADRaS/utils/gym_torcs.py @@ -251,7 +251,8 @@ def make_observation(self, raw_obs): 'rpm', 'track', 'trackPos', - 'wheelSpinVel'] + 'wheelSpinVel', + 'distFromStart'] Observation = col.namedtuple('Observation', names) return Observation(focus=np.array(raw_obs['focus'], dtype=madras.floatX)/200., speedX=np.array(raw_obs['speedX'], dtype=madras.floatX)/self.default_speed, @@ -263,7 +264,8 @@ def make_observation(self, raw_obs): rpm=np.array(raw_obs['rpm'], dtype=madras.floatX)/10000, track=np.array(raw_obs['track'], dtype=madras.floatX)/200., trackPos=np.array(raw_obs['trackPos'], dtype=madras.floatX)/1., - wheelSpinVel=np.array(raw_obs['wheelSpinVel'], dtype=madras.floatX)) + wheelSpinVel=np.array(raw_obs['wheelSpinVel'], dtype=madras.floatX), + distFromStart=np.array(raw_obs['distFromStart'], dtype=madras.floatX)) else: names = ['focus', 'speedX', 'speedY', 'speedZ', 'angle', diff --git a/MADRaS/utils/gym_torcs_v2.py b/MADRaS/utils/gym_torcs_v2.py index c255504..aeaf0c0 100644 --- a/MADRaS/utils/gym_torcs_v2.py +++ b/MADRaS/utils/gym_torcs_v2.py @@ -59,7 +59,6 @@ def terminate(self): def step(self, step, client, u, early_stop=1): - print("In STEP of {}: My server is {}".format(self.name, client.serverPID)) reward = 0 this_action = self.agent_to_torcs(u) diff --git a/MADRaS/utils/snakeoil3_gym_v2.py b/MADRaS/utils/snakeoil3_gym_v2.py index fb74027..3fd2597 100644 --- a/MADRaS/utils/snakeoil3_gym_v2.py +++ b/MADRaS/utils/snakeoil3_gym_v2.py @@ -286,7 +286,6 @@ def get_servers_input(self, step): n_fail = n_fail_org n_fail -= 1 - print("[{}]: My server PID is {}".format(self.name, self.serverPID)) if '***identified***' in sockdata: print("{} Client connected on {}..............".format(self.name, self.port)) continue @@ -381,7 +380,7 @@ def fancyout(self): # 'gear', 'distRaced', 'distFromStart', - # 'racePos', + 'racePos', 'opponents', 'wheelSpinVel', 'z', From 8df99258ceabcde36ff55a75d7909bd73cbeadc3 Mon Sep 17 00:00:00 2001 From: santara Date: Sun, 8 Dec 2019 06:45:09 +0530 Subject: [PATCH 03/46] minor merge --- MADRaS/envs/data/sim_options.yml | 225 ++++++++++++++++++++++++++++++- 1 file changed, 224 insertions(+), 1 deletion(-) diff --git a/MADRaS/envs/data/sim_options.yml b/MADRaS/envs/data/sim_options.yml index 12c59aa..7aef3eb 100644 --- a/MADRaS/envs/data/sim_options.yml +++ b/MADRaS/envs/data/sim_options.yml @@ -5,10 +5,17 @@ gear_change: False client_max_steps: -1 # to be interpreted as np.inf visualise: True # whether to render TORCS window no_of_visualisations: 1 # To visualize multiple training instances (under MPI) in parallel set it to more than 1 +<<<<<<< Updated upstream track_len: 2843.10 # in metres. CG Track 3. All track lengths can be found here: http://www.berniw.org/trb/tracks/tracklist.php max_steps: 20000 # max episode length track_width: 10.0 target_speed: 13.89 # metres per sec +======= +track_len: 5784.10 # in metres. All track lengths can be found here: http://www.berniw.org/trb/tracks/tracklist.php +max_steps: 5000 #40000 #15000 #20000 # max episode length +track_width: 11.0 +target_speed: 13.89 #27.78 # metres per sec +>>>>>>> Stashed changes state_dim: 29 early_stop: True normalize_actions: True # all actions in [-1, 1] @@ -52,7 +59,7 @@ rewards: TurnBackwardPenalty: scale: 10.0 AngAcclPenalty: - scale: 1.0 + scale: 2.0 max_ang_accl: 2.0 # Done function @@ -64,6 +71,7 @@ dones: - OutOfTrack # Traffic agents +<<<<<<< Updated upstream # traffic: # - ConstVelTrafficAgent: # target_speed: 50 @@ -96,6 +104,221 @@ dones: # - 0.000001 # accel_scale: 1.0 # steer_scale: 0.1 +======= +traffic: + - ParkedAgent: + target_speed: 50 + parking_lane_pos: + low: -0.8 + high: 0.0 + parking_dist_from_start: + low: 185 + high: 200 + collision_time_window: 1.2 + pid_settings: + accel_pid: + - 10.5 # a_x + - 0.05 # a_y + - 2.8 # a_z + steer_pid: + - 5.1 + - 0.001 + - 0.000001 + accel_scale: 1.0 + steer_scale: 0.1 + - ParkedAgent: + target_speed: 50 + parking_lane_pos: + low: 0.0 + high: 0.8 + parking_dist_from_start: + low: 165 + high: 180 + collision_time_window: 1.2 + pid_settings: + accel_pid: + - 10.5 # a_x + - 0.05 # a_y + - 2.8 # a_z + steer_pid: + - 5.1 + - 0.001 + - 0.000001 + accel_scale: 1.0 + steer_scale: 0.1 + - ParkedAgent: + target_speed: 50 + parking_lane_pos: + low: -0.8 + high: 0.0 + parking_dist_from_start: + low: 145 + high: 160 + collision_time_window: 1.2 + pid_settings: + accel_pid: + - 10.5 # a_x + - 0.05 # a_y + - 2.8 # a_z + steer_pid: + - 5.1 + - 0.001 + - 0.000001 + accel_scale: 1.0 + steer_scale: 0.1 + - ParkedAgent: + target_speed: 50 + parking_lane_pos: + low: 0.0 + high: 0.8 + parking_dist_from_start: + low: 125 + high: 140 + collision_time_window: 1.2 + pid_settings: + accel_pid: + - 10.5 # a_x + - 0.05 # a_y + - 2.8 # a_z + steer_pid: + - 5.1 + - 0.001 + - 0.000001 + accel_scale: 1.0 + steer_scale: 0.1 + - ParkedAgent: + target_speed: 50 + parking_lane_pos: + low: -0.8 + high: 0.0 + parking_dist_from_start: + low: 100 + high: 120 + collision_time_window: 1.2 + pid_settings: + accel_pid: + - 10.5 # a_x + - 0.05 # a_y + - 2.8 # a_z + steer_pid: + - 5.1 + - 0.001 + - 0.000001 + accel_scale: 1.0 + steer_scale: 0.1 + - ParkedAgent: + target_speed: 50 + parking_lane_pos: + low: 0.0 + high: 0.8 + parking_dist_from_start: + low: 80 + high: 95 + collision_time_window: 1.2 + pid_settings: + accel_pid: + - 10.5 # a_x + - 0.05 # a_y + - 2.8 # a_z + steer_pid: + - 5.1 + - 0.001 + - 0.000001 + accel_scale: 1.0 + steer_scale: 0.1 + - ParkedAgent: + target_speed: 50 + parking_lane_pos: + low: -0.8 + high: 0.0 + parking_dist_from_start: + low: 50 + high: 75 + collision_time_window: 1.2 + pid_settings: + accel_pid: + - 10.5 # a_x + - 0.05 # a_y + - 2.8 # a_z + steer_pid: + - 5.1 + - 0.001 + - 0.000001 + accel_scale: 1.0 + steer_scale: 0.1 + - ParkedAgent: + target_speed: 50 + parking_lane_pos: + low: 0.0 + high: 0.8 + parking_dist_from_start: + low: 30 + high: 45 + collision_time_window: 1.2 + pid_settings: + accel_pid: + - 10.5 # a_x + - 0.05 # a_y + - 2.8 # a_z + steer_pid: + - 5.1 + - 0.001 + - 0.000001 + accel_scale: 1.0 + steer_scale: 0.1 + - ParkedAgent: + target_speed: 50 + parking_lane_pos: + low: -0.8 + high: 0.0 + parking_dist_from_start: + low: 15 + high: 25 + collision_time_window: 1.2 + pid_settings: + accel_pid: + - 10.5 # a_x + - 0.05 # a_y + - 2.8 # a_z + steer_pid: + - 5.1 + - 0.001 + - 0.000001 + accel_scale: 1.0 + steer_scale: 0.1 + + # - ConstVelTrafficAgent: + # target_speed: 50 + # target_lane_pos: -0.5 + # collision_time_window: 1 # second + # pid_settings: + # accel_pid: + # - 10.5 # a_x + # - 0.05 # a_y + # - 2.8 # a_z + # steer_pid: + # - 5.1 + # - 0.001 + # - 0.000001 + # accel_scale: 1.0 + # steer_scale: 0.1 + # - SinusoidalSpeedAgent: + # speed_amplitude: 50 + # speed_time_period: 1000 + # target_lane_pos: -0.25 + # collision_time_window: 1 # second + # pid_settings: + # accel_pid: + # - 10.5 # a_x + # - 0.05 # a_y + # - 2.8 # a_z + # steer_pid: + # - 5.1 + # - 0.001 + # - 0.000001 + # accel_scale: 1.0 + # steer_scale: 0.1 +>>>>>>> Stashed changes # - RandomLaneSwitchAgent: # target_speed: 100 # collision_time_window: 1 # second From 8550f4b3626ee6b6dd49d6a38458e2163ee3b549 Mon Sep 17 00:00:00 2001 From: santara Date: Thu, 12 Dec 2019 07:23:14 +0530 Subject: [PATCH 04/46] Multi-agent reset latency issue fixed --- MADRaS/envs/data/sim_options.yml | 42 -------------------------------- MADRaS/envs/gym_madras_v2.py | 1 + MADRaS/test_environment_v2.py | 5 ++-- 3 files changed, 4 insertions(+), 44 deletions(-) diff --git a/MADRaS/envs/data/sim_options.yml b/MADRaS/envs/data/sim_options.yml index 7aef3eb..ae0f51a 100644 --- a/MADRaS/envs/data/sim_options.yml +++ b/MADRaS/envs/data/sim_options.yml @@ -5,17 +5,10 @@ gear_change: False client_max_steps: -1 # to be interpreted as np.inf visualise: True # whether to render TORCS window no_of_visualisations: 1 # To visualize multiple training instances (under MPI) in parallel set it to more than 1 -<<<<<<< Updated upstream -track_len: 2843.10 # in metres. CG Track 3. All track lengths can be found here: http://www.berniw.org/trb/tracks/tracklist.php -max_steps: 20000 # max episode length -track_width: 10.0 -target_speed: 13.89 # metres per sec -======= track_len: 5784.10 # in metres. All track lengths can be found here: http://www.berniw.org/trb/tracks/tracklist.php max_steps: 5000 #40000 #15000 #20000 # max episode length track_width: 11.0 target_speed: 13.89 #27.78 # metres per sec ->>>>>>> Stashed changes state_dim: 29 early_stop: True normalize_actions: True # all actions in [-1, 1] @@ -71,40 +64,6 @@ dones: - OutOfTrack # Traffic agents -<<<<<<< Updated upstream -# traffic: -# - ConstVelTrafficAgent: -# target_speed: 50 -# target_lane_pos: -0.5 -# collision_time_window: 1 # second -# pid_settings: -# accel_pid: -# - 10.5 # a_x -# - 0.05 # a_y -# - 2.8 # a_z -# steer_pid: -# - 5.1 -# - 0.001 -# - 0.000001 -# accel_scale: 1.0 -# steer_scale: 0.1 -# - SinusoidalSpeedAgent: -# speed_amplitude: 50 -# speed_time_period: 1000 -# target_lane_pos: -0.25 -# collision_time_window: 1 # second -# pid_settings: -# accel_pid: -# - 10.5 # a_x -# - 0.05 # a_y -# - 2.8 # a_z -# steer_pid: -# - 5.1 -# - 0.001 -# - 0.000001 -# accel_scale: 1.0 -# steer_scale: 0.1 -======= traffic: - ParkedAgent: target_speed: 50 @@ -318,7 +277,6 @@ traffic: # - 0.000001 # accel_scale: 1.0 # steer_scale: 0.1 ->>>>>>> Stashed changes # - RandomLaneSwitchAgent: # target_speed: 100 # collision_time_window: 1 # second diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index 8d2c853..fe9e4e6 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -366,6 +366,7 @@ def reset(self): # Collect first observations for agent in self.agents: s_t[agent] = self.agents[agent].get_observation_from_server() + self.agents[agent].client.respond_to_server() # Finish reset for agent in self.agents: self.agents[agent].complete_reset() diff --git a/MADRaS/test_environment_v2.py b/MADRaS/test_environment_v2.py index c12bff7..16c0be9 100644 --- a/MADRaS/test_environment_v2.py +++ b/MADRaS/test_environment_v2.py @@ -29,11 +29,12 @@ def test_madras_pid(): print("Testing step...") for t in range(2000): obs, r, done, _ = env.step([[0.3, 0.5], - # [0.0, 1.0] + [0.0, 1.0] ]) print("{}: reward={}, done={}".format(t, r, done)) dones = [x for x in done.values()] - if np.all(dones): + print(dones) + if np.any(dones): env.reset() os.system("pkill torcs") From 4e6f535f5a7e722c7e5d12a4942a70ce56793c5b Mon Sep 17 00:00:00 2001 From: Manish Prajapat <37489792+manish-pra@users.noreply.github.com> Date: Wed, 11 Dec 2019 22:55:20 -0800 Subject: [PATCH 05/46] Added respond func to elimate 10s of timeout error The server expects that it receives information of each agent one by one after it provides observation to that agent --- MADRaS/envs/gym_madras_v2.py | 1 + 1 file changed, 1 insertion(+) diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index 8d2c853..7b28b45 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -366,6 +366,7 @@ def reset(self): # Collect first observations for agent in self.agents: s_t[agent] = self.agents[agent].get_observation_from_server() + self.agents[agent].client.respond_to_server() # To elimate 10s of timeout error, responding to the server after obs # Finish reset for agent in self.agents: self.agents[agent].complete_reset() From 686e3bf1c9574024c01d0a8c4c86d777c071ab19 Mon Sep 17 00:00:00 2001 From: Manish Date: Thu, 12 Dec 2019 23:59:33 -0800 Subject: [PATCH 06/46] added multiprocessing event to parallely close all process(agent) together --- MADRaS/envs/gym_madras_v2.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index 7b28b45..9c53fd0 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -191,7 +191,7 @@ def step_vanilla(self, action): return next_obs, reward, done, info - def step_pid(self, desire): + def step_pid(self, desire, e): """Execute single step with lane_pos, velocity controls.""" if self._config.normalize_actions: # [-1, 1] should correspond to [-self._config.target_speed, @@ -230,14 +230,21 @@ def step_pid(self, desire): self.PID_controller.update(self.ob) done = self.done_manager.get_done_signal(self._config, game_state) if done: + e.set() + print('e is set by {}'.format(self.name)) + print(PID_step) + break + if e.is_set(): + print('e is not set by {}'.format(self.name)) + print(PID_step) break next_obs = self.observation_manager.get_obs(self.ob, self._config) return next_obs, reward, done, info - def step(self, action, return_dict={}): + def step(self, action, e, return_dict={}): if self._config.pid_assist: - return_dict[self.name] = self.step_pid(action) + return_dict[self.name] = self.step_pid(action, e) else: return_dict[self.name] = self.step_vanilla(action) return return_dict @@ -376,9 +383,10 @@ def step(self, action): next_obs, reward, done, info = {}, {}, {}, {} manager = multiprocessing.Manager() return_dict = manager.dict() + e = multiprocessing.Event() jobs = [] for i, agent in enumerate(self.agents): - p = multiprocessing.Process(target=self.agents[agent].step, args=(action[i], return_dict)) + p = multiprocessing.Process(target=self.agents[agent].step, args=(action[i], e, return_dict)) jobs.append(p) p.start() From 8fb3fdd3fd7aff651c0cc2e67846410eab0679c0 Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sun, 15 Dec 2019 13:01:48 +0530 Subject: [PATCH 07/46] updated gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 894a44c..522ac3c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,7 @@ __pycache__/ *.py[cod] *$py.class - +*.vscode/ # C extensions *.so From 02431bac296540beb29dfc1a3962c83fe328616e Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sun, 15 Dec 2019 13:10:07 +0530 Subject: [PATCH 08/46] updated changes from main repo --- MADRaS/envs/gym_madras_v2.py | 20 ++++++++++--------- MADRaS/traffic/traffic.py | 8 ++++---- MADRaS/{envs => utils}/config_parser.py | 0 MADRaS/{envs => utils}/done_manager_v2.py | 0 MADRaS/{envs => utils}/observation_manager.py | 2 +- MADRaS/{envs => utils}/reward_manager.py | 0 6 files changed, 16 insertions(+), 14 deletions(-) rename MADRaS/{envs => utils}/config_parser.py (100%) rename MADRaS/{envs => utils}/done_manager_v2.py (100%) rename MADRaS/{envs => utils}/observation_manager.py (99%) rename MADRaS/{envs => utils}/reward_manager.py (100%) diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index 7b28b45..50c7fc4 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -15,9 +15,9 @@ import math from copy import deepcopy import numpy as np -import utils.snakeoil3_gym_v2 as snakeoil3 -from utils.gym_torcs_v2 import TorcsEnv -from controllers.pid import PIDController +import MADRaS.utils.snakeoil3_gym_v2 as snakeoil3 +from MADRaS.utils.gym_torcs_v2 import TorcsEnv +from MADRaS.controllers.pid import PIDController import gym from gym.utils import seeding import os @@ -26,14 +26,16 @@ import time from mpi4py import MPI import socket -import envs.config_parser as config_parser -import envs.reward_manager as rm -import envs.done_manager_v2 as dm -import envs.observation_manager as om -import traffic.traffic as traffic +import MADRaS.utils.config_parser as config_parser +import MADRaS.utils.reward_manager as rm +import MADRaS.utils.done_manager_v2 as dm +import MADRaS.utils.observation_manager as om +import MADRaS.traffic.traffic as traffic import multiprocessing -DEFAULT_SIM_OPTIONS_FILE = "envs/data/sim_options_v2.yml" +path_and_file = os.path.realpath(__file__) +path, file = os.path.split(path_and_file) +DEFAULT_SIM_OPTIONS_FILE = os.path.join(path, "data", "sim_options.yml") class MadrasAgent(TorcsEnv, gym.Env): diff --git a/MADRaS/traffic/traffic.py b/MADRaS/traffic/traffic.py index b620898..da5f49f 100644 --- a/MADRaS/traffic/traffic.py +++ b/MADRaS/traffic/traffic.py @@ -1,8 +1,8 @@ import numpy as np -from utils.gym_torcs import TorcsEnv -import utils.snakeoil3_gym as snakeoil3 -from controllers.pid import PIDController -import utils.madras_datatypes as md +from MADRaS.utils.gym_torcs import TorcsEnv +import MADRaS.utils.snakeoil3_gym as snakeoil3 +from MADRaS.controllers.pid import PIDController +import MADRaS.utils.madras_datatypes as md from multiprocessing import Process from time import time diff --git a/MADRaS/envs/config_parser.py b/MADRaS/utils/config_parser.py similarity index 100% rename from MADRaS/envs/config_parser.py rename to MADRaS/utils/config_parser.py diff --git a/MADRaS/envs/done_manager_v2.py b/MADRaS/utils/done_manager_v2.py similarity index 100% rename from MADRaS/envs/done_manager_v2.py rename to MADRaS/utils/done_manager_v2.py diff --git a/MADRaS/envs/observation_manager.py b/MADRaS/utils/observation_manager.py similarity index 99% rename from MADRaS/envs/observation_manager.py rename to MADRaS/utils/observation_manager.py index bd6a582..b7a8840 100644 --- a/MADRaS/envs/observation_manager.py +++ b/MADRaS/utils/observation_manager.py @@ -1,6 +1,6 @@ import numpy as np from gym import spaces -import utils.madras_datatypes as md +import MADRaS.utils.madras_datatypes as md MadrasDatatypes = md.MadrasDatatypes() diff --git a/MADRaS/envs/reward_manager.py b/MADRaS/utils/reward_manager.py similarity index 100% rename from MADRaS/envs/reward_manager.py rename to MADRaS/utils/reward_manager.py From b4655a58d0a807122bbbceef18def9eaccb87871 Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sun, 15 Dec 2019 14:20:18 +0530 Subject: [PATCH 09/46] added changes suggested by @manish-pra --- MADRaS/utils/gym_torcs_v2.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/MADRaS/utils/gym_torcs_v2.py b/MADRaS/utils/gym_torcs_v2.py index aeaf0c0..cad4ed5 100644 --- a/MADRaS/utils/gym_torcs_v2.py +++ b/MADRaS/utils/gym_torcs_v2.py @@ -13,8 +13,8 @@ from gym import spaces import numpy as np import copy -import utils.snakeoil3_gym_v2 as snakeoil3 -import utils.madras_datatypes as md +import MADRaS.utils.snakeoil3_gym_v2 as snakeoil3 +import MADRaS.utils.madras_datatypes as md madras = md.MadrasDatatypes() @@ -109,7 +109,6 @@ def step(self, step, client, u, early_stop=1): # Save the previous full-obs from torcs for the reward calculation obs_pre = copy.deepcopy(client.S.d) - # One-Step Dynamics Update ################################# # Apply the Agent's action into torcs client.respond_to_server() @@ -118,9 +117,10 @@ def step(self, step, client, u, early_stop=1): code = client.get_servers_input(step) if code==-1: + obs_pre_ret = self.make_observation(obs_pre) client.R.d['meta'] = True print('Terminating because server stopped responding') - return obs_pre, 0, client.R.d['meta'], {'termination_cause':'hardReset'} + return obs_pre_ret, 0, client.R.d['meta'], {'termination_cause':'hardReset'} # Get the current full-observation from torcs obs = client.S.d From 6fa2e002193cf46cc00ce40237132c5f6a55dafb Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sun, 15 Dec 2019 14:30:37 +0530 Subject: [PATCH 10/46] smooth V2 --- MADRaS/envs/__init__.py | 2 +- MADRaS/envs/data/sim_options_v2.yml | 301 ++++++++++++++-------------- MADRaS/envs/gym_madras_v2.py | 5 +- MADRaS/utils/snakeoil3_gym_v2.py | 8 +- 4 files changed, 158 insertions(+), 158 deletions(-) diff --git a/MADRaS/envs/__init__.py b/MADRaS/envs/__init__.py index 16c5aa1..b6eacf2 100644 --- a/MADRaS/envs/__init__.py +++ b/MADRaS/envs/__init__.py @@ -1,2 +1,2 @@ """Env Import.""" -from envs.gym_madras import MadrasEnv \ No newline at end of file +from MADRaS.envs.gym_madras_v2 import MadrasEnv \ No newline at end of file diff --git a/MADRaS/envs/data/sim_options_v2.yml b/MADRaS/envs/data/sim_options_v2.yml index d73e5cb..86b2f1d 100644 --- a/MADRaS/envs/data/sim_options_v2.yml +++ b/MADRaS/envs/data/sim_options_v2.yml @@ -68,157 +68,17 @@ agents: - Collision - TurnBackward - OutOfTrack - - Rank1 - # - MadrasAgent: - # vision: False # whether to use vision input - # throttle: True - # gear_change: False - # client_max_steps: -1 # to be interpreted as np.inf - # target_speed: 12.89 # metres per sec - # state_dim: 24 - # early_stop: True - # normalize_actions: True # all actions in [-1, 1] - # # PID params - # pid_assist: True - # pid_settings: - # accel_pid: - # - 10.5 # a_x - # - 0.05 # a_y - # - 2.8 # a_z - # steer_pid: - # - 5.1 - # - 0.001 - # - 0.000001 - # accel_scale: 1.0 - # steer_scale: 0.1 - # pid_latency: 10 - - # # Observation mode - # observations: - # mode: SingleAgentSimpleLapObs - # normalize: False # gym_torcs normalizes by default - # obs_min: - # angle: -3.142 - # track: 0.0 - # trackPos: -1.0 - # obs_max: - # angle: 3.142 - # track: 200.0 - # trackPos: 1.0 - - # # Reward function - # rewards: - # ProgressReward3: - # scale: 1.0 - # AvgSpeedReward: - # scale: 1.0 - # CollisionPenalty: - # scale: 10.0 - # TurnBackwardPenalty: - # scale: 10.0 - # AngAcclPenalty: - # scale: 1.0 - # max_ang_accl: 2.0 - - # # Done function - # dones: - # - RaceOver - # - TimeOut - # - Collision - # - TurnBackward - # - OutOfTrack - -# Traffic agents -traffic: - - ParkedAgent: - target_speed: 80 - parking_lane_pos: - low: -0.6 - high: 0.0 - parking_dist_from_start: - low: 100 - high: 120 - collision_time_window: 1.2 - pid_settings: - accel_pid: - - 10.5 # a_x - - 0.05 # a_y - - 2.8 # a_z - steer_pid: - - 5.1 - - 0.001 - - 0.000001 - accel_scale: 1.0 - steer_scale: 0.1 - - ParkedAgent: - target_speed: 80 - parking_lane_pos: - low: 0.0 - high: 0.4 - parking_dist_from_start: - low: 80 - high: 95 - collision_time_window: 1.2 - pid_settings: - accel_pid: - - 10.5 # a_x - - 0.05 # a_y - - 2.8 # a_z - steer_pid: - - 5.1 - - 0.001 - - 0.000001 - accel_scale: 1.0 - steer_scale: 0.1 - - ParkedAgent: - target_speed: 80 - parking_lane_pos: - low: -0.6 - high: 0.0 - parking_dist_from_start: - low: 50 - high: 75 - collision_time_window: 1.2 - pid_settings: - accel_pid: - - 10.5 # a_x - - 0.05 # a_y - - 2.8 # a_z - steer_pid: - - 5.1 - - 0.001 - - 0.000001 - accel_scale: 1.0 - steer_scale: 0.1 - - ParkedAgent: - target_speed: 80 - parking_lane_pos: - low: 0.0 - high: 0.4 - parking_dist_from_start: - low: 30 - high: 45 - collision_time_window: 1.2 - pid_settings: - accel_pid: - - 10.5 # a_x - - 0.05 # a_y - - 2.8 # a_z - steer_pid: - - 5.1 - - 0.001 - - 0.000001 - accel_scale: 1.0 - steer_scale: 0.1 - - ParkedAgent: - target_speed: 80 - parking_lane_pos: - low: -0.6 - high: 0.0 - parking_dist_from_start: - low: 15 - high: 25 - collision_time_window: 1.2 + - MadrasAgent: + vision: False # whether to use vision input + throttle: True + gear_change: False + client_max_steps: -1 # to be interpreted as np.inf + target_speed: 12.89 # metres per sec + state_dim: 24 + early_stop: True + normalize_actions: True # all actions in [-1, 1] + # PID params + pid_assist: True pid_settings: accel_pid: - 10.5 # a_x @@ -230,6 +90,145 @@ traffic: - 0.000001 accel_scale: 1.0 steer_scale: 0.1 + pid_latency: 10 + + # Observation mode + observations: + mode: SingleAgentSimpleLapObs + normalize: False # gym_torcs normalizes by default + obs_min: + angle: -3.142 + track: 0.0 + trackPos: -1.0 + obs_max: + angle: 3.142 + track: 200.0 + trackPos: 1.0 + + # Reward function + rewards: + ProgressReward3: + scale: 1.0 + AvgSpeedReward: + scale: 1.0 + CollisionPenalty: + scale: 10.0 + TurnBackwardPenalty: + scale: 10.0 + AngAcclPenalty: + scale: 1.0 + max_ang_accl: 2.0 + + # Done function + dones: + - RaceOver + - TimeOut + - Collision + - TurnBackward + - OutOfTrack + +# Traffic agents +# traffic: +# - ParkedAgent: +# target_speed: 80 +# parking_lane_pos: +# low: -0.6 +# high: 0.0 +# parking_dist_from_start: +# low: 100 +# high: 120 +# collision_time_window: 1.2 +# pid_settings: +# accel_pid: +# - 10.5 # a_x +# - 0.05 # a_y +# - 2.8 # a_z +# steer_pid: +# - 5.1 +# - 0.001 +# - 0.000001 +# accel_scale: 1.0 +# steer_scale: 0.1 +# - ParkedAgent: +# target_speed: 80 +# parking_lane_pos: +# low: 0.0 +# high: 0.4 +# parking_dist_from_start: +# low: 80 +# high: 95 +# collision_time_window: 1.2 +# pid_settings: +# accel_pid: +# - 10.5 # a_x +# - 0.05 # a_y +# - 2.8 # a_z +# steer_pid: +# - 5.1 +# - 0.001 +# - 0.000001 +# accel_scale: 1.0 +# steer_scale: 0.1 +# - ParkedAgent: +# target_speed: 80 +# parking_lane_pos: +# low: -0.6 +# high: 0.0 +# parking_dist_from_start: +# low: 50 +# high: 75 +# collision_time_window: 1.2 +# pid_settings: +# accel_pid: +# - 10.5 # a_x +# - 0.05 # a_y +# - 2.8 # a_z +# steer_pid: +# - 5.1 +# - 0.001 +# - 0.000001 +# accel_scale: 1.0 +# steer_scale: 0.1 +# - ParkedAgent: +# target_speed: 80 +# parking_lane_pos: +# low: 0.0 +# high: 0.4 +# parking_dist_from_start: +# low: 30 +# high: 45 +# collision_time_window: 1.2 +# pid_settings: +# accel_pid: +# - 10.5 # a_x +# - 0.05 # a_y +# - 2.8 # a_z +# steer_pid: +# - 5.1 +# - 0.001 +# - 0.000001 +# accel_scale: 1.0 +# steer_scale: 0.1 +# - ParkedAgent: +# target_speed: 80 +# parking_lane_pos: +# low: -0.6 +# high: 0.0 +# parking_dist_from_start: +# low: 15 +# high: 25 +# collision_time_window: 1.2 +# pid_settings: +# accel_pid: +# - 10.5 # a_x +# - 0.05 # a_y +# - 2.8 # a_z +# steer_pid: +# - 5.1 +# - 0.001 +# - 0.000001 +# accel_scale: 1.0 +# steer_scale: 0.1 # - ConstVelTrafficAgent: # target_speed: 30 diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index 50c7fc4..93110aa 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -31,11 +31,12 @@ import MADRaS.utils.done_manager_v2 as dm import MADRaS.utils.observation_manager as om import MADRaS.traffic.traffic as traffic +from collections import OrderedDict import multiprocessing path_and_file = os.path.realpath(__file__) path, file = os.path.split(path_and_file) -DEFAULT_SIM_OPTIONS_FILE = os.path.join(path, "data", "sim_options.yml") +DEFAULT_SIM_OPTIONS_FILE = os.path.join(path, "data", "sim_options_v2.yml") class MadrasAgent(TorcsEnv, gym.Env): @@ -260,7 +261,7 @@ def __init__(self, cfg_path=DEFAULT_SIM_OPTIONS_FILE): self.torcs_proc = None self.seed() self.start_torcs_process() - self.agents ={} + self.agents = OrderedDict() if self._config.traffic: self.traffic_manager = traffic.MadrasTrafficManager( diff --git a/MADRaS/utils/snakeoil3_gym_v2.py b/MADRaS/utils/snakeoil3_gym_v2.py index 3fd2597..6a611ae 100644 --- a/MADRaS/utils/snakeoil3_gym_v2.py +++ b/MADRaS/utils/snakeoil3_gym_v2.py @@ -280,10 +280,10 @@ def get_servers_input(self, step): print("{} Waiting for server data on {}..............".format(self.name, self.port)) print("[{}]: Server count down : {}".format(self.name, n_fail)) - if n_fail < 0: - self.shutdown() - return -1 - n_fail = n_fail_org + # if n_fail < 0: + # self.shutdown() + # return -1 + # n_fail = n_fail_org n_fail -= 1 if '***identified***' in sockdata: From d86ee0cdfc0a4090457ac41a3e0eb978151038e9 Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sun, 15 Dec 2019 14:36:40 +0530 Subject: [PATCH 11/46] reorganization mirroring main-repo --- MADRaS/{ => agents/generic}/test_environment.py | 0 MADRaS/{ => agents/generic}/test_environment_v2.py | 6 +++--- MADRaS/{ => agents/rllib}/eval_rllib_agent.py | 0 MADRaS/{ => agents/rllib}/rllib_helpers.py | 0 MADRaS/{ => agents/rllib}/train_rllib_agent.py | 0 5 files changed, 3 insertions(+), 3 deletions(-) rename MADRaS/{ => agents/generic}/test_environment.py (100%) rename MADRaS/{ => agents/generic}/test_environment_v2.py (89%) rename MADRaS/{ => agents/rllib}/eval_rllib_agent.py (100%) rename MADRaS/{ => agents/rllib}/rllib_helpers.py (100%) rename MADRaS/{ => agents/rllib}/train_rllib_agent.py (100%) diff --git a/MADRaS/test_environment.py b/MADRaS/agents/generic/test_environment.py similarity index 100% rename from MADRaS/test_environment.py rename to MADRaS/agents/generic/test_environment.py diff --git a/MADRaS/test_environment_v2.py b/MADRaS/agents/generic/test_environment_v2.py similarity index 89% rename from MADRaS/test_environment_v2.py rename to MADRaS/agents/generic/test_environment_v2.py index c12bff7..46207d3 100644 --- a/MADRaS/test_environment_v2.py +++ b/MADRaS/agents/generic/test_environment_v2.py @@ -1,6 +1,6 @@ import numpy as np import gym -from envs.gym_madras_v2 import MadrasEnv +from MADRaS.envs.gym_madras_v2 import MadrasEnv import os @@ -29,11 +29,11 @@ def test_madras_pid(): print("Testing step...") for t in range(2000): obs, r, done, _ = env.step([[0.3, 0.5], - # [0.0, 1.0] + [0.0, 1.0] ]) print("{}: reward={}, done={}".format(t, r, done)) dones = [x for x in done.values()] - if np.all(dones): + if ((np.all(dones)) or (t%100 == 0)): env.reset() os.system("pkill torcs") diff --git a/MADRaS/eval_rllib_agent.py b/MADRaS/agents/rllib/eval_rllib_agent.py similarity index 100% rename from MADRaS/eval_rllib_agent.py rename to MADRaS/agents/rllib/eval_rllib_agent.py diff --git a/MADRaS/rllib_helpers.py b/MADRaS/agents/rllib/rllib_helpers.py similarity index 100% rename from MADRaS/rllib_helpers.py rename to MADRaS/agents/rllib/rllib_helpers.py diff --git a/MADRaS/train_rllib_agent.py b/MADRaS/agents/rllib/train_rllib_agent.py similarity index 100% rename from MADRaS/train_rllib_agent.py rename to MADRaS/agents/rllib/train_rllib_agent.py From 5bef0d0e89b0d5b3eaac5952e65823969016d3c9 Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sun, 15 Dec 2019 14:37:08 +0530 Subject: [PATCH 12/46] Time for 0.2 --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 751a2d3..0c66f0a 100644 --- a/setup.py +++ b/setup.py @@ -2,10 +2,10 @@ from setuptools import setup setup(name='MADRaS', - version='0.1', + version='0.2', description='Multi Agent Driving Simulator', install_requires=['gym', 'pyyaml', 'pTable', 'matplotlib', - 'tensorflow']) + 'mpi4py']) From d2ca923534a459ff83537d2cd50782c88c73a12b Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sun, 15 Dec 2019 15:57:43 +0530 Subject: [PATCH 13/46] final fixes --- MADRaS/agents/generic/test_environment_v2.py | 4 ++-- MADRaS/envs/gym_madras_v2.py | 7 +++++++ MADRaS/traffic/traffic.py | 6 +++--- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/MADRaS/agents/generic/test_environment_v2.py b/MADRaS/agents/generic/test_environment_v2.py index 46207d3..040c077 100644 --- a/MADRaS/agents/generic/test_environment_v2.py +++ b/MADRaS/agents/generic/test_environment_v2.py @@ -15,7 +15,7 @@ def test_madras_vanilla(): obs, r, done, _ = env.step([[0.0, 1.0, -1.0]]) print("{}: reward={}, done={}".format(t, r, done)) dones = [x for x in done.values()] - if np.all(dones): + if np.any(dones): env.reset() os.system("pkill torcs") @@ -33,7 +33,7 @@ def test_madras_pid(): ]) print("{}: reward={}, done={}".format(t, r, done)) dones = [x for x in done.values()] - if ((np.all(dones)) or (t%100 == 0)): + if ((np.any(dones)) or (t%100 == 0)): env.reset() os.system("pkill torcs") diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index 93110aa..283b2bc 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -343,8 +343,10 @@ def reset(self): self.reset_torcs() else: self.initial_reset = False + if self._config.traffic: self.traffic_manager.reset() + s_t = {} # TODO(santara): fix parallel reset # jobs = [] @@ -364,8 +366,13 @@ def reset(self): # s_t[agent] = self.agents[agent].reset() # Create clients and connect their sockets + + for agent in self.agents: self.agents[agent].create_new_client() + + + # Collect first observations for agent in self.agents: s_t[agent] = self.agents[agent].get_observation_from_server() diff --git a/MADRaS/traffic/traffic.py b/MADRaS/traffic/traffic.py index da5f49f..ad3c2b0 100644 --- a/MADRaS/traffic/traffic.py +++ b/MADRaS/traffic/traffic.py @@ -1,9 +1,10 @@ import numpy as np -from MADRaS.utils.gym_torcs import TorcsEnv +from MADRaS.utils.gym_torcs_v2 import TorcsEnv import MADRaS.utils.snakeoil3_gym as snakeoil3 from MADRaS.controllers.pid import PIDController import MADRaS.utils.madras_datatypes as md from multiprocessing import Process +from collections import OrderedDict from time import time MadrasDatatypes = md.MadrasDatatypes() @@ -11,7 +12,7 @@ class MadrasTrafficManager(object): """Creates the traffic agents for a given training configuration.""" def __init__(self, torcs_server_port, num_learning_agents, cfg): - self.traffic_agents = {} + self.traffic_agents = OrderedDict() self.traffic_processes = [] self.num_episodes_of_training = 0 for i, agent in enumerate(cfg): @@ -59,7 +60,6 @@ def __init__(self, port, cfg, name='MadrasTraffic'): self.env = TorcsEnv(vision=(cfg["vision"] if "vision" in cfg else False), throttle=(cfg["throttle"] if "throttle" in cfg else True), gear_change=(cfg["gear_change"] if "gear_change" in cfg else False), - visualise=(self.cfg["visualise"] if "visualise" in self.cfg else False), name=self.name ) self.PID_controller = PIDController(cfg["pid_settings"]) From 28674d66d396a2aab4d35d3714fb63ff93975f39 Mon Sep 17 00:00:00 2001 From: santara Date: Sun, 15 Dec 2019 16:50:20 +0530 Subject: [PATCH 14/46] Resolved merge conflicts --- MADRaS/agents/generic/test_environment_v2.py | 2 +- MADRaS/utils/gym_torcs_v2.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/MADRaS/agents/generic/test_environment_v2.py b/MADRaS/agents/generic/test_environment_v2.py index 040c077..1fc4f65 100644 --- a/MADRaS/agents/generic/test_environment_v2.py +++ b/MADRaS/agents/generic/test_environment_v2.py @@ -33,7 +33,7 @@ def test_madras_pid(): ]) print("{}: reward={}, done={}".format(t, r, done)) dones = [x for x in done.values()] - if ((np.any(dones)) or (t%100 == 0)): + if ((np.any(dones)) or (t > 0 and t % 100 == 0)): env.reset() os.system("pkill torcs") diff --git a/MADRaS/utils/gym_torcs_v2.py b/MADRaS/utils/gym_torcs_v2.py index cad4ed5..b947a17 100644 --- a/MADRaS/utils/gym_torcs_v2.py +++ b/MADRaS/utils/gym_torcs_v2.py @@ -224,7 +224,8 @@ def make_observation(self, raw_obs): 'rpm', 'track', 'trackPos', - 'wheelSpinVel'] + 'wheelSpinVel', + 'distFromStart'] Observation = col.namedtuple('Observation', names) return Observation(focus=np.array(raw_obs['focus'], dtype=madras.floatX)/200., speedX=np.array(raw_obs['speedX'], dtype=madras.floatX)/self.default_speed, @@ -236,7 +237,8 @@ def make_observation(self, raw_obs): rpm=np.array(raw_obs['rpm'], dtype=madras.floatX)/10000, track=np.array(raw_obs['track'], dtype=madras.floatX)/200., trackPos=np.array(raw_obs['trackPos'], dtype=madras.floatX)/1., - wheelSpinVel=np.array(raw_obs['wheelSpinVel'], dtype=madras.floatX)) + wheelSpinVel=np.array(raw_obs['wheelSpinVel'], dtype=madras.floatX), + distFromStart=np.array(raw_obs['distFromStart'], dtype=madras.floatX)) else: names = ['focus', 'speedX', 'speedY', 'speedZ', 'angle', From 04f5550a6236762765347acabcd3adc404ec6866 Mon Sep 17 00:00:00 2001 From: santara Date: Sun, 15 Dec 2019 17:11:45 +0530 Subject: [PATCH 15/46] Cleaning up the code... --- MADRaS/envs/gym_madras_v2.py | 5 +---- MADRaS/test_environment_v2.py | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index db4720d..82ce172 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -231,12 +231,9 @@ def step_pid(self, desire, e): done = self.done_manager.get_done_signal(self._config, game_state) if done: e.set() - print('e is set by {}'.format(self.name)) - print(PID_step) break if e.is_set(): - print('e is not set by {}'.format(self.name)) - print(PID_step) + print("[{}]: Stopping agent because some other agent has hit done".format(self.name)) break next_obs = self.observation_manager.get_obs(self.ob, self._config) diff --git a/MADRaS/test_environment_v2.py b/MADRaS/test_environment_v2.py index 16c0be9..a112365 100644 --- a/MADRaS/test_environment_v2.py +++ b/MADRaS/test_environment_v2.py @@ -29,7 +29,7 @@ def test_madras_pid(): print("Testing step...") for t in range(2000): obs, r, done, _ = env.step([[0.3, 0.5], - [0.0, 1.0] + [-0.3, 1.0] ]) print("{}: reward={}, done={}".format(t, r, done)) dones = [x for x in done.values()] From 047b5d0e5b9acf2b929f8498cf7e95436b8d9bf8 Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Fri, 20 Dec 2019 21:29:06 +0530 Subject: [PATCH 16/46] created action_dict --- MADRaS/agents/generic/test_environment_v2.py | 7 ++++--- MADRaS/envs/gym_madras_v2.py | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/MADRaS/agents/generic/test_environment_v2.py b/MADRaS/agents/generic/test_environment_v2.py index 675503e..fc974c0 100644 --- a/MADRaS/agents/generic/test_environment_v2.py +++ b/MADRaS/agents/generic/test_environment_v2.py @@ -28,9 +28,10 @@ def test_madras_pid(): " Verify if the number of dimensions {} is right.".format(obs, len(obs))) print("Testing step...") for t in range(2000): - obs, r, done, _ = env.step([[0.3, 0.5], - [-0.3, 1.0] - ]) + obs, r, done, _ = env.step({"MadrasAgent_0": [0.3, 0.5], + "MadrasAgent_1": [-0.3, 1.0] + }) + #print("{}".format(obs)) print("{}: reward={}, done={}".format(t, r, done)) dones = [x for x in done.values()] if ((np.any(dones)) or (t > 0 and t % 100 == 0)): diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index 93a3f24..51eb6b6 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -376,8 +376,8 @@ def step(self, action): return_dict = manager.dict() e = multiprocessing.Event() jobs = [] - for i, agent in enumerate(self.agents): - p = multiprocessing.Process(target=self.agents[agent].step, args=(action[i], e, return_dict)) + for agent in self.agents: + p = multiprocessing.Process(target=self.agents[agent].step, args=(action[agent], e, return_dict)) jobs.append(p) p.start() From 253b292afb89d0e9235af9a5bdb44fa6c77e094e Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Fri, 20 Dec 2019 22:39:05 +0530 Subject: [PATCH 17/46] recomended sim changes --- MADRaS/envs/gym_madras_v2.py | 13 ++++++++++--- setup.py | 3 ++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index 51eb6b6..1319235 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -33,6 +33,7 @@ import MADRaS.traffic.traffic as traffic from collections import OrderedDict import multiprocessing +from ray.rllib.env.multi_agent_env import MultiAgentEnv path_and_file = os.path.realpath(__file__) path, file = os.path.split(path_and_file) @@ -256,7 +257,7 @@ def increment_step(self): self.step_num += 1 -class MadrasEnv(gym.Env): +class MadrasEnv(gym.Env, MultiAgentEnv): """Definition of the Gym Madras Environment.""" def __init__(self, cfg_path=DEFAULT_SIM_OPTIONS_FILE): # If `visualise` is set to False torcs simulator will run in headless mode @@ -371,7 +372,7 @@ def reset(self): return s_t def step(self, action): - next_obs, reward, done, info = {}, {}, {}, {} + next_obs, reward, done, info = {}, {}, {'__all__': False}, {} manager = multiprocessing.Manager() return_dict = manager.dict() e = multiprocessing.Event() @@ -383,11 +384,17 @@ def step(self, action): for proc in jobs: proc.join() + + done_check = False + for agent in self.agents: next_obs[agent] = return_dict[agent][0] reward[agent] = return_dict[agent][1] done[agent] = return_dict[agent][2] + if (done[agent] == True): + done_check = True info[agent] = return_dict[agent][3] self.agents[agent].increment_step() - + + done['__all__'] = done_check return next_obs, reward, done, info diff --git a/setup.py b/setup.py index 0c66f0a..c6c80c3 100644 --- a/setup.py +++ b/setup.py @@ -8,4 +8,5 @@ 'pyyaml', 'pTable', 'matplotlib', - 'mpi4py']) + 'mpi4py', + 'ray[rllib]']) From d0550bef6e3233b7ef9917d9924c9aaaf2da7f24 Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Fri, 20 Dec 2019 22:40:46 +0530 Subject: [PATCH 18/46] added multi-agent train script --- .../agents/rllib/train_rllib_multi_agent.py | 73 +++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 MADRaS/agents/rllib/train_rllib_multi_agent.py diff --git a/MADRaS/agents/rllib/train_rllib_multi_agent.py b/MADRaS/agents/rllib/train_rllib_multi_agent.py new file mode 100644 index 0000000..7cd48b1 --- /dev/null +++ b/MADRaS/agents/rllib/train_rllib_multi_agent.py @@ -0,0 +1,73 @@ +import ray +import gym +import argparse +from ray.rllib.agents.ppo.ppo import PPOTrainer +from ray.rllib.agents.ppo.ppo_policy import PPOTFPolicy +from ray.tune.logger import pretty_print +from ray.rllib.env.multi_agent_env import MultiAgentEnv + +parser = argparse.ArgumentParser() +parser.add_argument("--num-iters", type=int, default=20) + +if __name__ == "__main__": + args = parser.parse_args() + ray.init() + + # Simple environment with 4 independent cartpole entities + #register_env("multi_cartpole", lambda _: MultiCartpole(4)) + env = gym.make("Madras-v0") + obs_space_1 = env.agents["MadrasAgent_0"].observation_space + act_space_1 = env.agents["MadrasAgent_0"].action_space + obs_space_2 = env.agents["MadrasAgent_1"].observation_space + act_space_2 = env.agents["MadrasAgent_1"].action_space + # You can also have multiple policies per trainer, but here we just + # show one each for PPO and DQN. + policies = { + "ppo_policy_1": (PPOTFPolicy, obs_space_1, act_space_1, {}), + "ppo_policy_2": (PPOTFPolicy, obs_space_2, act_space_2, {}), + } + + def policy_mapping_fn(agent_id): + if agent_id == "MadrasAgent_0": + return "ppo_policy_1" + else: + return "ppo_policy_2" + + ppo_trainer = PPOTrainer( + env="Madras-v0", + config={ + "multiagent": { + "policies": policies, + "policy_mapping_fn": policy_mapping_fn, + #"policies_to_train": ["ppo_policy_1","ppo_policy_2"], + }, + # disable filters, otherwise we would need to synchronize those + # as well to the DQN agent + + }) + + + # disable DQN exploration when used by the PPO trainer + # ppo_trainer.workers.foreach_worker( + # lambda ev: ev.for_policy( + # lambda pi: pi.set_epsilon(0.0), policy_id="dqn_policy")) + + # You should see both the printed X and Y approach 200 as this trains: + # info: + # policy_reward_mean: + # dqn_policy: X + # ppo_policy: Y + for i in range(args.num_iters): + print("== Iteration", i, "==") + + # improve the DQN policy + #print("-- DQN --") + #print(pretty_print(dqn_trainer.train())) + + # improve the PPO policy + print("-- PPO --") + print(pretty_print(ppo_trainer.train())) + + # swap weights to synchronize + #dqn_trainer.set_weights(ppo_trainer.get_weights(["ppo_policy"])) + #ppo_trainer.set_weights(dqn_trainer.get_weights(["dqn_policy"])) From b97a446b1144db0bcb1d62ca35f02054d5d7378a Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sat, 21 Dec 2019 20:07:25 +0530 Subject: [PATCH 19/46] adderessed @Santara's comments --- MADRaS/agents/generic/test_environment_v2.py | 2 +- .../agents/rllib/train_rllib_multi_agent.py | 74 +++++++++---------- MADRaS/envs/gym_madras_v2.py | 12 ++- setup.py | 3 +- 4 files changed, 44 insertions(+), 47 deletions(-) diff --git a/MADRaS/agents/generic/test_environment_v2.py b/MADRaS/agents/generic/test_environment_v2.py index fc974c0..608f5ee 100644 --- a/MADRaS/agents/generic/test_environment_v2.py +++ b/MADRaS/agents/generic/test_environment_v2.py @@ -12,7 +12,7 @@ def test_madras_vanilla(): " Verify if the number of dimensions {} is right.".format(obs, len(obs))) print("Testing step...") for t in range(20000): - obs, r, done, _ = env.step([[0.0, 1.0, -1.0]]) + obs, r, done, _ = env.step({"MadrasAgent_0": [0.0, 1.0, -1.0]}) print("{}: reward={}, done={}".format(t, r, done)) dones = [x for x in done.values()] if np.any(dones): diff --git a/MADRaS/agents/rllib/train_rllib_multi_agent.py b/MADRaS/agents/rllib/train_rllib_multi_agent.py index 7cd48b1..70dfcb0 100644 --- a/MADRaS/agents/rllib/train_rllib_multi_agent.py +++ b/MADRaS/agents/rllib/train_rllib_multi_agent.py @@ -5,6 +5,23 @@ from ray.rllib.agents.ppo.ppo_policy import PPOTFPolicy from ray.tune.logger import pretty_print from ray.rllib.env.multi_agent_env import MultiAgentEnv +from MADRaS.envs.gym_madras_v2 import MadrasEnv + + +class MADRaSRLLib(MultiAgentEnv): + """ + MADRaS rllib wrapper. + """ + def __init__(self, *args): + self._env = MadrasEnv() + self.agents = self._env.agents + self.num_agents = self._env.num_agents + + def reset(self): + return self._env.reset() + + def step(self, action_dict): + return self._env.step(action_dict) parser = argparse.ArgumentParser() parser.add_argument("--num-iters", type=int, default=20) @@ -13,61 +30,36 @@ args = parser.parse_args() ray.init() - # Simple environment with 4 independent cartpole entities - #register_env("multi_cartpole", lambda _: MultiCartpole(4)) - env = gym.make("Madras-v0") - obs_space_1 = env.agents["MadrasAgent_0"].observation_space - act_space_1 = env.agents["MadrasAgent_0"].action_space - obs_space_2 = env.agents["MadrasAgent_1"].observation_space - act_space_2 = env.agents["MadrasAgent_1"].action_space + env = MADRaSRLLib() + + obs_spaces, action_spaces = [], [] + for agent in env.agents: + obs_spaces.append(env.agents[agent].observation_space) + action_spaces.append(env.agents[agent].action_space) + + policies = {"ppo_policy_{}".format(i) : (PPOTFPolicy, obs_spaces[i], action_spaces[i], {}) for i in range(env.num_agents)} # You can also have multiple policies per trainer, but here we just - # show one each for PPO and DQN. - policies = { - "ppo_policy_1": (PPOTFPolicy, obs_space_1, act_space_1, {}), - "ppo_policy_2": (PPOTFPolicy, obs_space_2, act_space_2, {}), - } + # show one each for PPO. def policy_mapping_fn(agent_id): - if agent_id == "MadrasAgent_0": - return "ppo_policy_1" - else: - return "ppo_policy_2" + id = agent_id.split("_")[-1] + return "ppo_policy_{}".format(id) ppo_trainer = PPOTrainer( - env="Madras-v0", + env=MADRaSRLLib, config={ + "sample_batch_size": 50, + "train_batch_size": 100, + "sgd_minibatch_size": 64, "multiagent": { "policies": policies, "policy_mapping_fn": policy_mapping_fn, - #"policies_to_train": ["ppo_policy_1","ppo_policy_2"], }, - # disable filters, otherwise we would need to synchronize those - # as well to the DQN agent - }) - - # disable DQN exploration when used by the PPO trainer - # ppo_trainer.workers.foreach_worker( - # lambda ev: ev.for_policy( - # lambda pi: pi.set_epsilon(0.0), policy_id="dqn_policy")) - - # You should see both the printed X and Y approach 200 as this trains: - # info: - # policy_reward_mean: - # dqn_policy: X - # ppo_policy: Y for i in range(args.num_iters): print("== Iteration", i, "==") - # improve the DQN policy - #print("-- DQN --") - #print(pretty_print(dqn_trainer.train())) - # improve the PPO policy print("-- PPO --") - print(pretty_print(ppo_trainer.train())) - - # swap weights to synchronize - #dqn_trainer.set_weights(ppo_trainer.get_weights(["ppo_policy"])) - #ppo_trainer.set_weights(dqn_trainer.get_weights(["dqn_policy"])) + print(pretty_print(ppo_trainer.train())) \ No newline at end of file diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index 1319235..4dffa11 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -33,7 +33,6 @@ import MADRaS.traffic.traffic as traffic from collections import OrderedDict import multiprocessing -from ray.rllib.env.multi_agent_env import MultiAgentEnv path_and_file = os.path.realpath(__file__) path, file = os.path.split(path_and_file) @@ -257,7 +256,7 @@ def increment_step(self): self.step_num += 1 -class MadrasEnv(gym.Env, MultiAgentEnv): +class MadrasEnv(gym.Env): """Definition of the Gym Madras Environment.""" def __init__(self, cfg_path=DEFAULT_SIM_OPTIONS_FILE): # If `visualise` is set to False torcs simulator will run in headless mode @@ -266,6 +265,7 @@ def __init__(self, cfg_path=DEFAULT_SIM_OPTIONS_FILE): self.torcs_proc = None self.seed() self.start_torcs_process() + self.num_agents = 0 self.agents = OrderedDict() if self._config.traffic: @@ -282,6 +282,7 @@ def __init__(self, cfg_path=DEFAULT_SIM_OPTIONS_FILE): {"track_len": self._config.track_len, "max_steps": self._config.max_steps }) + self.num_agents += 1 # self.action_dim = self.agents[0].action_dim # TODO(santara): Can I not have different action dims for different agents? self.initial_reset = True @@ -391,7 +392,12 @@ def step(self, action): next_obs[agent] = return_dict[agent][0] reward[agent] = return_dict[agent][1] done[agent] = return_dict[agent][2] - if (done[agent] == True): + if (done[agent] == True): + """ + Although rllib supports individual agent resets + but MADRaS Env as of now has to be reset even if + one of the agents hits done. + """ done_check = True info[agent] = return_dict[agent][3] self.agents[agent].increment_step() diff --git a/setup.py b/setup.py index c6c80c3..0c66f0a 100644 --- a/setup.py +++ b/setup.py @@ -8,5 +8,4 @@ 'pyyaml', 'pTable', 'matplotlib', - 'mpi4py', - 'ray[rllib]']) + 'mpi4py']) From 11383d5ed437fefaa63b236152072ea44cdd1dfc Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sat, 21 Dec 2019 20:47:41 +0530 Subject: [PATCH 20/46] used inheritance for env wrapper --- .../agents/rllib/train_rllib_multi_agent.py | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/MADRaS/agents/rllib/train_rllib_multi_agent.py b/MADRaS/agents/rllib/train_rllib_multi_agent.py index 70dfcb0..18b2181 100644 --- a/MADRaS/agents/rllib/train_rllib_multi_agent.py +++ b/MADRaS/agents/rllib/train_rllib_multi_agent.py @@ -8,20 +8,18 @@ from MADRaS.envs.gym_madras_v2 import MadrasEnv -class MADRaSRLLib(MultiAgentEnv): +class MadrasRllib(MultiAgentEnv, MadrasEnv): """ - MADRaS rllib wrapper. + MADRaS rllib Env wrapper. """ def __init__(self, *args): - self._env = MadrasEnv() - self.agents = self._env.agents - self.num_agents = self._env.num_agents + MadrasEnv.__init__(self) def reset(self): - return self._env.reset() + return MadrasEnv.reset(self) def step(self, action_dict): - return self._env.step(action_dict) + return MadrasEnv.step(self, action_dict) parser = argparse.ArgumentParser() parser.add_argument("--num-iters", type=int, default=20) @@ -30,7 +28,7 @@ def step(self, action_dict): args = parser.parse_args() ray.init() - env = MADRaSRLLib() + env = MadrasRllib() obs_spaces, action_spaces = [], [] for agent in env.agents: @@ -38,17 +36,15 @@ def step(self, action_dict): action_spaces.append(env.agents[agent].action_space) policies = {"ppo_policy_{}".format(i) : (PPOTFPolicy, obs_spaces[i], action_spaces[i], {}) for i in range(env.num_agents)} - # You can also have multiple policies per trainer, but here we just - # show one each for PPO. def policy_mapping_fn(agent_id): id = agent_id.split("_")[-1] return "ppo_policy_{}".format(id) ppo_trainer = PPOTrainer( - env=MADRaSRLLib, + env=MadrasRllib, config={ - "sample_batch_size": 50, + "sample_batch_size": 50, #set them accordingly "train_batch_size": 100, "sgd_minibatch_size": 64, "multiagent": { From f70985c2a7268028135a615e27f1704f704c9892 Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sat, 21 Dec 2019 20:49:53 +0530 Subject: [PATCH 21/46] added extra space --- MADRaS/agents/rllib/train_rllib_multi_agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MADRaS/agents/rllib/train_rllib_multi_agent.py b/MADRaS/agents/rllib/train_rllib_multi_agent.py index 18b2181..241d847 100644 --- a/MADRaS/agents/rllib/train_rllib_multi_agent.py +++ b/MADRaS/agents/rllib/train_rllib_multi_agent.py @@ -58,4 +58,4 @@ def policy_mapping_fn(agent_id): # improve the PPO policy print("-- PPO --") - print(pretty_print(ppo_trainer.train())) \ No newline at end of file + print(pretty_print(ppo_trainer.train())) From 71b6fe0ee2c2fc1c0214ddbb722b9d52ddaaff41 Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sat, 21 Dec 2019 21:41:43 +0530 Subject: [PATCH 22/46] done feaure req --- MADRaS/envs/gym_madras_v2.py | 4 ++-- MADRaS/utils/done_manager_v2.py | 17 +++++++++++++---- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index 4dffa11..1519663 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -187,7 +187,7 @@ def step_vanilla(self, action): "num_steps": self.step_num} reward = self.reward_manager.get_reward(self._config, game_state) - done = self.done_manager.get_done_signal(self._config, game_state) + done = self.done_manager.get_done_signal(self._config, game_state, self.name) next_obs = self.observation_manager.get_obs(self.ob, self._config) @@ -231,7 +231,7 @@ def step_pid(self, desire, e): reward += self.reward_manager.get_reward(self._config, game_state) if self._config.pid_assist: self.PID_controller.update(self.ob) - done = self.done_manager.get_done_signal(self._config, game_state) + done = self.done_manager.get_done_signal(self._config, game_state, self.name) if done: e.set() break diff --git a/MADRaS/utils/done_manager_v2.py b/MADRaS/utils/done_manager_v2.py index e7e4938..fc6d06d 100644 --- a/MADRaS/utils/done_manager_v2.py +++ b/MADRaS/utils/done_manager_v2.py @@ -18,11 +18,20 @@ def __init__(self, cfg): "as done.") self.dones['TorcsDone'] = TorcsDone() - def get_done_signal(self, game_config, game_state): + def get_done_signal(self, game_config, game_state, agent_id): done_signals = [] - for done_function in self.dones.values(): - done_signals.append(done_function.check_done(game_config, game_state)) - return np.any(done_signals) + keys = [] + + for key, done_function in self.dones.items(): + done_val = done_function.check_done(game_config, game_state) + done_signals.append(done_val) + if done_val: + keys.append(key) + + signal = np.any(done_signals) + if signal: + print("[{}]: {}".format(agent_id, keys)) + return signal def reset(self): for done_function in self.dones.values(): From e9b76e22823efbbf412afcb36b055b265b9d5397 Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sat, 21 Dec 2019 23:45:45 +0530 Subject: [PATCH 23/46] reformatted dones --- MADRaS/envs/gym_madras_v2.py | 6 ++-- MADRaS/utils/done_manager_v2.py | 51 +++++++++++++++++++++++---------- 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index 1519663..3ebb4a1 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -67,7 +67,7 @@ def __init__(self, name, torcs_server_port, cfg, sim_info={}): self.obs_dim = self.observation_manager.get_state_dim() # No. of sensors input self.state_dim = self.observation_manager.get_state_dim() self.reward_manager = rm.RewardManager(self._config.rewards) - self.done_manager = dm.DoneManager(self._config.dones) + self.done_manager = dm.DoneManager(self._config.dones, self.name) self.initial_reset = True self.step_num = 0 @@ -187,7 +187,7 @@ def step_vanilla(self, action): "num_steps": self.step_num} reward = self.reward_manager.get_reward(self._config, game_state) - done = self.done_manager.get_done_signal(self._config, game_state, self.name) + done = self.done_manager.get_done_signal(self._config, game_state) next_obs = self.observation_manager.get_obs(self.ob, self._config) @@ -231,7 +231,7 @@ def step_pid(self, desire, e): reward += self.reward_manager.get_reward(self._config, game_state) if self._config.pid_assist: self.PID_controller.update(self.ob) - done = self.done_manager.get_done_signal(self._config, game_state, self.name) + done = self.done_manager.get_done_signal(self._config, game_state) if done: e.set() break diff --git a/MADRaS/utils/done_manager_v2.py b/MADRaS/utils/done_manager_v2.py index fc6d06d..7cf3ece 100644 --- a/MADRaS/utils/done_manager_v2.py +++ b/MADRaS/utils/done_manager_v2.py @@ -5,7 +5,8 @@ class DoneManager(object): """Composes the done function from a given done configuration.""" - def __init__(self, cfg): + def __init__(self, cfg, agent_id): + self.agent_id = agent_id self.dones = {} for key in cfg: try: @@ -18,19 +19,19 @@ def __init__(self, cfg): "as done.") self.dones['TorcsDone'] = TorcsDone() - def get_done_signal(self, game_config, game_state, agent_id): + def get_done_signal(self, game_config, game_state): done_signals = [] - keys = [] for key, done_function in self.dones.items(): done_val = done_function.check_done(game_config, game_state) done_signals.append(done_val) if done_val: - keys.append(key) + if hasattr(done_function, "num_steps"): + done_function.reason = done_function.reason.format(done_function.num_steps) + out = "[{}] {}".format(self.agent_id, done_function.reason) + print(out) signal = np.any(done_signals) - if signal: - print("[{}]: {}".format(agent_id, keys)) return signal def reset(self): @@ -45,7 +46,8 @@ class MadrasDone(object): - [required] check_done(game_config, game_state) - [optional] reset() """ - def __init__(self): + def __init__(self, reason): + self.reason = reason pass def check_done(self, game_config, game_state): @@ -58,6 +60,10 @@ def reset(self): class TorcsDone(MadrasDone): """Vanilla done function provided by TORCS.""" + + def __init__(self): + MadrasDone.__init__(self, "Done: Torcs Done") + def check_done(self, game_config, game_state): del game_config if not math.isnan(game_state["torcs_done"]): @@ -68,15 +74,21 @@ def check_done(self, game_config, game_state): class RaceOver(MadrasDone): """Terminates episode when the agent has finishes one lap.""" + def __init__(self): + MadrasDone.__init__(self, "Done: Race over!") + def check_done(self, game_config, game_state): if game_state["distance_traversed"] >= game_config.track_len: - print("Done: Race over!") return True else: return False class TimeOut(MadrasDone): + + def __init__(self): + MadrasDone.__init__(self, "Done: Episode terminated due to timeout.") + def check_done(self, game_config, game_state): self.num_steps = game_state["num_steps"] if not game_config.max_steps: @@ -84,20 +96,20 @@ def check_done(self, game_config, game_state): else: max_steps = game_config.max_steps if self.num_steps >= max_steps: - print("Done: Episode terminated due to timeout.") return True else: return False class Collision(MadrasDone): + def __init__(self): self.damage = 0.0 + MadrasDone.__init__(self, "Done: Episode terminated because agent collided.") def check_done(self, game_config, game_state): del game_config if self.damage < game_state["damage"]: - print("Done: Episode terminated because agent collided.") self.damage = 0.0 return True else: @@ -108,33 +120,42 @@ def reset(self): class TurnBackward(MadrasDone): + + def __init__(self): + MadrasDone.__init__(self, "Done: Episode terminated because agent turned backward.") + def check_done(self, game_config, game_state): del game_config if np.cos(game_state["angle"]) < 0: - print("Done: Episode terminated because agent turned backward.") return True else: return False class OutOfTrack(MadrasDone): + + def __init__(self): + MadrasDone.__init__(self, "Done: Episode terminated because agent went out of track" + " after {} steps.") + def check_done(self, game_config, game_state): self.num_steps = game_state["num_steps"] if (game_state["trackPos"] < -1 or game_state["trackPos"] > 1 or np.any(np.asarray(game_state["track"]) < 0)): - print("Done: Episode terminated because agent went out of track" - " after {} steps.".format(self.num_steps)) return True else: return False class Rank1(MadrasDone): + + def __init__(self): + MadrasDone.__init__(self, "Done: Episode terminated because agent is Rank 1" + " after {} steps.") + def check_done(self, game_config, game_state): self.num_steps = game_state["num_steps"] if game_state["racePos"] == 1: - print("Done: Episode terminated because agent is Rank 1" - " after {} steps.".format(self.num_steps)) return True else: return False \ No newline at end of file From 40f8a93f6ca8d30577610b34b4e096545dcfb5ee Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sun, 22 Dec 2019 20:36:02 +0530 Subject: [PATCH 24/46] added comm capabilites (untested) --- MADRaS/envs/data/sim_options_v2.yml | 4 +++ MADRaS/envs/gym_madras_v2.py | 54 +++++++++++++++++++++++++++-- MADRaS/utils/action_buffer.py | 40 +++++++++++++++++++++ 3 files changed, 96 insertions(+), 2 deletions(-) create mode 100644 MADRaS/utils/action_buffer.py diff --git a/MADRaS/envs/data/sim_options_v2.yml b/MADRaS/envs/data/sim_options_v2.yml index 19e8510..9cce8c1 100644 --- a/MADRaS/envs/data/sim_options_v2.yml +++ b/MADRaS/envs/data/sim_options_v2.yml @@ -35,6 +35,8 @@ agents: # Observation mode observations: mode: SingleAgentInTrafficObs + multi_flag: False + buff_size: 2 normalize: False # gym_torcs normalizes by default obs_min: angle: -3.142 @@ -95,6 +97,8 @@ agents: # Observation mode observations: mode: SingleAgentSimpleLapObs + multi_flag: False + buff_size: 2 normalize: False # gym_torcs normalizes by default obs_min: angle: -3.142 diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index 3ebb4a1..4835c25 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -20,6 +20,7 @@ from MADRaS.controllers.pid import PIDController import gym from gym.utils import seeding +from gym import spaces import os import subprocess import signal @@ -30,10 +31,15 @@ import MADRaS.utils.reward_manager as rm import MADRaS.utils.done_manager_v2 as dm import MADRaS.utils.observation_manager as om +import MADRaS.utils.action_buffer as ab +import MADRaS.utils.madras_datatypes as md import MADRaS.traffic.traffic as traffic from collections import OrderedDict import multiprocessing + +madras = md.MadrasDatatypes() + path_and_file = os.path.realpath(__file__) path, file = os.path.split(path_and_file) DEFAULT_SIM_OPTIONS_FILE = os.path.join(path, "data", "sim_options_v2.yml") @@ -70,6 +76,7 @@ def __init__(self, name, torcs_server_port, cfg, sim_info={}): self.done_manager = dm.DoneManager(self._config.dones, self.name) self.initial_reset = True self.step_num = 0 + self.multi_flag = self.observation['multi_flag'] def create_new_client(self): while True: @@ -166,6 +173,7 @@ def step_vanilla(self, action): action[1] = (action[1] + 1) / 2.0 # acceleration back to [0, 1] action[2] = (action[2] + 1) / 2.0 # brake back to [0, 1] r = 0.0 + self.action_buffer.insert(action) try: self.ob, r, done, info = TorcsEnv.step(self, 0, self.client, action, @@ -209,6 +217,7 @@ def step_pid(self, desire, e): reward = 0.0 for PID_step in range(self._config.pid_settings['pid_latency']): + self.action_buffer.insert(desire) a_t = self.PID_controller.get_action(desire) try: self.ob, r, done, info = TorcsEnv.step(self, PID_step, @@ -255,6 +264,21 @@ def increment_step(self): else: self.step_num += 1 + def actions_init(self, info={}): + self.action_buffer = ab.ActionBuffer(self.name, self.observation['size'], self.action_dim) + additional_dims = 0 + + for key, dims in info["agnet"].items(): + if (self.key != self.name): + additional_dims += dims*self.observation['size'] + + self.obs_dim += additional_dims + high = np.hstack(self.observation_space.high, np.ones((additional_dims), dtype=madras.floatX)) + low = np.hstack(self.observation_space.low, -np.ones((additional_dims), dtype=madras.floatX)) + self.observation_space = spaces.Box(high=high, low=low) + + + class MadrasEnv(gym.Env): """Definition of the Gym Madras Environment.""" @@ -267,7 +291,9 @@ def __init__(self, cfg_path=DEFAULT_SIM_OPTIONS_FILE): self.start_torcs_process() self.num_agents = 0 self.agents = OrderedDict() - + self.comm_info = {} + self.comm_agent_names = [] + self.act_dims = [] if self._config.traffic: self.traffic_manager = traffic.MadrasTrafficManager( self._config.torcs_server_port, len(self.agents), self._config.traffic) @@ -275,6 +301,7 @@ def __init__(self, cfg_path=DEFAULT_SIM_OPTIONS_FILE): if self._config.agents: for i, agent in enumerate(self._config.agents): agent_name = [x for x in agent.keys()][0] + agent_cfg = agent[agent_name] name = '{}_{}'.format(agent_name, i) torcs_server_port = self._config.torcs_server_port + i + num_traffic_agents @@ -282,9 +309,24 @@ def __init__(self, cfg_path=DEFAULT_SIM_OPTIONS_FILE): {"track_len": self._config.track_len, "max_steps": self._config.max_steps }) + if self.agents[name].multi_flag: + self.comm_agent_names.append(agent_name) + self.act_dims.append(self.agents[name].actio_dim) + self.comm_info[agent_name] = self.agents[name].actio_dim + self.comm_info["agnet_map"] = {} self.num_agents += 1 + for agent in self.comm_agent_names: + self.agents[agent].actions_init(self.comm_info) + self.comm_info["agent_map"][agent] = [] + for comm_agent in self.comm_agent_names: + if comm_agent != agent: + self.comm_info["agent_map"][agent].append(comm_agent) + + # self.action_dim = self.agents[0].action_dim # TODO(santara): Can I not have different action dims for different agents? + + self.initial_reset = True print("Madras agents are: ", self.agents) @@ -353,7 +395,7 @@ def reset(self): if self._config.traffic: self.traffic_manager.reset() - s_t = {} + s_t = OrderedDict() # Create clients and connect their sockets @@ -370,6 +412,10 @@ def reset(self): # Finish reset for agent in self.agents: self.agents[agent].complete_reset() + + for agent, agent_map in self.comm_info["agent_map"].items(): + for comm_agents in agent_map: + s_t[agent] = np.hstack(s_t[agent], self.agent[comm_agents].action_buffer.request()) return s_t def step(self, action): @@ -403,4 +449,8 @@ def step(self, action): self.agents[agent].increment_step() done['__all__'] = done_check + + for agent, agent_map in self.comm_info["agent_map"].items(): + for comm_agents in agent_map: + next_obs[agent] = np.hstack(next_obs[agent], self.agent[comm_agents].action_buffer.request()) return next_obs, reward, done, info diff --git a/MADRaS/utils/action_buffer.py b/MADRaS/utils/action_buffer.py new file mode 100644 index 0000000..70bd7d7 --- /dev/null +++ b/MADRaS/utils/action_buffer.py @@ -0,0 +1,40 @@ +from queue import Queue +from copy import copy +import numpy as np +import MADRaS.utils.madras_datatypes as md + +madras = md.MadrasDatatypes() + +class ActionBuffer: + """ + To hold the last k actions of taken + by an agent. + """ + + def __init__(self, agent_id, size, action_dim): + self._agent_id = agent_id + self._size = size + self._action_dim = action_dim + self._buffer = Queue(maxsize=self._size) + self._curr_size = 0 + + def insert(self, action): + if (self._curr_size < self._size): + self._buffer.put(action) + self._curr_size += 1 + else: + self._buffer.get() + self._buffer.put(action) + + def request(self): + temp_queue = copy(self._buffer) + ret = np.zeros((self._size*self._action_dim,), dtype=madras.floatX) + for i in range(self._curr_size): + ret[i: i+self._action_dim] = temp_queue.get() + del temp_queue + return ret + + def reset(self): + del self._buffer + self._buffer = Queue(maxsize=self._size) + self._curr_size = 0 From f6c730a1dde00a029102f9a00c57986f376e0e49 Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sun, 22 Dec 2019 20:52:49 +0530 Subject: [PATCH 25/46] removed typos --- MADRaS/envs/gym_madras_v2.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index 4835c25..f25afe9 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -76,7 +76,7 @@ def __init__(self, name, torcs_server_port, cfg, sim_info={}): self.done_manager = dm.DoneManager(self._config.dones, self.name) self.initial_reset = True self.step_num = 0 - self.multi_flag = self.observation['multi_flag'] + self.multi_flag = self._config.observations['multi_flag'] def create_new_client(self): while True: @@ -265,16 +265,16 @@ def increment_step(self): self.step_num += 1 def actions_init(self, info={}): - self.action_buffer = ab.ActionBuffer(self.name, self.observation['size'], self.action_dim) + self.action_buffer = ab.ActionBuffer(self.name, self._config.observations['size'], self.action_dim) additional_dims = 0 for key, dims in info["agnet"].items(): if (self.key != self.name): - additional_dims += dims*self.observation['size'] + additional_dims += dims*self._config.observations['size'] self.obs_dim += additional_dims - high = np.hstack(self.observation_space.high, np.ones((additional_dims), dtype=madras.floatX)) - low = np.hstack(self.observation_space.low, -np.ones((additional_dims), dtype=madras.floatX)) + high = np.hstack((self.observation_space.high, np.ones((additional_dims), dtype=madras.floatX))) + low = np.hstack((self.observation_space.low, -np.ones((additional_dims), dtype=madras.floatX))) self.observation_space = spaces.Box(high=high, low=low) @@ -313,9 +313,9 @@ def __init__(self, cfg_path=DEFAULT_SIM_OPTIONS_FILE): self.comm_agent_names.append(agent_name) self.act_dims.append(self.agents[name].actio_dim) self.comm_info[agent_name] = self.agents[name].actio_dim - self.comm_info["agnet_map"] = {} + self.num_agents += 1 - + self.comm_info["agent_map"] = {} for agent in self.comm_agent_names: self.agents[agent].actions_init(self.comm_info) self.comm_info["agent_map"][agent] = [] @@ -415,7 +415,7 @@ def reset(self): for agent, agent_map in self.comm_info["agent_map"].items(): for comm_agents in agent_map: - s_t[agent] = np.hstack(s_t[agent], self.agent[comm_agents].action_buffer.request()) + s_t[agent] = np.hstack((s_t[agent], self.agent[comm_agents].action_buffer.request())) return s_t def step(self, action): @@ -452,5 +452,5 @@ def step(self, action): for agent, agent_map in self.comm_info["agent_map"].items(): for comm_agents in agent_map: - next_obs[agent] = np.hstack(next_obs[agent], self.agent[comm_agents].action_buffer.request()) + next_obs[agent] = np.hstack((next_obs[agent], self.agent[comm_agents].action_buffer.request())) return next_obs, reward, done, info From 53b52e4b6977c6ba3a2a33858e153edd21375260 Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sun, 22 Dec 2019 21:09:10 +0530 Subject: [PATCH 26/46] syntax correctness --- MADRaS/envs/data/sim_options_v2.yml | 4 ++-- MADRaS/envs/gym_madras_v2.py | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/MADRaS/envs/data/sim_options_v2.yml b/MADRaS/envs/data/sim_options_v2.yml index 9cce8c1..05ebab3 100644 --- a/MADRaS/envs/data/sim_options_v2.yml +++ b/MADRaS/envs/data/sim_options_v2.yml @@ -35,7 +35,7 @@ agents: # Observation mode observations: mode: SingleAgentInTrafficObs - multi_flag: False + multi_flag: True buff_size: 2 normalize: False # gym_torcs normalizes by default obs_min: @@ -97,7 +97,7 @@ agents: # Observation mode observations: mode: SingleAgentSimpleLapObs - multi_flag: False + multi_flag: True buff_size: 2 normalize: False # gym_torcs normalizes by default obs_min: diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index f25afe9..83c00ca 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -265,12 +265,12 @@ def increment_step(self): self.step_num += 1 def actions_init(self, info={}): - self.action_buffer = ab.ActionBuffer(self.name, self._config.observations['size'], self.action_dim) + self.action_buffer = ab.ActionBuffer(self.name, self._config.observations['buff_size'], self.action_dim) additional_dims = 0 - for key, dims in info["agnet"].items(): + for key, dims in info["agent"].items(): if (self.key != self.name): - additional_dims += dims*self._config.observations['size'] + additional_dims += dims*self._config.observations['buff_size'] self.obs_dim += additional_dims high = np.hstack((self.observation_space.high, np.ones((additional_dims), dtype=madras.floatX))) @@ -291,7 +291,7 @@ def __init__(self, cfg_path=DEFAULT_SIM_OPTIONS_FILE): self.start_torcs_process() self.num_agents = 0 self.agents = OrderedDict() - self.comm_info = {} + self.comm_info = {"agent":{}, "agent_map":{}} self.comm_agent_names = [] self.act_dims = [] if self._config.traffic: @@ -310,12 +310,12 @@ def __init__(self, cfg_path=DEFAULT_SIM_OPTIONS_FILE): "max_steps": self._config.max_steps }) if self.agents[name].multi_flag: - self.comm_agent_names.append(agent_name) - self.act_dims.append(self.agents[name].actio_dim) - self.comm_info[agent_name] = self.agents[name].actio_dim + self.comm_agent_names.append(name) + self.act_dims.append(self.agents[name].action_dim) + self.comm_info[agent_name] = self.agents[name].action_dim self.num_agents += 1 - self.comm_info["agent_map"] = {} + print("COMM AGENTS",self.comm_agent_names) for agent in self.comm_agent_names: self.agents[agent].actions_init(self.comm_info) self.comm_info["agent_map"][agent] = [] @@ -415,7 +415,7 @@ def reset(self): for agent, agent_map in self.comm_info["agent_map"].items(): for comm_agents in agent_map: - s_t[agent] = np.hstack((s_t[agent], self.agent[comm_agents].action_buffer.request())) + s_t[agent] = np.hstack((s_t[agent], self.agents[comm_agents].action_buffer.request())) return s_t def step(self, action): @@ -452,5 +452,5 @@ def step(self, action): for agent, agent_map in self.comm_info["agent_map"].items(): for comm_agents in agent_map: - next_obs[agent] = np.hstack((next_obs[agent], self.agent[comm_agents].action_buffer.request())) + next_obs[agent] = np.hstack((next_obs[agent], self.agents[comm_agents].action_buffer.request())) return next_obs, reward, done, info From 2a58d1148d32b70fa44620261dcf2f51c1abd2bd Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sun, 22 Dec 2019 21:32:55 +0530 Subject: [PATCH 27/46] dimentional correctness --- MADRaS/agents/generic/test_environment_v2.py | 8 +++++++- MADRaS/envs/gym_madras_v2.py | 12 ++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/MADRaS/agents/generic/test_environment_v2.py b/MADRaS/agents/generic/test_environment_v2.py index 608f5ee..27d5f20 100644 --- a/MADRaS/agents/generic/test_environment_v2.py +++ b/MADRaS/agents/generic/test_environment_v2.py @@ -22,10 +22,15 @@ def test_madras_vanilla(): def test_madras_pid(): env = MadrasEnv() + for key, val in env.agents.items(): + print("Observation Space ", val.observation_space) + print("Obs_dim ", val.obs_dim) print("Testing reset...") obs = env.reset() print("Initial observation: {}." - " Verify if the number of dimensions {} is right.".format(obs, len(obs))) + " Verify if the number of dimensions is right.".format(obs)) + for key, value in obs.items(): + print("{}: {}".format(key, len(value))) print("Testing step...") for t in range(2000): obs, r, done, _ = env.step({"MadrasAgent_0": [0.3, 0.5], @@ -36,6 +41,7 @@ def test_madras_pid(): dones = [x for x in done.values()] if ((np.any(dones)) or (t > 0 and t % 100 == 0)): env.reset() + break os.system("pkill torcs") diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index 83c00ca..c1fafda 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -173,7 +173,8 @@ def step_vanilla(self, action): action[1] = (action[1] + 1) / 2.0 # acceleration back to [0, 1] action[2] = (action[2] + 1) / 2.0 # brake back to [0, 1] r = 0.0 - self.action_buffer.insert(action) + if self.multi_flag: + self.action_buffer.insert(action) try: self.ob, r, done, info = TorcsEnv.step(self, 0, self.client, action, @@ -217,7 +218,8 @@ def step_pid(self, desire, e): reward = 0.0 for PID_step in range(self._config.pid_settings['pid_latency']): - self.action_buffer.insert(desire) + if self.multi_flag: + self.action_buffer.insert(desire) a_t = self.PID_controller.get_action(desire) try: self.ob, r, done, info = TorcsEnv.step(self, PID_step, @@ -269,7 +271,7 @@ def actions_init(self, info={}): additional_dims = 0 for key, dims in info["agent"].items(): - if (self.key != self.name): + if (key != self.name): additional_dims += dims*self._config.observations['buff_size'] self.obs_dim += additional_dims @@ -293,7 +295,6 @@ def __init__(self, cfg_path=DEFAULT_SIM_OPTIONS_FILE): self.agents = OrderedDict() self.comm_info = {"agent":{}, "agent_map":{}} self.comm_agent_names = [] - self.act_dims = [] if self._config.traffic: self.traffic_manager = traffic.MadrasTrafficManager( self._config.torcs_server_port, len(self.agents), self._config.traffic) @@ -311,8 +312,7 @@ def __init__(self, cfg_path=DEFAULT_SIM_OPTIONS_FILE): }) if self.agents[name].multi_flag: self.comm_agent_names.append(name) - self.act_dims.append(self.agents[name].action_dim) - self.comm_info[agent_name] = self.agents[name].action_dim + self.comm_info["agent"][agent_name] = self.agents[name].action_dim self.num_agents += 1 print("COMM AGENTS",self.comm_agent_names) From cd019daa96ea0d0d7c90f188205db52ad9d50177 Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sun, 22 Dec 2019 23:01:30 +0530 Subject: [PATCH 28/46] removed buffer multiprocessing errors --- MADRaS/agents/generic/test_environment_v2.py | 6 ++--- MADRaS/envs/gym_madras_v2.py | 23 ++++++++++++-------- MADRaS/utils/action_buffer.py | 15 ++++++++----- 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/MADRaS/agents/generic/test_environment_v2.py b/MADRaS/agents/generic/test_environment_v2.py index 27d5f20..c64bf07 100644 --- a/MADRaS/agents/generic/test_environment_v2.py +++ b/MADRaS/agents/generic/test_environment_v2.py @@ -36,12 +36,10 @@ def test_madras_pid(): obs, r, done, _ = env.step({"MadrasAgent_0": [0.3, 0.5], "MadrasAgent_1": [-0.3, 1.0] }) - #print("{}".format(obs)) + print("{}".format(obs)) print("{}: reward={}, done={}".format(t, r, done)) - dones = [x for x in done.values()] - if ((np.any(dones)) or (t > 0 and t % 100 == 0)): + if (done['__all__'] or (t > 0 and t % 100 == 0)): env.reset() - break os.system("pkill torcs") diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index c1fafda..27bc6f6 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -77,6 +77,7 @@ def __init__(self, name, torcs_server_port, cfg, sim_info={}): self.initial_reset = True self.step_num = 0 self.multi_flag = self._config.observations['multi_flag'] + self.buff_size = self._config.observations['buff_size'] def create_new_client(self): while True: @@ -173,8 +174,6 @@ def step_vanilla(self, action): action[1] = (action[1] + 1) / 2.0 # acceleration back to [0, 1] action[2] = (action[2] + 1) / 2.0 # brake back to [0, 1] r = 0.0 - if self.multi_flag: - self.action_buffer.insert(action) try: self.ob, r, done, info = TorcsEnv.step(self, 0, self.client, action, @@ -218,8 +217,7 @@ def step_pid(self, desire, e): reward = 0.0 for PID_step in range(self._config.pid_settings['pid_latency']): - if self.multi_flag: - self.action_buffer.insert(desire) + a_t = self.PID_controller.get_action(desire) try: self.ob, r, done, info = TorcsEnv.step(self, PID_step, @@ -267,17 +265,18 @@ def increment_step(self): self.step_num += 1 def actions_init(self, info={}): - self.action_buffer = ab.ActionBuffer(self.name, self._config.observations['buff_size'], self.action_dim) + self.action_buffer = ab.ActionBuffer(self.name, self.buff_size, self.action_dim) additional_dims = 0 - for key, dims in info["agent"].items(): + for key, dims_tup in info["agent"].items(): if (key != self.name): - additional_dims += dims*self._config.observations['buff_size'] + additional_dims += dims_tup[0]*dims_tup[1] self.obs_dim += additional_dims high = np.hstack((self.observation_space.high, np.ones((additional_dims), dtype=madras.floatX))) low = np.hstack((self.observation_space.low, -np.ones((additional_dims), dtype=madras.floatX))) self.observation_space = spaces.Box(high=high, low=low) + print("{}: {}".format(self.name, self._config.observations['buff_size'])) @@ -312,7 +311,7 @@ def __init__(self, cfg_path=DEFAULT_SIM_OPTIONS_FILE): }) if self.agents[name].multi_flag: self.comm_agent_names.append(name) - self.comm_info["agent"][agent_name] = self.agents[name].action_dim + self.comm_info["agent"][name] = (self.agents[name].action_dim, self.agents[name].buff_size) self.num_agents += 1 print("COMM AGENTS",self.comm_agent_names) @@ -413,6 +412,9 @@ def reset(self): for agent in self.agents: self.agents[agent].complete_reset() + for comm_agents in self.comm_agent_names: #reset buffers + self.agents[comm_agents].action_buffer.reset() + for agent, agent_map in self.comm_info["agent_map"].items(): for comm_agents in agent_map: s_t[agent] = np.hstack((s_t[agent], self.agents[comm_agents].action_buffer.request())) @@ -424,7 +426,7 @@ def step(self, action): return_dict = manager.dict() e = multiprocessing.Event() jobs = [] - for agent in self.agents: + for agent in self.agents.keys(): p = multiprocessing.Process(target=self.agents[agent].step, args=(action[agent], e, return_dict)) jobs.append(p) p.start() @@ -432,6 +434,9 @@ def step(self, action): for proc in jobs: proc.join() + for agent in self.comm_agent_names: + self.agents[agent].action_buffer.insert(action[agent]) + done_check = False for agent in self.agents: diff --git a/MADRaS/utils/action_buffer.py b/MADRaS/utils/action_buffer.py index 70bd7d7..8d41990 100644 --- a/MADRaS/utils/action_buffer.py +++ b/MADRaS/utils/action_buffer.py @@ -1,5 +1,5 @@ from queue import Queue -from copy import copy +from copy import deepcopy import numpy as np import MADRaS.utils.madras_datatypes as md @@ -19,19 +19,22 @@ def __init__(self, agent_id, size, action_dim): self._curr_size = 0 def insert(self, action): + if (self._curr_size < self._size): - self._buffer.put(action) self._curr_size += 1 + self._buffer.put(action) else: - self._buffer.get() + _ = self._buffer.get() self._buffer.put(action) def request(self): - temp_queue = copy(self._buffer) + temp_queue = Queue(maxsize=self._size) ret = np.zeros((self._size*self._action_dim,), dtype=madras.floatX) for i in range(self._curr_size): - ret[i: i+self._action_dim] = temp_queue.get() - del temp_queue + a = self._buffer.get() + temp_queue.put(a) + ret[i*self._action_dim: (i+1)*self._action_dim] = a + self._buffer = temp_queue return ret def reset(self): From 7b9ad3a51a82cd67e401d47bbf77a15b9adc7525 Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sat, 29 Feb 2020 13:31:43 +0530 Subject: [PATCH 29/46] updated changes for multi work --- MADRaS/agents/generic/test_environment_v2.py | 36 ++++-- .../agents/rllib/train_rllib_multi_agent.py | 59 +++++++-- MADRaS/envs/data/sim_options_v2.yml | 119 ++++++++++++++---- MADRaS/envs/done_manager.py | 10 +- MADRaS/envs/gym_madras.py | 34 ++--- MADRaS/envs/gym_madras_v2.py | 110 ++++++++++++---- MADRaS/traffic/traffic.py | 15 ++- MADRaS/utils/done_manager_v2.py | 2 +- MADRaS/utils/observation_manager.py | 2 +- MADRaS/utils/reward_manager.py | 39 ++++-- MADRaS/utils/snakeoil3_gym_v2.py | 2 + 11 files changed, 324 insertions(+), 104 deletions(-) diff --git a/MADRaS/agents/generic/test_environment_v2.py b/MADRaS/agents/generic/test_environment_v2.py index c64bf07..bbba663 100644 --- a/MADRaS/agents/generic/test_environment_v2.py +++ b/MADRaS/agents/generic/test_environment_v2.py @@ -2,6 +2,15 @@ import gym from MADRaS.envs.gym_madras_v2 import MadrasEnv import os +import sys +import logging + +logging.basicConfig(filename='Telemetry.log', level=logging.DEBUG) +# logger = logging.getLogger(__name__) +# logger.setLevel(logging.DEBUG) +# fh = logging.FileHandler('Telemetry.log') +# fh.setLevel(logging.DEBUG) +# logger.addHandler(fh) def test_madras_vanilla(): @@ -11,8 +20,11 @@ def test_madras_vanilla(): print("Initial observation: {}." " Verify if the number of dimensions {} is right.".format(obs, len(obs))) print("Testing step...") - for t in range(20000): - obs, r, done, _ = env.step({"MadrasAgent_0": [0.0, 1.0, -1.0]}) + a = [0.0, 1.0, -1.0] + for t in range(4000): + obs, r, done, _ = env.step({"MadrasAgent_0": a}) + if ((t+1)%150 == 0): + a = [0.0, -1.0, 1.0] print("{}: reward={}, done={}".format(t, r, done)) dones = [x for x in done.values()] if np.any(dones): @@ -27,22 +39,28 @@ def test_madras_pid(): print("Obs_dim ", val.obs_dim) print("Testing reset...") obs = env.reset() + a = [0.0, 0.2] + b = [0.1, 0.00] print("Initial observation: {}." " Verify if the number of dimensions is right.".format(obs)) for key, value in obs.items(): print("{}: {}".format(key, len(value))) print("Testing step...") - for t in range(2000): - obs, r, done, _ = env.step({"MadrasAgent_0": [0.3, 0.5], - "MadrasAgent_1": [-0.3, 1.0] + running_rew = 0 + for t in range(4000): + obs, r, done, _ = env.step({"MadrasAgent_0": a, "MadrasAgent_1": b }) - print("{}".format(obs)) - print("{}: reward={}, done={}".format(t, r, done)) - if (done['__all__'] or (t > 0 and t % 100 == 0)): + #print("{}".format(obs)) + #if ((t+1)%15 == 0): + # a = [0.0, 0.0] + running_rew += r["MadrasAgent_0"] + #print("{}: reward={}, done={}".format(t, running_rew, done)) + #logger.info("HELLO") + if (done['__all__']): env.reset() os.system("pkill torcs") if __name__=='__main__': - # test_madras_vanilla() + #test_madras_vanilla() test_madras_pid() \ No newline at end of file diff --git a/MADRaS/agents/rllib/train_rllib_multi_agent.py b/MADRaS/agents/rllib/train_rllib_multi_agent.py index 241d847..4942d73 100644 --- a/MADRaS/agents/rllib/train_rllib_multi_agent.py +++ b/MADRaS/agents/rllib/train_rllib_multi_agent.py @@ -6,7 +6,9 @@ from ray.tune.logger import pretty_print from ray.rllib.env.multi_agent_env import MultiAgentEnv from MADRaS.envs.gym_madras_v2 import MadrasEnv - +import logging +import numpy as np +logging.basicConfig(filename='Telemetry.log', level=logging.DEBUG) class MadrasRllib(MultiAgentEnv, MadrasEnv): """ @@ -21,8 +23,29 @@ def reset(self): def step(self, action_dict): return MadrasEnv.step(self, action_dict) +def on_episode_end(info): + episode = info["episode"] + rewards = episode.agent_rewards + total_episode = episode.total_reward + + + episode.custom_metrics["agent0/rew_2"] = rewards[('MadrasAgent_0', 'ppo_policy_0')]**2.0 + episode.custom_metrics["agent1/rew_2"] = rewards[('MadrasAgent_1', 'ppo_policy_1')]**2.0 + episode.custom_metrics["env_rew_2"] = total_episode**2.0 + +def on_sample_end(info): + print(info.keys()) + sample = info["samples"] + print(dir(sample)) + splits = sample.policy_batches['ppo_policy_0'].split_by_episode() + print(len(splits)) + for split in splits: + print("EPISODE= ",np.sum(split['rewards'])) + + + parser = argparse.ArgumentParser() -parser.add_argument("--num-iters", type=int, default=20) +parser.add_argument("--num-iters", type=int, default=300) if __name__ == "__main__": args = parser.parse_args() @@ -34,7 +57,9 @@ def step(self, action_dict): for agent in env.agents: obs_spaces.append(env.agents[agent].observation_space) action_spaces.append(env.agents[agent].action_space) - + + print(obs_spaces) + print(action_spaces) policies = {"ppo_policy_{}".format(i) : (PPOTFPolicy, obs_spaces[i], action_spaces[i], {}) for i in range(env.num_agents)} def policy_mapping_fn(agent_id): @@ -44,18 +69,38 @@ def policy_mapping_fn(agent_id): ppo_trainer = PPOTrainer( env=MadrasRllib, config={ - "sample_batch_size": 50, #set them accordingly - "train_batch_size": 100, - "sgd_minibatch_size": 64, + "eager": False, + "num_workers": 1, + "num_gpus": 0, + "vf_clip_param": 20, + # "sample_batch_size": 20, #set them accordingly + "train_batch_size": 500, + "callbacks": { + "on_episode_end": on_episode_end, + #"on_sample_end": on_sample_end, + }, + #"lr": 5e-6, + # "sgd_minibatch_size": 24, "multiagent": { "policies": policies, "policy_mapping_fn": policy_mapping_fn, }, }) + ppo_trainer.restore("/home/sohan/ray_results/check/checkpoint_240/checkpoint-240") + for i in range(args.num_iters): print("== Iteration", i, "==") # improve the PPO policy - print("-- PPO --") + if (i % 10 == 0): + checkpoint = ppo_trainer.save() + print("checkpoint saved at", checkpoint) + + logging.warning("-- PPO --") print(pretty_print(ppo_trainer.train())) + + # if (i%20 == 0): + # weights_0 = ppo_trainer.get_weights(["ppo_policy_0"]) + # weights_1 = ppo_trainer.get_weights(["ppo_policy_1"]) + # ppo_trainer.set_weights({"ppo_policy_0": weights_1, "ppo_policy_1": weights_0}) diff --git a/MADRaS/envs/data/sim_options_v2.yml b/MADRaS/envs/data/sim_options_v2.yml index 05ebab3..17314f2 100644 --- a/MADRaS/envs/data/sim_options_v2.yml +++ b/MADRaS/envs/data/sim_options_v2.yml @@ -1,8 +1,8 @@ torcs_server_port: 60934 -visualise: False # whether to render TORCS window +visualise: True # whether to render TORCS window no_of_visualisations: 1 # To visualize multiple training instances (under MPI) in parallel set it to more than 1 -track_len: 2843.10 # in metres. CG Track 3. All track lengths can be found here: http://www.berniw.org/trb/tracks/tracklist.php -max_steps: 20000 # max episode length +track_len: 2587.54 # in metres. CG Track 3. All track lengths can be found here: http://www.berniw.org/trb/tracks/tracklist.php +max_steps: 1000 # max episode length track_width: 10.0 vision: False # whether to use vision input @@ -13,7 +13,7 @@ agents: throttle: True gear_change: False client_max_steps: -1 # to be interpreted as np.inf - target_speed: 13.89 # metres per sec + target_speed: 27.78 # metres per sec state_dim: 60 # 24 early_stop: True normalize_actions: True # all actions in [-1, 1] @@ -36,7 +36,7 @@ agents: observations: mode: SingleAgentInTrafficObs multi_flag: True - buff_size: 2 + buff_size: 1 normalize: False # gym_torcs normalizes by default obs_min: angle: -3.142 @@ -49,20 +49,20 @@ agents: # Reward function rewards: - ProgressReward2: + ProgressReward3: scale: 1.0 AvgSpeedReward: scale: 1.0 + SuccessfulOvertakeReward: + scale: 10.0 CollisionPenalty: scale: 10.0 TurnBackwardPenalty: scale: 10.0 AngAcclPenalty: - scale: 1.0 + scale: 5.0 max_ang_accl: 2.0 - SuccessfulOvertakeReward: - scale: 10.0 - + # Done function dones: - RaceOver @@ -70,13 +70,14 @@ agents: - Collision - TurnBackward - OutOfTrack + - MadrasAgent: vision: False # whether to use vision input throttle: True gear_change: False client_max_steps: -1 # to be interpreted as np.inf - target_speed: 12.89 # metres per sec - state_dim: 24 + target_speed: 27.78 # metres per sec + state_dim: 60 # 24 early_stop: True normalize_actions: True # all actions in [-1, 1] # PID params @@ -96,9 +97,9 @@ agents: # Observation mode observations: - mode: SingleAgentSimpleLapObs + mode: SingleAgentInTrafficObs multi_flag: True - buff_size: 2 + buff_size: 1 normalize: False # gym_torcs normalizes by default obs_min: angle: -3.142 @@ -115,14 +116,16 @@ agents: scale: 1.0 AvgSpeedReward: scale: 1.0 + SuccessfulOvertakeReward: + scale: 10.0 CollisionPenalty: scale: 10.0 TurnBackwardPenalty: scale: 10.0 AngAcclPenalty: - scale: 1.0 + scale: 5.0 max_ang_accl: 2.0 - + # Done function dones: - RaceOver @@ -131,16 +134,79 @@ agents: - TurnBackward - OutOfTrack + # - MadrasAgent: + # vision: False # whether to use vision input + # throttle: True + # gear_change: False + # client_max_steps: -1 # to be interpreted as np.inf + # target_speed: 12.89 # metres per sec + # state_dim: 60 + # early_stop: True + # normalize_actions: True # all actions in [-1, 1] + # # PID params + # pid_assist: False + # pid_settings: + # accel_pid: + # - 10.5 # a_x + # - 0.05 # a_y + # - 2.8 # a_z + # steer_pid: + # - 5.1 + # - 0.001 + # - 0.000001 + # accel_scale: 1.0 + # steer_scale: 0.1 + # pid_latency: 10 + + # # Observation mode + # observations: + # mode: SingleAgentSimpleLapObs + # multi_flag: False + # buff_size: 1 + # normalize: False # gym_torcs normalizes by default + # obs_min: + # angle: -3.142 + # track: 0.0 + # trackPos: -1.0 + # obs_max: + # angle: 3.142 + # track: 200.0 + # trackPos: 1.0 + + # # Reward function + # rewards: + # ProgressReward2: + # scale: 1.0 + # AvgSpeedReward: + # scale: 1.0 + # CollisionPenalty: + # scale: 10.0 + # TurnBackwardPenalty: + # scale: 10.0 + # AngAcclPenalty: + # scale: 1.0 + # max_ang_accl: 2.0 + # # TaskReward: + # # scale: 1.0 + + # # Done function + # dones: + # - RaceOver + # - TimeOut + # - TurnBackward + # - OutOfTrack + # Traffic agents traffic: - ParkedAgent: - target_speed: 80 + track_len: 2587.54 + target_speed: 50 parking_lane_pos: - low: -0.6 - high: 0.0 + low: -0.54 + high: -0.47 parking_dist_from_start: - low: 100 - high: 120 + low: 35 + high: 35 collision_time_window: 1.2 pid_settings: accel_pid: @@ -154,13 +220,14 @@ traffic: accel_scale: 1.0 steer_scale: 0.1 - ParkedAgent: - target_speed: 80 + track_len: 2587.54 + target_speed: 50 parking_lane_pos: - low: 0.0 - high: 0.4 + low: 0.47 + high: 0.54 parking_dist_from_start: - low: 80 - high: 95 + low: 35 + high: 35 collision_time_window: 1.2 pid_settings: accel_pid: diff --git a/MADRaS/envs/done_manager.py b/MADRaS/envs/done_manager.py index 45dbf59..9ef60f5 100644 --- a/MADRaS/envs/done_manager.py +++ b/MADRaS/envs/done_manager.py @@ -3,7 +3,7 @@ import warnings -class DoneManager(object): +class DoneHandler(object): """Composes the done function from a given done configuration.""" def __init__(self, cfg): self.dones = {} @@ -79,7 +79,7 @@ def check_done(self, game_config, game_state): else: max_steps = game_config.max_steps if self.num_steps >= max_steps: - print("Done: Episode terminated due to timeout.") + print("Done: Episode terminated due to timeout.{}".format(self.num_steps)) self.num_steps = 0 return True else: @@ -91,19 +91,19 @@ def reset(self): class Collision(MadrasDone): def __init__(self): - self.damage = 0.0 + self.damage = 1000.0 def check_done(self, game_config, game_state): del game_config if self.damage < game_state["damage"]: print("Done: Episode terminated because agent collided.") - self.damage = 0.0 + self.damage = 1000.0 return True else: return False def reset(self): - self.damage = 0.0 + self.damage = 1000.0 class TurnBackward(MadrasDone): diff --git a/MADRaS/envs/gym_madras.py b/MADRaS/envs/gym_madras.py index 0ed74c8..d5d9ec3 100644 --- a/MADRaS/envs/gym_madras.py +++ b/MADRaS/envs/gym_madras.py @@ -102,10 +102,10 @@ def __init__(self, cfg_path=None): self.action_dim = 2 # LanePos, Velocity else: self.action_dim = 3 # Steer, Accel, Brake - self.observation_manager = om.ObservationManager(self._config.observations, + self.observation_handler = om.ObservationHandler(self._config.observations, self._config.vision) - self.reward_manager = rm.RewardManager(self._config.rewards) - self.done_manager = dm.DoneManager(self._config.dones) + self.reward_handler = rm.RewardHandler(self._config.rewards) + self.done_handler = dm.DoneHandler(self._config.dones) TorcsEnv.__init__(self, vision=self._config.vision, @@ -117,9 +117,9 @@ def __init__(self, cfg_path=None): if self._config.normalize_actions: self.action_space = gym.spaces.Box(low=-np.ones(3), high=np.ones(3)) - self.observation_space = self.observation_manager.get_observation_space() + self.observation_space = self.observation_handler.get_observation_space() - self.state_dim = self.observation_manager.get_state_dim() # No. of sensors input + self.state_dim = self.observation_handler.get_state_dim() # No. of sensors input self.env_name = 'Madras_Env' self.client_type = 0 # Snakeoil client type self.initial_reset = True @@ -129,7 +129,7 @@ def __init__(self, cfg_path=None): self.seed() self.start_torcs_process() if self._config.traffic: - self.traffic_manager = traffic.MadrasTrafficManager( + self.traffic_handler = traffic.MadrasTrafficHandler( self._config.torcs_server_port, 1, self._config.traffic) def seed(self, seed=None): @@ -179,7 +179,7 @@ def start_torcs_process(self): def reset(self): """Reset Method to be called at the end of each episode.""" if self._config.traffic: - self.traffic_manager.reset() + self.traffic_handler.reset() if self.initial_reset: self.wait_for_observation() @@ -198,11 +198,11 @@ def reset(self): print("Reset: Reset failed as agent started off track. Retrying...") self.distance_traversed = 0 - s_t = self.observation_manager.get_obs(self.ob, self._config) + s_t = self.observation_handler.get_obs(self.ob, self._config) if self._config.pid_assist: self.PID_controller.reset() - self.reward_manager.reset() - self.done_manager.reset() + self.reward_handler.reset() + self.done_handler.reset() print("Reset: Starting new episode") if np.any(np.asarray(self.ob.track) < 0): print("Reset produced bad track values.") @@ -250,14 +250,14 @@ def step_vanilla(self, action): "damage": self.client.S.d["damage"], "trackPos": self.client.S.d["trackPos"], "track": self.client.S.d["track"]} - reward = self.reward_manager.get_reward(self._config, game_state) + reward = self.reward_handler.get_reward(self._config, game_state) - done = self.done_manager.get_done_signal(self._config, game_state) + done = self.done_handler.get_done_signal(self._config, game_state) - next_obs = self.observation_manager.get_obs(self.ob, self._config) + next_obs = self.observation_handler.get_obs(self.ob, self._config) if done: if self._config.traffic: - self.traffic_manager.kill_all_traffic_agents() + self.traffic_handler.kill_all_traffic_agents() self.client.R.d["meta"] = True # Terminate the episode print('Terminating PID {}'.format(self.client.serverPID)) @@ -295,16 +295,16 @@ def step_pid(self, desire): "damage": self.client.S.d["damage"], "trackPos": self.client.S.d["trackPos"], "track": self.client.S.d["track"]} - reward += self.reward_manager.get_reward(self._config, game_state) + reward += self.reward_handler.get_reward(self._config, game_state) if self._config.pid_assist: self.PID_controller.update(self.ob) - done = self.done_manager.get_done_signal(self._config, game_state) + done = self.done_handler.get_done_signal(self._config, game_state) if done: self.client.R.d["meta"] = True # Terminate the episode print('Terminating PID {}'.format(self.client.serverPID)) break - next_obs = self.observation_manager.get_obs(self.ob, self._config) + next_obs = self.observation_handler.get_obs(self.ob, self._config) return next_obs, reward, done, info diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index 27bc6f6..e417508 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -36,10 +36,10 @@ import MADRaS.traffic.traffic as traffic from collections import OrderedDict import multiprocessing - +import logging madras = md.MadrasDatatypes() - +#logger = logging.getLogger(__name__) path_and_file = os.path.realpath(__file__) path, file = os.path.split(path_and_file) DEFAULT_SIM_OPTIONS_FILE = os.path.join(path, "data", "sim_options_v2.yml") @@ -64,18 +64,21 @@ def __init__(self, name, torcs_server_port, cfg, sim_info={}): self.action_dim = 2 # LanePos, Velocity else: self.action_dim = 3 # Steer, Accel, Brake - self.observation_manager = om.ObservationManager(self._config.observations, + self.observation_handler = om.ObservationHandler(self._config.observations, self._config.vision) if self._config.normalize_actions: - self.action_space = gym.spaces.Box(low=-np.ones(3), high=np.ones(3)) + self.action_space = gym.spaces.Box(low=-np.ones(self.action_dim), high=np.ones(self.action_dim)) - self.observation_space = self.observation_manager.get_observation_space() - self.obs_dim = self.observation_manager.get_state_dim() # No. of sensors input - self.state_dim = self.observation_manager.get_state_dim() - self.reward_manager = rm.RewardManager(self._config.rewards) - self.done_manager = dm.DoneManager(self._config.dones, self.name) + self.observation_space = self.observation_handler.get_observation_space() + self.obs_dim = self.observation_handler.get_state_dim() # No. of sensors input + self.state_dim = self.observation_handler.get_state_dim() + self.reward_handler = rm.RewardHandler(self._config.rewards) + self.done_handler = dm.DoneHandler(self._config.dones, self.name) self.initial_reset = True self.step_num = 0 + self.prev_dist = 0 + self.prev_damage = 0 + self.prev_pos = np.inf self.multi_flag = self._config.observations['multi_flag'] self.buff_size = self._config.observations['buff_size'] @@ -101,17 +104,17 @@ def get_observation_from_server(self): if np.any(np.asarray(self.ob.track) < 0): print("Reset produced bad track values.") self.distance_traversed = 0 - s_t = self.observation_manager.get_obs(self.ob, self._config) + s_t = self.observation_handler.get_obs(self.ob, self._config) return s_t def complete_reset(self): if self._config.pid_assist: self.PID_controller.reset() - self.reward_manager.reset() - self.done_manager.reset() + self.reward_handler.reset() + self.done_handler.reset() self.step_num = 0 - print("Reset: Starting new episode") + logging.info("Reset: Starting new episode") def reset_new(self, return_dict={}): @@ -140,12 +143,12 @@ def reset(self, return_dict={}): print("Reset produced bad track values.") self.distance_traversed = 0 print("Initial observation: {}".format(self.ob)) - s_t = self.observation_manager.get_obs(self.ob, self._config) + s_t = self.observation_handler.get_obs(self.ob, self._config) if self._config.pid_assist: self.PID_controller.reset() - self.reward_manager.reset() - self.done_manager.reset() - print("Reset: Starting new episode") + self.reward_handler.reset() + self.done_handler.reset() + #print("[{}] Reset: Starting new episode DAMAGE{}".format(self.name, self.client.S.d["damage"])) return_dict[self.name] = s_t return return_dict @@ -165,6 +168,7 @@ def wait_for_observation(self): raw_ob = self.client.S.d # Get the current full-observation from torcs self.ob = self.make_observation(raw_ob) + #self.client.respond_to_server() except: pass @@ -186,6 +190,9 @@ def step_vanilla(self, action): game_state = {"torcs_reward": r, "torcs_done": done, + "prev_distance_traversed": self.prev_dist, + "prev_racePos": self.prev_pos, + "prev_damage": self.prev_damage, "distance_traversed": self.client.S.d['distRaced'], "angle": self.client.S.d["angle"], "damage": self.client.S.d["damage"], @@ -193,12 +200,21 @@ def step_vanilla(self, action): "track": self.client.S.d["track"], "racePos": self.client.S.d["racePos"], "num_steps": self.step_num} - reward = self.reward_manager.get_reward(self._config, game_state) + logging.info("[{}] Lane_pos:{} Distance:{} Speed{}".format(self.name, + self.client.S.d["trackPos"], + self.client.S.d['distRaced'], + self.ob.speedX*self.default_speed)) + #print("DISTANCE_TRAVERSED:{} SPEED:{}".format(game_state["distance_traversed"], (self.ob.speedX*self.default_speed))) + #print("PREV_DIST: {}".format(game_state["prev_distance_traversed"])) + reward = self.reward_handler.get_reward(self._config, game_state) - done = self.done_manager.get_done_signal(self._config, game_state) + done = self.done_handler.get_done_signal(self._config, game_state) - next_obs = self.observation_manager.get_obs(self.ob, self._config) + next_obs = self.observation_handler.get_obs(self.ob, self._config) + info['prev_dist'] = game_state["distance_traversed"] + info['prev_damage'] = game_state["damage"] + info['prev_pos'] = game_state["racePos"] return next_obs, reward, done, info @@ -229,6 +245,9 @@ def step_pid(self, desire, e): game_state = {"torcs_reward": r, "torcs_done": done, + "prev_racePos": self.prev_pos, + "prev_damage": self.prev_damage, + "prev_distance_traversed": self.prev_dist, "distance_traversed": self.client.S.d["distRaced"], "angle": self.client.S.d["angle"], "damage": self.client.S.d["damage"], @@ -236,11 +255,24 @@ def step_pid(self, desire, e): "track": self.client.S.d["track"], "racePos": self.client.S.d["racePos"], "num_steps": self.step_num} - - reward += self.reward_manager.get_reward(self._config, game_state) + # if (self.name == "MadrasAgent_0"): + # print("[{0}]PID Lane_pos:{1:0.4f} Distance:{2:0.2f} Speed{3:0.2f} Damage{4:0.2f}".format(self.name, + # self.client.S.d["trackPos"], + # self.client.S.d['distRaced'], + # self.ob.speedX*self.default_speed, + # self.client.S.d["damage"])) + + reward += self.reward_handler.get_reward(self._config, game_state) + self.prev_dist = game_state["distance_traversed"] + self.prev_damage = game_state["prev_damage"] + self.prev_pos = game_state["racePos"] + #print("DISTANCE_TRAVERSED:{} SPEED:{}".format(game_state["distance_traversed"], (self.ob.speedX*self.default_speed))) if self._config.pid_assist: self.PID_controller.update(self.ob) - done = self.done_manager.get_done_signal(self._config, game_state) + done = self.done_handler.get_done_signal(self._config, game_state) + info['prev_dist'] = self.prev_dist + info['prev_damage'] = game_state["damage"] + info['prev_pos'] = game_state["racePos"] if done: e.set() break @@ -248,7 +280,8 @@ def step_pid(self, desire, e): print("[{}]: Stopping agent because some other agent has hit done".format(self.name)) break - next_obs = self.observation_manager.get_obs(self.ob, self._config) + + next_obs = self.observation_handler.get_obs(self.ob, self._config) return next_obs, reward, done, info def step(self, action, e, return_dict={}): @@ -258,11 +291,18 @@ def step(self, action, e, return_dict={}): return_dict[self.name] = self.step_vanilla(action) return return_dict + def increment_(self, dist, damage, pos): + self.prev_dist = dist + self.prev_damage = damage + self.prev_pos = pos + #print("DIST_UPDATED: ", self.prev_dist) + def increment_step(self): if self._config.pid_assist: self.step_num += self._config.pid_settings['pid_latency'] else: self.step_num += 1 + #self.increment_dist() def actions_init(self, info={}): self.action_buffer = ab.ActionBuffer(self.name, self.buff_size, self.action_dim) @@ -295,7 +335,7 @@ def __init__(self, cfg_path=DEFAULT_SIM_OPTIONS_FILE): self.comm_info = {"agent":{}, "agent_map":{}} self.comm_agent_names = [] if self._config.traffic: - self.traffic_manager = traffic.MadrasTrafficManager( + self.traffic_manager = traffic.MadrasTrafficHandler( self._config.torcs_server_port, len(self.agents), self._config.traffic) num_traffic_agents = len(self._config.traffic) if self._config.traffic else 0 if self._config.agents: @@ -324,7 +364,8 @@ def __init__(self, cfg_path=DEFAULT_SIM_OPTIONS_FILE): # self.action_dim = self.agents[0].action_dim # TODO(santara): Can I not have different action dims for different agents? - + self.action_space = self.agents["MadrasAgent_0"].action_space + self.observation_space = self.agents["MadrasAgent_0"].observation_space self.initial_reset = True print("Madras agents are: ", self.agents) @@ -406,8 +447,11 @@ def reset(self): # Collect first observations for agent in self.agents: + self.agents[agent].prev_dist = 0.0 s_t[agent] = self.agents[agent].get_observation_from_server() self.agents[agent].client.respond_to_server() # To elimate 10s of timeout error, responding to the server after obs + self.agents[agent].increment_(self.agents[agent].client.S.d["distRaced"], self.agents[agent].client.S.d["damage"], self.agents[agent].client.S.d["racePos"]) + print("[{}] Reset: Starting new episode DAMAGE{}".format(agent, self.agents[agent].client.S.d["damage"])) # Finish reset for agent in self.agents: self.agents[agent].complete_reset() @@ -418,6 +462,19 @@ def reset(self): for agent, agent_map in self.comm_info["agent_map"].items(): for comm_agents in agent_map: s_t[agent] = np.hstack((s_t[agent], self.agents[comm_agents].action_buffer.request())) + + + stop_action = {} + for agent_name, agent in self.agents.items(): + if (agent.action_dim == 3): + stop_action[agent_name] = [0.0, -1.0, 1.0] + else: + stop_action[agent_name] = [0.0, 0.0] + + for i in range(25): + #print("WAITING {}".format(i)) + next_obs, next_reward, next_done, nexxt_info = self.step(stop_action) + s_t = next_obs return s_t def step(self, action): @@ -452,6 +509,7 @@ def step(self, action): done_check = True info[agent] = return_dict[agent][3] self.agents[agent].increment_step() + self.agents[agent].increment_(info[agent]['prev_dist'], info[agent]['prev_damage'], info[agent]['prev_pos']) done['__all__'] = done_check diff --git a/MADRaS/traffic/traffic.py b/MADRaS/traffic/traffic.py index ad3c2b0..bd89c84 100644 --- a/MADRaS/traffic/traffic.py +++ b/MADRaS/traffic/traffic.py @@ -9,7 +9,7 @@ MadrasDatatypes = md.MadrasDatatypes() -class MadrasTrafficManager(object): +class MadrasTrafficHandler(object): """Creates the traffic agents for a given training configuration.""" def __init__(self, torcs_server_port, num_learning_agents, cfg): self.traffic_agents = OrderedDict() @@ -93,6 +93,7 @@ def get_action(self): def flag_off(self, random_seed=0): del random_seed self.wait_for_observation() + self.client.respond_to_server() print("[{}]: My server is at {}".format(self.name, self.client.serverPID)) self.is_alive = True while True: @@ -222,6 +223,7 @@ class ParkedAgent(MadrasTrafficAgent): def __init__(self, port, cfg, name): super(ParkedAgent, self).__init__(port, cfg, name) self.target_speed = cfg["target_speed"]/self.env.default_speed + self.track_len = cfg["track_len"] def flag_off(self, random_seed=0): self.time = time() @@ -235,14 +237,21 @@ def flag_off(self, random_seed=0): self.parking_dist_from_start = (self.cfg["parking_dist_from_start"]["low"] + np.random.random()*parking_dist_range) self.behind_finish_line = True - self.prev_dist_from_start = 0 + self.prev_dist_from_start = self.track_len self.parked = False + self.init_flag = True super(ParkedAgent, self).flag_off() def get_action(self): self.distance_from_start = self.ob.distFromStart - if self.distance_from_start < self.prev_dist_from_start: + + if (self.behind_finish_line): + self.distance_from_start -= self.track_len + + if (abs(self.distance_from_start) >= 0.95*self.track_len): self.behind_finish_line = False + + #print("START DIST:{} TRACK LEN: {}".format(self.distance_from_start, self.track_len)) if not self.behind_finish_line and self.distance_from_start >= self.parking_dist_from_start: self.steer, self.accel, self.brake = 0.0, 0.0, 1.0 if not self.parked: diff --git a/MADRaS/utils/done_manager_v2.py b/MADRaS/utils/done_manager_v2.py index 7cf3ece..e53f2d6 100644 --- a/MADRaS/utils/done_manager_v2.py +++ b/MADRaS/utils/done_manager_v2.py @@ -3,7 +3,7 @@ import warnings -class DoneManager(object): +class DoneHandler(object): """Composes the done function from a given done configuration.""" def __init__(self, cfg, agent_id): self.agent_id = agent_id diff --git a/MADRaS/utils/observation_manager.py b/MADRaS/utils/observation_manager.py index b7a8840..34df663 100644 --- a/MADRaS/utils/observation_manager.py +++ b/MADRaS/utils/observation_manager.py @@ -4,7 +4,7 @@ MadrasDatatypes = md.MadrasDatatypes() -class ObservationManager(object): +class ObservationHandler(object): """Composes the observation vector for a given observation mode.""" def __init__(self, cfg, vision=False): self.cfg = cfg diff --git a/MADRaS/utils/reward_manager.py b/MADRaS/utils/reward_manager.py index b43d592..42fdc18 100644 --- a/MADRaS/utils/reward_manager.py +++ b/MADRaS/utils/reward_manager.py @@ -4,7 +4,7 @@ from copy import deepcopy -class RewardManager(object): +class RewardHandler(object): """Composes the reward function from a given reward configuration.""" def __init__(self, cfg): self.rewards = {} @@ -78,8 +78,9 @@ def reset(self): class ProgressReward2(ProgressReward): def compute_reward(self, game_config, game_state): + print(self.prev_dist) target_speed = game_config.target_speed / 50 # m/step - progress = game_state["distance_traversed"] - self.prev_dist + progress = game_state["distance_traversed"] - game_state["prev_distance_traversed"] reward = self.cfg["scale"] * np.min([1.0, progress/target_speed]) self.prev_dist = deepcopy(game_state["distance_traversed"]) return reward @@ -87,9 +88,14 @@ def compute_reward(self, game_config, game_state): class ProgressReward3(ProgressReward): def compute_reward(self, game_config, game_state): - target_speed = game_config.target_speed / 50 # m/step - progress = game_state["distance_traversed"] - self.prev_dist + #print("CURRENT {} PREV {} DIFF {}".format(game_state["distance_traversed"], self.prev_dist, game_state["distance_traversed"] - self.prev_dist)) + target_speed = game_config.target_speed / 50.0 # m/step + progress = game_state["distance_traversed"] - game_state["prev_distance_traversed"] reward = self.cfg["scale"] * (progress/target_speed) + + #if (reward > 0.6): + # print("CURRENT {} PREV {} DIFF {}".format(game_state["distance_traversed"], game_state["prev_distance_traversed"], game_state["distance_traversed"] - game_state["prev_distance_traversed"])) + # print("PROGRESS {} rew {}".format(progress, progress/target_speed)) self.prev_dist = deepcopy(game_state["distance_traversed"]) return reward @@ -117,7 +123,7 @@ def __init__(self, cfg): def compute_reward(self, game_config, game_state): del game_config reward = 0.0 - if self.damage < game_state["damage"]: + if game_state["prev_damage"] < game_state["damage"]: reward = -self.cfg["scale"] return reward @@ -166,9 +172,24 @@ def reset(self): def compute_reward(self, game_config, game_state): reward = 0.0 - if math.isinf(self.rank): # very fist step - self.rank = game_state["racePos"] - elif game_state["racePos"] < self.rank: - self.rank = game_state["racePos"] + if math.isinf(game_state["racePos"]): # very fist step + return reward + if game_state["racePos"] < game_state["prev_racePos"]: + #self.rank = game_state["racePos"] reward = self.cfg["scale"] + return reward + +class TaskReward(MadrasReward): + def __init__(self, cfg): + super(TaskReward, self).__init__(cfg) + self.got_reward = False + + def reset(self): + self.got_reward = False + + def compute_reward(self, game_config, game_state): + reward = 0 + if ((game_state["racePos"] <= 2) and (not self.got_reward)): + reward = self.cfg["scale"] * 100 + self.got_reward = False return reward \ No newline at end of file diff --git a/MADRaS/utils/snakeoil3_gym_v2.py b/MADRaS/utils/snakeoil3_gym_v2.py index 6a611ae..06bc42b 100644 --- a/MADRaS/utils/snakeoil3_gym_v2.py +++ b/MADRaS/utils/snakeoil3_gym_v2.py @@ -205,6 +205,7 @@ def setup_connection(self): sockdata, addr = self.so.recvfrom(data_size) sockdata = sockdata.decode('utf-8') except socket.error as emsg: + #print("[{}]: SocketData: {}".format(self.name, sockdata)) print("[{}]: SocketError: {}".format(self.name, emsg)) print("{} Waiting for server on {}............".format(self.name, self.port)) print("[{}]: Count Down : {}".format(self.name, n_fail)) @@ -275,6 +276,7 @@ def get_servers_input(self, step): sockdata, addr = self.so.recvfrom(data_size) sockdata = sockdata.decode('utf-8') except socket.error as emsg: + print("[{}]: SocketData: {}".format(self.name, sockdata)) print('[{}]: SocketError: {}'.format(self.name, emsg)) print("{} Waiting for server data on {}..............".format(self.name, self.port)) From 0cdf29dd9f295ba408d2c53ee3c52a9725ad397a Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sat, 29 Feb 2020 13:48:52 +0530 Subject: [PATCH 30/46] changed conflicting names --- MADRaS/envs/gym_madras_v2.py | 10 +++++----- .../utils/{done_manager_v2.py => done_handler_v2.py} | 0 .../{observation_manager.py => observation_handler.py} | 0 MADRaS/utils/{reward_manager.py => reward_handler.py} | 0 4 files changed, 5 insertions(+), 5 deletions(-) rename MADRaS/utils/{done_manager_v2.py => done_handler_v2.py} (100%) rename MADRaS/utils/{observation_manager.py => observation_handler.py} (100%) rename MADRaS/utils/{reward_manager.py => reward_handler.py} (100%) diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index e417508..3b79683 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -28,9 +28,9 @@ from mpi4py import MPI import socket import MADRaS.utils.config_parser as config_parser -import MADRaS.utils.reward_manager as rm -import MADRaS.utils.done_manager_v2 as dm -import MADRaS.utils.observation_manager as om +import MADRaS.utils.reward_handler as rm +import MADRaS.utils.done_handler_v2 as dm +import MADRaS.utils.observation_handler as om import MADRaS.utils.action_buffer as ab import MADRaS.utils.madras_datatypes as md import MADRaS.traffic.traffic as traffic @@ -335,7 +335,7 @@ def __init__(self, cfg_path=DEFAULT_SIM_OPTIONS_FILE): self.comm_info = {"agent":{}, "agent_map":{}} self.comm_agent_names = [] if self._config.traffic: - self.traffic_manager = traffic.MadrasTrafficHandler( + self.traffic_handler = traffic.MadrasTrafficHandler( self._config.torcs_server_port, len(self.agents), self._config.traffic) num_traffic_agents = len(self._config.traffic) if self._config.traffic else 0 if self._config.agents: @@ -433,7 +433,7 @@ def reset(self): self.initial_reset = False if self._config.traffic: - self.traffic_manager.reset() + self.traffic_handler.reset() s_t = OrderedDict() diff --git a/MADRaS/utils/done_manager_v2.py b/MADRaS/utils/done_handler_v2.py similarity index 100% rename from MADRaS/utils/done_manager_v2.py rename to MADRaS/utils/done_handler_v2.py diff --git a/MADRaS/utils/observation_manager.py b/MADRaS/utils/observation_handler.py similarity index 100% rename from MADRaS/utils/observation_manager.py rename to MADRaS/utils/observation_handler.py diff --git a/MADRaS/utils/reward_manager.py b/MADRaS/utils/reward_handler.py similarity index 100% rename from MADRaS/utils/reward_manager.py rename to MADRaS/utils/reward_handler.py From 640ca51389cfc944200451c6d5272fd1505f8dfc Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sat, 14 Mar 2020 10:39:21 +0530 Subject: [PATCH 31/46] updated madras types nomenclature --- MADRaS/envs/gym_madras_v2.py | 6 ++-- MADRaS/traffic/traffic.py | 2 -- MADRaS/utils/gym_torcs.py | 56 ++++++++++++++--------------- MADRaS/utils/gym_torcs_v2.py | 52 +++++++++++++-------------- MADRaS/utils/observation_handler.py | 14 ++++---- 5 files changed, 64 insertions(+), 66 deletions(-) diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index 3b79683..b803f93 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -38,7 +38,7 @@ import multiprocessing import logging -madras = md.MadrasDatatypes() +mt = md.MadrasDatatypes() #logger = logging.getLogger(__name__) path_and_file = os.path.realpath(__file__) path, file = os.path.split(path_and_file) @@ -313,8 +313,8 @@ def actions_init(self, info={}): additional_dims += dims_tup[0]*dims_tup[1] self.obs_dim += additional_dims - high = np.hstack((self.observation_space.high, np.ones((additional_dims), dtype=madras.floatX))) - low = np.hstack((self.observation_space.low, -np.ones((additional_dims), dtype=madras.floatX))) + high = np.hstack((self.observation_space.high, np.ones((additional_dims), dtype=mt.floatX))) + low = np.hstack((self.observation_space.low, -np.ones((additional_dims), dtype=mt.floatX))) self.observation_space = spaces.Box(high=high, low=low) print("{}: {}".format(self.name, self._config.observations['buff_size'])) diff --git a/MADRaS/traffic/traffic.py b/MADRaS/traffic/traffic.py index bd89c84..f9792ed 100644 --- a/MADRaS/traffic/traffic.py +++ b/MADRaS/traffic/traffic.py @@ -2,12 +2,10 @@ from MADRaS.utils.gym_torcs_v2 import TorcsEnv import MADRaS.utils.snakeoil3_gym as snakeoil3 from MADRaS.controllers.pid import PIDController -import MADRaS.utils.madras_datatypes as md from multiprocessing import Process from collections import OrderedDict from time import time -MadrasDatatypes = md.MadrasDatatypes() class MadrasTrafficHandler(object): """Creates the traffic agents for a given training configuration.""" diff --git a/MADRaS/utils/gym_torcs.py b/MADRaS/utils/gym_torcs.py index 002006c..f0cd129 100644 --- a/MADRaS/utils/gym_torcs.py +++ b/MADRaS/utils/gym_torcs.py @@ -13,10 +13,10 @@ from gym import spaces import numpy as np import copy -import utils.snakeoil3_gym as snakeoil3 -import utils.madras_datatypes as md +import MADRaS.utils.snakeoil3_gym as snakeoil3 +import MADRaS.utils.madras_datatypes as md -madras = md.MadrasDatatypes() +mt = md.MadrasDatatypes() class TorcsEnv: @@ -40,8 +40,8 @@ def __init__(self, vision=False, throttle=False, gear_change=False, obs_dim=29, if throttle is False: # Throttle is generally True self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(1,)) else: - high = np.array([1., 1., 1.], dtype=madras.floatX) - low = np.array([-1., 0., 0.], dtype=madras.floatX) + high = np.array([1., 1., 1.], dtype=mt.floatX) + low = np.array([-1., 0., 0.], dtype=mt.floatX) self.action_space = spaces.Box(low=low, high=high) # steer,accel,brake if vision is False: # Vision has to be set True if you need the images from the simulator @@ -49,8 +49,8 @@ def __init__(self, vision=False, throttle=False, gear_change=False, obs_dim=29, low = -high self.observation_space = spaces.Box(low, high) else: - high = np.array([1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf, 255], dtype=madras.floatX) - low = np.array([0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf, 0], dtype=madras.floatX) + high = np.array([1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf, 255], dtype=mt.floatX) + low = np.array([0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf, 0], dtype=mt.floatX) self.observation_space = spaces.Box(low=low, high=high) @@ -254,18 +254,18 @@ def make_observation(self, raw_obs): 'wheelSpinVel', 'distFromStart'] Observation = col.namedtuple('Observation', names) - return Observation(focus=np.array(raw_obs['focus'], dtype=madras.floatX)/200., - speedX=np.array(raw_obs['speedX'], dtype=madras.floatX)/self.default_speed, - speedY=np.array(raw_obs['speedY'], dtype=madras.floatX)/self.default_speed, - speedZ=np.array(raw_obs['speedZ'], dtype=madras.floatX)/self.default_speed, - angle=np.array(raw_obs['angle'], dtype=madras.floatX)/3.1416, - damage=np.array(raw_obs['damage'], dtype=madras.floatX), - opponents=np.array(raw_obs['opponents'], dtype=madras.floatX)/200., - rpm=np.array(raw_obs['rpm'], dtype=madras.floatX)/10000, - track=np.array(raw_obs['track'], dtype=madras.floatX)/200., - trackPos=np.array(raw_obs['trackPos'], dtype=madras.floatX)/1., - wheelSpinVel=np.array(raw_obs['wheelSpinVel'], dtype=madras.floatX), - distFromStart=np.array(raw_obs['distFromStart'], dtype=madras.floatX)) + return Observation(focus=np.array(raw_obs['focus'], dtype=mt.floatX)/200., + speedX=np.array(raw_obs['speedX'], dtype=mt.floatX)/self.default_speed, + speedY=np.array(raw_obs['speedY'], dtype=mt.floatX)/self.default_speed, + speedZ=np.array(raw_obs['speedZ'], dtype=mt.floatX)/self.default_speed, + angle=np.array(raw_obs['angle'], dtype=mt.floatX)/3.1416, + damage=np.array(raw_obs['damage'], dtype=mt.floatX), + opponents=np.array(raw_obs['opponents'], dtype=mt.floatX)/200., + rpm=np.array(raw_obs['rpm'], dtype=mt.floatX)/10000, + track=np.array(raw_obs['track'], dtype=mt.floatX)/200., + trackPos=np.array(raw_obs['trackPos'], dtype=mt.floatX)/1., + wheelSpinVel=np.array(raw_obs['wheelSpinVel'], dtype=mt.floatX), + distFromStart=np.array(raw_obs['distFromStart'], dtype=mt.floatX)) else: names = ['focus', 'speedX', 'speedY', 'speedZ', 'angle', @@ -280,13 +280,13 @@ def make_observation(self, raw_obs): # Get RGB from observation image_rgb = self.obs_vision_to_image_rgb(raw_obs[names[8]]) - return Observation(focus=np.array(raw_obs['focus'], dtype=madras.floatX)/200., - speedX=np.array(raw_obs['speedX'], dtype=madras.floatX)/self.default_speed, - speedY=np.array(raw_obs['speedY'], dtype=madras.floatX)/self.default_speed, - speedZ=np.array(raw_obs['speedZ'], dtype=madras.floatX)/self.default_speed, - opponents=np.array(raw_obs['opponents'], dtype=madras.floatX)/200., - rpm=np.array(raw_obs['rpm'], dtype=madras.floatX), - track=np.array(raw_obs['track'], dtype=madras.floatX)/200., - trackPos=np.array(raw_obs['trackPos'], dtype=madras.floatX)/1., - wheelSpinVel=np.array(raw_obs['wheelSpinVel'], dtype=madras.floatX), + return Observation(focus=np.array(raw_obs['focus'], dtype=mt.floatX)/200., + speedX=np.array(raw_obs['speedX'], dtype=mt.floatX)/self.default_speed, + speedY=np.array(raw_obs['speedY'], dtype=mt.floatX)/self.default_speed, + speedZ=np.array(raw_obs['speedZ'], dtype=mt.floatX)/self.default_speed, + opponents=np.array(raw_obs['opponents'], dtype=mt.floatX)/200., + rpm=np.array(raw_obs['rpm'], dtype=mt.floatX), + track=np.array(raw_obs['track'], dtype=mt.floatX)/200., + trackPos=np.array(raw_obs['trackPos'], dtype=mt.floatX)/1., + wheelSpinVel=np.array(raw_obs['wheelSpinVel'], dtype=mt.floatX), img=image_rgb) diff --git a/MADRaS/utils/gym_torcs_v2.py b/MADRaS/utils/gym_torcs_v2.py index b947a17..2d89eec 100644 --- a/MADRaS/utils/gym_torcs_v2.py +++ b/MADRaS/utils/gym_torcs_v2.py @@ -16,7 +16,7 @@ import MADRaS.utils.snakeoil3_gym_v2 as snakeoil3 import MADRaS.utils.madras_datatypes as md -madras = md.MadrasDatatypes() +mt = md.MadrasDatatypes() class TorcsEnv: @@ -38,8 +38,8 @@ def __init__(self, vision=False, throttle=False, gear_change=False, obs_dim=29, if throttle is False: # Throttle is generally True self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(1,)) else: - high = np.array([1., 1., 1.], dtype=madras.floatX) - low = np.array([-1., 0., 0.], dtype=madras.floatX) + high = np.array([1., 1., 1.], dtype=mt.floatX) + low = np.array([-1., 0., 0.], dtype=mt.floatX) self.action_space = spaces.Box(low=low, high=high) # steer,accel,brake if vision is False: # Vision has to be set True if you need the images from the simulator @@ -47,8 +47,8 @@ def __init__(self, vision=False, throttle=False, gear_change=False, obs_dim=29, low = -high self.observation_space = spaces.Box(low, high) else: - high = np.array([1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf, 255], dtype=madras.floatX) - low = np.array([0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf, 0], dtype=madras.floatX) + high = np.array([1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf, 255], dtype=mt.floatX) + low = np.array([0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf, 0], dtype=mt.floatX) self.observation_space = spaces.Box(low=low, high=high) @@ -227,18 +227,18 @@ def make_observation(self, raw_obs): 'wheelSpinVel', 'distFromStart'] Observation = col.namedtuple('Observation', names) - return Observation(focus=np.array(raw_obs['focus'], dtype=madras.floatX)/200., - speedX=np.array(raw_obs['speedX'], dtype=madras.floatX)/self.default_speed, - speedY=np.array(raw_obs['speedY'], dtype=madras.floatX)/self.default_speed, - speedZ=np.array(raw_obs['speedZ'], dtype=madras.floatX)/self.default_speed, - angle=np.array(raw_obs['angle'], dtype=madras.floatX)/3.1416, - damage=np.array(raw_obs['damage'], dtype=madras.floatX), - opponents=np.array(raw_obs['opponents'], dtype=madras.floatX)/200., - rpm=np.array(raw_obs['rpm'], dtype=madras.floatX)/10000, - track=np.array(raw_obs['track'], dtype=madras.floatX)/200., - trackPos=np.array(raw_obs['trackPos'], dtype=madras.floatX)/1., - wheelSpinVel=np.array(raw_obs['wheelSpinVel'], dtype=madras.floatX), - distFromStart=np.array(raw_obs['distFromStart'], dtype=madras.floatX)) + return Observation(focus=np.array(raw_obs['focus'], dtype=mt.floatX)/200., + speedX=np.array(raw_obs['speedX'], dtype=mt.floatX)/self.default_speed, + speedY=np.array(raw_obs['speedY'], dtype=mt.floatX)/self.default_speed, + speedZ=np.array(raw_obs['speedZ'], dtype=mt.floatX)/self.default_speed, + angle=np.array(raw_obs['angle'], dtype=mt.floatX)/3.1416, + damage=np.array(raw_obs['damage'], dtype=mt.floatX), + opponents=np.array(raw_obs['opponents'], dtype=mt.floatX)/200., + rpm=np.array(raw_obs['rpm'], dtype=mt.floatX)/10000, + track=np.array(raw_obs['track'], dtype=mt.floatX)/200., + trackPos=np.array(raw_obs['trackPos'], dtype=mt.floatX)/1., + wheelSpinVel=np.array(raw_obs['wheelSpinVel'], dtype=mt.floatX), + distFromStart=np.array(raw_obs['distFromStart'], dtype=mt.floatX)) else: names = ['focus', 'speedX', 'speedY', 'speedZ', 'angle', @@ -253,13 +253,13 @@ def make_observation(self, raw_obs): # Get RGB from observation image_rgb = self.obs_vision_to_image_rgb(raw_obs[names[8]]) - return Observation(focus=np.array(raw_obs['focus'], dtype=madras.floatX)/200., - speedX=np.array(raw_obs['speedX'], dtype=madras.floatX)/self.default_speed, - speedY=np.array(raw_obs['speedY'], dtype=madras.floatX)/self.default_speed, - speedZ=np.array(raw_obs['speedZ'], dtype=madras.floatX)/self.default_speed, - opponents=np.array(raw_obs['opponents'], dtype=madras.floatX)/200., - rpm=np.array(raw_obs['rpm'], dtype=madras.floatX), - track=np.array(raw_obs['track'], dtype=madras.floatX)/200., - trackPos=np.array(raw_obs['trackPos'], dtype=madras.floatX)/1., - wheelSpinVel=np.array(raw_obs['wheelSpinVel'], dtype=madras.floatX), + return Observation(focus=np.array(raw_obs['focus'], dtype=mt.floatX)/200., + speedX=np.array(raw_obs['speedX'], dtype=mt.floatX)/self.default_speed, + speedY=np.array(raw_obs['speedY'], dtype=mt.floatX)/self.default_speed, + speedZ=np.array(raw_obs['speedZ'], dtype=mt.floatX)/self.default_speed, + opponents=np.array(raw_obs['opponents'], dtype=mt.floatX)/200., + rpm=np.array(raw_obs['rpm'], dtype=mt.floatX), + track=np.array(raw_obs['track'], dtype=mt.floatX)/200., + trackPos=np.array(raw_obs['trackPos'], dtype=mt.floatX)/1., + wheelSpinVel=np.array(raw_obs['wheelSpinVel'], dtype=mt.floatX), img=image_rgb) diff --git a/MADRaS/utils/observation_handler.py b/MADRaS/utils/observation_handler.py index 34df663..74048ca 100644 --- a/MADRaS/utils/observation_handler.py +++ b/MADRaS/utils/observation_handler.py @@ -2,7 +2,7 @@ from gym import spaces import MADRaS.utils.madras_datatypes as md -MadrasDatatypes = md.MadrasDatatypes() +mt = md.MadrasDatatypes() class ObservationHandler(object): """Composes the observation vector for a given observation mode.""" @@ -82,10 +82,10 @@ def observation_space(self, vision): else: high = np.array([1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf, 255], - dtype=MadrasDatatypes.floatX) + dtype=mt.floatX) low = np.array([0., -np.inf, -np.inf, -np.inf, 0., -np.inf, 0., -np.inf, 0], - dtype=MadrasDatatypes.floatX) + dtype=mt.floatX) observation_space = spaces.Box(low=low, high=high) return observation_space @@ -110,9 +110,9 @@ def observation_space(self, vision): if vision: raise NotImplementedError("Vision inputs not supported yet.") high = np.asarray([1] + 19*[1] + [np.inf] + 3*[np.inf], - dtype=MadrasDatatypes.floatX) + dtype=mt.floatX) low = np.asarray([-1] + 19*[0] + [-np.inf] + 3*[-np.inf], - dtype=MadrasDatatypes.floatX) + dtype=mt.floatX) observation_space = spaces.Box(low=low, high=high) @@ -139,9 +139,9 @@ def observation_space(self, vision): if vision: raise NotImplementedError("Vision inputs not supported yet.") high = np.asarray([1] + 19*[1] + [np.inf] + 3*[np.inf] + 36*[1], - dtype=MadrasDatatypes.floatX) + dtype=mt.floatX) low = np.asarray([-1] + 19*[0] + [-np.inf] + 3*[-np.inf] + 36*[0], - dtype=MadrasDatatypes.floatX) + dtype=mt.floatX) observation_space = spaces.Box(low=low, high=high) From 172025ba80b227ac4ecbd7a596bb707e6ed29ef0 Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sat, 14 Mar 2020 13:42:46 +0530 Subject: [PATCH 32/46] [UNTESTED] updated comm module with custom network architecure --- MADRaS/envs/data/communications.yml | 20 +++++ MADRaS/envs/gym_madras_v2.py | 85 ++++++++++++++----- ...ion_buffer.py => communications_buffer.py} | 24 ++++-- MADRaS/utils/observation_handler.py | 2 +- 4 files changed, 101 insertions(+), 30 deletions(-) create mode 100644 MADRaS/envs/data/communications.yml rename MADRaS/utils/{action_buffer.py => communications_buffer.py} (57%) diff --git a/MADRaS/envs/data/communications.yml b/MADRaS/envs/data/communications.yml new file mode 100644 index 0000000..8c26831 --- /dev/null +++ b/MADRaS/envs/data/communications.yml @@ -0,0 +1,20 @@ +MadrasAgent0: + buff_size: 2 + vars: + action + speedX + speedY + trackPos + comms: + MadrasAgent1 + + +MadrasAgent1: + buff_size: 2 + vars: + action + speedX + speedY + trackPos + comms: + - MadrasAgent0 diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index b803f93..bc8a4c7 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -31,7 +31,7 @@ import MADRaS.utils.reward_handler as rm import MADRaS.utils.done_handler_v2 as dm import MADRaS.utils.observation_handler as om -import MADRaS.utils.action_buffer as ab +import MADRaS.utils.communications_buffer as cb import MADRaS.utils.madras_datatypes as md import MADRaS.traffic.traffic as traffic from collections import OrderedDict @@ -43,6 +43,7 @@ path_and_file = os.path.realpath(__file__) path, file = os.path.split(path_and_file) DEFAULT_SIM_OPTIONS_FILE = os.path.join(path, "data", "sim_options_v2.yml") +DEFAULT_COMMUNICATION_MAP = os.path.join(path, "data", "communications.yml") class MadrasAgent(TorcsEnv, gym.Env): @@ -210,11 +211,12 @@ def step_vanilla(self, action): done = self.done_handler.get_done_signal(self._config, game_state) - next_obs = self.observation_handler.get_obs(self.ob, self._config) + next_obs, full_obs = self.observation_handler.get_obs(self.ob, self._config) info['prev_dist'] = game_state["distance_traversed"] info['prev_damage'] = game_state["damage"] info['prev_pos'] = game_state["racePos"] + info['full_obs'] = full_obs return next_obs, reward, done, info @@ -281,7 +283,9 @@ def step_pid(self, desire, e): break - next_obs = self.observation_handler.get_obs(self.ob, self._config) + next_obs, full_obs = self.observation_handler.get_obs(self.ob, self._config) + + info['full_obs'] = full_obs return next_obs, reward, done, info def step(self, action, e, return_dict={}): @@ -305,16 +309,39 @@ def increment_step(self): #self.increment_dist() def actions_init(self, info={}): - self.action_buffer = ab.ActionBuffer(self.name, self.buff_size, self.action_dim) + + self.comms_buffer = cb.CommBuffer(self.name, self.buff_size, self.action_dim) + additional_dims_h = [] + additional_dims_l = [] additional_dims = 0 - for key, dims_tup in info["agent"].items(): - if (key != self.name): - additional_dims += dims_tup[0]*dims_tup[1] + for var in info["vars"]: + if (var == 'track'): + additional_dims_h += 19*[1] + additional_dims_l += 19*[0] + additional_dims += 19 + elif (var == 'track'): + additional_dims_h += [np.inf] + additional_dims_l += [-np.inf] + additional_dims += 1 + elif (var == 'opponents'): + additional_dims_h += 36*[1] + additional_dims_l += 36*[0] + additional_dims += 36 + elif ('speed' in var): + additional_dims_h += [np.inf] + additional_dims_l += [-np.inf] + additional_dims += 1 + else: + additional_dims_h += [1] + additional_dims_l += [-1] + additional_dims += 1 - self.obs_dim += additional_dims - high = np.hstack((self.observation_space.high, np.ones((additional_dims), dtype=mt.floatX))) - low = np.hstack((self.observation_space.low, -np.ones((additional_dims), dtype=mt.floatX))) + self.obs_dim += additional_dims*len(info['comm']) + additional_dims_h = additional_dims_h*len(info['comm']) + additional_dims_l = additional_dims_l*len(info['comm']) + high = np.hstack((self.observation_space.high, additional_dims_h)) + low = np.hstack((self.observation_space.low, additional_dims_l)) self.observation_space = spaces.Box(high=high, low=low) print("{}: {}".format(self.name, self._config.observations['buff_size'])) @@ -327,6 +354,7 @@ def __init__(self, cfg_path=DEFAULT_SIM_OPTIONS_FILE): # If `visualise` is set to False torcs simulator will run in headless mode self._config = config_parser.MadrasEnvConfig() self._config.update(config_parser.parse_yaml(cfg_path)) + self.communications_map = None self.torcs_proc = None self.seed() self.start_torcs_process() @@ -349,18 +377,25 @@ def __init__(self, cfg_path=DEFAULT_SIM_OPTIONS_FILE): {"track_len": self._config.track_len, "max_steps": self._config.max_steps }) - if self.agents[name].multi_flag: - self.comm_agent_names.append(name) - self.comm_info["agent"][name] = (self.agents[name].action_dim, self.agents[name].buff_size) + # if self.agents[name].multi_flag: + # self.comm_agent_names.append(name) + # self.comm_info["agent"][name] = (self.agents[name].action_dim, self.agents[name].buff_size) self.num_agents += 1 - print("COMM AGENTS",self.comm_agent_names) - for agent in self.comm_agent_names: - self.agents[agent].actions_init(self.comm_info) - self.comm_info["agent_map"][agent] = [] - for comm_agent in self.comm_agent_names: - if comm_agent != agent: - self.comm_info["agent_map"][agent].append(comm_agent) + + if os.path.isfile(DEFAULT_COMMUNICATION_MAP): + self.communications_map = config_parser.parse_yaml(DEFAULT_COMMUNICATION_MAP) + + print("COMMUNICATIONS MAP", self.comm_agent_names) + # for agent_s, agents_r in self.communications_map.items(): + # self.comm_info["agent"][agent_s] = (self.agents[name].action_dim, self.agents[name].buff_size) + # self.agents[agent_s].action_init(self.communications_map) + # for agent in self.comm_agent_names: + # self.agents[agent].actions_init(self.comm_info) + # self.comm_info["agent_map"][agent] = [] + # for comm_agent in self.comm_agent_names: + # if comm_agent != agent: + # self.comm_info["agent_map"][agent].append(comm_agent) # self.action_dim = self.agents[0].action_dim # TODO(santara): Can I not have different action dims for different agents? @@ -456,7 +491,7 @@ def reset(self): for agent in self.agents: self.agents[agent].complete_reset() - for comm_agents in self.comm_agent_names: #reset buffers + for comm_agents in self.communications_map.keys(): #reset buffers self.agents[comm_agents].action_buffer.reset() for agent, agent_map in self.comm_info["agent_map"].items(): @@ -491,8 +526,8 @@ def step(self, action): for proc in jobs: proc.join() - for agent in self.comm_agent_names: - self.agents[agent].action_buffer.insert(action[agent]) + # for agent in self.comm_agent_names: + # self.agents[agent].action_buffer.insert(action[agent]) done_check = False @@ -510,6 +545,10 @@ def step(self, action): info[agent] = return_dict[agent][3] self.agents[agent].increment_step() self.agents[agent].increment_(info[agent]['prev_dist'], info[agent]['prev_damage'], info[agent]['prev_pos']) + + for agent, agent_info in self.communications_map.items(): + for agent_s in agent_info["comms"]: + self.agents.comms_buffer.insert(agent[action], info[agent]["full_obs"], agent_info["vars"]) done['__all__'] = done_check diff --git a/MADRaS/utils/action_buffer.py b/MADRaS/utils/communications_buffer.py similarity index 57% rename from MADRaS/utils/action_buffer.py rename to MADRaS/utils/communications_buffer.py index 8d41990..83f3dff 100644 --- a/MADRaS/utils/action_buffer.py +++ b/MADRaS/utils/communications_buffer.py @@ -3,9 +3,9 @@ import numpy as np import MADRaS.utils.madras_datatypes as md -madras = md.MadrasDatatypes() +mt = md.MadrasDatatypes() -class ActionBuffer: +class CommBuffer: """ To hold the last k actions of taken by an agent. @@ -18,18 +18,18 @@ def __init__(self, agent_id, size, action_dim): self._buffer = Queue(maxsize=self._size) self._curr_size = 0 - def insert(self, action): + def insert(self, action, full_obs, var_list): if (self._curr_size < self._size): self._curr_size += 1 - self._buffer.put(action) + self._buffer.put(self.parse_buffer_items(action, full_obs, var_list)) else: _ = self._buffer.get() - self._buffer.put(action) + self._buffer.put(self.parse_buffer_items(action, full_obs, var_list)) def request(self): temp_queue = Queue(maxsize=self._size) - ret = np.zeros((self._size*self._action_dim,), dtype=madras.floatX) + ret = np.zeros((self._size*self._action_dim,), dtype=mt.floatX) for i in range(self._curr_size): a = self._buffer.get() temp_queue.put(a) @@ -37,6 +37,18 @@ def request(self): self._buffer = temp_queue return ret + def parse_buffer_items(self, action, full_obs, var_list): + buffer_array = [] + for var in var_list: + if var == 'action': + buffer_array.append(action) + else: + val = None + exec("val = full_obs.{}".format(var)) + buffer_array.append(val) + buffer_array = np.hstack(buffer_array) + return buffer_array + def reset(self): del self._buffer self._buffer = Queue(maxsize=self._size) diff --git a/MADRaS/utils/observation_handler.py b/MADRaS/utils/observation_handler.py index 74048ca..31257b5 100644 --- a/MADRaS/utils/observation_handler.py +++ b/MADRaS/utils/observation_handler.py @@ -18,7 +18,7 @@ def __init__(self, cfg, vision=False): def get_obs(self, full_obs, game_config): if self.cfg["normalize"]: full_obs = self.normalize_obs(full_obs, game_config) - return self.obs.get_obs(full_obs) + return self.obs.get_obs(full_obs), full_obs def min_max_normalize(self, var, min, max): offset = (min + max) / 2.0 From e86d66473368e490b16cab0001a338c672fc88b0 Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Tue, 17 Mar 2020 10:03:29 +0530 Subject: [PATCH 33/46] obs_dim mismatch fixed --- MADRaS/envs/data/communications.yml | 24 ++++++++++---------- MADRaS/envs/gym_madras_v2.py | 35 ++++++++++++++++++----------- 2 files changed, 34 insertions(+), 25 deletions(-) diff --git a/MADRaS/envs/data/communications.yml b/MADRaS/envs/data/communications.yml index 8c26831..f977e01 100644 --- a/MADRaS/envs/data/communications.yml +++ b/MADRaS/envs/data/communications.yml @@ -1,20 +1,20 @@ -MadrasAgent0: +MadrasAgent_0: buff_size: 2 vars: - action - speedX - speedY - trackPos + - action + - speedX + - speedY + - trackPos comms: - MadrasAgent1 + - MadrasAgent_1 -MadrasAgent1: +MadrasAgent_1: buff_size: 2 vars: - action - speedX - speedY - trackPos + - action + - speedX + - speedY + - trackPos comms: - - MadrasAgent0 + - MadrasAgent_0 diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index bc8a4c7..2bfba79 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -316,7 +316,8 @@ def actions_init(self, info={}): additional_dims = 0 for var in info["vars"]: - if (var == 'track'): + + if (var == 'trackPos'): additional_dims_h += 19*[1] additional_dims_l += 19*[0] additional_dims += 19 @@ -337,9 +338,10 @@ def actions_init(self, info={}): additional_dims_l += [-1] additional_dims += 1 - self.obs_dim += additional_dims*len(info['comm']) - additional_dims_h = additional_dims_h*len(info['comm']) - additional_dims_l = additional_dims_l*len(info['comm']) + import pdb; pdb.set_trace() + self.obs_dim += additional_dims*len(info['comms']) + additional_dims_h = additional_dims_h*len(info['comms']) + additional_dims_l = additional_dims_l*len(info['comms']) high = np.hstack((self.observation_space.high, additional_dims_h)) low = np.hstack((self.observation_space.low, additional_dims_l)) self.observation_space = spaces.Box(high=high, low=low) @@ -386,7 +388,9 @@ def __init__(self, cfg_path=DEFAULT_SIM_OPTIONS_FILE): if os.path.isfile(DEFAULT_COMMUNICATION_MAP): self.communications_map = config_parser.parse_yaml(DEFAULT_COMMUNICATION_MAP) - print("COMMUNICATIONS MAP", self.comm_agent_names) + print("COMMUNICATIONS MAP", self.communications_map) + for comm_agent, agent_attrs in self.communications_map.items(): + self.agents[comm_agent].actions_init(agent_attrs) # for agent_s, agents_r in self.communications_map.items(): # self.comm_info["agent"][agent_s] = (self.agents[name].action_dim, self.agents[name].buff_size) # self.agents[agent_s].action_init(self.communications_map) @@ -491,12 +495,14 @@ def reset(self): for agent in self.agents: self.agents[agent].complete_reset() - for comm_agents in self.communications_map.keys(): #reset buffers - self.agents[comm_agents].action_buffer.reset() + for comm_agent in self.communications_map.keys(): #reset buffers + self.agents[comm_agent].comms_buffer.reset() + s_t[comm_agent] = np.hstack((s_t[agent], self.agents[comm_agent].comms_buffer.request())) + - for agent, agent_map in self.comm_info["agent_map"].items(): - for comm_agents in agent_map: - s_t[agent] = np.hstack((s_t[agent], self.agents[comm_agents].action_buffer.request())) + # for agent, agent_map in self.comm_info["agent_map"].items(): + # for comm_agents in agent_map: + # s_t[agent] = np.hstack((s_t[agent], self.agents[comm_agents].comms_buffer.request())) stop_action = {} @@ -552,7 +558,10 @@ def step(self, action): done['__all__'] = done_check - for agent, agent_map in self.comm_info["agent_map"].items(): - for comm_agents in agent_map: - next_obs[agent] = np.hstack((next_obs[agent], self.agents[comm_agents].action_buffer.request())) + for comm_agent in self.communications_map.keys(): + next_obs[comm_agent] = np.hstack((next_obs[agent], self.agents[comm_agent].comms_buffer.request())) + + # for agent, agent_map in self.comm_info["agent_map"].items(): + # for comm_agents in agent_map: + # next_obs[agent] = np.hstack((next_obs[agent], self.agents[comm_agents].action_buffer.request())) return next_obs, reward, done, info From 32c56d9736ae8f3c3b5e3ae1a1c518daffa55290 Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Tue, 17 Mar 2020 11:09:06 +0530 Subject: [PATCH 34/46] corrected obs formatting --- MADRaS/envs/gym_madras_v2.py | 26 +++++++++++++++++--------- MADRaS/utils/observation_handler.py | 5 ++++- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index 2bfba79..964e8fc 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -211,7 +211,7 @@ def step_vanilla(self, action): done = self.done_handler.get_done_signal(self._config, game_state) - next_obs, full_obs = self.observation_handler.get_obs(self.ob, self._config) + next_obs, full_obs = self.observation_handler.get_obs(self.ob, self._config, full_flag=True) info['prev_dist'] = game_state["distance_traversed"] info['prev_damage'] = game_state["damage"] @@ -234,6 +234,8 @@ def step_pid(self, desire, e): reward = 0.0 + #import pdb; pdb.set_trace() + for PID_step in range(self._config.pid_settings['pid_latency']): a_t = self.PID_controller.get_action(desire) @@ -282,10 +284,12 @@ def step_pid(self, desire, e): print("[{}]: Stopping agent because some other agent has hit done".format(self.name)) break + #print("ORIG OB {}".format(self.ob)) + next_obs, full_obs = self.observation_handler.get_obs(self.ob, self._config, full_flag=True) + #print("NEXT OB {}".format(next_obs)) + #print("FULL OBS {}".format(full_obs._asdict())) - next_obs, full_obs = self.observation_handler.get_obs(self.ob, self._config) - - info['full_obs'] = full_obs + info['full_obs'] = full_obs._asdict() return next_obs, reward, done, info def step(self, action, e, return_dict={}): @@ -333,12 +337,15 @@ def actions_init(self, info={}): additional_dims_h += [np.inf] additional_dims_l += [-np.inf] additional_dims += 1 + elif ('action' == var): + additional_dims_h += [1]*self.action_dim + additional_dims_l += [-1]*self.action_dim + additional_dims += self.action_dim else: additional_dims_h += [1] additional_dims_l += [-1] additional_dims += 1 - - import pdb; pdb.set_trace() + self.obs_dim += additional_dims*len(info['comms']) additional_dims_h = additional_dims_h*len(info['comms']) additional_dims_l = additional_dims_l*len(info['comms']) @@ -499,6 +506,7 @@ def reset(self): self.agents[comm_agent].comms_buffer.reset() s_t[comm_agent] = np.hstack((s_t[agent], self.agents[comm_agent].comms_buffer.request())) + # for agent, agent_map in self.comm_info["agent_map"].items(): # for comm_agents in agent_map: @@ -536,7 +544,7 @@ def step(self, action): # self.agents[agent].action_buffer.insert(action[agent]) done_check = False - + #import pdb; pdb.set_trace() for agent in self.agents: next_obs[agent] = return_dict[agent][0] reward[agent] = return_dict[agent][1] @@ -553,8 +561,8 @@ def step(self, action): self.agents[agent].increment_(info[agent]['prev_dist'], info[agent]['prev_damage'], info[agent]['prev_pos']) for agent, agent_info in self.communications_map.items(): - for agent_s in agent_info["comms"]: - self.agents.comms_buffer.insert(agent[action], info[agent]["full_obs"], agent_info["vars"]) + for agent_c in agent_info["comms"]: + self.agents[agent].comms_buffer.insert(action[agent_c], info[agent_c]["full_obs"], agent_info["vars"]) done['__all__'] = done_check diff --git a/MADRaS/utils/observation_handler.py b/MADRaS/utils/observation_handler.py index 31257b5..d212c3f 100644 --- a/MADRaS/utils/observation_handler.py +++ b/MADRaS/utils/observation_handler.py @@ -15,9 +15,12 @@ def __init__(self, cfg, vision=False): except: raise ValueError("Unrecognized observation mode {}".format(cfg["mode"])) - def get_obs(self, full_obs, game_config): + def get_obs(self, full_obs, game_config, full_flag=False): if self.cfg["normalize"]: full_obs = self.normalize_obs(full_obs, game_config) + if not full_flag: + return self.obs.get_obs(full_obs) + else: return self.obs.get_obs(full_obs), full_obs def min_max_normalize(self, var, min, max): From d879291d6182921f56d5b9e71f3890bbe1b2fac1 Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Tue, 17 Mar 2020 11:40:45 +0530 Subject: [PATCH 35/46] buffer fix and variable error fix --- MADRaS/envs/gym_madras_v2.py | 11 ++++++----- MADRaS/utils/communications_buffer.py | 17 +++++++++-------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index 964e8fc..d4b7c81 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -314,20 +314,20 @@ def increment_step(self): def actions_init(self, info={}): - self.comms_buffer = cb.CommBuffer(self.name, self.buff_size, self.action_dim) + additional_dims_h = [] additional_dims_l = [] additional_dims = 0 for var in info["vars"]: - if (var == 'trackPos'): + if (var == 'track'): additional_dims_h += 19*[1] additional_dims_l += 19*[0] additional_dims += 19 - elif (var == 'track'): - additional_dims_h += [np.inf] - additional_dims_l += [-np.inf] + elif (var == 'trackPos'): + additional_dims_h += [1] + additional_dims_l += [-1] additional_dims += 1 elif (var == 'opponents'): additional_dims_h += 36*[1] @@ -347,6 +347,7 @@ def actions_init(self, info={}): additional_dims += 1 self.obs_dim += additional_dims*len(info['comms']) + self.comms_buffer = cb.CommBuffer(self.name, self.buff_size, additional_dims*len(info['comms'])) additional_dims_h = additional_dims_h*len(info['comms']) additional_dims_l = additional_dims_l*len(info['comms']) high = np.hstack((self.observation_space.high, additional_dims_h)) diff --git a/MADRaS/utils/communications_buffer.py b/MADRaS/utils/communications_buffer.py index 83f3dff..f454694 100644 --- a/MADRaS/utils/communications_buffer.py +++ b/MADRaS/utils/communications_buffer.py @@ -11,10 +11,10 @@ class CommBuffer: by an agent. """ - def __init__(self, agent_id, size, action_dim): + def __init__(self, agent_id, size, buffer_dim): self._agent_id = agent_id self._size = size - self._action_dim = action_dim + self._buffer_dim = buffer_dim self._buffer = Queue(maxsize=self._size) self._curr_size = 0 @@ -29,11 +29,11 @@ def insert(self, action, full_obs, var_list): def request(self): temp_queue = Queue(maxsize=self._size) - ret = np.zeros((self._size*self._action_dim,), dtype=mt.floatX) + ret = np.zeros((self._size*self._buffer_dim,), dtype=mt.floatX) for i in range(self._curr_size): - a = self._buffer.get() - temp_queue.put(a) - ret[i*self._action_dim: (i+1)*self._action_dim] = a + agent_var = self._buffer.get() + temp_queue.put(agent_var) + ret[i*self._buffer_dim: (i+1)*self._buffer_dim] = agent_var self._buffer = temp_queue return ret @@ -43,10 +43,11 @@ def parse_buffer_items(self, action, full_obs, var_list): if var == 'action': buffer_array.append(action) else: - val = None - exec("val = full_obs.{}".format(var)) + val = full_obs[var] buffer_array.append(val) buffer_array = np.hstack(buffer_array) + #print("BUFFER_{}".format(buffer_array)) + #print("BUFFER_SIZE {}".format(buffer_array.shape)) return buffer_array def reset(self): From 7d4e9d72544c13285f00db5e664ccb1e61f7c2d9 Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Wed, 18 Mar 2020 10:54:17 +0530 Subject: [PATCH 36/46] IV COMM working --- MADRaS/agents/generic/test_environment_v2.py | 13 +- MADRaS/envs/data/communications.yml | 4 +- MADRaS/envs/data/sim_options_v2.yml | 205 ++++++++++--------- MADRaS/envs/gym_madras_v2.py | 55 ++--- MADRaS/utils/communications_buffer.py | 24 +-- MADRaS/utils/config_parser.py | 2 +- MADRaS/utils/reward_handler.py | 1 - 7 files changed, 140 insertions(+), 164 deletions(-) diff --git a/MADRaS/agents/generic/test_environment_v2.py b/MADRaS/agents/generic/test_environment_v2.py index bbba663..fe309b2 100644 --- a/MADRaS/agents/generic/test_environment_v2.py +++ b/MADRaS/agents/generic/test_environment_v2.py @@ -21,8 +21,11 @@ def test_madras_vanilla(): " Verify if the number of dimensions {} is right.".format(obs, len(obs))) print("Testing step...") a = [0.0, 1.0, -1.0] + a = [0.0, 0.2, 0.0] + b = [0.1, 0.3, 0.0] + c = [0.2, 0.4, 0.0] for t in range(4000): - obs, r, done, _ = env.step({"MadrasAgent_0": a}) + obs, r, done, _ = env.step({"MadrasAgent_0": a, "MadrasAgent_1": b, "MadrasAgent_2": c}) if ((t+1)%150 == 0): a = [0.0, -1.0, 1.0] print("{}: reward={}, done={}".format(t, r, done)) @@ -41,6 +44,7 @@ def test_madras_pid(): obs = env.reset() a = [0.0, 0.2] b = [0.1, 0.00] + c = [0.2, -0.2] print("Initial observation: {}." " Verify if the number of dimensions is right.".format(obs)) for key, value in obs.items(): @@ -48,8 +52,7 @@ def test_madras_pid(): print("Testing step...") running_rew = 0 for t in range(4000): - obs, r, done, _ = env.step({"MadrasAgent_0": a, "MadrasAgent_1": b - }) + obs, r, done, _ = env.step({"MadrasAgent_0": a, "MadrasAgent_1": b, "MadrasAgent_2": c}) #print("{}".format(obs)) #if ((t+1)%15 == 0): # a = [0.0, 0.0] @@ -62,5 +65,5 @@ def test_madras_pid(): if __name__=='__main__': - #test_madras_vanilla() - test_madras_pid() \ No newline at end of file + test_madras_vanilla() + #test_madras_pid() \ No newline at end of file diff --git a/MADRaS/envs/data/communications.yml b/MADRaS/envs/data/communications.yml index f977e01..b493eb4 100644 --- a/MADRaS/envs/data/communications.yml +++ b/MADRaS/envs/data/communications.yml @@ -4,9 +4,10 @@ MadrasAgent_0: - action - speedX - speedY - - trackPos + - track comms: - MadrasAgent_1 + - MadrasAgent_0 MadrasAgent_1: @@ -16,5 +17,6 @@ MadrasAgent_1: - speedX - speedY - trackPos + - opponents comms: - MadrasAgent_0 diff --git a/MADRaS/envs/data/sim_options_v2.yml b/MADRaS/envs/data/sim_options_v2.yml index 17314f2..c88f646 100644 --- a/MADRaS/envs/data/sim_options_v2.yml +++ b/MADRaS/envs/data/sim_options_v2.yml @@ -1,9 +1,10 @@ torcs_server_port: 60934 -visualise: True # whether to render TORCS window +visualise: False # whether to render TORCS window no_of_visualisations: 1 # To visualize multiple training instances (under MPI) in parallel set it to more than 1 track_len: 2587.54 # in metres. CG Track 3. All track lengths can be found here: http://www.berniw.org/trb/tracks/tracklist.php max_steps: 1000 # max episode length track_width: 10.0 +init_wait: 25 #initial waiting period for the learning agents after the ep starts vision: False # whether to use vision input # agent config: @@ -14,11 +15,11 @@ agents: gear_change: False client_max_steps: -1 # to be interpreted as np.inf target_speed: 27.78 # metres per sec - state_dim: 60 # 24 + state_dim: 24 # 24 early_stop: True normalize_actions: True # all actions in [-1, 1] # PID params - pid_assist: True + pid_assist: False pid_settings: accel_pid: - 10.5 # a_x @@ -34,7 +35,7 @@ agents: # Observation mode observations: - mode: SingleAgentInTrafficObs + mode: SingleAgentSimpleLapObs multi_flag: True buff_size: 1 normalize: False # gym_torcs normalizes by default @@ -77,11 +78,11 @@ agents: gear_change: False client_max_steps: -1 # to be interpreted as np.inf target_speed: 27.78 # metres per sec - state_dim: 60 # 24 + state_dim: 24 # 24 early_stop: True normalize_actions: True # all actions in [-1, 1] # PID params - pid_assist: True + pid_assist: False pid_settings: accel_pid: - 10.5 # a_x @@ -97,7 +98,7 @@ agents: # Observation mode observations: - mode: SingleAgentInTrafficObs + mode: SingleAgentSimpleLapObs multi_flag: True buff_size: 1 normalize: False # gym_torcs normalizes by default @@ -134,101 +135,17 @@ agents: - TurnBackward - OutOfTrack - # - MadrasAgent: - # vision: False # whether to use vision input - # throttle: True - # gear_change: False - # client_max_steps: -1 # to be interpreted as np.inf - # target_speed: 12.89 # metres per sec - # state_dim: 60 - # early_stop: True - # normalize_actions: True # all actions in [-1, 1] - # # PID params - # pid_assist: False - # pid_settings: - # accel_pid: - # - 10.5 # a_x - # - 0.05 # a_y - # - 2.8 # a_z - # steer_pid: - # - 5.1 - # - 0.001 - # - 0.000001 - # accel_scale: 1.0 - # steer_scale: 0.1 - # pid_latency: 10 - - # # Observation mode - # observations: - # mode: SingleAgentSimpleLapObs - # multi_flag: False - # buff_size: 1 - # normalize: False # gym_torcs normalizes by default - # obs_min: - # angle: -3.142 - # track: 0.0 - # trackPos: -1.0 - # obs_max: - # angle: 3.142 - # track: 200.0 - # trackPos: 1.0 - - # # Reward function - # rewards: - # ProgressReward2: - # scale: 1.0 - # AvgSpeedReward: - # scale: 1.0 - # CollisionPenalty: - # scale: 10.0 - # TurnBackwardPenalty: - # scale: 10.0 - # AngAcclPenalty: - # scale: 1.0 - # max_ang_accl: 2.0 - # # TaskReward: - # # scale: 1.0 - - # # Done function - # dones: - # - RaceOver - # - TimeOut - # - TurnBackward - # - OutOfTrack - -# Traffic agents -traffic: - - ParkedAgent: - track_len: 2587.54 - target_speed: 50 - parking_lane_pos: - low: -0.54 - high: -0.47 - parking_dist_from_start: - low: 35 - high: 35 - collision_time_window: 1.2 - pid_settings: - accel_pid: - - 10.5 # a_x - - 0.05 # a_y - - 2.8 # a_z - steer_pid: - - 5.1 - - 0.001 - - 0.000001 - accel_scale: 1.0 - steer_scale: 0.1 - - ParkedAgent: - track_len: 2587.54 - target_speed: 50 - parking_lane_pos: - low: 0.47 - high: 0.54 - parking_dist_from_start: - low: 35 - high: 35 - collision_time_window: 1.2 + - MadrasAgent: + vision: False # whether to use vision input + throttle: True + gear_change: False + client_max_steps: -1 # to be interpreted as np.inf + target_speed: 12.89 # metres per sec + state_dim: 24 + early_stop: True + normalize_actions: True # all actions in [-1, 1] + # PID params + pid_assist: False pid_settings: accel_pid: - 10.5 # a_x @@ -240,6 +157,90 @@ traffic: - 0.000001 accel_scale: 1.0 steer_scale: 0.1 + pid_latency: 10 + + # Observation mode + observations: + mode: SingleAgentSimpleLapObs + multi_flag: False + buff_size: 1 + normalize: False # gym_torcs normalizes by default + obs_min: + angle: -3.142 + track: 0.0 + trackPos: -1.0 + obs_max: + angle: 3.142 + track: 200.0 + trackPos: 1.0 + + # Reward function + rewards: + ProgressReward2: + scale: 1.0 + AvgSpeedReward: + scale: 1.0 + CollisionPenalty: + scale: 10.0 + TurnBackwardPenalty: + scale: 10.0 + AngAcclPenalty: + scale: 1.0 + max_ang_accl: 2.0 + # TaskReward: + # scale: 1.0 + + # Done function + dones: + - RaceOver + - TimeOut + - TurnBackward + - OutOfTrack + +# Traffic agents +# traffic: +# - ParkedAgent: +# track_len: 2587.54 +# target_speed: 50 +# parking_lane_pos: +# low: -0.54 +# high: -0.47 +# parking_dist_from_start: +# low: 35 +# high: 35 +# collision_time_window: 1.2 +# pid_settings: +# accel_pid: +# - 10.5 # a_x +# - 0.05 # a_y +# - 2.8 # a_z +# steer_pid: +# - 5.1 +# - 0.001 +# - 0.000001 +# accel_scale: 1.0 +# steer_scale: 0.1 +# - ParkedAgent: +# track_len: 2587.54 +# target_speed: 50 +# parking_lane_pos: +# low: 0.47 +# high: 0.54 +# parking_dist_from_start: +# low: 35 +# high: 35 +# collision_time_window: 1.2 +# pid_settings: +# accel_pid: +# - 10.5 # a_x +# - 0.05 # a_y +# - 2.8 # a_z +# steer_pid: +# - 5.1 +# - 0.001 +# - 0.000001 +# accel_scale: 1.0 +# steer_scale: 0.1 # - ParkedAgent: # target_speed: 80 # parking_lane_pos: diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index d4b7c81..dc059d1 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -216,7 +216,7 @@ def step_vanilla(self, action): info['prev_dist'] = game_state["distance_traversed"] info['prev_damage'] = game_state["damage"] info['prev_pos'] = game_state["racePos"] - info['full_obs'] = full_obs + info['full_obs'] = full_obs._asdict() return next_obs, reward, done, info @@ -270,7 +270,7 @@ def step_pid(self, desire, e): self.prev_dist = game_state["distance_traversed"] self.prev_damage = game_state["prev_damage"] self.prev_pos = game_state["racePos"] - #print("DISTANCE_TRAVERSED:{} SPEED:{}".format(game_state["distance_traversed"], (self.ob.speedX*self.default_speed))) + if self._config.pid_assist: self.PID_controller.update(self.ob) done = self.done_handler.get_done_signal(self._config, game_state) @@ -284,10 +284,7 @@ def step_pid(self, desire, e): print("[{}]: Stopping agent because some other agent has hit done".format(self.name)) break - #print("ORIG OB {}".format(self.ob)) next_obs, full_obs = self.observation_handler.get_obs(self.ob, self._config, full_flag=True) - #print("NEXT OB {}".format(next_obs)) - #print("FULL OBS {}".format(full_obs._asdict())) info['full_obs'] = full_obs._asdict() return next_obs, reward, done, info @@ -312,7 +309,7 @@ def increment_step(self): self.step_num += 1 #self.increment_dist() - def actions_init(self, info={}): + def comms_init(self, info={}): additional_dims_h = [] @@ -346,6 +343,7 @@ def actions_init(self, info={}): additional_dims_l += [-1] additional_dims += 1 + print("NAME:{}, lencomms:{}".format(self.name, len(info['comms']))) self.obs_dim += additional_dims*len(info['comms']) self.comms_buffer = cb.CommBuffer(self.name, self.buff_size, additional_dims*len(info['comms'])) additional_dims_h = additional_dims_h*len(info['comms']) @@ -364,14 +362,13 @@ def __init__(self, cfg_path=DEFAULT_SIM_OPTIONS_FILE): # If `visualise` is set to False torcs simulator will run in headless mode self._config = config_parser.MadrasEnvConfig() self._config.update(config_parser.parse_yaml(cfg_path)) - self.communications_map = None + self.communications_map = dict() self.torcs_proc = None self.seed() self.start_torcs_process() self.num_agents = 0 self.agents = OrderedDict() - self.comm_info = {"agent":{}, "agent_map":{}} - self.comm_agent_names = [] + self.init_wait = self._config.init_wait if self._config.traffic: self.traffic_handler = traffic.MadrasTrafficHandler( self._config.torcs_server_port, len(self.agents), self._config.traffic) @@ -398,17 +395,7 @@ def __init__(self, cfg_path=DEFAULT_SIM_OPTIONS_FILE): print("COMMUNICATIONS MAP", self.communications_map) for comm_agent, agent_attrs in self.communications_map.items(): - self.agents[comm_agent].actions_init(agent_attrs) - # for agent_s, agents_r in self.communications_map.items(): - # self.comm_info["agent"][agent_s] = (self.agents[name].action_dim, self.agents[name].buff_size) - # self.agents[agent_s].action_init(self.communications_map) - # for agent in self.comm_agent_names: - # self.agents[agent].actions_init(self.comm_info) - # self.comm_info["agent_map"][agent] = [] - # for comm_agent in self.comm_agent_names: - # if comm_agent != agent: - # self.comm_info["agent_map"][agent].append(comm_agent) - + self.agents[comm_agent].comms_init(agent_attrs) # self.action_dim = self.agents[0].action_dim # TODO(santara): Can I not have different action dims for different agents? self.action_space = self.agents["MadrasAgent_0"].action_space @@ -484,14 +471,10 @@ def reset(self): s_t = OrderedDict() - # Create clients and connect their sockets - - + # Create clients and connect their sockets for agent in self.agents: self.agents[agent].create_new_client() - - # Collect first observations for agent in self.agents: self.agents[agent].prev_dist = 0.0 @@ -499,6 +482,7 @@ def reset(self): self.agents[agent].client.respond_to_server() # To elimate 10s of timeout error, responding to the server after obs self.agents[agent].increment_(self.agents[agent].client.S.d["distRaced"], self.agents[agent].client.S.d["damage"], self.agents[agent].client.S.d["racePos"]) print("[{}] Reset: Starting new episode DAMAGE{}".format(agent, self.agents[agent].client.S.d["damage"])) + # Finish reset for agent in self.agents: self.agents[agent].complete_reset() @@ -507,13 +491,6 @@ def reset(self): self.agents[comm_agent].comms_buffer.reset() s_t[comm_agent] = np.hstack((s_t[agent], self.agents[comm_agent].comms_buffer.request())) - - - # for agent, agent_map in self.comm_info["agent_map"].items(): - # for comm_agents in agent_map: - # s_t[agent] = np.hstack((s_t[agent], self.agents[comm_agents].comms_buffer.request())) - - stop_action = {} for agent_name, agent in self.agents.items(): if (agent.action_dim == 3): @@ -521,8 +498,7 @@ def reset(self): else: stop_action[agent_name] = [0.0, 0.0] - for i in range(25): - #print("WAITING {}".format(i)) + for i in range(self.init_wait): next_obs, next_reward, next_done, nexxt_info = self.step(stop_action) s_t = next_obs return s_t @@ -541,11 +517,7 @@ def step(self, action): for proc in jobs: proc.join() - # for agent in self.comm_agent_names: - # self.agents[agent].action_buffer.insert(action[agent]) - done_check = False - #import pdb; pdb.set_trace() for agent in self.agents: next_obs[agent] = return_dict[agent][0] reward[agent] = return_dict[agent][1] @@ -562,15 +534,14 @@ def step(self, action): self.agents[agent].increment_(info[agent]['prev_dist'], info[agent]['prev_damage'], info[agent]['prev_pos']) for agent, agent_info in self.communications_map.items(): + full_obs_list = [] for agent_c in agent_info["comms"]: - self.agents[agent].comms_buffer.insert(action[agent_c], info[agent_c]["full_obs"], agent_info["vars"]) + full_obs_list.append((info[agent_c]["full_obs"], action[agent_c])) + self.agents[agent].comms_buffer.insert(full_obs_list, agent_info["vars"]) done['__all__'] = done_check for comm_agent in self.communications_map.keys(): next_obs[comm_agent] = np.hstack((next_obs[agent], self.agents[comm_agent].comms_buffer.request())) - # for agent, agent_map in self.comm_info["agent_map"].items(): - # for comm_agents in agent_map: - # next_obs[agent] = np.hstack((next_obs[agent], self.agents[comm_agents].action_buffer.request())) return next_obs, reward, done, info diff --git a/MADRaS/utils/communications_buffer.py b/MADRaS/utils/communications_buffer.py index f454694..d8b0540 100644 --- a/MADRaS/utils/communications_buffer.py +++ b/MADRaS/utils/communications_buffer.py @@ -18,14 +18,14 @@ def __init__(self, agent_id, size, buffer_dim): self._buffer = Queue(maxsize=self._size) self._curr_size = 0 - def insert(self, action, full_obs, var_list): + def insert(self, full_obs, var_list): if (self._curr_size < self._size): self._curr_size += 1 - self._buffer.put(self.parse_buffer_items(action, full_obs, var_list)) + self._buffer.put(self.parse_buffer_items(full_obs, var_list)) else: _ = self._buffer.get() - self._buffer.put(self.parse_buffer_items(action, full_obs, var_list)) + self._buffer.put(self.parse_buffer_items(full_obs, var_list)) def request(self): temp_queue = Queue(maxsize=self._size) @@ -37,17 +37,17 @@ def request(self): self._buffer = temp_queue return ret - def parse_buffer_items(self, action, full_obs, var_list): + def parse_buffer_items(self, full_obs_list, var_list): buffer_array = [] - for var in var_list: - if var == 'action': - buffer_array.append(action) - else: - val = full_obs[var] - buffer_array.append(val) + for full_obs_act in full_obs_list: + full_obs, action = full_obs_act + for var in var_list: + if var == 'action': + buffer_array.append(action) + else: + val = full_obs[var] + buffer_array.append(val) buffer_array = np.hstack(buffer_array) - #print("BUFFER_{}".format(buffer_array)) - #print("BUFFER_SIZE {}".format(buffer_array.shape)) return buffer_array def reset(self): diff --git a/MADRaS/utils/config_parser.py b/MADRaS/utils/config_parser.py index 1ad6a44..731cfe1 100644 --- a/MADRaS/utils/config_parser.py +++ b/MADRaS/utils/config_parser.py @@ -28,7 +28,7 @@ def update(self, cfg_dict): if cfg_dict is None: return direct_attributes = ['torcs_server_port', 'visualise', 'vision', 'no_of_visualizations', 'track_len', - 'max_steps', 'agents', 'traffic'] + 'max_steps', 'agents', 'traffic', 'init_wait'] for key in direct_attributes: if key in cfg_dict: exec("self.{} = {}".format(key, cfg_dict[key])) diff --git a/MADRaS/utils/reward_handler.py b/MADRaS/utils/reward_handler.py index 42fdc18..0b243b7 100644 --- a/MADRaS/utils/reward_handler.py +++ b/MADRaS/utils/reward_handler.py @@ -78,7 +78,6 @@ def reset(self): class ProgressReward2(ProgressReward): def compute_reward(self, game_config, game_state): - print(self.prev_dist) target_speed = game_config.target_speed / 50 # m/step progress = game_state["distance_traversed"] - game_state["prev_distance_traversed"] reward = self.cfg["scale"] * np.min([1.0, progress/target_speed]) From 41fccccf1587f1f99f28ce5ca39383bb20b1a73e Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Mon, 23 Mar 2020 19:49:11 +0530 Subject: [PATCH 37/46] removed deprecated examples of V1 --- MADRaS/example_controllers/__init__.py | 0 .../behavior_reflex/DDPG/OU.py | 7 - .../behavior_reflex/DDPG/ReplayBuffer.py | 45 --- .../behavior_reflex/DDPG/__init__.py | 0 .../behavior_reflex/DDPG/actor_network.py | 143 ---------- .../behavior_reflex/DDPG/critic_network.py | 135 --------- .../behavior_reflex/DDPG/ddpg.py | 141 --------- .../behavior_reflex/__init__.py | 0 .../behavior_reflex/configurations.py | 28 -- .../behavior_reflex/multi_agent.py | 24 -- .../behavior_reflex/playGame_DDPG.py | 210 -------------- MADRaS/example_controllers/pid/DDPG/OU.py | 23 -- .../pid/DDPG/ReplayBuffer.py | 75 ----- .../example_controllers/pid/DDPG/__init__.py | 0 .../pid/DDPG/actor_network.py | 136 --------- .../pid/DDPG/critic_network.py | 133 --------- MADRaS/example_controllers/pid/DDPG/ddpg.py | 173 ------------ MADRaS/example_controllers/pid/__init__.py | 0 .../pid/configurations.yml | 20 -- MADRaS/example_controllers/pid/multi_agent.py | 19 -- .../pid/playGame_DDPG_pid.py | 267 ------------------ 21 files changed, 1579 deletions(-) delete mode 100644 MADRaS/example_controllers/__init__.py delete mode 100644 MADRaS/example_controllers/behavior_reflex/DDPG/OU.py delete mode 100644 MADRaS/example_controllers/behavior_reflex/DDPG/ReplayBuffer.py delete mode 100644 MADRaS/example_controllers/behavior_reflex/DDPG/__init__.py delete mode 100644 MADRaS/example_controllers/behavior_reflex/DDPG/actor_network.py delete mode 100644 MADRaS/example_controllers/behavior_reflex/DDPG/critic_network.py delete mode 100644 MADRaS/example_controllers/behavior_reflex/DDPG/ddpg.py delete mode 100644 MADRaS/example_controllers/behavior_reflex/__init__.py delete mode 100644 MADRaS/example_controllers/behavior_reflex/configurations.py delete mode 100644 MADRaS/example_controllers/behavior_reflex/multi_agent.py delete mode 100644 MADRaS/example_controllers/behavior_reflex/playGame_DDPG.py delete mode 100644 MADRaS/example_controllers/pid/DDPG/OU.py delete mode 100644 MADRaS/example_controllers/pid/DDPG/ReplayBuffer.py delete mode 100644 MADRaS/example_controllers/pid/DDPG/__init__.py delete mode 100644 MADRaS/example_controllers/pid/DDPG/actor_network.py delete mode 100644 MADRaS/example_controllers/pid/DDPG/critic_network.py delete mode 100644 MADRaS/example_controllers/pid/DDPG/ddpg.py delete mode 100644 MADRaS/example_controllers/pid/__init__.py delete mode 100644 MADRaS/example_controllers/pid/configurations.yml delete mode 100644 MADRaS/example_controllers/pid/multi_agent.py delete mode 100644 MADRaS/example_controllers/pid/playGame_DDPG_pid.py diff --git a/MADRaS/example_controllers/__init__.py b/MADRaS/example_controllers/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/MADRaS/example_controllers/behavior_reflex/DDPG/OU.py b/MADRaS/example_controllers/behavior_reflex/DDPG/OU.py deleted file mode 100644 index 7b58ce9..0000000 --- a/MADRaS/example_controllers/behavior_reflex/DDPG/OU.py +++ /dev/null @@ -1,7 +0,0 @@ -import random -import numpy as np - -class OU(object): - - def function(self, x, mu, theta, sigma): - return theta * (mu - x) + sigma * np.random.randn(1) \ No newline at end of file diff --git a/MADRaS/example_controllers/behavior_reflex/DDPG/ReplayBuffer.py b/MADRaS/example_controllers/behavior_reflex/DDPG/ReplayBuffer.py deleted file mode 100644 index 7fc88de..0000000 --- a/MADRaS/example_controllers/behavior_reflex/DDPG/ReplayBuffer.py +++ /dev/null @@ -1,45 +0,0 @@ -from collections import deque -import random - -class ReplayBuffer(object): - - def __init__(self, buffer_size): - self.buffer_size = buffer_size - self.num_experiences = 0 - self.buffer = deque() - self.mean_reward = 0.0 - - def getBatch(self, batch_size): - # Randomly sample batch_size examples - if self.num_experiences < batch_size: - return random.sample(self.buffer, self.num_experiences) - else: - return random.sample(self.buffer, batch_size) - - def size(self): - return self.buffer_size - - def add(self, state, action, reward, new_state, done): - experience = (state, action, reward, new_state, done) - if self.num_experiences < self.buffer_size: - self.buffer.append(experience) - self.num_experiences += 1 - else: - self.buffer.popleft() - self.buffer.append(experience) - self.mean_reward = ( (self.mean_reward * (self.num_experiences-1)) + reward) / float( self.num_experiences ) - - - def count(self): - # if buffer is full, return buffer size - # otherwise, return experience counter - return self.num_experiences - - def getMeanReward(self): - # if buffer is full, return buffer size - # otherwise, return experience counter - return self.mean_reward - - def erase(self): - self.buffer = deque() - self.num_experiences = 0 diff --git a/MADRaS/example_controllers/behavior_reflex/DDPG/__init__.py b/MADRaS/example_controllers/behavior_reflex/DDPG/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/MADRaS/example_controllers/behavior_reflex/DDPG/actor_network.py b/MADRaS/example_controllers/behavior_reflex/DDPG/actor_network.py deleted file mode 100644 index 8dba638..0000000 --- a/MADRaS/example_controllers/behavior_reflex/DDPG/actor_network.py +++ /dev/null @@ -1,143 +0,0 @@ -import tensorflow as tf -import numpy as np -import math - - -# Hyper Parameters -LAYER1_SIZE = 300 -LAYER2_SIZE = 400 -LEARNING_RATE = 1e-4 -TAU = 0.001 -BATCH_SIZE = 32 -class ActorNetwork: - """docstring for ActorNetwork""" - def __init__(self,sess,state_dim,action_dim): - - self.sess = sess - self.state_dim = state_dim - self.action_dim = action_dim - - # create actor network - self.state_input,self.action_output,self.net = self.create_network(state_dim,action_dim) - - # create target actor network - self.target_state_input,self.target_action_output,self.target_update,self.target_net = self.create_target_network(state_dim,action_dim,self.net) - - # define training rules - self.create_training_method() - - # self.sess.run(tf.initialize_all_variables()) - self.sess.run(tf.global_variables_initializer()) - - self.update_target() - #self.load_network() - - def create_training_method(self): - self.q_gradient_input = tf.placeholder("float",[None,self.action_dim]) - self.parameters_gradients = tf.gradients(self.action_output,self.net,-self.q_gradient_input) - '''for i, grad in enumerate(self.parameters_gradients): - if grad is not None: - self.parameters_gradients[i] = tf.clip_by_value(grad, -2.0,2.0)''' - self.optimizer = tf.train.AdamOptimizer(LEARNING_RATE).apply_gradients(zip(self.parameters_gradients,self.net)) - - def create_network(self,state_dim,action_dim): - - layer1_size = LAYER1_SIZE - layer2_size = LAYER2_SIZE - - state_input = tf.placeholder("float",[None,state_dim]) - - W1 = self.variable([state_dim,layer1_size],state_dim) - b1 = self.variable([layer1_size],state_dim) - W2 = self.variable([layer1_size,layer2_size],layer1_size) - b2 = self.variable([layer2_size],layer1_size) - #W3 = tf.Variable(tf.random_uniform([layer2_size,action_dim],-3e-3,3e-3)) - #b3 = tf.Variable(tf.random_uniform([action_dim],-3e-3,3e-3)) - - W_steer = tf.Variable(tf.random_uniform([layer2_size,1],-1e-4,1e-4)) - b_steer = tf.Variable(tf.random_uniform([1],-1e-4,1e-4)) - - W_accel = tf.Variable(tf.random_uniform([layer2_size,1],-1e-4,1e-4)) - b_accel = tf.Variable(tf.random_uniform([1],-1e-4,1e-4)) - - W_brake = tf.Variable(tf.random_uniform([layer2_size,1],-1e-4,1e-4)) - b_brake = tf.Variable(tf.random_uniform([1],-1e-4,1e-4)) - - - layer1 = tf.nn.relu(tf.matmul(state_input,W1) + b1) - layer2 = tf.nn.relu(tf.matmul(layer1,W2) + b2) - - - steer = tf.tanh(tf.matmul(layer2,W_steer) + b_steer) - accel = tf.sigmoid(tf.matmul(layer2,W_accel) + b_accel) - brake = tf.sigmoid(tf.matmul(layer2,W_brake) + b_brake) - - # action_output = tf.concat(1, [steer, accel, brake]) - action_output = tf.concat([steer, accel, brake], 1) - - return state_input,action_output,[W1,b1,W2,b2,W_steer,b_steer,W_accel,b_accel,W_brake,b_brake] - - def create_target_network(self,state_dim,action_dim,net): - state_input = tf.placeholder("float",[None,state_dim]) - ema = tf.train.ExponentialMovingAverage(decay=1-TAU) - target_update = ema.apply(net) - target_net = [ema.average(x) for x in net] - - layer1 = tf.nn.relu(tf.matmul(state_input,target_net[0]) + target_net[1]) - layer2 = tf.nn.relu(tf.matmul(layer1,target_net[2]) + target_net[3]) - - steer = tf.tanh(tf.matmul(layer2,target_net[4]) + target_net[5]) - accel = tf.sigmoid(tf.matmul(layer2,target_net[6]) + target_net[7]) - brake = tf.sigmoid(tf.matmul(layer2,target_net[8]) + target_net[9]) - - # action_output = tf.concat(1, [steer, accel, brake]) - action_output = tf.concat([steer, accel, brake], 1) - return state_input,action_output,target_update,target_net - - - - - def update_target(self): - self.sess.run(self.target_update) - - def train(self,q_gradient_batch,state_batch): - self.sess.run(self.optimizer,feed_dict={ - self.q_gradient_input:q_gradient_batch, - self.state_input:state_batch - }) - - def actions(self,state_batch): - return self.sess.run(self.action_output,feed_dict={ - self.state_input:state_batch - }) - - def action(self,state): - return self.sess.run(self.action_output,feed_dict={ - self.state_input:[state] - })[0] - - - def target_actions(self,state_batch): - return self.sess.run(self.target_action_output,feed_dict={ - self.target_state_input:state_batch - }) - - # f fan-in size - def variable(self,shape,f): - return tf.Variable(tf.random_uniform(shape,-1/math.sqrt(f),1/math.sqrt(f))) - ''' - def load_network(self): - self.saver = tf.train.Saver() - checkpoint = tf.train.get_checkpoint_state("saved_actor_networks") - if checkpoint and checkpoint.model_checkpoint_path: - self.saver.restore(self.sess, checkpoint.model_checkpoint_path) - print "Successfully loaded:", checkpoint.model_checkpoint_path - else: - print "Could not find old network weights" - def save_network(self,time_step): - print 'save actor-network...',time_step - self.saver.save(self.sess, 'saved_actor_networks/' + 'actor-network', global_step = time_step) - - ''' - - diff --git a/MADRaS/example_controllers/behavior_reflex/DDPG/critic_network.py b/MADRaS/example_controllers/behavior_reflex/DDPG/critic_network.py deleted file mode 100644 index 6d6c709..0000000 --- a/MADRaS/example_controllers/behavior_reflex/DDPG/critic_network.py +++ /dev/null @@ -1,135 +0,0 @@ - -import tensorflow as tf -import numpy as np -import math - - -LAYER1_SIZE = 300 -LAYER2_SIZE = 600 -LEARNING_RATE = 1e-3 -TAU = 0.001 -L2 = 0.0001 - -class CriticNetwork: - """docstring for CriticNetwork""" - def __init__(self,sess,state_dim,action_dim): - self.time_step = 0 - self.sess = sess - # create q network - self.state_input,\ - self.action_input,\ - self.q_value_output,\ - self.net = self.create_q_network(state_dim,action_dim) - - # create target q network (the same structure with q network) - self.target_state_input,\ - self.target_action_input,\ - self.target_q_value_output,\ - self.target_update = self.create_target_q_network(state_dim,action_dim,self.net) - - self.create_training_method() - - # initialization - # self.sess.run(tf.initialize_all_variables()) - self.sess.run(tf.global_variables_initializer()) - - self.update_target() - - def create_training_method(self): - # Define training optimizer - self.y_input = tf.placeholder("float",[None,1]) - weight_decay = tf.add_n([L2 * tf.nn.l2_loss(var) for var in self.net]) - self.cost = tf.reduce_mean(tf.square(self.y_input - self.q_value_output)) + weight_decay - self.optimizer = tf.train.AdamOptimizer(LEARNING_RATE).minimize(self.cost) - '''self.optimizer = tf.train.AdamOptimizer(LEARNING_RATE) - self.parameters_gradients = self.optimizer.compute_gradients(self.cost) - - for i, (grad,var) in enumerate(self.parameters_gradients): - if grad is not None: - self.parameters_gradients[i] = (tf.clip_by_value(grad, -5.0,5.0),var) - - self.train_op = self.optimizer.apply_gradients(self.parameters_gradients)''' - self.action_gradients = tf.gradients(self.q_value_output,self.action_input) - - def create_q_network(self,state_dim,action_dim): - # the layer size could be changed - layer1_size = LAYER1_SIZE - layer2_size = LAYER2_SIZE - - state_input = tf.placeholder("float",[None,state_dim]) - action_input = tf.placeholder("float",[None,action_dim]) - - W1 = self.variable([state_dim,layer1_size],state_dim) - b1 = self.variable([layer1_size],state_dim) - W2 = self.variable([layer1_size,layer2_size],layer1_size+action_dim) - W2_action = self.variable([action_dim,layer2_size],layer1_size+action_dim) - b2 = self.variable([layer2_size],layer1_size+action_dim) - W3 = tf.Variable(tf.random_uniform([layer2_size,1],-3e-3,3e-3)) - b3 = tf.Variable(tf.random_uniform([1],-3e-3,3e-3)) - - layer1 = tf.nn.relu(tf.matmul(state_input,W1) + b1) - layer2 = tf.nn.relu(tf.matmul(layer1,W2) + tf.matmul(action_input,W2_action) + b2) - q_value_output = tf.identity(tf.matmul(layer2,W3) + b3) - - return state_input,action_input,q_value_output,[W1,b1,W2,W2_action,b2,W3,b3] - - def create_target_q_network(self,state_dim,action_dim,net): - - state_input = tf.placeholder("float",[None,state_dim]) - action_input = tf.placeholder("float",[None,action_dim]) - - ema = tf.train.ExponentialMovingAverage(decay=1-TAU) - target_update = ema.apply(net) - target_net = [ema.average(x) for x in net] - - layer1 = tf.nn.relu(tf.matmul(state_input,target_net[0]) + target_net[1]) - layer2 = tf.nn.relu(tf.matmul(layer1,target_net[2]) + tf.matmul(action_input,target_net[3]) + target_net[4]) - q_value_output = tf.identity(tf.matmul(layer2,target_net[5]) + target_net[6]) - - return state_input,action_input,q_value_output,target_update - - def update_target(self): - self.sess.run(self.target_update) - - def train(self,y_batch,state_batch,action_batch): - self.time_step += 1 - self.sess.run(self.optimizer,feed_dict={ - self.y_input:y_batch, - self.state_input:state_batch, - self.action_input:action_batch - }) - - def gradients(self,state_batch,action_batch): - return self.sess.run(self.action_gradients,feed_dict={ - self.state_input:state_batch, - self.action_input:action_batch - })[0] - - def target_q(self,state_batch,action_batch): - return self.sess.run(self.target_q_value_output,feed_dict={ - self.target_state_input:state_batch, - self.target_action_input:action_batch - }) - - def q_value(self,state_batch,action_batch): - return self.sess.run(self.q_value_output,feed_dict={ - self.state_input:state_batch, - self.action_input:action_batch}) - - # f fan-in size - def variable(self,shape,f): - return tf.Variable(tf.random_uniform(shape,-1/math.sqrt(f),1/math.sqrt(f))) - ''' - def load_network(self): - self.saver = tf.train.Saver() - checkpoint = tf.train.get_checkpoint_state("saved_critic_networks") - if checkpoint and checkpoint.model_checkpoint_path: - self.saver.restore(self.sess, checkpoint.model_checkpoint_path) - print "Successfully loaded:", checkpoint.model_checkpoint_path - else: - print "Could not find old network weights" - - def save_network(self,time_step): - print 'save critic-network...',time_step - self.saver.save(self.sess, 'saved_critic_networks/' + 'critic-network', global_step = time_step) - ''' diff --git a/MADRaS/example_controllers/behavior_reflex/DDPG/ddpg.py b/MADRaS/example_controllers/behavior_reflex/DDPG/ddpg.py deleted file mode 100644 index 16e2404..0000000 --- a/MADRaS/example_controllers/behavior_reflex/DDPG/ddpg.py +++ /dev/null @@ -1,141 +0,0 @@ - -# ----------------------------------- -# Deep Deterministic Policy Gradient -# Author: Flood Sung -# Date: 2016.5.4 -# ----------------------------------- - -import gym -import tensorflow as tf -import numpy as np -from example_controllers.behavior_reflex.DDPG.OU import OU -import math, random -from example_controllers.behavior_reflex.DDPG.critic_network import CriticNetwork -from example_controllers.behavior_reflex.DDPG.actor_network import ActorNetwork -from example_controllers.behavior_reflex.DDPG.ReplayBuffer import ReplayBuffer -#from configurations import save_location - - -# Hyper Parameters: - -REPLAY_BUFFER_SIZE = 100000 -REPLAY_START_SIZE = 100 -BATCH_SIZE = 32 -GAMMA = 0.99 - - -class DDPG: - """docstring for DDPG""" - def __init__(self, env_name, state_dim,action_dim,save_location): - self.name = 'DDPG' # name for uploading results - self.env_name = env_name - # Randomly initialize actor network and critic network - # with both their target networks - self.state_dim = state_dim - self.action_dim = action_dim - self.save_location = save_location - # Ensure action bound is symmetric - self.time_step = 0 - self.sess = tf.InteractiveSession() - - self.actor_network = ActorNetwork(self.sess,self.state_dim,self.action_dim) - self.critic_network = CriticNetwork(self.sess,self.state_dim,self.action_dim) - - # initialize replay buffer - self.replay_buffer = ReplayBuffer(REPLAY_BUFFER_SIZE) - - # Initialize a random process the Ornstein-Uhlenbeck process for action exploration - self.OU = OU() - - # loading networks - self.saver = tf.train.Saver(max_to_keep=0) - checkpoint = tf.train.get_checkpoint_state(self.save_location) - if checkpoint and checkpoint.model_checkpoint_path: - self.saver.restore(self.sess, checkpoint.model_checkpoint_path) - print("Successfully loaded:", checkpoint.model_checkpoint_path) - else: - print("Could not find old network weights") - - def train(self): - #print "train step",self.time_step - # Sample a random minibatch of N transitions from replay buffer - minibatch = self.replay_buffer.getBatch(BATCH_SIZE) - state_batch = np.asarray([data[0] for data in minibatch]) - action_batch = np.asarray([data[1] for data in minibatch]) - reward_batch = np.asarray([data[2] for data in minibatch]) - next_state_batch = np.asarray([data[3] for data in minibatch]) - done_batch = np.asarray([data[4] for data in minibatch]) - - # for action_dim = 1 - action_batch = np.resize(action_batch,[BATCH_SIZE,self.action_dim]) - - # Calculate y_batch - - next_action_batch = self.actor_network.target_actions(next_state_batch) - q_value_batch = self.critic_network.target_q(next_state_batch,next_action_batch) - y_batch = [] - for i in range(len(minibatch)): - if done_batch[i]: - y_batch.append(reward_batch[i]) - else : - y_batch.append(reward_batch[i] + GAMMA * q_value_batch[i]) - y_batch = np.resize(y_batch,[BATCH_SIZE,1]) - # Update critic by minimizing the loss L - self.critic_network.train(y_batch,state_batch,action_batch) - - # Update the actor policy using the sampled gradient: - action_batch_for_gradients = self.actor_network.actions(state_batch) - q_gradient_batch = self.critic_network.gradients(state_batch,action_batch_for_gradients) - - self.actor_network.train(q_gradient_batch,state_batch) - - # Update the target networks - self.actor_network.update_target() - self.critic_network.update_target() - - def saveNetwork(self,i): - self.saver.save(self.sess, self.save_location + 'network' + str(i)+'DDPG.ckpt', global_step = self.time_step,) - - - def action(self,state): - action = self.actor_network.action(state) - action[0] = np.clip( action[0], -1 , 1 ) - action[1] = np.clip( action[1], 0 , 1 ) - action[2] = np.clip( action[2], 0 , 1 ) - #print "Action:", action - return action - - def noise_action(self,state,epsilon): - # Select action a_t according to the current policy and exploration noise - action = self.actor_network.action(state) - #print action.shape - #print "Action_No_Noise:", action - noise_t = np.zeros(self.action_dim) - noise_t[0] = epsilon * self.OU.function(action[0], 0.0 , 0.60, 0.80) - noise_t[1] = epsilon * self.OU.function(action[1], 0.5 , 1.00, 0.10) - noise_t[2] = epsilon * self.OU.function(action[2], -0.1 , 1.00, 0.05) - - if random.random() <= 0.1: - # print("********Stochastic brake***********") - noise_t[2] = epsilon * self.OU.function(action[2], 0.2 , 1.00, 0.10) - - action = action + noise_t - action[0] = np.clip(action[0], -1, 1) - action[1] = np.clip(action[1], 0 , 1) - action[2] = np.clip(action[2], 0 , 1) - - #print "Action_Noise:", action - return action - - def perceive(self,state,action,reward,next_state,done): - # Store transition (s_t,a_t,r_t,s_{t+1}) in replay buffer - - if ( not (math.isnan( reward ))): - self.replay_buffer.add(state,action,reward,next_state,done) - - self.time_step = self.time_step + 1 - # Store transitions to replay start size then start training - if self.replay_buffer.count() > REPLAY_START_SIZE: - self.train() - - diff --git a/MADRaS/example_controllers/behavior_reflex/__init__.py b/MADRaS/example_controllers/behavior_reflex/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/MADRaS/example_controllers/behavior_reflex/configurations.py b/MADRaS/example_controllers/behavior_reflex/configurations.py deleted file mode 100644 index d1b0522..0000000 --- a/MADRaS/example_controllers/behavior_reflex/configurations.py +++ /dev/null @@ -1,28 +0,0 @@ -################################################################### -# -# All the configurations are done here. -# -# 1. Toggle the is_training flag to 0 to test a saved model. -# 2. epsilon_start is, as the name suggests, where the annealing epsilon starts from -# 3. total_explore is used as : epsilon -= 1/total_explore -# -################################################################### - -visualize_after = 5 -is_training = 1 - -total_explore = 600000.0 -max_eps = 6000 -max_steps_eps = 10000 - -wait_at_beginning = 0 -initial_wait_period = 200 # to give the other cars a headstart of these many steps - -epsilon_start = 1 -start_reward = -10000 # these need to be changed if restarting the playGame.py script - -save_location = './weights/' - -torcsPort = 3101 -configFile = '~/.torcs/config/raceman/quickrace.xml' -# configFile = '~/.torcs/config/raceman/practice.xml' diff --git a/MADRaS/example_controllers/behavior_reflex/multi_agent.py b/MADRaS/example_controllers/behavior_reflex/multi_agent.py deleted file mode 100644 index 97e288b..0000000 --- a/MADRaS/example_controllers/behavior_reflex/multi_agent.py +++ /dev/null @@ -1,24 +0,0 @@ -import threading -import multiprocessing -import numpy as np -import tensorflow as tf -import example_controllers.behavior_reflex.playGame_DDPG as playGame_DDPG -import os -from random import choice -from time import sleep -from time import time -#import pymp - -with tf.device("/cpu:0"): - num_workers = 3 #multiprocessing.cpu_count() #use this when you want to use all the cpus - print("numb of workers is" + str(num_workers)) - -with tf.Session() as sess: - worker_threads = [] -#with pymp.Parallel(4) as p: #uncomment this for parallelization of threads - for i in range(num_workers): - worker_work = lambda: (playGame_DDPG.playGame(f_diagnostics=""+str(i), train_indicator=1, port=3001+i)) - t = threading.Thread(target=(worker_work)) - t.start() - sleep(0.5) - worker_threads.append(t) diff --git a/MADRaS/example_controllers/behavior_reflex/playGame_DDPG.py b/MADRaS/example_controllers/behavior_reflex/playGame_DDPG.py deleted file mode 100644 index becefd0..0000000 --- a/MADRaS/example_controllers/behavior_reflex/playGame_DDPG.py +++ /dev/null @@ -1,210 +0,0 @@ -import sys -from utils.gym_torcs import TorcsEnv -import numpy as np -import utils.snakeoil3_gym as snakeoil3 -import collections as col -import random -import argparse -import tensorflow as tf -import timeit -import math -import sys -import os -from example_controllers.behavior_reflex.configurations import * -from example_controllers.behavior_reflex.DDPG.ddpg import * - -import gc -gc.enable() - -np.random.seed(1337) - - -def playGame(f_diagnostics, train_indicator, port=3101): # 1 means Train, 0 means simply Run - - action_dim = 3 #Steering/Acceleration/Brake - state_dim = 65 #of sensors input - - env_name = 'Torcs_Env' - weights_location = save_location + str(port) + "/" - agent = DDPG(env_name, state_dim, action_dim, weights_location) - - # Generate a Torcs environment - print("I have been asked to use port: ", port) - env = TorcsEnv(vision=False, throttle=True, gear_change=False) - ob = None - while ob is None: - try: - client = snakeoil3.Client(p=port, vision=False) # Open new UDP in vtorcs - client.MAX_STEPS = np.inf - - client.get_servers_input(0) # Get the initial input from torcs - - obs = client.S.d # Get the current full-observation from torcs - ob = env.make_observation(obs) - - s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm, ob.opponents)) - except: - pass - - EXPLORE = total_explore - episode_count = max_eps - max_steps = max_steps_eps - epsilon = epsilon_start - done = False - epsilon_steady_state = 0.01 # This is used for early stopping. - - totalSteps = 0 - best_reward = -100000 - running_avg_reward = 0. - - - print("TORCS Experiment Start.") - for i in range(episode_count): - - early_stop = 1 - total_reward = 0. - info = {'termination_cause':0} - distance_traversed = 0. - speed_array=[] - trackPos_array=[] - - print('\n\nStarting new episode...\n') - - for step in range(max_steps): - - # Take noisy actions during training - if (train_indicator): - epsilon -= 1.0 / EXPLORE - epsilon = max(epsilon, epsilon_steady_state) - a_t = agent.noise_action(s_t,epsilon) - else: - a_t = agent.action(s_t) # [steer, accel, brake] - try: - ob, r_t, done, info = env.step(step, client, a_t, early_stop) - if done: - break - analyse_info(info, printing=False) - - s_t1 = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm, ob.opponents)) - distance_traversed += ob.speedX*np.cos(ob.angle) #Assuming 1 step = 1 second - speed_array.append(ob.speedX*np.cos(ob.angle)) - trackPos_array.append(ob.trackPos) - - #Checking for nan rewards: - if (math.isnan( r_t )): - r_t = 0.0 - for bad_r in range( 50 ): - print( 'Bad Reward Found' ) - break - - # Add to replay buffer only if training - if (train_indicator): - agent.perceive(s_t,a_t,r_t,s_t1,done) # Add experience to replay buffer - - except Exception as e: - print("Exception caught after training, at port " + str(i) + str(e) ) - ob = None - while ob is None: - try: - client = snakeoil3.Client(p=port, vision=False) # Open new UDP in vtorcs - client.MAX_STEPS = np.inf - client.get_servers_input(0) # Get the initial input from torcs - obs = client.S.d # Get the current full-observation from torcs - ob = env.make_observation(obs) - except: - pass - continue - total_reward += r_t - s_t = s_t1 - - # Displaying progress every 15 steps. - if ( (np.mod(step,15)==0) ): - print("Episode", i, "Step", step, "Epsilon", epsilon , "Action", a_t, "Reward", r_t ) - - totalSteps += 1 - if done: - break - - if ((train_indicator ==1 )): - if(i%300 == 0): - agent.saveNetwork(i) - - running_avg_reward = running_average(running_avg_reward, i+1, total_reward) - - - print("TOTAL REWARD @ " + str(i) +"-th Episode : Num_Steps= " + str(step) + "; Max_steps= " + str(max_steps) +"; Reward= " + str(total_reward) +"; Running average reward= " + str(running_avg_reward)) - print("Total Step: " + str(totalSteps)) - print("") - - print(info) - try: - if 'termination_cause' in info.keys() and info['termination_cause']=='hardReset': - print('Hard reset by some agent') - ob, client = env.reset(client=client, relaunch=True) - else: - ob, client = env.reset(client=client, relaunch=True) - except Exception as e: - print("Exception caught after episode end, at port " + str(i) + str(e) ) - ob = None - while ob is None: - try: - client = snakeoil3.Client(p=port, vision=False) # Open new UDP in vtorcs - client.MAX_STEPS = np.inf - client.get_servers_input(0) # Get the initial input from torcs - obs = client.S.d # Get the current full-observation from torcs - ob = env.make_observation(obs) - except: - print("Another exception caught while handling exception, at port " + str(i) + str(e) ) - - - s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, ob.speedY, ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm, ob.opponents)) - - env.end() # This is for shutting down TORCS - print("Finish.") - -def document_episode(episode_no, distance_traversed, speed_array, trackPos_array, info, running_avg_reward, f_diagnostics): - """ - Note down a tuple of diagnostic values for each episode - (episode_no, distance_traversed, mean(speed_array), std(speed_array), mean(trackPos_array), std(trackPos_array), info[termination_cause], running_avg_reward) - """ - f_diagnostics.write(str(episode_no)+",") - f_diagnostics.write(str(distance_traversed)+",") - f_diagnostics.write(str(np.mean(speed_array))+",") - f_diagnostics.write(str(np.std(speed_array))+",") - f_diagnostics.write(str(np.mean(trackPos_array))+",") - f_diagnostics.write(str(np.std(trackPos_array))+",") - f_diagnostics.write(str(info['termination_cause'])+",") - f_diagnostics.write(str(running_avg_reward)+"\n") - - -def running_average(prev_avg, num_episodes, new_val): - total = prev_avg*(num_episodes-1) - total += new_val - return np.float(total/num_episodes) - -def analyse_info(info, printing=True): - simulation_state = ['Normal', 'Terminated as car is OUT OF TRACK', 'Terminated as car has SMALL PROGRESS', 'Terminated as car has TURNED BACKWARDS'] - if printing and info['termination_cause']!=0: - print(simulation_state[info['termination_cause']]) - -if __name__ == "__main__": - - try: - port = int(sys.argv[1]) - except Exception as e: - # raise e - print("Usage : python %s " % (sys.argv[0])) - sys.exit() - - print('is_training : ' + str(is_training)) - print('Starting best_reward : ' + str(start_reward)) - print( total_explore ) - print( max_eps ) - print( max_steps_eps ) - print( epsilon_start ) - print('config_file : ' + str(configFile)) - - # f_diagnostics = open('output_logs/diagnostics_for_window_' + sys.argv[1]+'_with_fixed_episode_length', 'w') #Add date and time to file name - f_diagnostics = "" - playGame(f_diagnostics, train_indicator=0, port=port) - # f_diagnostics.close() diff --git a/MADRaS/example_controllers/pid/DDPG/OU.py b/MADRaS/example_controllers/pid/DDPG/OU.py deleted file mode 100644 index 4488514..0000000 --- a/MADRaS/example_controllers/pid/DDPG/OU.py +++ /dev/null @@ -1,23 +0,0 @@ -"""Noise Function.""" -import numpy as np - - -class OU(object): - """Ornstein Uhlenbeck Process.""" - - def function(self, x, mu, theta, sigma): - """The stochastic equation.""" - return theta * (mu - x) + sigma * np.random.randn(1) - - -if __name__ == "__main__": - NUM_SAMPLES = 1000 - x = np.linspace(0, 10, NUM_SAMPLES) - noise = OU() - dx = np.asarray([noise.function(x_i, 5, 0.15, 0.1) - for x_i in x]).reshape((NUM_SAMPLES,)) - y = x + dx - import matplotlib.pyplot as plt - plt.plot(x, y, c='r') - plt.plot(x, x) - plt.show() diff --git a/MADRaS/example_controllers/pid/DDPG/ReplayBuffer.py b/MADRaS/example_controllers/pid/DDPG/ReplayBuffer.py deleted file mode 100644 index b8cc65e..0000000 --- a/MADRaS/example_controllers/pid/DDPG/ReplayBuffer.py +++ /dev/null @@ -1,75 +0,0 @@ -"""Prioritized experience replay buffer.""" -import random -from queue import PriorityQueue -from utils.madras_datatypes import Madras - -madras = Madras() - - -class experience(object): - """Experience class format.""" - - def __init__(self, state, action, reward, next_state, done, TD): - """Init Method""" - self.state = state - self.action = action - self.reward = reward - self.next_state = next_state - self.done = done - self.TD = TD - - def __gt__(self, other): - """Redifining >.""" - return (self.TD > other.TD) - - def __lt__(self, other): - """Redifining <.""" - return (self.TD < other.TD) - - def __eq__(self, other): - """Redifining ==.""" - return (self.TD == other.TD) - -class ReplayBuffer(object): - """Prioritized Experiance replay.""" - - def __init__(self, buffer_size): - """Init Method.""" - self.buffer_size = buffer_size - self.num_experiences = 0 - self.buffer = PriorityQueue() - self.mean_reward = 0.0 - - def getBatch(self, batch_size): - """Sampling.""" - if self.num_experiences < batch_size: - return random.sample(list(self.buffer.queue), self.num_experiences) - else: - return random.sample(list(self.buffer.queue), batch_size) - - def size(self): - """Get size.""" - return self.buffer_size - - def add(self, state, action, reward, new_state, done, TD): - """Add Experience.""" - exp = experience(state, action, reward, new_state, done, TD) - if self.num_experiences < self.buffer_size: - self.buffer.put(exp) - self.num_experiences += 1 - else: - self.buffer.get() - self.buffer.put(exp) - self.mean_reward = ((self.mean_reward * (self.num_experiences - 1)) + - reward) / madras.floatX(self.num_experiences) - - def count(self): - """If buffer is full. - - Return buffer size otherwise, return experience counter. - """ - return self.num_experiences - - def getMeanReward(self): - """Get Mean Reward.""" - return self.mean_reward diff --git a/MADRaS/example_controllers/pid/DDPG/__init__.py b/MADRaS/example_controllers/pid/DDPG/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/MADRaS/example_controllers/pid/DDPG/actor_network.py b/MADRaS/example_controllers/pid/DDPG/actor_network.py deleted file mode 100644 index d5e7d99..0000000 --- a/MADRaS/example_controllers/pid/DDPG/actor_network.py +++ /dev/null @@ -1,136 +0,0 @@ -"""Actor-Network.""" - -import tensorflow as tf - -# Hyper Parameters -LAYER1_SIZE = 300 -LAYER2_SIZE = 400 -LEARNING_RATE = 1e-4 -TAU = 0.001 -BATCH_SIZE = 32 - - -class ActorNetwork: - """docstring for ActorNetwork.""" - - def __init__(self, sess, state_dim, action_dim): - """Init Method.""" - self.sess = sess - self.state_dim = state_dim - self.action_dim = action_dim - # create actor network - self.state_input, self.action_output, self.net =\ - self.create_network(state_dim, action_dim) - - # create target actor network - self.target_state_input, self.target_action_output, self.target_update, self.target_net =\ - self.create_target_network(state_dim, action_dim, self.net) - - # define training rules - self.create_training_method() - - # self.sess.run(tf.initialize_all_variables()) - self.sess.run(tf.global_variables_initializer()) - - self.update_target() - # self.load_network() - - def create_training_method(self): - """Trainer.""" - self.q_gradient_input = tf.placeholder("float", [None, - self.action_dim]) - self.parameters_gradients = tf.gradients(self.action_output, - self.net, - -self.q_gradient_input) - self.optimizer = tf.train.AdamOptimizer(LEARNING_RATE).apply_gradients( - list(zip(self.parameters_gradients, self.net))) - - def create_network(self, state_dim, action_dim): - """Network Creater.""" - layer1_size = LAYER1_SIZE - layer2_size = LAYER2_SIZE - - state_input = tf.placeholder("float", [None, state_dim]) - regularization = tf.contrib.layers.l2_regularizer(scale=0.01) - - # ###################GET_VARIABLES################## - W1 = tf.get_variable("W1", shape=[state_dim, layer1_size], - regularizer=regularization) - b1 = tf.get_variable("b1", shape=[layer1_size], - regularizer=regularization, - initializer=tf.zeros_initializer) - W2 = tf.get_variable("W2", shape=[layer1_size, layer2_size], - regularizer=regularization) - b2 = tf.get_variable("b2", shape=[layer2_size], - regularizer=regularization, - initializer=tf.zeros_initializer) - - layer1 = tf.nn.relu(tf.matmul(state_input, W1) + b1) - layer1_norm = tf.layers.batch_normalization(layer1) - layer2 = tf.nn.relu(tf.matmul(layer1_norm, W2) + b2) - layer2_norm = tf.layers.batch_normalization(layer2) - - W_trackpos = tf.get_variable("W_trackpos", shape=[layer2_size, 1], - regularizer=regularization) - b_trackpos = tf.get_variable("b_trackpos", shape=[1], - regularizer=regularization, - initializer=tf.zeros_initializer) - W_vel = tf.get_variable("W_vel", shape=[layer2_size, 1], - regularizer=regularization) - b_vel = tf.get_variable("b_vel", shape=[1], - regularizer=regularization, - initializer=tf.zeros_initializer) - - trackpos = tf.nn.relu(tf.matmul(layer2_norm, W_trackpos) + b_trackpos) - trackpos_res = trackpos + tf.gather(state_input, indices=[20], axis=1) - - velocity = tf.nn.relu(tf.matmul(layer2_norm, W_vel) + b_vel) - velocity_res = velocity + tf.gather(state_input, indices=[21], axis=1) - action_output = tf.concat([trackpos_res, velocity_res], 1) - - return state_input, action_output, [W1, b1, W2, b2, - W_trackpos, b_trackpos, - W_vel, b_vel] - - def create_target_network(self, state_dim, action_dim, net): - """Target Network Creater.""" - state_input = tf.placeholder("float", [None, state_dim]) - ema = tf.train.ExponentialMovingAverage(decay=1 - TAU) - target_update = ema.apply(net) - target_net = [ema.average(x) for x in net] - - layer1 = tf.nn.relu(tf.matmul(state_input, target_net[0]) + target_net[1]) - layer2 = tf.nn.relu(tf.matmul(layer1, target_net[2]) + target_net[3]) - - trackpos = tf.nn.relu(tf.matmul(layer2, target_net[4]) + target_net[5]) - trackpos_res = trackpos + tf.gather(state_input, indices=[20], axis=1) - velocity = tf.nn.relu(tf.matmul(layer2, target_net[6]) + target_net[7]) - velocity_res = velocity + tf.gather(state_input, indices=[21], axis=1) - - action_output = tf.concat([trackpos_res, velocity_res], 1) - return state_input, action_output, target_update, target_net - - def update_target(self): - """Target update.""" - self.sess.run(self.target_update) - - def train(self, q_gradient_batch, state_batch): - """Train Function.""" - self.sess.run(self.optimizer, feed_dict={ - self.q_gradient_input: q_gradient_batch, - self.state_input: state_batch}) - - def actions(self, state_batch): - """Get Network Output.""" - return self.sess.run(self.action_output, feed_dict={ - self.state_input: state_batch}) - - def action(self, state): - """Get output for a single obs.""" - return self.sess.run(self.action_output, feed_dict={ - self.state_input: [state]})[0] - - def target_actions(self, state_batch): - """Get Target Network Outputs.""" - return self.sess.run(self.target_action_output, feed_dict={ - self.target_state_input: state_batch}) diff --git a/MADRaS/example_controllers/pid/DDPG/critic_network.py b/MADRaS/example_controllers/pid/DDPG/critic_network.py deleted file mode 100644 index 33ea328..0000000 --- a/MADRaS/example_controllers/pid/DDPG/critic_network.py +++ /dev/null @@ -1,133 +0,0 @@ -"""Critic-Network.""" - -import tensorflow as tf - -LAYER1_SIZE = 300 -LAYER2_SIZE = 600 -LEARNING_RATE = 1e-3 -TAU = 0.001 -L2 = 0.0001 - - -class CriticNetwork: - """docstring for CriticNetwork.""" - - def __init__(self, sess, state_dim, action_dim): - """Init Method.""" - self.time_step = 0 - self.sess = sess - # create q network - self.state_input, self.action_input, self.q_value_output, self.net =\ - self.create_q_network(state_dim, action_dim) - - # create target q network (the same structure with q network) - self.target_state_input, self.target_action_input, self.target_q_value_output, self.target_update =\ - self.create_target_q_network(state_dim, action_dim, self.net) - - self.create_training_method() - - # initialization - # self.sess.run(tf.initialize_all_variables()) - self.sess.run(tf.global_variables_initializer()) - - self.update_target() - - def create_training_method(self): - """Define training optimizer.""" - self.y_input = tf.placeholder("float", [None, 1]) - weight_decay = tf.add_n([L2 * tf.nn.l2_loss(var) for var in self.net]) - self.TD_error = tf.reduce_mean(tf.square(self.y_input - self.q_value_output)) - self.cost = self.TD_error + weight_decay - self.optimizer = tf.train.AdamOptimizer(LEARNING_RATE).minimize(self.cost) - self.action_gradients = tf.gradients(self.q_value_output, self.action_input) - - def create_q_network(self, state_dim, action_dim): - """Q Network.""" - layer1_size = LAYER1_SIZE - layer2_size = LAYER2_SIZE - - state_input = tf.placeholder("float", [None, state_dim]) - action_input = tf.placeholder("float", [None, action_dim]) - regularization = tf.contrib.layers.l2_regularizer(scale=0.01) - - W1 = tf.get_variable("W1_c", shape=[state_dim, layer1_size], - regularizer=regularization) - b1 = tf.get_variable("b1_c", shape=[layer1_size], - regularizer=regularization) - W2 = tf.get_variable("W2_c", shape=[layer1_size, layer2_size], - regularizer=regularization) - W2_action = tf.get_variable("W2_action", shape=[action_dim, layer2_size], - regularizer=regularization) - b2 = tf.get_variable("b2_c", shape=[layer2_size], - regularizer=regularization) - W3 = tf.get_variable("W3_c", shape=[layer2_size, 1], - regularizer=regularization) - b3 = tf.get_variable("b3_c", shape=[1], regularizer=regularization) - - layer1 = tf.nn.relu(tf.matmul(state_input, W1) + b1) - layer1_norm = tf.layers.batch_normalization(layer1) - layer2 = tf.nn.relu(tf.matmul(layer1_norm, W2) + - tf.matmul(action_input, W2_action) + b2) - layer2_norm = tf.layers.batch_normalization(layer2) - q_value_output = tf.identity(tf.matmul(layer2_norm, W3) + b3) - - return state_input, action_input, q_value_output, [W1, b1, W2, - W2_action, - b2, W3, b3] - - def create_target_q_network(self, state_dim, action_dim, net): - """Target Q Network.""" - state_input = tf.placeholder("float", [None, state_dim]) - action_input = tf.placeholder("float", [None, action_dim]) - - ema = tf.train.ExponentialMovingAverage(decay=1 - TAU) - target_update = ema.apply(net) - target_net = [ema.average(x) for x in net] - - layer1 = tf.nn.relu(tf.matmul(state_input, target_net[0]) + - target_net[1]) - layer1_norm = tf.layers.batch_normalization(layer1) - layer2 = tf.nn.relu(tf.matmul(layer1_norm, target_net[2]) + - tf.matmul(action_input, target_net[3]) + - target_net[4]) - layer2_norm = tf.layers.batch_normalization(layer2) - q_value_output = tf.identity(tf.matmul(layer2_norm, target_net[5]) + - target_net[6]) - - return state_input,action_input,q_value_output,target_update - - def update_target(self): - """Update Target Network.""" - self.sess.run(self.target_update) - - def train(self, y_batch, state_batch, action_batch): - """Train Step.""" - self.time_step += 1 - self.sess.run(self.optimizer, feed_dict={ - self.y_input: y_batch, - self.state_input: state_batch, - self.action_input: action_batch}) - - def gradients(self, state_batch, action_batch): - """Get Network Gradients.""" - return self.sess.run(self.action_gradients, feed_dict={ - self.state_input: state_batch, - self.action_input: action_batch})[0] - - def target_q(self, state_batch, action_batch): - """Output of the Target Q Network.""" - return self.sess.run(self.target_q_value_output, feed_dict={ - self.target_state_input: state_batch, - self.target_action_input: action_batch}) - - def q_value(self, state_batch, action_batch): - """Get Q-Value.""" - return self.sess.run(self.q_value_output, feed_dict={ - self.state_input: state_batch, - self.action_input: action_batch}) - - def td_error(self, state, action, y): - """TD Error Calculation.""" - return self.sess.run(self.TD_error, feed_dict={self.y_input: y, - self.state_input: state, - self.action_input: action}) diff --git a/MADRaS/example_controllers/pid/DDPG/ddpg.py b/MADRaS/example_controllers/pid/DDPG/ddpg.py deleted file mode 100644 index e0ae64e..0000000 --- a/MADRaS/example_controllers/pid/DDPG/ddpg.py +++ /dev/null @@ -1,173 +0,0 @@ -"""Deep Deterministic Policy Gradient Algorithm.""" -import math -import random -import numpy as np -from copy import deepcopy -import tensorflow as tf - -from example_controllers.pid.DDPG.OU import OU -from example_controllers.pid.DDPG.critic_network import CriticNetwork -from example_controllers.pid.DDPG.actor_network import ActorNetwork -from example_controllers.pid.DDPG.ReplayBuffer import ReplayBuffer -from utils.display_utils import * - -# Hyper Parameters: - -REPLAY_BUFFER_SIZE = 100000 -REPLAY_START_SIZE = 100 -BATCH_SIZE = 32 -GAMMA = 0.99 - - -class DDPG: - """Class DDPG.""" - - def __init__(self, env_name, state_dim, action_dim, save_location, reward_threshold, thresh_coin_toss): - """Init Method.""" - self.name = 'DDPG' # name for uploading results - self.env_name = env_name - # Randomly initialize actor network and critic network - # with both their target networks - self.state_dim = state_dim - self.action_dim = action_dim - self.save_location = save_location - - # Ensure action bound is symmetric - self.time_step = 0 - self.sess = tf.InteractiveSession() - - self.actor_network = ActorNetwork(self.sess, self.state_dim, self.action_dim) - self.critic_network = CriticNetwork(self.sess, self.state_dim, self.action_dim) - - # initialize replay buffer - self.replay_buffer = ReplayBuffer(REPLAY_BUFFER_SIZE) - - # Initialize a random process the Ornstein-Uhlenbeck process noise - self.OU = OU() - self.visualize = [] - - self.episode_printer = StructuredPrinter() - self.step_printer = StructuredPrinter() - - # loading networks - self.saver = tf.train.Saver() - checkpoint = tf.train.get_checkpoint_state(save_location) - if checkpoint and checkpoint.model_checkpoint_path: - self.saver.restore(self.sess, checkpoint.model_checkpoint_path) - print("Successfully loaded:", checkpoint.model_checkpoint_path) - else: - print("Could not find old network weights") - self.reward_threshold = reward_threshold - self.thresh_coin_toss = thresh_coin_toss - - def train(self): - """Train Method.""" - # Sample a random minibatch of N transitions from replay buffer - minibatch = self.replay_buffer.getBatch(BATCH_SIZE) - state_batch = np.asarray([data.state for data in minibatch]) - action_batch = np.asarray([data.action for data in minibatch]) - reward_batch = np.asarray([data.reward for data in minibatch]) - next_state_batch = np.asarray([data.next_state for data in minibatch]) - done_batch = np.asarray([data.done for data in minibatch]) - - # for action_dim = 1 - action_batch = np.resize(action_batch, [BATCH_SIZE, self.action_dim]) - - # Calculate y_batch - - next_action_batch = self.actor_network.target_actions(next_state_batch) - q_value_batch = self.critic_network.target_q(next_state_batch, next_action_batch) - y_batch = [] - for i in range(len(minibatch)): - if done_batch[i]: - y_batch.append(reward_batch[i]) - else: - y_batch.append(reward_batch[i] + GAMMA * q_value_batch[i]) - y_batch = np.resize(y_batch, [BATCH_SIZE, 1]) - # Update critic by minimizing the loss L - self.critic_network.train(y_batch, state_batch, action_batch) - - # Update the actor policy using the sampled gradient: - action_batch_for_gradients = self.actor_network.actions(state_batch) - q_gradient_batch = self.critic_network.gradients(state_batch, action_batch_for_gradients) - self.actor_network.train(q_gradient_batch, state_batch) - # Update the target networks - self.actor_network.update_target() - self.critic_network.update_target() - - def saveNetwork(self, i): - """Saver.""" - self.saver.save(self.sess, self.save_location + - self.env_name + 'network' + str(i) + - 'DDPG.ckpt', global_step=self.time_step) - - def action(self, state): - """Compute action.""" - action = self.actor_network.action(state) - return action - - def noise_action(self, state, epsilon): - """Select action a_t according to the current policy and noise.""" - action = self.actor_network.action(state) - temp_action = deepcopy(action) - self.step_printer.data["pi_TrackPos"] = action[0] - self.step_printer.data["pi_Velocity"] = action[1] - noise_t = np.zeros(self.action_dim) - noise_t[0] = epsilon * self.OU.function(action[0], 0.0, 0.60, 0.80) - noise_t[1] = epsilon * self.OU.function(action[1], 0.5, 1.00, 0.10) - action = action + noise_t - action[0] = np.clip(action[0], -1, 1) - action[1] = np.clip(action[1], 0, 1) - self.visualize.append((temp_action[1], - self.critic_network.q_value( - state.reshape((1, self.state_dim)), - action.reshape((1, self.action_dim))))) - return action, self.step_printer - - def perceive(self, temp_buffer, traj_reward): - """Store transition (s_t,a_t,r_t,s_{t+1}) in replay buffer.""" - if (traj_reward > self.reward_threshold) or\ - (self.replay_buffer.num_experiences == 0): - self.episode_printer.data["Replay_Buffer"] = "GOOD-Added" - # print("Adding GOOD trajectory with reward %0.2f"%traj_reward) - for sample in temp_buffer: - if (not (math.isnan(sample[2]))): - next_action = self.actor_network.target_actions( - sample[3].reshape((1, self.state_dim))) - q_value = self.critic_network.target_q( - sample[3].reshape((1, self.state_dim)), - next_action.reshape((1, self.action_dim))) - y = sample[2] if sample[4] else sample[2] + GAMMA * q_value - TD = self.critic_network.td_error( - sample[0].reshape((1, self.state_dim)), - sample[1].reshape((1, self.action_dim)), - np.resize(y, [1, 1])) - self.replay_buffer.add(sample[0], sample[1], - sample[2], sample[3], sample[4], TD) - else: - if random.uniform(0, 1) < self.thresh_coin_toss: - self.episode_printer.data["Replay_Buffer"] = "BAD-Added" - # print("Adding LUCKY BAD trajectory with reward%0.2f"%traj_reward) - for sample in temp_buffer: - if (not (math.isnan(sample[2]))): - next_action = self.actor_network.target_actions( - sample[3].reshape((1, self.state_dim))) - q_value = self.critic_network.target_q( - sample[3].reshape((1, self.state_dim)), - next_action.reshape((1, self.action_dim))) - y = sample[2] if sample[4] else sample[2] + GAMMA * q_value - TD = self.critic_network.td_error( - sample[0].reshape((1, self.state_dim)), - sample[1].reshape((1, self.action_dim)), - np.resize(y, [1, 1])) - self.replay_buffer.add(sample[0], sample[1], - sample[2], sample[3], sample[4], TD) - else: - self.episode_printer.data["Replay_Buffer"] = "BAD-Rejected" - # print("REJECTING BAD trajectory with reward %0.2f"%traj_reward) - - self.time_step = self.time_step + 1 - # Store transitions to replay start size then start training - if self.replay_buffer.count() > REPLAY_START_SIZE: - self.train() - return self.episode_printer diff --git a/MADRaS/example_controllers/pid/__init__.py b/MADRaS/example_controllers/pid/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/MADRaS/example_controllers/pid/configurations.yml b/MADRaS/example_controllers/pid/configurations.yml deleted file mode 100644 index 161bdd4..0000000 --- a/MADRaS/example_controllers/pid/configurations.yml +++ /dev/null @@ -1,20 +0,0 @@ -agent: - total_explore: 6000000.0 - max_eps: 400 - max_steps_eps: 5000 - epsilon_start: 0.4 - delta: 0.2 - start_reward: -10000.0 - reward_threshold: -200 - thresh_coin_toss: 0.3 - speed_buffer_size: 100 - velocity_limit: 100.0 - acceleration_limit: 0.5 - -configs: - save_location: "save_network_checkpoints/" - f_diagnostics: "./logger.log" - torcsPort: 3001 - configFile: "~/.torcs/config/raceman/quickrace.xml" - fig_save_dir: "./figures_heatmaps" - is_training: 1 diff --git a/MADRaS/example_controllers/pid/multi_agent.py b/MADRaS/example_controllers/pid/multi_agent.py deleted file mode 100644 index 26def08..0000000 --- a/MADRaS/example_controllers/pid/multi_agent.py +++ /dev/null @@ -1,19 +0,0 @@ -import threading -import multiprocessing -import tensorflow as tf -import example_controllers.pid.playGame_DDPG_pid as playGame_DDPG -from time import sleep, time - -with tf.device("/cpu:0"): - num_workers = 3 # multiprocessing.cpu_count() #use this when you want to use all the cpus - print("numb of workers is" + str(num_workers)) - -with tf.Session() as sess: - worker_threads = [] -#with pymp.Parallel(4) as p: #uncomment this for parallelization of threads - for i in range(num_workers): - worker_work = lambda: (playGame_DDPG.playGame(f_diagnostics=""+str(i), train_indicator=1, port=3001+i)) - t = threading.Thread(target=(worker_work)) - t.start() - sleep(0.5) - worker_threads.append(t) diff --git a/MADRaS/example_controllers/pid/playGame_DDPG_pid.py b/MADRaS/example_controllers/pid/playGame_DDPG_pid.py deleted file mode 100644 index f58c8c0..0000000 --- a/MADRaS/example_controllers/pid/playGame_DDPG_pid.py +++ /dev/null @@ -1,267 +0,0 @@ -""" -DDPG Training. - -PID Control = True -""" -import sys -import os -import gc -import yaml -from copy import deepcopy -import numpy as np -import matplotlib.pyplot as plt - -from utils.madras_datatypes import Madras -from utils.gym_madras import MadrasEnv -from utils.display_utils import * -from example_controllers.pid.DDPG.ddpg import * - - -np.random.seed(1337) -gc.enable() -figure = plt.figure() -madras = Madras() - -with open("example_controllers/pid/configurations.yml", "r") as ymlfile: - cfg = yaml.load(ymlfile) - -# config parameters are printed in main - - -def visualize_action_value(list_Q, fig, ep_no): - """Heatmap Plotter.""" - actions_lane = [] - actions_vel = [] - Q_values = [] - for act, q in list_Q: - actions_lane.append(act[0]) - actions_vel.append(act[1]) - Q_values.append(q[0]) - actions_vel = np.asarray(actions_vel) - actions_lane = np.asarray(actions_lane) - Q_values = np.asarray(Q_values) - plot_heatmap(actions_lane, actions_vel, Q_values) - # plt.subplot(211) - # plt.clf() - # plt.pause(1) - # plt.scatter(actions_lane, Q_values, alpha=0.5) - # plt.title("Lane_Pos") - # plt.subplot(212) - # plt.scatter(actions_vel, Q_values, alpha=0.5) - # plt.title("Velocity") - plt.savefig(os.path.join(cfg['configs']['fig_save_dir'], - "plot_" + str(ep_no) + ".png")) - # plt.show(block=False) - # plt.pause(1) - plt.clf() - # plt.pause(1) - # fig.clear() - - -def playGame(f_diagnostics, train_indicator, port=3101): - """Training Method.""" - EXPLORE = cfg['agent']['total_explore'] - episode_count = cfg['agent']['max_eps'] - max_steps = cfg['agent']['max_steps_eps'] - EPSILON = cfg['agent']['epsilon_start'] - DELTA = cfg['agent']['delta'] - done = False - epsilon_steady_state = 0.01 # This is used for early stopping. - running_average_distance = 0. - max_distance = 0. - best_reward = cfg['agent']['start_reward'] - running_avg_reward = 0. - weights_location = cfg['configs']['save_location'] + str(port) + "/" - episode_printer = StructuredPrinter(mode="episode") - step_printer = StructuredPrinter(mode="step") - # Set up a Torcs environment - print(("TORCS has been asked to use port: ", port)) - env = MadrasEnv(vision=False, throttle=True, gear_change=False, - port=port, pid_assist=True) - - agent = DDPG(env.env_name, env.state_dim, env.action_dim, weights_location, - cfg['agent']['reward_threshold'], cfg['agent']['thresh_coin_toss']) - - s_t = env.reset() - print("\n\n-----------------Experiment Begins-----------------------\n\n") - - for i in range(episode_count): - - save_indicator = 1 - temp_distance = 0. - total_reward = 0. - info = {'termination_cause': 0} - distance_traversed = 0. - speed_array = [] - trackPos_array = [] - dmg = 0.0 - dmg_S = 0.0 - discount = 0.1 - CRASH_COUNTER = 0 - SPEED_BUFFER_SIZE = cfg['agent']['speed_buffer_size'] - count_stop = 0 - temp_replay_buffer = [] - - for step in range(max_steps): - dst = env.distance_traversed - - if (train_indicator): - - step_printer.data["Dist_Raced"] = dst - if dst < running_average_distance: - epsilon = EPSILON - DELTA - step_printer.data["eps"] = epsilon - step_printer.explore_mode = -1 - elif dst > max_distance: - epsilon = EPSILON + DELTA - step_printer.data["eps"] = epsilon - step_printer.explore_mode = 1 - else: - epsilon = deepcopy(EPSILON) - step_printer.data["eps"] = epsilon - step_printer.explore_mode = 0 - EPSILON -= 1.0 / EXPLORE - EPSILON = max(EPSILON, epsilon_steady_state) - d_t, step_info = agent.noise_action(s_t, epsilon) - for k in list(step_info.data.keys()): - step_printer.data[k] = step_info.data[k] - # Giving the car an inital kick - if step <= 50: - d_t[1] = abs(d_t[1]) + 0.1 - - else: # Testing phase - d_t = agent.action(s_t) - try: - s_t1, r_t, done, info = env.step(d_t) - distance_traversed += env.distance_traversed - speed_array.append(env.ob.speedX * np.cos(env.ob.angle)) - trackPos_array.append(env.ob.trackPos) - # Add to replay buffer only if training - if train_indicator: - temp_replay_buffer.append([s_t, d_t, r_t, s_t1, done]) - total_reward += r_t - s_t = s_t1 - dmg_S = 0.1 * dmg_S + (env.ob.damage - dmg) * 10 - if not dmg == env.ob.damage: - CRASH_COUNTER = CRASH_COUNTER + 1 - dmg = env.ob.damage - step_printer.data["Desired_Trackpos"] = d_t[0] - step_printer.data["Desired_Velocity"] = d_t[1] - step_printer.data["Reward"] = r_t - step_printer.print_step(step) - - except: - pass - - if done: - temp_distance = dst - if temp_distance > max_distance: - max_distance = temp_distance - break - - if train_indicator: - episode_info = agent.perceive(temp_replay_buffer, total_reward) - # Train agent - - for k in list(episode_info.data.keys()): - episode_printer.data[k] = episode_info.data[k] - - temp_replay_buffer = [] - - # Saving the best model yet - if save_indicator == 1 and train_indicator == 1: - if best_reward < total_reward: - if(best_reward < total_reward): - best_reward = total_reward - agent.saveNetwork(port) - - running_avg_reward = running_average(running_avg_reward, - i + 1, total_reward) - running_average_distance = running_average(running_average_distance, - i + 1, temp_distance) - temp_distance = 0. - - episode_printer.data["Total_Steps"] = step - episode_printer.data["Agv_Speed"] = np.mean(speed_array) - episode_printer.data["Avg_TrackPos"] = np.mean(trackPos_array) - episode_printer.data["Dist_Traversed"] = distance_traversed - episode_printer.data["Traj_Reward"] = total_reward - episode_printer.data["Run_Avg_Traj_Reward"] = running_avg_reward - episode_printer.data["Run_Avg_Dist_Trav"] = running_average_distance - episode_printer.data["Max_Dist_Trav"] = max_distance - episode_printer.data["Replay_Buffer_Size"] = agent.replay_buffer.num_experiences - - episode_printer.print_episode(i) - ''' - agent.visualize = [] - for data in agent.replay_buffer.getBatch(500):#use getBatch(500) - here to reduce latency - action = np.asarray(data.action) - state = np.asarray(data.state) - agent.visualize.append((action,agent.critic_network.q_value(state.reshape((1,agent.state_dim)),action.reshape((1,agent.action_dim))))) - visualize_action_value(agent.visualize,figure,i) - agent.visualize = [] - ''' - s_t = env.reset(info) - """ document_episode(i, distance_traversed, speed_array, trackPos_array, - info, running_avg_reward, f_diagnostics) - """ - - env.end() # This is for shutting down TORCS-engine - plt.close(figure) - print("Finished.") - - -def document_episode(episode_no, distance_traversed, speed_array, - trackPos_array, info, running_avg_reward, f_diagnostics): - """ - Note down a tuple of diagnostic values for each episode. - - (episode_no, distance_traversed, mean(speed_array), std(speed_array), - mean(trackPos_array), std(trackPos_array), info[termination_cause], - running_avg_reward) - """ - f_diagnostics.write(str(episode_no) + ",") - f_diagnostics.write(str(distance_traversed) + ",") - f_diagnostics.write(str(np.mean(speed_array)) + ",") - f_diagnostics.write(str(np.std(speed_array)) + ",") - f_diagnostics.write(str(np.mean(trackPos_array)) + ",") - f_diagnostics.write(str(np.std(trackPos_array)) + ",") - f_diagnostics.write(str(running_avg_reward) + "\n") - - -def running_average(prev_avg, num_episodes, new_val): - """Running average compute.""" - total = prev_avg * (num_episodes - 1) - total += new_val - return madras.floatX(total / num_episodes) - -def analyse_info(info, printing=True): - """Print Helper Function""" - simulation_state = ['Normal', 'Terminated as car is OUT OF TRACK', - 'Terminated as car has SMALL PROGRESS', - 'Terminated as car has TURNED BACKWARDS'] - if printing and info['termination_cause'] != 0: - print((simulation_state[info['termination_cause']])) - -if __name__ == "__main__": - - try: - port = int(sys.argv[1]) - except Exception as e: - print("Usage : python %s " % (sys.argv[0])) - sys.exit() - - print('is_training : ' + str(cfg['configs']['is_training'])) - print('Starting best_reward : ' + str(cfg['agent']['start_reward'])) - print((cfg['agent']['total_explore'])) - print((cfg['agent']['max_eps'])) - print((cfg['agent']['max_steps_eps'])) - print((cfg['agent']['epsilon_start'])) - print('config_file : ' + cfg['configs']['configFile']) - f_diagnostics = open("logger_17June.log", 'w') - f_diagnostics.write("EPISODE,distance_traversed,mean_speed,std_speed,\ - mean_track,std_track,reward_runn\n") - playGame(f_diagnostics, train_indicator=cfg['configs']['is_training'], - port=port) - f_diagnostics.close() From 7473967c96bf36bc695be740f8a514aa3777d72f Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Fri, 3 Apr 2020 20:36:55 +0530 Subject: [PATCH 38/46] addressed @Santara's comments --- MADRaS/envs/gym_madras_v2.py | 39 +++++++++++++++++------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index dc059d1..f46a3d2 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -312,44 +312,44 @@ def increment_step(self): def comms_init(self, info={}): - additional_dims_h = [] - additional_dims_l = [] + additional_vars_high = [] + additional_vars_low = [] additional_dims = 0 for var in info["vars"]: if (var == 'track'): - additional_dims_h += 19*[1] - additional_dims_l += 19*[0] + additional_vars_high += 19*[1] + additional_vars_low += 19*[0] additional_dims += 19 elif (var == 'trackPos'): - additional_dims_h += [1] - additional_dims_l += [-1] + additional_vars_high += [1] + additional_vars_low += [-1] additional_dims += 1 elif (var == 'opponents'): - additional_dims_h += 36*[1] - additional_dims_l += 36*[0] + additional_vars_high += 36*[1] + additional_vars_low += 36*[0] additional_dims += 36 elif ('speed' in var): - additional_dims_h += [np.inf] - additional_dims_l += [-np.inf] + additional_vars_high += [np.inf] + additional_vars_low += [-np.inf] additional_dims += 1 elif ('action' == var): - additional_dims_h += [1]*self.action_dim - additional_dims_l += [-1]*self.action_dim + additional_vars_high += [1]*self.action_dim + additional_vars_low += [-1]*self.action_dim additional_dims += self.action_dim else: - additional_dims_h += [1] - additional_dims_l += [-1] + additional_vars_high += [1] + additional_vars_low += [-1] additional_dims += 1 print("NAME:{}, lencomms:{}".format(self.name, len(info['comms']))) self.obs_dim += additional_dims*len(info['comms']) self.comms_buffer = cb.CommBuffer(self.name, self.buff_size, additional_dims*len(info['comms'])) - additional_dims_h = additional_dims_h*len(info['comms']) - additional_dims_l = additional_dims_l*len(info['comms']) - high = np.hstack((self.observation_space.high, additional_dims_h)) - low = np.hstack((self.observation_space.low, additional_dims_l)) + additional_vars_high = additional_vars_high*len(info['comms']) + additional_vars_low = additional_vars_low*len(info['comms']) + high = np.hstack((self.observation_space.high, additional_vars_high)) + low = np.hstack((self.observation_space.low, additional_vars_low)) self.observation_space = spaces.Box(high=high, low=low) print("{}: {}".format(self.name, self._config.observations['buff_size'])) @@ -384,9 +384,6 @@ def __init__(self, cfg_path=DEFAULT_SIM_OPTIONS_FILE): {"track_len": self._config.track_len, "max_steps": self._config.max_steps }) - # if self.agents[name].multi_flag: - # self.comm_agent_names.append(name) - # self.comm_info["agent"][name] = (self.agents[name].action_dim, self.agents[name].buff_size) self.num_agents += 1 From b022d8ca96406f74862d50b2d66d97c38b06b036 Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sun, 5 Apr 2020 15:10:59 +0530 Subject: [PATCH 39/46] addressed new comments --- MADRaS/envs/gym_madras_v2.py | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index f46a3d2..d61fcc2 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -330,11 +330,11 @@ def comms_init(self, info={}): additional_vars_high += 36*[1] additional_vars_low += 36*[0] additional_dims += 36 - elif ('speed' in var): + elif ('speed' in var): #for handling speedX, speedY & speedZ additional_vars_high += [np.inf] additional_vars_low += [-np.inf] additional_dims += 1 - elif ('action' == var): + elif (var == 'action'): additional_vars_high += [1]*self.action_dim additional_vars_low += [-1]*self.action_dim additional_dims += self.action_dim @@ -456,6 +456,22 @@ def execute_torcs_launch_command(self): self.torcs_proc = subprocess.Popen([command], shell=True, preexec_fn=os.setsid) print("TORCS server PID is: ", self.torcs_proc.pid) + def reset_wait(self): + stop_action = {} + for agent_name, agent in self.agents.items(): + if (agent.action_dim == 3): + stop_action[agent_name] = [0.0, -1.0, 1.0] + else: + stop_action[agent_name] = [0.0, 0.0] + + s_t = None + for _ in range(self.init_wait): + next_obs, _, _, _ = self.step(stop_action) + s_t = next_obs + + return s_t + + def reset(self): """Reset Method to be called at the end of each episode.""" if not self.initial_reset: @@ -486,18 +502,10 @@ def reset(self): for comm_agent in self.communications_map.keys(): #reset buffers self.agents[comm_agent].comms_buffer.reset() - s_t[comm_agent] = np.hstack((s_t[agent], self.agents[comm_agent].comms_buffer.request())) - - stop_action = {} - for agent_name, agent in self.agents.items(): - if (agent.action_dim == 3): - stop_action[agent_name] = [0.0, -1.0, 1.0] - else: - stop_action[agent_name] = [0.0, 0.0] + s_t[comm_agent] = np.hstack((s_t[comm_agent], self.agents[comm_agent].comms_buffer.request())) - for i in range(self.init_wait): - next_obs, next_reward, next_done, nexxt_info = self.step(stop_action) - s_t = next_obs + s_t = self.reset_wait() + return s_t def step(self, action): @@ -539,6 +547,6 @@ def step(self, action): done['__all__'] = done_check for comm_agent in self.communications_map.keys(): - next_obs[comm_agent] = np.hstack((next_obs[agent], self.agents[comm_agent].comms_buffer.request())) + next_obs[comm_agent] = np.hstack((next_obs[comm_agent], self.agents[comm_agent].comms_buffer.request())) return next_obs, reward, done, info From 184c660d917ee7f2c39c9123b904f9bbfa6c58bd Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sun, 5 Apr 2020 15:13:46 +0530 Subject: [PATCH 40/46] removed stray comments --- MADRaS/envs/gym_madras_v2.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index d61fcc2..45b94f2 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -101,7 +101,7 @@ def get_observation_from_server(self): raw_ob = self.client.S.d # Get the current full-observation from torcs self.ob = self.make_observation(raw_ob) - # print("[{}]: Initial observation: {}".format(self.name, self.ob)) + if np.any(np.asarray(self.ob.track) < 0): print("Reset produced bad track values.") self.distance_traversed = 0 @@ -149,7 +149,6 @@ def reset(self, return_dict={}): self.PID_controller.reset() self.reward_handler.reset() self.done_handler.reset() - #print("[{}] Reset: Starting new episode DAMAGE{}".format(self.name, self.client.S.d["damage"])) return_dict[self.name] = s_t return return_dict @@ -205,8 +204,7 @@ def step_vanilla(self, action): self.client.S.d["trackPos"], self.client.S.d['distRaced'], self.ob.speedX*self.default_speed)) - #print("DISTANCE_TRAVERSED:{} SPEED:{}".format(game_state["distance_traversed"], (self.ob.speedX*self.default_speed))) - #print("PREV_DIST: {}".format(game_state["prev_distance_traversed"])) + reward = self.reward_handler.get_reward(self._config, game_state) done = self.done_handler.get_done_signal(self._config, game_state) @@ -259,13 +257,7 @@ def step_pid(self, desire, e): "track": self.client.S.d["track"], "racePos": self.client.S.d["racePos"], "num_steps": self.step_num} - # if (self.name == "MadrasAgent_0"): - # print("[{0}]PID Lane_pos:{1:0.4f} Distance:{2:0.2f} Speed{3:0.2f} Damage{4:0.2f}".format(self.name, - # self.client.S.d["trackPos"], - # self.client.S.d['distRaced'], - # self.ob.speedX*self.default_speed, - # self.client.S.d["damage"])) - + reward += self.reward_handler.get_reward(self._config, game_state) self.prev_dist = game_state["distance_traversed"] self.prev_damage = game_state["prev_damage"] @@ -394,7 +386,7 @@ def __init__(self, cfg_path=DEFAULT_SIM_OPTIONS_FILE): for comm_agent, agent_attrs in self.communications_map.items(): self.agents[comm_agent].comms_init(agent_attrs) - # self.action_dim = self.agents[0].action_dim # TODO(santara): Can I not have different action dims for different agents? + # TODO(santara): Can I not have different action dims for different agents? self.action_space = self.agents["MadrasAgent_0"].action_space self.observation_space = self.agents["MadrasAgent_0"].observation_space From 9ca21451996402ab2e7deca6a42074afbe722a44 Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sun, 19 Apr 2020 20:14:41 +0530 Subject: [PATCH 41/46] removed configs from init --- MADRaS/__init__.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/MADRaS/__init__.py b/MADRaS/__init__.py index 384b487..c0d66af 100644 --- a/MADRaS/__init__.py +++ b/MADRaS/__init__.py @@ -1,9 +1,9 @@ """Env Registration.""" from gym.envs.registration import register -register( - id='Madras-v0', - entry_point='MADRaS.envs:MadrasEnv', - max_episode_steps=10000, - reward_threshold=25.0, -) +# register( +# id='Madras-v0', +# entry_point='MADRaS.envs:MadrasEnv', +# max_episode_steps=10000, +# reward_threshold=25.0, +# ) From e0e679609c193013cd9dcbb564ab94f7e2dd325b Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sun, 19 Apr 2020 20:30:30 +0530 Subject: [PATCH 42/46] updated mult train restore path --- MADRaS/agents/rllib/train_rllib_multi_agent.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/MADRaS/agents/rllib/train_rllib_multi_agent.py b/MADRaS/agents/rllib/train_rllib_multi_agent.py index 4942d73..187c93f 100644 --- a/MADRaS/agents/rllib/train_rllib_multi_agent.py +++ b/MADRaS/agents/rllib/train_rllib_multi_agent.py @@ -87,7 +87,7 @@ def policy_mapping_fn(agent_id): }, }) - ppo_trainer.restore("/home/sohan/ray_results/check/checkpoint_240/checkpoint-240") + #ppo_trainer.restore("{restore path}") for i in range(args.num_iters): print("== Iteration", i, "==") @@ -99,8 +99,3 @@ def policy_mapping_fn(agent_id): logging.warning("-- PPO --") print(pretty_print(ppo_trainer.train())) - - # if (i%20 == 0): - # weights_0 = ppo_trainer.get_weights(["ppo_policy_0"]) - # weights_1 = ppo_trainer.get_weights(["ppo_policy_1"]) - # ppo_trainer.set_weights({"ppo_policy_0": weights_1, "ppo_policy_1": weights_0}) From 3c73f2d76f6d3198cfa72f1f68da2d76e1b560cf Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sun, 19 Apr 2020 20:33:55 +0530 Subject: [PATCH 43/46] added print changes --- MADRaS/envs/gym_madras_v2.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index 45b94f2..c5710ab 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -335,7 +335,7 @@ def comms_init(self, info={}): additional_vars_low += [-1] additional_dims += 1 - print("NAME:{}, lencomms:{}".format(self.name, len(info['comms']))) + logging.info("NAME:{}, lencomms:{}".format(self.name, len(info['comms']))) self.obs_dim += additional_dims*len(info['comms']) self.comms_buffer = cb.CommBuffer(self.name, self.buff_size, additional_dims*len(info['comms'])) additional_vars_high = additional_vars_high*len(info['comms']) @@ -343,7 +343,7 @@ def comms_init(self, info={}): high = np.hstack((self.observation_space.high, additional_vars_high)) low = np.hstack((self.observation_space.low, additional_vars_low)) self.observation_space = spaces.Box(high=high, low=low) - print("{}: {}".format(self.name, self._config.observations['buff_size'])) + logging.info("{}: {}".format(self.name, self._config.observations['buff_size'])) @@ -486,7 +486,6 @@ def reset(self): s_t[agent] = self.agents[agent].get_observation_from_server() self.agents[agent].client.respond_to_server() # To elimate 10s of timeout error, responding to the server after obs self.agents[agent].increment_(self.agents[agent].client.S.d["distRaced"], self.agents[agent].client.S.d["damage"], self.agents[agent].client.S.d["racePos"]) - print("[{}] Reset: Starting new episode DAMAGE{}".format(agent, self.agents[agent].client.S.d["damage"])) # Finish reset for agent in self.agents: From 45c764a00758feff4e1086a819f7b37b02e7e17e Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Fri, 16 Oct 2020 14:36:12 +0530 Subject: [PATCH 44/46] init work on merge --- MADRaS/agents/generic/pid_test.py | 46 ++++ MADRaS/envs/data/sim_options_v2.yml | 259 ++++++++++--------- MADRaS/envs/gym_madras_v2.py | 26 +- MADRaS/utils/config_parser.py | 11 +- MADRaS/utils/data/car_config.template | 4 + MADRaS/utils/data/car_dynamics.py | 95 +++++++ MADRaS/utils/data/quickrace.template | 78 ++++++ MADRaS/utils/data/scr_server_config.template | 129 +++++++++ MADRaS/utils/data/track_details.py | 63 +++++ MADRaS/utils/torcs_server_config.py | 127 +++++++++ 10 files changed, 708 insertions(+), 130 deletions(-) create mode 100644 MADRaS/agents/generic/pid_test.py create mode 100644 MADRaS/utils/data/car_config.template create mode 100644 MADRaS/utils/data/car_dynamics.py create mode 100644 MADRaS/utils/data/quickrace.template create mode 100644 MADRaS/utils/data/scr_server_config.template create mode 100644 MADRaS/utils/data/track_details.py create mode 100644 MADRaS/utils/torcs_server_config.py diff --git a/MADRaS/agents/generic/pid_test.py b/MADRaS/agents/generic/pid_test.py new file mode 100644 index 0000000..282530d --- /dev/null +++ b/MADRaS/agents/generic/pid_test.py @@ -0,0 +1,46 @@ +import numpy as np +import gym +from MADRaS.envs.gym_madras_v2 import MadrasEnv +import os +import sys +import logging + +logging.basicConfig(level=logging.DEBUG) + +def test_madras_pid(vel, file_name): + env = MadrasEnv() + for key, val in env.agents.items(): + print("Observation Space ", val.observation_space) + print("Obs_dim ", val.obs_dim) + print("Testing reset...") + obs = env.reset() + vel = float(vel) + a = [0.0, vel] + b = [0.1, 0.00] + c = [0.2, -0.2] + print("Initial observation: {}." + " Verify if the number of dimensions is right.".format(obs)) + for key, value in obs.items(): + print("{}: {}".format(key, len(value))) + print("Testing step...") + running_rew = 0 + speeds = [] + for t in range(300): + obs, r, done, _ = env.step({"MadrasAgent_0": a}) + #print("{}".format(obs)) + + # a = [0.0, 0.0] + running_rew += r["MadrasAgent_0"] + #print("{}: reward={}, done={}".format(t, running_rew, done)) + #logger.info("HELLO") + speeds.append(obs["MadrasAgent_0"][21]) + if (done['__all__']): + env.reset() + print(speeds) + np.save(file_name, np.array(speeds)) + os.system("pkill torcs") + + +if __name__=='__main__': + #test_madras_vanilla() + test_madras_pid(sys.argv[1], sys.argv[2]) \ No newline at end of file diff --git a/MADRaS/envs/data/sim_options_v2.yml b/MADRaS/envs/data/sim_options_v2.yml index c88f646..e5ebdaa 100644 --- a/MADRaS/envs/data/sim_options_v2.yml +++ b/MADRaS/envs/data/sim_options_v2.yml @@ -6,6 +6,25 @@ max_steps: 1000 # max episode length track_width: 10.0 init_wait: 25 #initial waiting period for the learning agents after the ep starts vision: False # whether to use vision input +randomize_env: False +noisy_observations: True + +server_config: + max_cars: 1 + min_traffic_cars: 0 + track_names: + - forza + - aalborg + - alpine-1 + - alpine-2 + distance_to_start: 25 + torcs_server_config_dir: /home/sohan/.torcs/config/raceman/ # This must be the value for TORCS with rendering on + scr_server_config_dir: /home/sohan/usr/local/share/games/torcs/drivers/scr_server/ + traffic_car: p406 # get full list of cars here: /home/anirban/usr/local/share/games/torcs/cars/ + learning_car: # get full list of cars here: /home/anirban/usr/local/share/games/torcs/cars/ + - car1-stock1 + - car1-stock2 + # agent config: agents: @@ -17,9 +36,11 @@ agents: target_speed: 27.78 # metres per sec state_dim: 24 # 24 early_stop: True + add_noise_to_actions: False + action_noise_std: 0.01 normalize_actions: True # all actions in [-1, 1] # PID params - pid_assist: False + pid_assist: True pid_settings: accel_pid: - 10.5 # a_x @@ -66,136 +87,134 @@ agents: # Done function dones: - - RaceOver - - TimeOut - Collision - TurnBackward - OutOfTrack - - MadrasAgent: - vision: False # whether to use vision input - throttle: True - gear_change: False - client_max_steps: -1 # to be interpreted as np.inf - target_speed: 27.78 # metres per sec - state_dim: 24 # 24 - early_stop: True - normalize_actions: True # all actions in [-1, 1] - # PID params - pid_assist: False - pid_settings: - accel_pid: - - 10.5 # a_x - - 0.05 # a_y - - 2.8 # a_z - steer_pid: - - 5.1 - - 0.001 - - 0.000001 - accel_scale: 1.0 - steer_scale: 0.1 - pid_latency: 10 + # - MadrasAgent: + # vision: False # whether to use vision input + # throttle: True + # gear_change: False + # client_max_steps: -1 # to be interpreted as np.inf + # target_speed: 27.78 # metres per sec + # state_dim: 24 # 24 + # early_stop: True + # normalize_actions: True # all actions in [-1, 1] + # # PID params + # pid_assist: False + # pid_settings: + # accel_pid: + # - 10.5 # a_x + # - 0.05 # a_y + # - 2.8 # a_z + # steer_pid: + # - 5.1 + # - 0.001 + # - 0.000001 + # accel_scale: 1.0 + # steer_scale: 0.1 + # pid_latency: 10 - # Observation mode - observations: - mode: SingleAgentSimpleLapObs - multi_flag: True - buff_size: 1 - normalize: False # gym_torcs normalizes by default - obs_min: - angle: -3.142 - track: 0.0 - trackPos: -1.0 - obs_max: - angle: 3.142 - track: 200.0 - trackPos: 1.0 + # # Observation mode + # observations: + # mode: SingleAgentSimpleLapObs + # multi_flag: True + # buff_size: 1 + # normalize: False # gym_torcs normalizes by default + # obs_min: + # angle: -3.142 + # track: 0.0 + # trackPos: -1.0 + # obs_max: + # angle: 3.142 + # track: 200.0 + # trackPos: 1.0 - # Reward function - rewards: - ProgressReward3: - scale: 1.0 - AvgSpeedReward: - scale: 1.0 - SuccessfulOvertakeReward: - scale: 10.0 - CollisionPenalty: - scale: 10.0 - TurnBackwardPenalty: - scale: 10.0 - AngAcclPenalty: - scale: 5.0 - max_ang_accl: 2.0 + # # Reward function + # rewards: + # ProgressReward3: + # scale: 1.0 + # AvgSpeedReward: + # scale: 1.0 + # SuccessfulOvertakeReward: + # scale: 10.0 + # CollisionPenalty: + # scale: 10.0 + # TurnBackwardPenalty: + # scale: 10.0 + # AngAcclPenalty: + # scale: 5.0 + # max_ang_accl: 2.0 - # Done function - dones: - - RaceOver - - TimeOut - - Collision - - TurnBackward - - OutOfTrack + # # Done function + # dones: + # - RaceOver + # - TimeOut + # - Collision + # - TurnBackward + # - OutOfTrack - - MadrasAgent: - vision: False # whether to use vision input - throttle: True - gear_change: False - client_max_steps: -1 # to be interpreted as np.inf - target_speed: 12.89 # metres per sec - state_dim: 24 - early_stop: True - normalize_actions: True # all actions in [-1, 1] - # PID params - pid_assist: False - pid_settings: - accel_pid: - - 10.5 # a_x - - 0.05 # a_y - - 2.8 # a_z - steer_pid: - - 5.1 - - 0.001 - - 0.000001 - accel_scale: 1.0 - steer_scale: 0.1 - pid_latency: 10 + # - MadrasAgent: + # vision: False # whether to use vision input + # throttle: True + # gear_change: False + # client_max_steps: -1 # to be interpreted as np.inf + # target_speed: 12.89 # metres per sec + # state_dim: 24 + # early_stop: True + # normalize_actions: True # all actions in [-1, 1] + # # PID params + # pid_assist: False + # pid_settings: + # accel_pid: + # - 10.5 # a_x + # - 0.05 # a_y + # - 2.8 # a_z + # steer_pid: + # - 5.1 + # - 0.001 + # - 0.000001 + # accel_scale: 1.0 + # steer_scale: 0.1 + # pid_latency: 10 - # Observation mode - observations: - mode: SingleAgentSimpleLapObs - multi_flag: False - buff_size: 1 - normalize: False # gym_torcs normalizes by default - obs_min: - angle: -3.142 - track: 0.0 - trackPos: -1.0 - obs_max: - angle: 3.142 - track: 200.0 - trackPos: 1.0 + # # Observation mode + # observations: + # mode: SingleAgentSimpleLapObs + # multi_flag: False + # buff_size: 1 + # normalize: False # gym_torcs normalizes by default + # obs_min: + # angle: -3.142 + # track: 0.0 + # trackPos: -1.0 + # obs_max: + # angle: 3.142 + # track: 200.0 + # trackPos: 1.0 - # Reward function - rewards: - ProgressReward2: - scale: 1.0 - AvgSpeedReward: - scale: 1.0 - CollisionPenalty: - scale: 10.0 - TurnBackwardPenalty: - scale: 10.0 - AngAcclPenalty: - scale: 1.0 - max_ang_accl: 2.0 - # TaskReward: - # scale: 1.0 + # # Reward function + # rewards: + # ProgressReward2: + # scale: 1.0 + # AvgSpeedReward: + # scale: 1.0 + # CollisionPenalty: + # scale: 10.0 + # TurnBackwardPenalty: + # scale: 10.0 + # AngAcclPenalty: + # scale: 1.0 + # max_ang_accl: 2.0 + # # TaskReward: + # # scale: 1.0 - # Done function - dones: - - RaceOver - - TimeOut - - TurnBackward - - OutOfTrack + # # Done function + # dones: + # - RaceOver + # - TimeOut + # - TurnBackward + # - OutOfTrack # Traffic agents # traffic: diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index c5710ab..dea013e 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -28,6 +28,7 @@ from mpi4py import MPI import socket import MADRaS.utils.config_parser as config_parser +import MADRaS.utils.torcs_server_config as torcs_config import MADRaS.utils.reward_handler as rm import MADRaS.utils.done_handler_v2 as dm import MADRaS.utils.observation_handler as om @@ -282,6 +283,9 @@ def step_pid(self, desire, e): return next_obs, reward, done, info def step(self, action, e, return_dict={}): + if self._config.add_noise_to_actions: + noise = np.random.normal(scale=self._config.action_noise_std, size=self.action_dim) + action += noise if self._config.pid_assist: return_dict[self.name] = self.step_pid(action, e) else: @@ -357,14 +361,19 @@ def __init__(self, cfg_path=DEFAULT_SIM_OPTIONS_FILE): self.communications_map = dict() self.torcs_proc = None self.seed() - self.start_torcs_process() - self.num_agents = 0 + + self.num_agents = len(self._config.agents) self.agents = OrderedDict() self.init_wait = self._config.init_wait + self.torcs_server_config = torcs_config.TorcsConfig( + self._config.server_config, num_learning_cars= self.num_agents, randomize=self._config.randomize_env) + self.start_torcs_process() if self._config.traffic: self.traffic_handler = traffic.MadrasTrafficHandler( - self._config.torcs_server_port, len(self.agents), self._config.traffic) + self._config.torcs_server_port, len(self.agents), self._config.traffic) + num_traffic_agents = len(self._config.traffic) if self._config.traffic else 0 + if self._config.agents: for i, agent in enumerate(self._config.agents): agent_name = [x for x in agent.keys()][0] @@ -376,8 +385,6 @@ def __init__(self, cfg_path=DEFAULT_SIM_OPTIONS_FILE): {"track_len": self._config.track_len, "max_steps": self._config.max_steps }) - - self.num_agents += 1 if os.path.isfile(DEFAULT_COMMUNICATION_MAP): self.communications_map = config_parser.parse_yaml(DEFAULT_COMMUNICATION_MAP) @@ -385,7 +392,8 @@ def __init__(self, cfg_path=DEFAULT_SIM_OPTIONS_FILE): print("COMMUNICATIONS MAP", self.communications_map) for comm_agent, agent_attrs in self.communications_map.items(): self.agents[comm_agent].comms_init(agent_attrs) - + + # TODO(santara): Can I not have different action dims for different agents? self.action_space = self.agents["MadrasAgent_0"].action_space self.observation_space = self.agents["MadrasAgent_0"].observation_space @@ -417,6 +425,7 @@ def test_torcs_server_port(self): udp.close() def start_torcs_process(self): + #import pdb; pdb.set_trace() if self.torcs_proc is not None: os.killpg(os.getpgid(self.torcs_proc.pid), signal.SIGKILL) time.sleep(0.5) @@ -436,9 +445,9 @@ def reset_torcs(self): def execute_torcs_launch_command(self): command = None rank = MPI.COMM_WORLD.Get_rank() - + self.torcs_server_config.generate_torcs_server_config() if rank < self._config.no_of_visualisations and self._config.visualise: - command = 'export TORCS_PORT={} && vglrun torcs -t 10000000 -nolaptime'.format( + command = 'export TORCS_PORT={} && torcs -t 10000000 -nolaptime'.format( self._config.torcs_server_port) else: command = 'export TORCS_PORT={} && torcs -t 10000000 -r ~/.torcs/config/raceman/quickrace.xml -nolaptime'.format(self._config.torcs_server_port) @@ -466,6 +475,7 @@ def reset_wait(self): def reset(self): """Reset Method to be called at the end of each episode.""" + #import pdb; pdb.set_trace() if not self.initial_reset: self.reset_torcs() else: diff --git a/MADRaS/utils/config_parser.py b/MADRaS/utils/config_parser.py index 731cfe1..2ba7cb5 100644 --- a/MADRaS/utils/config_parser.py +++ b/MADRaS/utils/config_parser.py @@ -15,6 +15,10 @@ def __init__(self): self.no_of_visualisations = 1 self.track_len = 7014.6 self.max_steps = 20000 + self.server_config = {} + self.noisy_observations = False + self.track_limits = {'low': -1, 'high': 1} + self.randomoze_env = False self.agents = {} self.traffic = [] @@ -28,7 +32,8 @@ def update(self, cfg_dict): if cfg_dict is None: return direct_attributes = ['torcs_server_port', 'visualise', 'vision', 'no_of_visualizations', 'track_len', - 'max_steps', 'agents', 'traffic', 'init_wait'] + 'max_steps', 'agents', 'traffic', 'init_wait', 'server_config', 'track_limits', + 'randomize_env'] for key in direct_attributes: if key in cfg_dict: exec("self.{} = {}".format(key, cfg_dict[key])) @@ -50,6 +55,8 @@ def __init__(self): self.normalize_actions = False self.early_stop = True self.observations = None + self.action_noise_std = 0.001 + self.add_noise_to_actions = False self.rewards = {} self.dones = {} @@ -64,7 +71,7 @@ def update(self, cfg_dict): return direct_attributes = ['track_len', 'max_steps', 'vision', 'throttle', 'gear_change', 'pid_assist', 'pid_latency', 'target_speed', 'early_stop', 'normalize_actions', 'observations', 'rewards', 'dones', - 'pid_settings'] + 'pid_settings', 'action_noise_std', 'add_noise_to_actions'] for key in direct_attributes: if key in cfg_dict: exec("self.{} = {}".format(key, cfg_dict[key])) diff --git a/MADRaS/utils/data/car_config.template b/MADRaS/utils/data/car_config.template new file mode 100644 index 0000000..870f839 --- /dev/null +++ b/MADRaS/utils/data/car_config.template @@ -0,0 +1,4 @@ +
+ + +
\ No newline at end of file diff --git a/MADRaS/utils/data/car_dynamics.py b/MADRaS/utils/data/car_dynamics.py new file mode 100644 index 0000000..83dd8c7 --- /dev/null +++ b/MADRaS/utils/data/car_dynamics.py @@ -0,0 +1,95 @@ +dynamics_data = { + "155-DTM": [[0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000, 18000, 19000, 20000], + [750.0, 363.5057471264368, 408.80079284902195, 435.3448275862069, 462.6436781609196, 462.35795454545456, 471.264367816092, 451.24350555006237, 401.02675179762593, 352.4077041741376, 330.06878830032144, 750.0, 750.0, 750.0, 750.0, 711.6161949594086, 667.1401827744456, 627.8966426112429, 593.0134957995072, 561.8022591784805, 533.7121462195564]], + "acura-nsx-sz": [[0, 500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 5500, 6000, 6500, 7000, 7500, 8000, 8500, 9000, 9500, 10000], + [280, 300, 350, 390, 430, 430, 450, 450, 460, 460, 460, 440, 420, 390, 360, 350, 340, 320, 300, 280, 250]], + "baja-bug": [[0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000, 18000, 19000, 20000], + [2000.0, 150.60536398467436, 160.51027785254712, 177.2462587670076, 177.3406511327918, 156.87826830977735, 109.13270838941034, 75.0287370177196, 42.145593869731805, 1186.8182037348374, 1068.1363833613536, 971.0330757830488, 890.1136528011281, 821.643371816426, 762.9545595438242, 712.0909222409025, 667.5852396008461, 628.3155196243257, 593.4091018674187, 562.1770438743966, 534.0681916806768]], + "buggy": [[0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000, 18000, 19000, 20000], + [2000.0, 193.25583777291413, 254.64298624195746, 345.58690989979937, 384.2380774543822, 363.7756946313678, 297.8413499794324, 229.6334072360509, 181.8878473156839, 1186.8182037348374, 1068.1363833613536, 971.0330757830488, 890.1136528011281, 821.643371816426, 762.9545595438242, 712.0909222409025, 667.5852396008461, 628.3155196243257, 593.4091018674187, 562.1770438743966, 534.0681916806768]], + "car1-ow1": [[0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 13000, 14000, 15000, 16000, 17000, 18000, 19000, 20000], + [100, 100, 120, 140, 160, 180, 220, 260, 300, 340, 340, 340.0, 340.0, 345.0, 350.0, 355.0, 360.0, 360.0, 360.0, 330.0, 300.0]], + "car1-stock1": [[0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000], + [100, 200, 333, 420, 520, 640, 670, 710, 690, 663, 590]], + "car1-stock2": [[0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000], + [100, 290, 390, 480, 560, 640, 650, 640, 600, 560, 500]], + "car1-trb1": [[0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000], + [100.0, 160.0, 190.0, 280.0, 350, 405.0, 443.0, 465.0, 483.0, 415.0, 360.0]], + "car1-trb3": [[0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000], + [100.0, 160.0, 190.0, 240.0, 300.0, 350, 405, 435, 430, 370, 293]], + "car2-trb1": [[0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000], + [100.0, 220.0, 310.0, 370.0, 420.0, 460.0, 490.0, 500.0, 485.0, 428.0, 310.0]], + "car3-trb1": [[0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000], + [90.0, 230.0, 350.0, 450.0, 530.0, 520.0, 500.0, 475.0, 450.0, 415.0, 369.0]], + "car4-trb1": [[0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000], + [80.0, 190.0, 230.0, 330.0, 405.0, 475.0, 505.0, 515.0, 480.0, 410.0, 330.0]], + "car5-trb1": [[0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000], + [97.0, 222.0, 325.0, 470.0, 560.0, 555.0, 545.0, 511.0, 471.0, 410.0, 320.0]], + "car6-trb1": [[0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000], + [91.0, 287.0, 377.0, 455.0, 493.0, 517.0, 523.0, 502.0, 473.0, 347.98, 185.10]], + "car7-trb1": [[0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000], + [91.0, 287.0, 377.0, 455.0, 492.0, 517.0, 501.0, 492.0, 453.0, 333.98, 185.10]], + "car8-trb1": [[0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000], + [97.0, 222.0, 325.0, 470.0, 560.0, 555.0, 545.0, 521.0, 471.0, 410.0, 320.0]], + "kc-2000gt": [[0.000000, 500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 5500, 6000, 6500, 7000, 7500, 8000, 8500, 9000, 9500, 10000], + [0, 140, 150, 150, 155, 160, 165, 168, 172, 175, 177, 172, 167, 161, 152, 142, 132, 130, 0.000000, 0.000000, 0.000000]], + "kc-5300gt": [[0, 500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 5500, 6000, 6500, 7000], + [0, 110, 170, 260, 350, 430, 475, 488, 480, 470, 460, 445, 415, 380, 335]], + "kc-110": [[0, 500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 5500, 6000, 6500, 7000, 7500, 8000, 8500, 9000, 9500, 10000], + [0, 60, 75, 87, 98, 109, 115, 118, 120, 122, 121.6, 121.2, 120.8, 120.4, 120, 107, 95, 80, 64, 0, 0]], + "p406": [[0, 500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 5500, 6000, 6500, 7000], + [20, 60, 110, 110, 140, 167, 195, 220, 240, 260, 255, 250, 220, 200, 160]], + "pw-imprezawrc": [[0, 1024, 2048, 3072, 4096, 5120, 5888, 7168, 8192, 9216, 10240], + [100, 317, 339, 355, 415, 452, 482, 438, 364, 210, 147]], + "pw-focuswrc": [[0, 1024, 2048, 3072, 4096, 5120, 5888, 7168, 8192, 9216, 10240], + [100, 217, 339, 365, 461, 466, 472, 428, 354, 200, 147]], + "pw-evoviwrc": [[0, 1024, 2048, 3072, 4096, 5120, 5888, 7168, 8192, 9216, 10000], + [0, 230, 345, 395, 461, 471, 472, 458, 354, 200, 147]], + "pw-206wrc": [[0, 1024, 2048, 3072, 4096, 5120, 5888, 7168, 8192, 9216, 10000], + [100, 247, 369, 395, 466, 472, 467, 458, 384, 230, 177]], + "pw-corollawrc": [[0, 1024, 2048, 3072, 4096, 5120, 5888, 7168, 8192, 9216, 10000], + [100, 227, 310, 375, 471, 472, 462, 438, 364, 210, 157]], + "pw-306wrc": [[0, 1024, 2048, 3072, 4096, 5120, 5888, 7168, 8192, 9216, 10000], + [100, 247, 369, 395, 456, 462, 458, 445, 384, 230, 177]] +} + +import matplotlib.pyplot as plt + +def generate_plots(): + for car in dynamics_data.keys(): + rpm = dynamics_data[car][0] + torque = dynamics_data[car][1] + plt.figure() + plt.plot(rpm, torque) + plt.title(car) + plt.xlabel('rpm') + plt.ylabel('torque (N.m)') + plt.savefig(car+'.png') + + +cars_to_compare = [ + "car1-stock1", + "car1-stock2", + "155-DTM", + "car3-trb1", + "kc-2000gt", + "buggy", + "baja-bug" +] + + +def plot_to_compare(): + plt.figure() + for car in cars_to_compare: + rpm = dynamics_data[car][0] + torque = dynamics_data[car][1] + plt.plot(rpm, torque) + plt.title("Engine responses compared (Train)") + plt.xlabel('rpm') + plt.ylabel('torque (N.m)') + plt.legend(cars_to_compare) + plt.savefig('engine_responses_train.png') + +if __name__=='__main__': + # generate_plots() + plot_to_compare() \ No newline at end of file diff --git a/MADRaS/utils/data/quickrace.template b/MADRaS/utils/data/quickrace.template new file mode 100644 index 0000000..db49804 --- /dev/null +++ b/MADRaS/utils/data/quickrace.template @@ -0,0 +1,78 @@ + + + + + +
+ + + + +
+ +
+ +
+ + +
+ +
+ +
+
+ +
+ +
+ +
+ + + + + +
+ + + + + + +
+ +
+ +
+ + + + {car_config} + +
+ +
+ +
+ +
+ +
+ +
+ +
+ + +
+
+ +
+ +
+ +
+ +
+ +
\ No newline at end of file diff --git a/MADRaS/utils/data/scr_server_config.template b/MADRaS/utils/data/scr_server_config.template new file mode 100644 index 0000000..2e3e47a --- /dev/null +++ b/MADRaS/utils/data/scr_server_config.template @@ -0,0 +1,129 @@ + + + + + + + + + + +
+
+
+ + + + + + + + + +
+
+ + + + + + + + + +
+
+ + + + + + + + + +
+
+ + + + + + + + + +
+
+ + + + + + + + + +
+
+ + + + + + + + + +
+
+ + + + + + + + + +
+
+ + + + + + + + + +
+
+ + + + + + + + + +
+
+ + + + + + + + + +
+
+
+
diff --git a/MADRaS/utils/data/track_details.py b/MADRaS/utils/data/track_details.py new file mode 100644 index 0000000..bb04d22 --- /dev/null +++ b/MADRaS/utils/data/track_details.py @@ -0,0 +1,63 @@ +track_lengths = { + "aalborg": 2587.54, + "alpine-1": 6355.65, + "alpine-2": 3773.57, + "brondehach": 3919.31, + "g-track-1": 2057.56, + "g-track-2": 3185.83, + "g-track-3": 2843.10, + "corkscrew": 3608.45, + "eroad": 3260.43, + "e-track-1": 3243.64, + "e-track-2": 5380.50, + "e-track-3": 4208.37, + "e-track-4": 7041.68, + "e-track-6": 4441.29, + "forza": 5784.10, + "ole-road-1": 6282.81, + "ruudskogen": 3274.20, + "spring": 22129.77, + "street-1": 3823.05, + "wheel-1": 4328.54, + "wheel-2": 6205.46, + "dirt-1": 1072.93, + "dirt-2": 1760.94, + "dirt-3": 2205.93, + "dirt-4": 3260.43, + "dirt-5": 1072.93, + "dirt-6": 3147.46, + "mixed-1": 1014.22, + "mixed-2": 1412.90 +} + +track_widths = { + "aalborg": 10.0, + "alpine-1": 12.0, + "alpine-2": 10.0, + "brondehach": 13.0, + "g-track-1": 15.0, + "g-track-2": 15.0, + "g-track-3": 10.0, + "corkscrew": 12.0, + "eroad": 16.0, + "e-track-1": 15.0, + "e-track-2": 12.0, + "e-track-3": 12.0, + "e-track-4": 15.0, + "e-track-6": 13.0, + "forza": 11.0, + "ole-road-1": 10.0, + "ruudskogen": 11.0, + "spring": 12.0, + "street-1": 14.0, + "wheel-1": 14.0, + "wheel-2": 12.0, + "dirt-1": 10.0, + "dirt-2": 10.0, + "dirt-3": 10.0, + "dirt-4": 16.0, + "dirt-5": 10.0, + "dirt-6": 15.0, + "mixed-1": 10.0, + "mixed-2": 10.0 +} \ No newline at end of file diff --git a/MADRaS/utils/torcs_server_config.py b/MADRaS/utils/torcs_server_config.py new file mode 100644 index 0000000..971a541 --- /dev/null +++ b/MADRaS/utils/torcs_server_config.py @@ -0,0 +1,127 @@ +import numpy as np +import random +import os +import logging +import MADRaS.utils.data.track_details as track_details +logger = logging.getLogger(__name__) + +path_and_file = os.path.realpath(__file__) +path, _ = os.path.split(path_and_file) + +QUICKRACE_TEMPLATE_PATH = os.path.join(path, "data", "quickrace.template") +CAR_CONFIG_TEMPATE_PATH = os.path.join(path, "data", "car_config.template") +SCR_SERVER_CONFIG_TEMPLATE_PATH = os.path.join(path, "data", "scr_server_config.template") + +MAX_NUM_CARS = 10 # There are 10 scr-servers in the TORCS GUI + +DIRT_TRACK_NAMES = [ + "dirt-1", + "dirt-2", + "dirt-3", + "dirt-4", + "dirt-5", + "dirt-6", + "mixed-1", + "mixed-2" +] + +ROAD_TRACK_NAMES = [ + "aalborg", + "alpine-1", + "alpine-2", + "brondehach", + "g-track-1", + "g-track-2", + "g-track-3", + "corkscrew", + "eroad", + # "e-track-1", # Segmentation fault from torcs: no track observations after about 1000 steps + "e-track-2", + "e-track-3", + "e-track-4", + "e-track-6", + "forza", + "ole-road-1", + "ruudskogen", + "spring", + "street-1", + "wheel-1", + "wheel-2" +] + + + +class TorcsConfig(object): + def __init__(self, cfg, num_learning_cars, randomize=False): + self.max_cars = cfg["max_cars"] if "max_cars" in cfg else MAX_NUM_CARS + self.min_traffic_cars = cfg["min_traffic_cars"] if "min_traffic_cars" in cfg else 0 + self.track_category = cfg["track_category"] if "track_category" in cfg else "road" + self.num_learning_cars = num_learning_cars + self.track_names = cfg["track_names"] if "track_names" in cfg else ROAD_TRACK_NAMES + self.distance_to_start = cfg["distance_to_start"] if "distance_to_start" in cfg else 0 + self.torcs_server_config_dir = (cfg["torcs_server_config_dir"] if "torcs_server_config_dir" in cfg + else "/home/sohan/.torcs/config/raceman/") + self.scr_server_config_dir = (cfg["scr_server_config_dir"] if "scr_server_config_dir" in cfg + else "/home/sohan/usr/local/share/games/torcs/drivers/scr_server/") + with open(QUICKRACE_TEMPLATE_PATH, 'r') as f: + self.quickrace_template = f.read() + with open(CAR_CONFIG_TEMPATE_PATH, 'r') as f: + self.car_config_template = f.read() + with open(SCR_SERVER_CONFIG_TEMPLATE_PATH, 'r') as f: + self.scr_server_config_template = f.read() + self.randomize = randomize + self.quickrace_xml_path = os.path.join(self.torcs_server_config_dir, "quickrace.xml") + self.scr_server_xml_path = os.path.join(self.scr_server_config_dir, "scr_server.xml") + self.traffic_car_type = cfg['traffic_car'] if 'traffic_car' in cfg else 'car1-trb1' + self.learning_car_types = cfg['learning_car'] + self.track_length = 0 + self.track_width = 0 + + def get_num_traffic_cars(self): + if not self.randomize: + return self.max_cars-self.num_learning_cars + else: + num_traffic_cars = np.random.randint(low=self.min_traffic_cars, high=self.max_cars) + return num_traffic_cars + + def get_track_name(self): + if not self.randomize: + track_name = self.track_names[0] + else: + track_name = random.sample(self.track_names, 1)[0] + logging.info("-------------------------CURRENT TRACK:{}------------------------".format(track_name)) + self.track_length = track_details.track_lengths[track_name] + self.track_width = track_details.track_widths[track_name] + return track_name + + def get_learning_car_type(self): + if not self.randomize: + learning_car_types = self.learning_car_types[:self.num_learning_cars] + else: + learning_car_types = random.sample(self.learning_car_types, self.num_learning_cars) + for i, car in enumerate(learning_car_types): + logging.info("-------------------------LEARNING CAR {}:{}------------------------".format(i, car)) + return learning_car_types + + def generate_torcs_server_config(self): + self.num_traffic_cars = self.get_num_traffic_cars() + logging.info("-----------------------Num. Traffic Cars:{}-----------------------".format(self.num_traffic_cars)) + + car_config = "\n".join(self.car_config_template.format( + **{"section_no": i+1, "car_no": i}) for i in range( + self.num_traffic_cars + self.num_learning_cars)) + context = { + "track_name": self.get_track_name(), + "track_category": self.track_category, + "distance_to_start": self.distance_to_start, + "car_config": car_config + } + torcs_server_config = self.quickrace_template.format(**context) + with open(self.quickrace_xml_path, "w") as f: + f.write(torcs_server_config) + self.learning_car_types = self.get_learning_car_type() + car_name_list = ([self.traffic_car_type]*self.num_traffic_cars + self.learning_car_types + + [self.traffic_car_type]*(MAX_NUM_CARS-self.num_traffic_cars-self.num_learning_cars)) + scr_server_config = self.scr_server_config_template.format(*car_name_list) + with open(self.scr_server_xml_path, "w") as f: + f.write(scr_server_config) \ No newline at end of file From abfb42d6625f36cdb3cee1148022406b92d230f5 Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Fri, 16 Oct 2020 15:11:08 +0530 Subject: [PATCH 45/46] random working without traffic --- MADRaS/envs/gym_madras_v2.py | 8 +++++--- MADRaS/traffic/traffic.py | 12 ++++++------ MADRaS/utils/torcs_server_config.py | 4 +++- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index dea013e..711e128 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -380,7 +380,7 @@ def __init__(self, cfg_path=DEFAULT_SIM_OPTIONS_FILE): agent_cfg = agent[agent_name] name = '{}_{}'.format(agent_name, i) - torcs_server_port = self._config.torcs_server_port + i + num_traffic_agents + torcs_server_port = self._config.torcs_server_port + i + self.torcs_server_config.num_traffic_cars self.agents[name] = MadrasAgent(name, torcs_server_port, agent_cfg, {"track_len": self._config.track_len, "max_steps": self._config.max_steps @@ -478,16 +478,18 @@ def reset(self): #import pdb; pdb.set_trace() if not self.initial_reset: self.reset_torcs() + self.torcs_server_config.generate_torcs_server_config() else: self.initial_reset = False if self._config.traffic: - self.traffic_handler.reset() + self.traffic_handler.reset(self.torcs_server_config.num_traffic_cars) s_t = OrderedDict() # Create clients and connect their sockets - for agent in self.agents: + for i, agent in enumerate(self.agents): + self.agents[agent].torcs_server_port = self._config.torcs_server_port + i + self.torcs_server_config.num_traffic_cars self.agents[agent].create_new_client() # Collect first observations diff --git a/MADRaS/traffic/traffic.py b/MADRaS/traffic/traffic.py index f9792ed..befeddb 100644 --- a/MADRaS/traffic/traffic.py +++ b/MADRaS/traffic/traffic.py @@ -29,12 +29,11 @@ def __init__(self, torcs_server_port, num_learning_agents, cfg): def get_traffic_agents(self): return self.traffic_agents - def flag_off_traffic(self): + def flag_off_traffic(self, num_cars_to_reset): self.num_episodes_of_training += 1 for i, agent in enumerate(self.traffic_agents.values()): - self.traffic_processes.append( - Process(target=agent.flag_off, - args=(i+self.num_episodes_of_training,))) + if i < num_cars_to_reset: + self.traffic_processes.append(Process(target=agent.flag_off, args=(i+self.num_episodes_of_training,))) for traffic_process in self.traffic_processes: traffic_process.start() @@ -43,9 +42,10 @@ def kill_all_traffic_agents(self): traffic_process.terminate() self.traffic_processes = [] - def reset(self): + def reset(self, num_cars_to_reset): self.kill_all_traffic_agents() - self.flag_off_traffic() + self.flag_off_traffic(num_cars_to_reset) + class MadrasTrafficAgent(object): diff --git a/MADRaS/utils/torcs_server_config.py b/MADRaS/utils/torcs_server_config.py index 971a541..19e1d0e 100644 --- a/MADRaS/utils/torcs_server_config.py +++ b/MADRaS/utils/torcs_server_config.py @@ -81,7 +81,9 @@ def get_num_traffic_cars(self): if not self.randomize: return self.max_cars-self.num_learning_cars else: - num_traffic_cars = np.random.randint(low=self.min_traffic_cars, high=self.max_cars) + if (self.min_traffic_cars == (self.max_cars-self.num_learning_cars)): + return 0 + num_traffic_cars = np.random.randint(low=self.min_traffic_cars, high=self.max_cars-self.num_learning_cars+1) return num_traffic_cars def get_track_name(self): From 3f03013ef7f02bdd51b89fb91f2015fce889bd95 Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Fri, 16 Oct 2020 15:20:14 +0530 Subject: [PATCH 46/46] initial traffic random working --- MADRaS/envs/gym_madras_v2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MADRaS/envs/gym_madras_v2.py b/MADRaS/envs/gym_madras_v2.py index 711e128..8250186 100644 --- a/MADRaS/envs/gym_madras_v2.py +++ b/MADRaS/envs/gym_madras_v2.py @@ -478,7 +478,7 @@ def reset(self): #import pdb; pdb.set_trace() if not self.initial_reset: self.reset_torcs() - self.torcs_server_config.generate_torcs_server_config() + # self.torcs_server_config.generate_torcs_server_config() else: self.initial_reset = False