diff --git a/examples/exp_configs/non_rl/bay_bridge.py b/examples/exp_configs/non_rl/bay_bridge.py index d7d78360f..f3e0c465f 100644 --- a/examples/exp_configs/non_rl/bay_bridge.py +++ b/examples/exp_configs/non_rl/bay_bridge.py @@ -48,7 +48,7 @@ lc_pushy=0.8, lc_speed_gain=4.0, model="LC2013", - lane_change_mode="no_lat_collide", + lane_change_mode="no_lc_safe", # lcKeepRight=0.8 ), num_vehicles=1400) diff --git a/examples/exp_configs/non_rl/bay_bridge_toll.py b/examples/exp_configs/non_rl/bay_bridge_toll.py index 1b8268aeb..0941823cb 100644 --- a/examples/exp_configs/non_rl/bay_bridge_toll.py +++ b/examples/exp_configs/non_rl/bay_bridge_toll.py @@ -46,7 +46,7 @@ model="LC2013", lcCooperative=0.2, lcSpeedGain=15, - lane_change_mode="no_lat_collide", + lane_change_mode="no_lc_safe", ), num_vehicles=50) diff --git a/examples/exp_configs/non_rl/i210_subnetwork.py b/examples/exp_configs/non_rl/i210_subnetwork.py index 3fac52be2..c6249da51 100644 --- a/examples/exp_configs/non_rl/i210_subnetwork.py +++ b/examples/exp_configs/non_rl/i210_subnetwork.py @@ -41,11 +41,9 @@ if WANT_GHOST_CELL: from flow.networks.i210_subnetwork_ghost_cell import I210SubNetworkGhostCell, EDGES_DISTRIBUTION - highway_start_edge = 'ghost0' else: from flow.networks.i210_subnetwork import I210SubNetwork, EDGES_DISTRIBUTION - highway_start_edge = "119257914" vehicles = VehicleParams() @@ -255,11 +253,6 @@ def valid_ids(env, veh_ids): env.k.vehicle.get_speed(valid_ids(env, env.k.vehicle.get_ids())))), "avg_outflow": lambda env: np.nan_to_num( env.k.vehicle.get_outflow_rate(120)), - # # we multiply by 5 to account for the vehicle length and by 1000 to convert - # # into veh/km - # "avg_density": lambda env: 5 * 1000 * len(env.k.vehicle.get_ids_by_edge( - # edge_id)) / (env.k.network.edge_length(edge_id) - # * env.k.network.num_lanes(edge_id)), "mpg": lambda env: miles_per_gallon(env, valid_ids(env, env.k.vehicle.get_ids()), gain=1.0), "mpj": lambda env: miles_per_megajoule(env, valid_ids(env, env.k.vehicle.get_ids()), gain=1.0), } diff --git a/examples/exp_configs/non_rl/minicity.py b/examples/exp_configs/non_rl/minicity.py index 23b232480..35d5edbce 100644 --- a/examples/exp_configs/non_rl/minicity.py +++ b/examples/exp_configs/non_rl/minicity.py @@ -18,7 +18,7 @@ speed_mode=1, ), lane_change_params=SumoLaneChangeParams( - lane_change_mode="no_lat_collide", + lane_change_mode="no_lc_safe", ), initial_speed=0, num_vehicles=90) diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index 028e5bc7c..28d15bd8e 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -28,7 +28,7 @@ # WANT_DOWNSTREAM_BOUNDARY = True ON_RAMP = False PENETRATION_RATE = 0.10 -V_DES = 7.0 +V_DES = 5.0 HORIZON = 1000 WARMUP_STEPS = 600 @@ -64,11 +64,11 @@ "mpg_reward": False, # whether to use the MPJ reward. Otherwise, defaults to a target velocity reward "mpj_reward": False, - # how many vehicles to look back for the MPG reward - "look_back_length": 1, + # how many vehicles to look back for any reward + "look_back_length": 10, # whether to reroute vehicles once they have exited - "reroute_on_exit": True, - 'target_velocity': 8.0, + "reroute_on_exit": False, + 'target_velocity': 5.0, # how many AVs there can be at once (this is only for centralized critics) "max_num_agents": 10, # which edges we shouldn't apply control on @@ -91,9 +91,10 @@ "speed_reward_gain": 0.5, # penalize stopped vehicles "penalize_stops": True, - + "stop_penalty": 0.05, # penalize accels - "penalize_accel": True + "penalize_accel": True, + "accel_penalty": 0.05 }) # CREATE VEHICLE TYPES AND INFLOWS @@ -264,7 +265,7 @@ sims_per_step=3, warmup_steps=WARMUP_STEPS, additional_params=additional_env_params, - done_at_exit=False + done_at_exit=not additional_env_params["reroute_on_exit"] ), # network-related parameters (see flow.core.params.NetParams and the diff --git a/examples/train.py b/examples/train.py index 112b7fa0d..7bf259691 100644 --- a/examples/train.py +++ b/examples/train.py @@ -113,7 +113,6 @@ def run_model_stablebaseline(flow_params, """ from stable_baselines.common.vec_env import DummyVecEnv, SubprocVecEnv from stable_baselines import PPO2 - if num_cpus == 1: constructor = env_constructor(params=flow_params, version=0)() # The algorithms require a vectorized environment to run @@ -218,8 +217,8 @@ def setup_exps_rllib(flow_params, config["lr"] = tune.grid_search([5e-4, 5e-5]) elif alg_run == "TD3": - agent_cls = get_agent_class(alg_run) - config = deepcopy(agent_cls._default_config) + alg_run = get_agent_class(alg_run) + config = deepcopy(alg_run._default_config) config["num_workers"] = n_cpus config["horizon"] = horizon @@ -242,6 +241,9 @@ def on_episode_start(info): episode.user_data["avg_energy"] = [] episode.user_data["avg_mpg"] = [] episode.user_data["avg_mpj"] = [] + episode.user_data["num_cars"] = [] + episode.user_data["avg_accel_human"] = [] + episode.user_data["avg_accel_avs"] = [] def on_episode_step(info): episode = info["episode"] @@ -271,6 +273,15 @@ def on_episode_step(info): episode.user_data["avg_speed_avs"].append(av_speed) episode.user_data["avg_mpg"].append(miles_per_gallon(env, veh_ids, gain=1.0)) episode.user_data["avg_mpj"].append(miles_per_megajoule(env, veh_ids, gain=1.0)) + episode.user_data["num_cars"].append(len(env.k.vehicle.get_ids())) + episode.user_data["avg_accel_human"].append(np.nan_to_num(np.mean( + [np.abs((env.k.vehicle.get_speed(veh_id) - env.k.vehicle.get_previous_speed(veh_id))/env.sim_step) for + veh_id in veh_ids if veh_id in env.k.vehicle.previous_speeds.keys()] + ))) + episode.user_data["avg_accel_avs"].append(np.nan_to_num(np.mean( + [np.abs((env.k.vehicle.get_speed(veh_id) - env.k.vehicle.get_previous_speed(veh_id))/env.sim_step) for + veh_id in rl_ids if veh_id in env.k.vehicle.previous_speeds.keys()] + ))) def on_episode_end(info): episode = info["episode"] @@ -281,9 +292,10 @@ def on_episode_end(info): episode.custom_metrics["avg_energy_per_veh"] = np.mean(episode.user_data["avg_energy"]) episode.custom_metrics["avg_mpg_per_veh"] = np.mean(episode.user_data["avg_mpg"]) episode.custom_metrics["avg_mpj_per_veh"] = np.mean(episode.user_data["avg_mpj"]) + episode.custom_metrics["num_cars"] = np.mean(episode.user_data["num_cars"]) def on_train_result(info): - """Store the mean score of the episode, and adjust the number of adversaries.""" + """Store the mean score of the episode, and increment or decrement the iteration number for curriculum.""" trainer = info["trainer"] trainer.workers.foreach_worker( lambda ev: ev.foreach_env( @@ -468,7 +480,6 @@ def train_stable_baselines(submodule, flags): """Train policies using the PPO algorithm in stable-baselines.""" from stable_baselines.common.vec_env import DummyVecEnv from stable_baselines import PPO2 - flow_params = submodule.flow_params # Path to the saved files exp_tag = flow_params['exp_tag'] diff --git a/flow/algorithms/centralized_PPO.py b/flow/algorithms/centralized_PPO.py index 57fdd7e33..dca737f75 100644 --- a/flow/algorithms/centralized_PPO.py +++ b/flow/algorithms/centralized_PPO.py @@ -1,14 +1,14 @@ +"""An example of customizing PPO to leverage a centralized critic.""" + from __future__ import absolute_import from __future__ import division from __future__ import print_function -"""An example of customizing PPO to leverage a centralized critic.""" - import argparse import numpy as np from ray.rllib.agents.ppo.ppo import PPOTrainer -from flow.algorithms.custom_ppo import CustomPPOTFPolicy +from flow.algorithms.custom_ppo import CustomPPOTFPolicy, KLCoeffMixin from ray.rllib.evaluation.postprocessing import compute_advantages, \ Postprocessing from ray.rllib.policy.sample_batch import SampleBatch @@ -65,14 +65,17 @@ def __init__(self, obs_space, action_space, num_outputs, model_config, self.register_variables(self.central_vf.variables) def forward(self, input_dict, state, seq_lens): + """Run forward inference.""" return self.model.forward(input_dict, state, seq_lens) def central_value_function(self, central_obs): + """Compute the centralized value function.""" return tf.reshape( self.central_vf( [central_obs]), [-1]) def value_function(self): + """Compute the normal value function; this is only here to make the code run.""" return self.model.value_function() # not used @@ -145,23 +148,27 @@ def __init__(self, @override(RecurrentTFModelV2) def forward_rnn(self, inputs, state, seq_lens): + """Forward inference on the RNN.""" model_out, self._value_out, h, c = self.model( [inputs, seq_lens] + state) return model_out, [h, c] @override(ModelV2) def get_initial_state(self): + """Set up the initial RNN state.""" return [ np.zeros(self.cell_size, np.float32), np.zeros(self.cell_size, np.float32), ] def central_value_function(self, central_obs): + """Compute the central value function.""" return tf.reshape( self.central_vf( [central_obs]), [-1]) def value_function(self): + """Compute the normal value function; this is only here to make the code run.""" return tf.reshape(self._value_out, [-1]) # not used @@ -175,18 +182,18 @@ def __init__(self): ) def compute_central_vf(self, central_obs): + """Run forward inference on the model.""" feed_dict = { self.get_placeholder(CENTRAL_OBS): central_obs, } return self.get_session().run(self.central_value_function, feed_dict) -# Grabs the opponent obs/act and includes it in the experience train_batch, -# and computes GAE using the central vf predictions. def centralized_critic_postprocessing(policy, sample_batch, other_agent_batches=None, episode=None): + """Find all other agents that overlapped with you and stack their obs to be passed to the central VF.""" if policy.loss_initialized(): assert other_agent_batches is not None @@ -207,13 +214,6 @@ def centralized_critic_postprocessing(policy, agent_id: other_agent_batches[agent_id][1]["obs"].copy() for agent_id in other_agent_batches.keys() } - # padded_agent_obs = {agent_id: - # overlap_and_pad_agent( - # time_span, - # rel_agent_time, - # other_obs[agent_id]) - # for agent_id, - # rel_agent_time in rel_agents.items()} padded_agent_obs = { agent_id: fill_missing( agent_time, @@ -228,8 +228,9 @@ def centralized_critic_postprocessing(policy, try: central_obs_batch = np.hstack( (sample_batch["obs"], np.hstack(central_obs_list))) - except: + except Exception as e: # TODO(@ev) this is a bug and needs to be fixed + print('Error in stacking obs ', e) central_obs_batch = sample_batch["obs"] max_vf_agents = policy.model.max_num_agents num_agents = len(rel_agents) + 1 @@ -286,6 +287,7 @@ def time_overlap(time_span, agent_time): def fill_missing(agent_time, other_agent_time, obs): + """Pad the obs to the appropriate length for agents that don't overlap perfectly in time.""" # shortcut, the two overlap perfectly if np.sum(agent_time == other_agent_time) == agent_time.shape[0]: return obs @@ -314,15 +316,9 @@ def overlap_and_pad_agent(time_span, agent_time, obs): assert time_overlap(time_span, agent_time) print(time_span) print(agent_time) - if time_span[0] == 7 or agent_time[0] == 7: - import ipdb - ipdb.set_trace() # FIXME(ev) some of these conditions can be combined # no padding needed if agent_time[0] == time_span[0] and agent_time[1] == time_span[1]: - if obs.shape[0] < 200: - import ipdb - ipdb.set_trace() return obs # agent enters before time_span starts and exits before time_span end if agent_time[0] < time_span[0] and agent_time[1] < time_span[1]: @@ -331,9 +327,6 @@ def overlap_and_pad_agent(time_span, agent_time, obs): overlap_obs = obs[non_overlap_time:] padding = np.zeros((missing_time, obs.shape[1])) obs_concat = np.concatenate((overlap_obs, padding)) - if obs_concat.shape[0] < 200: - import ipdb - ipdb.set_trace() return obs_concat # agent enters after time_span starts and exits after time_span ends elif agent_time[0] > time_span[0] and agent_time[1] > time_span[1]: @@ -342,9 +335,6 @@ def overlap_and_pad_agent(time_span, agent_time, obs): missing_time = agent_time[0] - time_span[0] padding = np.zeros((missing_time, obs.shape[1])) obs_concat = np.concatenate((padding, overlap_obs)) - if obs_concat.shape[0] < 200: - import ipdb - ipdb.set_trace() return obs_concat # agent time is entirely contained in time_span elif agent_time[0] >= time_span[0] and agent_time[1] <= time_span[1]: @@ -357,9 +347,6 @@ def overlap_and_pad_agent(time_span, agent_time, obs): if missing_right > 0: padding = np.zeros((missing_right, obs.shape[1])) obs_concat = np.concatenate((obs_concat, padding)) - if obs_concat.shape[0] < 200: - import ipdb - ipdb.set_trace() return obs_concat # agent time totally contains time_span elif agent_time[0] <= time_span[0] and agent_time[1] >= time_span[1]: @@ -370,14 +357,11 @@ def overlap_and_pad_agent(time_span, agent_time, obs): overlap_obs = overlap_obs[non_overlap_left:] if non_overlap_right > 0: overlap_obs = overlap_obs[:-non_overlap_right] - if overlap_obs.shape[0] < 200: - import ipdb - ipdb.set_trace() return overlap_obs -# Copied from PPO but optimizing the central value function def loss_with_central_critic(policy, model, dist_class, train_batch): + """Set up the PPO loss but replace the VF loss with the centralized VF loss.""" CentralizedValueMixin.__init__(policy) logits, state = model.from_batch(train_batch) @@ -408,6 +392,8 @@ def loss_with_central_critic(policy, model, dist_class, train_batch): class PPOLoss(object): + """Object containing the PPO loss function.""" + def __init__(self, action_space, dist_class, @@ -471,6 +457,7 @@ def __init__(self, model_config : dict, optional model config for use in specifying action distributions. """ + def reduce_mean_valid(t): return tf.reduce_mean(tf.boolean_mask(t, valid_mask)) @@ -507,28 +494,13 @@ def reduce_mean_valid(t): def new_ppo_surrogate_loss(policy, model, dist_class, train_batch): + """Return the PPO loss with the centralized value function.""" loss = loss_with_central_critic(policy, model, dist_class, train_batch) return loss -class KLCoeffMixin(object): - def __init__(self, config): - # KL Coefficient - self.kl_coeff_val = config["kl_coeff"] - self.kl_target = config["kl_target"] - self.kl_coeff = tf.get_variable( - initializer=tf.constant_initializer(self.kl_coeff_val), - name="kl_coeff", - shape=(), - trainable=False, - dtype=tf.float32) - - def update_kl(self, blah): - pass - - def setup_mixins(policy, obs_space, action_space, config): - # copied from PPO + """Construct additional classes that add on to PPO.""" KLCoeffMixin.__init__(policy, config) EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], @@ -540,7 +512,7 @@ def setup_mixins(policy, obs_space, action_space, config): def central_vf_stats(policy, train_batch, grads): - # Report the explained variance of the central value function. + """Report the explained variance of the centralized value function.""" return { "vf_explained_var": explained_variance( train_batch[Postprocessing.VALUE_TARGETS], @@ -549,6 +521,7 @@ def central_vf_stats(policy, train_batch, grads): def kl_and_loss_stats(policy, train_batch): + """Trianing stats to pass to the tensorboard.""" return { "cur_kl_coeff": tf.cast(policy.kl_coeff, tf.float64), "cur_lr": tf.cast(policy.cur_lr, tf.float64), diff --git a/flow/algorithms/custom_ppo.py b/flow/algorithms/custom_ppo.py index 65291f1d4..47a4459aa 100644 --- a/flow/algorithms/custom_ppo.py +++ b/flow/algorithms/custom_ppo.py @@ -1,4 +1,4 @@ -"""PPO but we add in the outflow after the reward to the final reward.""" +"""PPO but without the adaptive KL term that RLlib added.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -15,11 +15,11 @@ from ray.rllib.utils.explained_variance import explained_variance from ray.rllib.utils.tf_ops import make_tf_callable from ray.rllib.utils import try_import_tf + from ray.rllib.agents.trainer_template import build_trainer from ray.rllib.agents.ppo.ppo import choose_policy_optimizer, DEFAULT_CONFIG from ray.rllib.agents.ppo.ppo import warn_about_bad_reward_scales - tf = try_import_tf() logger = logging.getLogger(__name__) @@ -29,6 +29,8 @@ class PPOLoss(object): + """PPO Loss object.""" + def __init__(self, action_space, dist_class, @@ -92,6 +94,7 @@ def __init__(self, model_config : dict, optional model config for use in specifying action distributions. """ + def reduce_mean_valid(t): return tf.reduce_mean(tf.boolean_mask(t, valid_mask)) @@ -127,6 +130,7 @@ def reduce_mean_valid(t): def ppo_surrogate_loss(policy, model, dist_class, train_batch): + """Construct and return the PPO loss.""" logits, state = model.from_batch(train_batch) action_dist = dist_class(logits, model) @@ -163,6 +167,7 @@ def ppo_surrogate_loss(policy, model, dist_class, train_batch): def kl_and_loss_stats(policy, train_batch): + """Return statistics for the tensorboard.""" return { "cur_kl_coeff": tf.cast(policy.kl_coeff, tf.float64), "cur_lr": tf.cast(policy.cur_lr, tf.float64), @@ -216,6 +221,7 @@ def postprocess_ppo_gae(policy, def clip_gradients(policy, optimizer, loss): + """If grad_clip is not None, clip the gradients.""" variables = policy.model.trainable_variables() if policy.config["grad_clip"] is not None: grads_and_vars = optimizer.compute_gradients(loss, variables) @@ -229,6 +235,8 @@ def clip_gradients(policy, optimizer, loss): class ValueNetworkMixin(object): + """Construct the value function.""" + def __init__(self, obs_space, action_space, config): if config["use_gae"]: @@ -242,7 +250,7 @@ def value(ob, prev_action, prev_reward, *state): [prev_reward]), "is_training": tf.convert_to_tensor(False), }, [tf.convert_to_tensor([s]) for s in state], - tf.convert_to_tensor([1])) + tf.convert_to_tensor([1])) return self.model.value_function()[0] else: @@ -255,11 +263,13 @@ def value(ob, prev_action, prev_reward, *state): def setup_config(policy, obs_space, action_space, config): + """Add additional custom options from the config.""" # auto set the model option for layer sharing config["model"]["vf_share_layers"] = config["vf_share_layers"] def setup_mixins(policy, obs_space, action_space, config): + """Construct additional classes that add on to PPO.""" KLCoeffMixin.__init__(policy, config) ValueNetworkMixin.__init__(policy, obs_space, action_space, config) EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], @@ -268,6 +278,8 @@ def setup_mixins(policy, obs_space, action_space, config): class KLCoeffMixin(object): + """Update the KL Coefficient. This is intentionally disabled to match the PPO paper better.""" + def __init__(self, config): # KL Coefficient self.kl_coeff_val = config["kl_coeff"] @@ -280,6 +292,7 @@ def __init__(self, config): dtype=tf.float32) def update_kl(self, blah): + """Disabled to match the PPO paper better.""" pass @@ -300,6 +313,7 @@ def update_kl(self, blah): def validate_config(config): + """Check that the config is set up properly.""" if config["entropy_coeff"] < 0: raise DeprecationWarning("entropy_coeff must be >= 0") if isinstance(config["entropy_coeff"], int): diff --git a/flow/controllers/car_following_models.py b/flow/controllers/car_following_models.py index 42c9b2a9b..280c94d37 100755 --- a/flow/controllers/car_following_models.py +++ b/flow/controllers/car_following_models.py @@ -647,6 +647,7 @@ def __init__(self, def get_accel(self, env): """See parent class.""" + # without generating waves. lead_id = env.k.vehicle.get_leader(self.veh_id) if not lead_id: # no car ahead if self.want_max_accel: diff --git a/flow/controllers/velocity_controllers.py b/flow/controllers/velocity_controllers.py index 86868c5f7..4d8bfec79 100644 --- a/flow/controllers/velocity_controllers.py +++ b/flow/controllers/velocity_controllers.py @@ -122,8 +122,6 @@ def get_accel(self, env): or env.k.vehicle.get_x_by_id(self.veh_id) > self.control_length[1])) \ or (self.no_control_edges is not None and len(self.no_control_edges) > 0 and edge in self.no_control_edges): - # TODO(@evinitsky) put back - # or env.k.vehicle.get_edge(self.veh_id) in self.no_control_edges: return None else: # compute the acceleration from the desired velocity diff --git a/flow/core/params.py b/flow/core/params.py index 0527b33c2..94970d614 100755 --- a/flow/core/params.py +++ b/flow/core/params.py @@ -17,7 +17,27 @@ "all_checks": 31 } -LC_MODES = {"aggressive": 0, "no_lat_collide": 512, "strategic": 1621} +LC_MODES = { + "no_lc_safe": 512, + "no_lc_aggressive": 0, + "sumo_default": 1621, + "no_strategic_aggressive": 1108, + "no_strategic_safe": 1620, + "only_strategic_aggressive": 1, + "only_strategic_safe": 513, + "no_cooperative_aggressive": 1105, + "no_cooperative_safe": 1617, + "only_cooperative_aggressive": 4, + "only_cooperative_safe": 516, + "no_speed_gain_aggressive": 1093, + "no_speed_gain_safe": 1605, + "only_speed_gain_aggressive": 16, + "only_speed_gain_safe": 528, + "no_right_drive_aggressive": 1045, + "no_right_drive_safe": 1557, + "only_right_drive_aggressive": 64, + "only_right_drive_safe": 576 +} # Traffic light defaults PROGRAM_ID = 1 @@ -906,14 +926,71 @@ class SumoLaneChangeParams: ---------- lane_change_mode : str or int, optional may be one of the following: + * "no_lc_safe" (default): Disable all SUMO lane changing but still + handle safety checks (collision avoidance and safety-gap enforcement) + in the simulation. Binary is [001000000000] + * "no_lc_aggressive": SUMO lane changes are not executed, collision + avoidance and safety-gap enforcement are off. + Binary is [000000000000] + + * "sumo_default": Execute all changes requested by a custom controller + unless in conflict with TraCI. Binary is [011001010101]. + + * "no_strategic_aggressive": Execute all changes except strategic + (routing) lane changes unless in conflict with TraCI. Collision + avoidance and safety-gap enforcement are off. Binary is [010001010100] + * "no_strategic_safe": Execute all changes except strategic + (routing) lane changes unless in conflict with TraCI. Collision + avoidance and safety-gap enforcement are on. Binary is [011001010100] + * "only_strategic_aggressive": Execute only strategic (routing) lane + changes unless in conflict with TraCI. Collision avoidance and + safety-gap enforcement are off. Binary is [000000000001] + * "only_strategic_safe": Execute only strategic (routing) lane + changes unless in conflict with TraCI. Collision avoidance and + safety-gap enforcement are on. Binary is [001000000001] + + * "no_cooperative_aggressive": Execute all changes except cooperative + (change in order to allow others to change) lane changes unless in + conflict with TraCI. Collision avoidance and safety-gap enforcement + are off. Binary is [010001010001] + * "no_cooperative_safe": Execute all changes except cooperative + lane changes unless in conflict with TraCI. Collision avoidance and + safety-gap enforcement are on. Binary is [011001010001] + * "only_cooperative_aggressive": Execute only cooperative lane changes + unless in conflict with TraCI. Collision avoidance and safety-gap + enforcement are off. Binary is [000000000100] + * "only_cooperative_safe": Execute only cooperative lane changes + unless in conflict with TraCI. Collision avoidance and safety-gap + enforcement are on. Binary is [001000000100] + + * "no_speed_gain_aggressive": Execute all changes except speed gain (the + other lane allows for faster driving) lane changes unless in conflict + with TraCI. Collision avoidance and safety-gap enforcement are off. + Binary is [010001000101] + * "no_speed_gain_safe": Execute all changes except speed gain + lane changes unless in conflict with TraCI. Collision avoidance and + safety-gap enforcement are on. Binary is [011001000101] + * "only_speed_gain_aggressive": Execute only speed gain lane changes + unless in conflict with TraCI. Collision avoidance and safety-gap + enforcement are off. Binary is [000000010000] + * "only_speed_gain_safe": Execute only speed gain lane changes + unless in conflict with TraCI. Collision avoidance and safety-gap + enforcement are on. Binary is [001000010000] + + * "no_right_drive_aggressive": Execute all changes except right drive + (obligation to drive on the right) lane changes unless in conflict + with TraCI. Collision avoidance and safety-gap enforcement are off. + Binary is [010000010101] + * "no_right_drive_safe": Execute all changes except right drive + lane changes unless in conflict with TraCI. Collision avoidance and + safety-gap enforcement are on. Binary is [011000010101] + * "only_right_drive_aggressive": Execute only right drive lane changes + unless in conflict with TraCI. Collision avoidance and safety-gap + enforcement are off. Binary is [000001000000] + * "only_right_drive_safe": Execute only right drive lane changes + unless in conflict with TraCI. Collision avoidance and safety-gap + enforcement are on. Binary is [001001000000] - * "no_lat_collide" (default): Human cars will not make lane - changes, RL cars can lane change into any space, no matter how - likely it is to crash - * "strategic": Human cars make lane changes in accordance with SUMO - to provide speed boosts - * "aggressive": RL cars are not limited by sumo with regard to - their lane-change actions, and can crash longitudinally * int values may be used to define custom lane change modes for the given vehicles, specified at: http://sumo.dlr.de/wiki/TraCI/Change_Vehicle_State#lane_change_mode_.280xb6.29 @@ -952,7 +1029,7 @@ class SumoLaneChangeParams: """ def __init__(self, - lane_change_mode="no_lat_collide", + lane_change_mode="no_lc_safe", model="LC2013", lc_strategic=1.0, lc_cooperative=1.0, @@ -1060,7 +1137,7 @@ def __init__(self, elif not (isinstance(lane_change_mode, int) or isinstance(lane_change_mode, float)): logging.error("Setting lane change mode to default.") - lane_change_mode = LC_MODES["no_lat_collide"] + lane_change_mode = LC_MODES["no_lc_safe"] self.lane_change_mode = lane_change_mode diff --git a/flow/core/rewards.py b/flow/core/rewards.py index 0f234da7e..6462b0a8c 100755 --- a/flow/core/rewards.py +++ b/flow/core/rewards.py @@ -322,7 +322,7 @@ def energy_consumption(env, gain=.001): rho = 1.225 # air density (kg/m^3) A = 2.6 # vehicle cross sectional area (m^2) for veh_id in env.k.vehicle.get_ids(): - if veh_id not in env.k.vehicle.previous_speeds: + if veh_id not in env.k.vehicle.previous_speeds.keys(): continue speed = env.k.vehicle.get_speed(veh_id) prev_speed = env.k.vehicle.get_previous_speed(veh_id) diff --git a/flow/envs/multiagent/base.py b/flow/envs/multiagent/base.py index 881461d63..5c4795bbe 100644 --- a/flow/envs/multiagent/base.py +++ b/flow/envs/multiagent/base.py @@ -48,6 +48,7 @@ def step(self, rl_actions): info : dict contains other diagnostic information from the previous action """ + done = {} for _ in range(self.env_params.sims_per_step): if self.time_counter <= self.env_params.sims_per_step * self.env_params.warmup_steps: self.observed_ids.update(self.k.vehicle.get_ids()) @@ -110,9 +111,11 @@ def step(self, rl_actions): print('A CRASH! A CRASH!!!!!! AAAAAAAAAH!!!!!') break + done.update({key: True for key in self.k.vehicle.get_arrived_ids()}) + states = self.get_state() - done = {key: key in self.k.vehicle.get_arrived_ids() - for key in states.keys()} + done.update({key: key in self.k.vehicle.get_arrived_ids() + for key in states.keys()}) if crash or (self.time_counter >= self.env_params.sims_per_step * (self.env_params.warmup_steps + self.env_params.horizon)): done['__all__'] = True @@ -128,7 +131,9 @@ def step(self, rl_actions): reward = self.compute_reward(rl_actions, fail=crash) if self.env_params.done_at_exit: - for rl_id in self.k.vehicle.get_arrived_rl_ids(): + # pull out the done keys that might not have corresponding states + valid_ids = [key for key, val in done.items() if val] + for rl_id in valid_ids: done[rl_id] = True reward[rl_id] = 0 states[rl_id] = -1 * np.ones(self.observation_space.shape[0]) diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index ad301a3f5..f9bd73dbd 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -1,4 +1,5 @@ """Environment for training vehicles to reduce congestion in the I210.""" + from gym.spaces import Box import numpy as np @@ -94,9 +95,11 @@ def __init__(self, env_params, sim_params, network, simulator='traci'): # penalize stops self.penalize_stops = env_params.additional_params["penalize_stops"] + self.stop_penalty = env_params.additional_params["stop_penalty"] # penalize accel self.penalize_accel = env_params.additional_params.get("penalize_accel", False) + self.accel_penalty = env_params.additional_params["accel_penalty"] @property def observation_space(self): @@ -142,16 +145,9 @@ def _apply_rl_actions(self, rl_actions): if rl_actions: for rl_id, actions in rl_actions.items(): accel = actions[0] - - # lane_change_softmax = np.exp(actions[1:4]) - # lane_change_softmax /= np.sum(lane_change_softmax) - # lane_change_action = np.random.choice([-1, 0, 1], - # p=lane_change_softmax) id_list.append(rl_id) accel_list.append(accel) self.k.vehicle.apply_acceleration(id_list, accel_list) - # self.k.vehicle.apply_lane_change(rl_id, lane_change_action) - # print('time to apply actions is ', time() - t) def in_control_range(self, veh_id): """Return if a veh_id is on an edge that is allowed to be controlled. @@ -194,6 +190,7 @@ def compute_reward(self, rl_actions, **kwargs): rewards = {} valid_ids = [rl_id for rl_id in self.k.vehicle.get_rl_ids() if self.in_control_range(rl_id)] + valid_human_ids = [rl_id for rl_id in self.k.vehicle.get_ids() if self.in_control_range(rl_id)] if self.env_params.additional_params["local_reward"]: des_speed = self.env_params.additional_params["target_velocity"] @@ -220,26 +217,29 @@ def compute_reward(self, rl_actions, **kwargs): else: break else: - speeds = [] - follow_speed = self.k.vehicle.get_speed(self.k.vehicle.get_follower(rl_id)) - if follow_speed >= 0: - speeds.append(follow_speed) - if self.k.vehicle.get_speed(rl_id) >= 0: - speeds.append(self.k.vehicle.get_speed(rl_id)) - if len(speeds) > 0: - # rescale so the critic can estimate it quickly - rewards[rl_id] = np.mean([(des_speed - np.abs(speed - des_speed)) ** 2 - for speed in speeds]) / (des_speed ** 2) + follow_id = rl_id + reward = (des_speed - np.abs(self.k.vehicle.get_speed(rl_id) - des_speed)) ** 2 + reward /= ((des_speed ** 2) * self.look_back_length) + rewards[rl_id] = reward + + for i in range(self.look_back_length): + follow_id = self.k.vehicle.get_follower(follow_id) + if follow_id not in ["", None]: + + follow_speed = self.k.vehicle.get_speed(self.k.vehicle.get_follower(follow_id)) + reward = (des_speed - np.abs(follow_speed - des_speed)) ** 2 + reward /= ((des_speed ** 2) * self.look_back_length) + rewards[rl_id] = reward else: if self.mpg_reward: - reward = np.nan_to_num(miles_per_gallon(self, self.k.vehicle.get_ids(), gain=1.0)) / 100.0 + reward = np.nan_to_num(miles_per_gallon(self, valid_human_ids, gain=1.0)) / 100.0 else: - speeds = self.k.vehicle.get_speed(self.k.vehicle.get_ids()) + speeds = self.k.vehicle.get_speed(valid_human_ids) des_speed = self.env_params.additional_params["target_velocity"] # rescale so the critic can estimate it quickly if self.reroute_on_exit: reward = np.nan_to_num(np.mean([(des_speed - np.abs(speed - des_speed)) - for speed in speeds]) / (des_speed)) + for speed in speeds]) / des_speed) else: reward = np.nan_to_num(np.mean([(des_speed - np.abs(speed - des_speed)) ** 2 for speed in speeds]) / (des_speed ** 2)) @@ -256,10 +256,8 @@ def compute_reward(self, rl_actions, **kwargs): t_headway = max( self.k.vehicle.get_headway(veh_id) / self.k.vehicle.get_speed(veh_id), 0) - # print('time headway is {}, headway is {}'.format(t_headway, self.k.vehicle.get_headway(veh_id))) scaling_factor = max(0, 1 - self.num_training_iters / self.headway_curriculum_iters) penalty += scaling_factor * self.headway_reward_gain * min((t_headway - t_min) / t_min, 0) - # print('penalty is ', penalty) rewards[veh_id] += penalty @@ -274,7 +272,7 @@ def compute_reward(self, rl_actions, **kwargs): follow_id = self.k.vehicle.get_follower(follow_id) if follow_id not in ["", None]: if self.reroute_on_exit: - speed_reward += ((des_speed - np.abs(speed - des_speed))) / (des_speed) + speed_reward += (des_speed - np.abs(speed - des_speed)) / des_speed else: speed_reward += ((des_speed - np.abs(speed - des_speed)) ** 2) / (des_speed ** 2) else: @@ -287,11 +285,11 @@ def compute_reward(self, rl_actions, **kwargs): speed = self.k.vehicle.get_speed(veh_id) if self.penalize_stops: if speed < 1.0: - rewards[veh_id] -= .01 + rewards[veh_id] -= self.stop_penalty if self.penalize_accel and veh_id in self.k.vehicle.previous_speeds: prev_speed = self.k.vehicle.get_previous_speed(veh_id) abs_accel = abs(speed - prev_speed) / self.sim_step - rewards[veh_id] -= abs_accel / 400.0 + rewards[veh_id] -= abs_accel * self.accel_penalty # print('time to get reward is ', time() - t) return rewards @@ -324,8 +322,6 @@ def additional_command(self): if edge == self.exit_edge and \ (self.k.vehicle.get_position(veh_id) > self.k.network.edge_length(self.exit_edge) - 100) \ and self.k.vehicle.get_leader(veh_id) is None: - # if self.step_counter > 6000: - # import ipdb; ipdb.set_trace() type_id = self.k.vehicle.get_type(veh_id) # remove the vehicle self.k.vehicle.remove(veh_id) @@ -334,7 +330,7 @@ def additional_command(self): del valid_lanes[index] # reintroduce it at the start of the network # TODO(@evinitsky) select the lane and speed a bit more cleanly - # Note, the position is 10 so you are not overlapping with the inflow car that is being removed. + # Note, the position is 20 so you are not overlapping with the inflow car that is being removed. # this allows the vehicle to be immediately inserted. try: self.k.vehicle.add( @@ -405,6 +401,17 @@ def step(self, rl_actions): done[rl_id] = True reward[rl_id] = 0 state[rl_id] = -1 * np.ones(self.observation_space.shape[0]) + else: + # you have to catch the vehicles on the exit edge, they have not yet + # recieved a done when the env terminates + if done['__all__']: + on_exit_edge = [rl_id for rl_id in self.k.vehicle.get_rl_ids() + if self.on_exit_edge(rl_id)] + for rl_id in on_exit_edge: + done[rl_id] = True + reward[rl_id] = 0 + state[rl_id] = -1 * np.ones(self.observation_space.shape[0]) + return state, reward, done, info diff --git a/flow/networks/__init__.py b/flow/networks/__init__.py index af849031d..2b3faced8 100644 --- a/flow/networks/__init__.py +++ b/flow/networks/__init__.py @@ -16,10 +16,11 @@ from flow.networks.minicity import MiniCityNetwork from flow.networks.highway_ramps import HighwayRampsNetwork from flow.networks.i210_subnetwork import I210SubNetwork +from flow.networks.i210_subnetwork_ghost_cell import I210SubNetworkGhostCell __all__ = [ "Network", "BayBridgeNetwork", "BayBridgeTollNetwork", "BottleneckNetwork", "FigureEightNetwork", "TrafficLightGridNetwork", "HighwayNetwork", "RingNetwork", "MergeNetwork", "MultiRingNetwork", - "MiniCityNetwork", "HighwayRampsNetwork", "I210SubNetwork" + "MiniCityNetwork", "HighwayRampsNetwork", "I210SubNetwork", "I210SubNetworkGhostCell" ] diff --git a/flow/visualize/i210_replay.py b/flow/visualize/i210_replay.py index b2e22d5b3..22fe262b3 100644 --- a/flow/visualize/i210_replay.py +++ b/flow/visualize/i210_replay.py @@ -19,7 +19,7 @@ from ray.tune.registry import register_env from flow.core.util import emission_to_csv, ensure_dir -from flow.core.rewards import vehicle_energy_consumption +from flow.core.rewards import veh_energy_consumption from flow.utils.registry import make_create_env from flow.utils.rllib import get_flow_params from flow.utils.rllib import get_rllib_config @@ -285,7 +285,7 @@ def replay(args, flow_params, output_dir=None, transfer_test=None, rllib_config= per_vehicle_energy_trace[veh_id].append(0) completed_veh_types[veh_id] = env.k.vehicle.get_type(veh_id) else: - per_vehicle_energy_trace[veh_id].append(-1 * vehicle_energy_consumption(env, veh_id)) + per_vehicle_energy_trace[veh_id].append(-1 * veh_energy_consumption(env, veh_id)) if type(done) is dict and done['__all__']: break diff --git a/flow/visualize/time_space_diagram.py b/flow/visualize/time_space_diagram.py index 8daffde86..a9509aa64 100644 --- a/flow/visualize/time_space_diagram.py +++ b/flow/visualize/time_space_diagram.py @@ -17,7 +17,8 @@ python time_space_diagram.py .csv .json """ from flow.utils.rllib import get_flow_params -from flow.networks import RingNetwork, FigureEightNetwork, MergeNetwork, I210SubNetwork, HighwayNetwork +from flow.networks import RingNetwork, FigureEightNetwork, MergeNetwork, I210SubNetwork, \ + HighwayNetwork, I210SubNetworkGhostCell import argparse import csv @@ -38,6 +39,7 @@ FigureEightNetwork, MergeNetwork, I210SubNetwork, + I210SubNetworkGhostCell, HighwayNetwork ] @@ -137,6 +139,7 @@ def get_time_space_data(data, params): MergeNetwork: _merge, FigureEightNetwork: _figure_eight, I210SubNetwork: _i210_subnetwork, + I210SubNetworkGhostCell: _i210_subnetwork, HighwayNetwork: _highway, } @@ -431,7 +434,7 @@ def _i210_subnetwork(data, params, all_time): # create the output variables # TODO(@ev) handle subsampling better than this low_time = int(0 / params['sim'].sim_step) - high_time = int(1600 / params['sim'].sim_step) + high_time = int(1600 * params['env'].sims_per_step / params['sim'].sim_step) all_time = all_time[low_time:high_time] # track only vehicles that were around during this time period diff --git a/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1 b/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1 index 0693ed4b6..d346e9dc5 100644 Binary files a/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1 and b/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1 differ diff --git a/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1.tune_metadata b/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1.tune_metadata index 7eef2ef15..febe7b205 100644 Binary files a/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1.tune_metadata and b/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1.tune_metadata differ diff --git a/tests/data/rllib_data/multi_agent/params.json b/tests/data/rllib_data/multi_agent/params.json index 01089f730..1aa9a114c 100644 --- a/tests/data/rllib_data/multi_agent/params.json +++ b/tests/data/rllib_data/multi_agent/params.json @@ -8,17 +8,19 @@ "on_sample_end": null, "on_train_result": null }, - "clip_actions": false, + "clip_actions": true, "clip_param": 0.3, "clip_rewards": null, "collect_metrics_timeout": 180, "compress_observations": false, "custom_resources_per_worker": {}, + "eager": false, + "eager_tracing": false, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, - "env": "MultiWaveAttenuationPOEnv-v0", + "env": "MultiAgentAccelPOEnv-v1", "env_config": { - "flow_params": "{\n \"env\": {\n \"additional_params\": {\n \"max_accel\": 1,\n \"max_decel\": 1,\n \"ring_length\": [\n 230,\n 230\n ],\n \"target_velocity\": 4\n },\n \"clip_actions\": true,\n \"evaluate\": false,\n \"horizon\": 3000,\n \"sims_per_step\": 1,\n \"warmup_steps\": 750\n },\n \"env_name\": \"MultiWaveAttenuationPOEnv\",\n \"exp_tag\": \"lord_of_numrings1\",\n \"initial\": {\n \"additional_params\": {},\n \"bunching\": 20.0,\n \"edges_distribution\": \"all\",\n \"lanes_distribution\": Infinity,\n \"min_gap\": 0,\n \"perturbation\": 0.0,\n \"shuffle\": false,\n \"spacing\": \"custom\",\n \"x0\": 0\n },\n \"net\": {\n \"additional_params\": {\n \"lanes\": 1,\n \"length\": 230,\n \"num_rings\": 1,\n \"resolution\": 40,\n \"speed_limit\": 30\n },\n \"inflows\": {\n \"_InFlows__flows\": []\n },\n \"osm_path\": null,\n \"template\": null\n },\n \"network\": \"MultiRingNetwork\",\n \"sim\": {\n \"color_vehicles\": true,\n \"emission_path\": null,\n \"lateral_resolution\": null,\n \"no_step_log\": true,\n \"num_clients\": 1,\n \"overtake_right\": false,\n \"port\": null,\n \"print_warnings\": true,\n \"pxpm\": 2,\n \"render\": false,\n \"restart_instance\": false,\n \"save_render\": false,\n \"seed\": null,\n \"show_radius\": false,\n \"sight_radius\": 25,\n \"sim_step\": 0.1,\n \"teleport_time\": -1\n },\n \"simulator\": \"traci\",\n \"veh\": [\n {\n \"acceleration_controller\": [\n \"IDMController\",\n {\n \"noise\": 0.2\n }\n ],\n \"car_following_params\": {\n \"controller_params\": {\n \"accel\": 2.6,\n \"carFollowModel\": \"IDM\",\n \"decel\": 4.5,\n \"impatience\": 0.5,\n \"maxSpeed\": 30,\n \"minGap\": 2.5,\n \"sigma\": 0.5,\n \"speedDev\": 0.1,\n \"speedFactor\": 1.0,\n \"tau\": 1.0\n },\n \"speed_mode\": 25\n },\n \"initial_speed\": 0,\n \"lane_change_controller\": [\n \"SimLaneChangeController\",\n {}\n ],\n \"lane_change_params\": {\n \"controller_params\": {\n \"laneChangeModel\": \"LC2013\",\n \"lcCooperative\": \"1.0\",\n \"lcKeepRight\": \"1.0\",\n \"lcSpeedGain\": \"1.0\",\n \"lcStrategic\": \"1.0\"\n },\n \"lane_change_mode\": 512\n },\n \"num_vehicles\": 21,\n \"routing_controller\": [\n \"ContinuousRouter\",\n {}\n ],\n \"veh_id\": \"human_0\"\n },\n {\n \"acceleration_controller\": [\n \"RLController\",\n {}\n ],\n \"car_following_params\": {\n \"controller_params\": {\n \"accel\": 2.6,\n \"carFollowModel\": \"IDM\",\n \"decel\": 4.5,\n \"impatience\": 0.5,\n \"maxSpeed\": 30,\n \"minGap\": 2.5,\n \"sigma\": 0.5,\n \"speedDev\": 0.1,\n \"speedFactor\": 1.0,\n \"tau\": 1.0\n },\n \"speed_mode\": 25\n },\n \"initial_speed\": 0,\n \"lane_change_controller\": [\n \"SimLaneChangeController\",\n {}\n ],\n \"lane_change_params\": {\n \"controller_params\": {\n \"laneChangeModel\": \"LC2013\",\n \"lcCooperative\": \"1.0\",\n \"lcKeepRight\": \"1.0\",\n \"lcSpeedGain\": \"1.0\",\n \"lcStrategic\": \"1.0\"\n },\n \"lane_change_mode\": 512\n },\n \"num_vehicles\": 1,\n \"routing_controller\": [\n \"ContinuousRouter\",\n {}\n ],\n \"veh_id\": \"rl_0\"\n }\n ]\n}", + "flow_params": "{\n \"env\": {\n \"additional_params\": {\n \"max_accel\": 3,\n \"max_decel\": 3,\n \"sort_vehicles\": false,\n \"target_velocity\": 20\n },\n \"clip_actions\": true,\n \"evaluate\": false,\n \"horizon\": 1500,\n \"sims_per_step\": 1,\n \"warmup_steps\": 0\n },\n \"env_name\": \"flow.envs.multiagent.ring.accel.MultiAgentAccelPOEnv\",\n \"exp_tag\": \"multiagent_figure_eight\",\n \"initial\": {\n \"additional_params\": {},\n \"bunching\": 0,\n \"edges_distribution\": \"all\",\n \"lanes_distribution\": Infinity,\n \"min_gap\": 0,\n \"perturbation\": 0.0,\n \"shuffle\": false,\n \"spacing\": \"uniform\",\n \"x0\": 0\n },\n \"net\": {\n \"additional_params\": {\n \"lanes\": 1,\n \"radius_ring\": 30,\n \"resolution\": 40,\n \"speed_limit\": 30\n },\n \"inflows\": {\n \"_InFlows__flows\": []\n },\n \"osm_path\": null,\n \"template\": null\n },\n \"network\": \"flow.networks.figure_eight.FigureEightNetwork\",\n \"sim\": {\n \"color_by_speed\": false,\n \"emission_path\": null,\n \"force_color_update\": false,\n \"lateral_resolution\": null,\n \"no_step_log\": true,\n \"num_clients\": 1,\n \"overtake_right\": false,\n \"port\": null,\n \"print_warnings\": true,\n \"pxpm\": 2,\n \"render\": false,\n \"restart_instance\": false,\n \"save_render\": false,\n \"seed\": null,\n \"show_radius\": false,\n \"sight_radius\": 25,\n \"sim_step\": 0.1,\n \"teleport_time\": -1,\n \"use_ballistic\": false\n },\n \"simulator\": \"traci\",\n \"veh\": [\n {\n \"acceleration_controller\": [\n \"IDMController\",\n {\n \"noise\": 0.2\n }\n ],\n \"car_following_params\": {\n \"controller_params\": {\n \"accel\": 2.6,\n \"carFollowModel\": \"IDM\",\n \"decel\": 1.5,\n \"impatience\": 0.5,\n \"maxSpeed\": 30,\n \"minGap\": 2.5,\n \"sigma\": 0.5,\n \"speedDev\": 0.1,\n \"speedFactor\": 1.0,\n \"tau\": 1.0\n },\n \"speed_mode\": 1\n },\n \"initial_speed\": 0,\n \"lane_change_controller\": [\n \"SimLaneChangeController\",\n {}\n ],\n \"lane_change_params\": {\n \"controller_params\": {\n \"laneChangeModel\": \"LC2013\",\n \"lcCooperative\": \"1.0\",\n \"lcKeepRight\": \"1.0\",\n \"lcSpeedGain\": \"1.0\",\n \"lcStrategic\": \"1.0\"\n },\n \"lane_change_mode\": 512\n },\n \"num_vehicles\": 6,\n \"routing_controller\": [\n \"ContinuousRouter\",\n {}\n ],\n \"veh_id\": \"human_0\"\n },\n {\n \"acceleration_controller\": [\n \"RLController\",\n {}\n ],\n \"car_following_params\": {\n \"controller_params\": {\n \"accel\": 3,\n \"carFollowModel\": \"IDM\",\n \"decel\": 3,\n \"impatience\": 0.5,\n \"maxSpeed\": 30,\n \"minGap\": 2.5,\n \"sigma\": 0.5,\n \"speedDev\": 0.1,\n \"speedFactor\": 1.0,\n \"tau\": 1.0\n },\n \"speed_mode\": 1\n },\n \"initial_speed\": 0,\n \"lane_change_controller\": [\n \"SimLaneChangeController\",\n {}\n ],\n \"lane_change_params\": {\n \"controller_params\": {\n \"laneChangeModel\": \"LC2013\",\n \"lcCooperative\": \"1.0\",\n \"lcKeepRight\": \"1.0\",\n \"lcSpeedGain\": \"1.0\",\n \"lcStrategic\": \"1.0\"\n },\n \"lane_change_mode\": 512\n },\n \"num_vehicles\": 1,\n \"routing_controller\": [\n \"ContinuousRouter\",\n {}\n ],\n \"veh_id\": \"rl_0\"\n },\n {\n \"acceleration_controller\": [\n \"IDMController\",\n {\n \"noise\": 0.2\n }\n ],\n \"car_following_params\": {\n \"controller_params\": {\n \"accel\": 2.6,\n \"carFollowModel\": \"IDM\",\n \"decel\": 1.5,\n \"impatience\": 0.5,\n \"maxSpeed\": 30,\n \"minGap\": 2.5,\n \"sigma\": 0.5,\n \"speedDev\": 0.1,\n \"speedFactor\": 1.0,\n \"tau\": 1.0\n },\n \"speed_mode\": 1\n },\n \"initial_speed\": 0,\n \"lane_change_controller\": [\n \"SimLaneChangeController\",\n {}\n ],\n \"lane_change_params\": {\n \"controller_params\": {\n \"laneChangeModel\": \"LC2013\",\n \"lcCooperative\": \"1.0\",\n \"lcKeepRight\": \"1.0\",\n \"lcSpeedGain\": \"1.0\",\n \"lcStrategic\": \"1.0\"\n },\n \"lane_change_mode\": 512\n },\n \"num_vehicles\": 6,\n \"routing_controller\": [\n \"ContinuousRouter\",\n {}\n ],\n \"veh_id\": \"human_1\"\n },\n {\n \"acceleration_controller\": [\n \"RLController\",\n {}\n ],\n \"car_following_params\": {\n \"controller_params\": {\n \"accel\": 3,\n \"carFollowModel\": \"IDM\",\n \"decel\": 3,\n \"impatience\": 0.5,\n \"maxSpeed\": 30,\n \"minGap\": 2.5,\n \"sigma\": 0.5,\n \"speedDev\": 0.1,\n \"speedFactor\": 1.0,\n \"tau\": 1.0\n },\n \"speed_mode\": 1\n },\n \"initial_speed\": 0,\n \"lane_change_controller\": [\n \"SimLaneChangeController\",\n {}\n ],\n \"lane_change_params\": {\n \"controller_params\": {\n \"laneChangeModel\": \"LC2013\",\n \"lcCooperative\": \"1.0\",\n \"lcKeepRight\": \"1.0\",\n \"lcSpeedGain\": \"1.0\",\n \"lcStrategic\": \"1.0\"\n },\n \"lane_change_mode\": 512\n },\n \"num_vehicles\": 1,\n \"routing_controller\": [\n \"ContinuousRouter\",\n {}\n ],\n \"veh_id\": \"rl_1\"\n }\n ]\n}", "run": "PPO" }, "evaluation_config": {}, @@ -26,7 +28,7 @@ "evaluation_num_episodes": 10, "gamma": 0.999, "grad_clip": null, - "horizon": 3000, + "horizon": 1500, "ignore_worker_failures": false, "input": "sampler", "input_evaluation": [ @@ -34,27 +36,31 @@ "wis" ], "kl_coeff": 0.2, - "kl_target": 0.01, - "lambda": 1.0, + "kl_target": 0.02, + "lambda": 0.97, "local_tf_session_args": { "inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8 }, - "log_level": "INFO", + "log_level": "WARN", "log_sys_usage": true, - "lr": 1e-05, + "lr": 5e-05, "lr_schedule": null, + "memory": 0, + "memory_per_worker": 0, "metrics_smoothing_episodes": 100, "min_iter_time_s": 0, "model": { "conv_activation": "relu", "conv_filters": null, + "custom_action_dist": null, "custom_model": null, "custom_options": {}, "custom_preprocessor": null, "dim": 84, "fcnet_activation": "tanh", "fcnet_hiddens": [ + 32, 32, 32 ], @@ -75,23 +81,25 @@ "policies": { "av": [ "", - "Box(3,)", + "Box(6,)", "Box(1,)", {} ] }, - "policies_to_train": [ - "av" - ], - "policy_mapping_fn": "tune.function(.policy_mapping_fn at 0x7fda132e6c80>)" + "policies_to_train": null, + "policy_mapping_fn": "" }, + "no_done_at_end": false, + "no_eager_on_workers": false, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "num_envs_per_worker": 1, "num_gpus": 0, "num_gpus_per_worker": 0, - "num_sgd_iter": 30, + "num_sgd_iter": 10, "num_workers": 2, + "object_store_memory": 0, + "object_store_memory_per_worker": 0, "observation_filter": "NoFilter", "optimizer": {}, "output": null, @@ -110,7 +118,7 @@ "sgd_minibatch_size": 128, "shuffle_buffer_size": 0, "shuffle_sequences": true, - "simple_optimizer": true, + "simple_optimizer": false, "soft_horizon": false, "synchronize_filters": true, "tf_session_args": { @@ -126,7 +134,7 @@ "log_device_placement": false }, "timesteps_per_iteration": 0, - "train_batch_size": 60000, + "train_batch_size": 30000, "use_gae": true, "vf_clip_param": 10.0, "vf_loss_coeff": 1.0, diff --git a/tests/data/rllib_data/multi_agent/params.pkl b/tests/data/rllib_data/multi_agent/params.pkl index cd832aa1c..192cf7558 100644 Binary files a/tests/data/rllib_data/multi_agent/params.pkl and b/tests/data/rllib_data/multi_agent/params.pkl differ diff --git a/tests/data/rllib_data/single_agent/checkpoint_1/checkpoint-1 b/tests/data/rllib_data/single_agent/checkpoint_1/checkpoint-1 index f8a7e8976..b7ae94640 100644 Binary files a/tests/data/rllib_data/single_agent/checkpoint_1/checkpoint-1 and b/tests/data/rllib_data/single_agent/checkpoint_1/checkpoint-1 differ diff --git a/tests/data/rllib_data/single_agent/checkpoint_1/checkpoint-1.tune_metadata b/tests/data/rllib_data/single_agent/checkpoint_1/checkpoint-1.tune_metadata index e83b72aea..55b72be28 100644 Binary files a/tests/data/rllib_data/single_agent/checkpoint_1/checkpoint-1.tune_metadata and b/tests/data/rllib_data/single_agent/checkpoint_1/checkpoint-1.tune_metadata differ diff --git a/tests/data/rllib_data/single_agent/params.json b/tests/data/rllib_data/single_agent/params.json index c5e605ef4..c93580225 100644 --- a/tests/data/rllib_data/single_agent/params.json +++ b/tests/data/rllib_data/single_agent/params.json @@ -8,17 +8,19 @@ "on_sample_end": null, "on_train_result": null }, - "clip_actions": false, + "clip_actions": true, "clip_param": 0.3, "clip_rewards": null, "collect_metrics_timeout": 180, "compress_observations": false, "custom_resources_per_worker": {}, + "eager": false, + "eager_tracing": false, "entropy_coeff": 0.0, "entropy_coeff_schedule": null, - "env": "WaveAttenuationPOEnv-v0", + "env": "AccelEnv-v0", "env_config": { - "flow_params": "{\n \"env\": {\n \"additional_params\": {\n \"max_accel\": 1,\n \"max_decel\": 1,\n \"ring_length\": [\n 220,\n 270\n ]\n },\n \"clip_actions\": false,\n \"evaluate\": false,\n \"horizon\": 3000,\n \"sims_per_step\": 1,\n \"warmup_steps\": 750\n },\n \"env_name\": \"WaveAttenuationPOEnv\",\n \"exp_tag\": \"stabilizing_the_ring\",\n \"initial\": {\n \"additional_params\": {},\n \"bunching\": 0,\n \"edges_distribution\": \"all\",\n \"lanes_distribution\": Infinity,\n \"min_gap\": 0,\n \"perturbation\": 0.0,\n \"shuffle\": false,\n \"spacing\": \"uniform\",\n \"x0\": 0\n },\n \"net\": {\n \"additional_params\": {\n \"lanes\": 1,\n \"length\": 260,\n \"resolution\": 40,\n \"speed_limit\": 30\n },\n \"inflows\": {\n \"_InFlows__flows\": []\n },\n \"osm_path\": null,\n \"template\": null\n },\n \"network\": \"RingNetwork\",\n \"sim\": {\n \"color_vehicles\": true,\n \"emission_path\": null,\n \"lateral_resolution\": null,\n \"no_step_log\": true,\n \"num_clients\": 1,\n \"overtake_right\": false,\n \"port\": null,\n \"print_warnings\": true,\n \"pxpm\": 2,\n \"render\": false,\n \"restart_instance\": false,\n \"save_render\": false,\n \"seed\": null,\n \"show_radius\": false,\n \"sight_radius\": 25,\n \"sim_step\": 0.1,\n \"teleport_time\": -1\n },\n \"simulator\": \"traci\",\n \"veh\": [\n {\n \"acceleration_controller\": [\n \"IDMController\",\n {\n \"noise\": 0.2\n }\n ],\n \"car_following_params\": {\n \"controller_params\": {\n \"accel\": 2.6,\n \"carFollowModel\": \"IDM\",\n \"decel\": 4.5,\n \"impatience\": 0.5,\n \"maxSpeed\": 30,\n \"minGap\": 0,\n \"sigma\": 0.5,\n \"speedDev\": 0.1,\n \"speedFactor\": 1.0,\n \"tau\": 1.0\n },\n \"speed_mode\": 25\n },\n \"initial_speed\": 0,\n \"lane_change_controller\": [\n \"SimLaneChangeController\",\n {}\n ],\n \"lane_change_params\": {\n \"controller_params\": {\n \"laneChangeModel\": \"LC2013\",\n \"lcCooperative\": \"1.0\",\n \"lcKeepRight\": \"1.0\",\n \"lcSpeedGain\": \"1.0\",\n \"lcStrategic\": \"1.0\"\n },\n \"lane_change_mode\": 512\n },\n \"num_vehicles\": 21,\n \"routing_controller\": [\n \"ContinuousRouter\",\n {}\n ],\n \"veh_id\": \"human\"\n },\n {\n \"acceleration_controller\": [\n \"RLController\",\n {}\n ],\n \"car_following_params\": {\n \"controller_params\": {\n \"accel\": 2.6,\n \"carFollowModel\": \"IDM\",\n \"decel\": 4.5,\n \"impatience\": 0.5,\n \"maxSpeed\": 30,\n \"minGap\": 2.5,\n \"sigma\": 0.5,\n \"speedDev\": 0.1,\n \"speedFactor\": 1.0,\n \"tau\": 1.0\n },\n \"speed_mode\": 25\n },\n \"initial_speed\": 0,\n \"lane_change_controller\": [\n \"SimLaneChangeController\",\n {}\n ],\n \"lane_change_params\": {\n \"controller_params\": {\n \"laneChangeModel\": \"LC2013\",\n \"lcCooperative\": \"1.0\",\n \"lcKeepRight\": \"1.0\",\n \"lcSpeedGain\": \"1.0\",\n \"lcStrategic\": \"1.0\"\n },\n \"lane_change_mode\": 512\n },\n \"num_vehicles\": 1,\n \"routing_controller\": [\n \"ContinuousRouter\",\n {}\n ],\n \"veh_id\": \"rl\"\n }\n ]\n}", + "flow_params": "{\n \"env\": {\n \"additional_params\": {\n \"max_accel\": 3,\n \"max_decel\": 3,\n \"sort_vehicles\": false,\n \"target_velocity\": 20\n },\n \"clip_actions\": true,\n \"evaluate\": false,\n \"horizon\": 1500,\n \"sims_per_step\": 1,\n \"warmup_steps\": 0\n },\n \"env_name\": \"flow.envs.ring.accel.AccelEnv\",\n \"exp_tag\": \"singleagent_figure_eight\",\n \"initial\": {\n \"additional_params\": {},\n \"bunching\": 0,\n \"edges_distribution\": \"all\",\n \"lanes_distribution\": Infinity,\n \"min_gap\": 0,\n \"perturbation\": 0.0,\n \"shuffle\": false,\n \"spacing\": \"uniform\",\n \"x0\": 0\n },\n \"net\": {\n \"additional_params\": {\n \"lanes\": 1,\n \"radius_ring\": 30,\n \"resolution\": 40,\n \"speed_limit\": 30\n },\n \"inflows\": {\n \"_InFlows__flows\": []\n },\n \"osm_path\": null,\n \"template\": null\n },\n \"network\": \"flow.networks.figure_eight.FigureEightNetwork\",\n \"sim\": {\n \"color_by_speed\": false,\n \"emission_path\": null,\n \"force_color_update\": false,\n \"lateral_resolution\": null,\n \"no_step_log\": true,\n \"num_clients\": 1,\n \"overtake_right\": false,\n \"port\": null,\n \"print_warnings\": true,\n \"pxpm\": 2,\n \"render\": false,\n \"restart_instance\": false,\n \"save_render\": false,\n \"seed\": null,\n \"show_radius\": false,\n \"sight_radius\": 25,\n \"sim_step\": 0.1,\n \"teleport_time\": -1,\n \"use_ballistic\": false\n },\n \"simulator\": \"traci\",\n \"veh\": [\n {\n \"acceleration_controller\": [\n \"IDMController\",\n {\n \"noise\": 0.2\n }\n ],\n \"car_following_params\": {\n \"controller_params\": {\n \"accel\": 2.6,\n \"carFollowModel\": \"IDM\",\n \"decel\": 1.5,\n \"impatience\": 0.5,\n \"maxSpeed\": 30,\n \"minGap\": 2.5,\n \"sigma\": 0.5,\n \"speedDev\": 0.1,\n \"speedFactor\": 1.0,\n \"tau\": 1.0\n },\n \"speed_mode\": 1\n },\n \"initial_speed\": 0,\n \"lane_change_controller\": [\n \"SimLaneChangeController\",\n {}\n ],\n \"lane_change_params\": {\n \"controller_params\": {\n \"laneChangeModel\": \"LC2013\",\n \"lcCooperative\": \"1.0\",\n \"lcKeepRight\": \"1.0\",\n \"lcSpeedGain\": \"1.0\",\n \"lcStrategic\": \"1.0\"\n },\n \"lane_change_mode\": 512\n },\n \"num_vehicles\": 13,\n \"routing_controller\": [\n \"ContinuousRouter\",\n {}\n ],\n \"veh_id\": \"human\"\n },\n {\n \"acceleration_controller\": [\n \"RLController\",\n {}\n ],\n \"car_following_params\": {\n \"controller_params\": {\n \"accel\": 2.6,\n \"carFollowModel\": \"IDM\",\n \"decel\": 1.5,\n \"impatience\": 0.5,\n \"maxSpeed\": 30,\n \"minGap\": 2.5,\n \"sigma\": 0.5,\n \"speedDev\": 0.1,\n \"speedFactor\": 1.0,\n \"tau\": 1.0\n },\n \"speed_mode\": 1\n },\n \"initial_speed\": 0,\n \"lane_change_controller\": [\n \"SimLaneChangeController\",\n {}\n ],\n \"lane_change_params\": {\n \"controller_params\": {\n \"laneChangeModel\": \"LC2013\",\n \"lcCooperative\": \"1.0\",\n \"lcKeepRight\": \"1.0\",\n \"lcSpeedGain\": \"1.0\",\n \"lcStrategic\": \"1.0\"\n },\n \"lane_change_mode\": 512\n },\n \"num_vehicles\": 1,\n \"routing_controller\": [\n \"ContinuousRouter\",\n {}\n ],\n \"veh_id\": \"rl\"\n }\n ]\n}", "run": "PPO" }, "evaluation_config": {}, @@ -26,7 +28,7 @@ "evaluation_num_episodes": 10, "gamma": 0.999, "grad_clip": null, - "horizon": 3000, + "horizon": 1500, "ignore_worker_failures": false, "input": "sampler", "input_evaluation": [ @@ -40,23 +42,27 @@ "inter_op_parallelism_threads": 8, "intra_op_parallelism_threads": 8 }, - "log_level": "INFO", + "log_level": "WARN", "log_sys_usage": true, "lr": 5e-05, "lr_schedule": null, + "memory": 0, + "memory_per_worker": 0, "metrics_smoothing_episodes": 100, "min_iter_time_s": 0, "model": { "conv_activation": "relu", "conv_filters": null, + "custom_action_dist": null, "custom_model": null, "custom_options": {}, "custom_preprocessor": null, "dim": 84, "fcnet_activation": "tanh", "fcnet_hiddens": [ - 3, - 3 + 32, + 32, + 32 ], "framestack": true, "free_log_std": false, @@ -76,6 +82,8 @@ "policies_to_train": null, "policy_mapping_fn": null }, + "no_done_at_end": false, + "no_eager_on_workers": false, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "num_envs_per_worker": 1, @@ -83,6 +91,8 @@ "num_gpus_per_worker": 0, "num_sgd_iter": 10, "num_workers": 2, + "object_store_memory": 0, + "object_store_memory_per_worker": 0, "observation_filter": "NoFilter", "optimizer": {}, "output": null, @@ -117,7 +127,7 @@ "log_device_placement": false }, "timesteps_per_iteration": 0, - "train_batch_size": 60000, + "train_batch_size": 30000, "use_gae": true, "vf_clip_param": 10.0, "vf_loss_coeff": 1.0, diff --git a/tests/data/rllib_data/single_agent/params.pkl b/tests/data/rllib_data/single_agent/params.pkl index 511d34343..e69753b7f 100644 Binary files a/tests/data/rllib_data/single_agent/params.pkl and b/tests/data/rllib_data/single_agent/params.pkl differ diff --git a/tests/fast_tests/test_vehicles.py b/tests/fast_tests/test_vehicles.py index a37b235ff..9ac8b8e50 100644 --- a/tests/fast_tests/test_vehicles.py +++ b/tests/fast_tests/test_vehicles.py @@ -33,7 +33,7 @@ def test_speed_lane_change_modes(self): speed_mode='obey_safe_speed', ), lane_change_params=SumoLaneChangeParams( - lane_change_mode="no_lat_collide", + lane_change_mode="no_lc_safe", ) ) @@ -56,7 +56,7 @@ def test_speed_lane_change_modes(self): self.assertEqual(vehicles.type_parameters["typeB"][ "car_following_params"].speed_mode, 0) self.assertEqual(vehicles.type_parameters["typeB"][ - "lane_change_params"].lane_change_mode, 1621) + "lane_change_params"].lane_change_mode, 512) vehicles.add( "typeC", @@ -89,7 +89,7 @@ def test_controlled_id_params(self): speed_mode="obey_safe_speed", ), lane_change_params=SumoLaneChangeParams( - lane_change_mode="no_lat_collide", + lane_change_mode="no_lc_safe", )) default_mingap = SumoCarFollowingParams().controller_params["minGap"] self.assertEqual(vehicles.types[0]["type_params"]["minGap"],