diff --git a/examples/exp_configs/non_rl/bay_bridge.py b/examples/exp_configs/non_rl/bay_bridge.py
index d7d78360f..f3e0c465f 100644
--- a/examples/exp_configs/non_rl/bay_bridge.py
+++ b/examples/exp_configs/non_rl/bay_bridge.py
@@ -48,7 +48,7 @@
         lc_pushy=0.8,
         lc_speed_gain=4.0,
         model="LC2013",
-        lane_change_mode="no_lat_collide",
+        lane_change_mode="no_lc_safe",
         # lcKeepRight=0.8
     ),
     num_vehicles=1400)
diff --git a/examples/exp_configs/non_rl/bay_bridge_toll.py b/examples/exp_configs/non_rl/bay_bridge_toll.py
index 1b8268aeb..0941823cb 100644
--- a/examples/exp_configs/non_rl/bay_bridge_toll.py
+++ b/examples/exp_configs/non_rl/bay_bridge_toll.py
@@ -46,7 +46,7 @@
         model="LC2013",
         lcCooperative=0.2,
         lcSpeedGain=15,
-        lane_change_mode="no_lat_collide",
+        lane_change_mode="no_lc_safe",
     ),
     num_vehicles=50)
 
diff --git a/examples/exp_configs/non_rl/i210_subnetwork.py b/examples/exp_configs/non_rl/i210_subnetwork.py
index 3fac52be2..c6249da51 100644
--- a/examples/exp_configs/non_rl/i210_subnetwork.py
+++ b/examples/exp_configs/non_rl/i210_subnetwork.py
@@ -41,11 +41,9 @@
 
 if WANT_GHOST_CELL:
     from flow.networks.i210_subnetwork_ghost_cell import I210SubNetworkGhostCell, EDGES_DISTRIBUTION
-
     highway_start_edge = 'ghost0'
 else:
     from flow.networks.i210_subnetwork import I210SubNetwork, EDGES_DISTRIBUTION
-
     highway_start_edge = "119257914"
 
 vehicles = VehicleParams()
@@ -255,11 +253,6 @@ def valid_ids(env, veh_ids):
         env.k.vehicle.get_speed(valid_ids(env, env.k.vehicle.get_ids())))),
     "avg_outflow": lambda env: np.nan_to_num(
         env.k.vehicle.get_outflow_rate(120)),
-    # # we multiply by 5 to account for the vehicle length and by 1000 to convert
-    # # into veh/km
-    # "avg_density": lambda env: 5 * 1000 * len(env.k.vehicle.get_ids_by_edge(
-    #     edge_id)) / (env.k.network.edge_length(edge_id)
-    #                  * env.k.network.num_lanes(edge_id)),
     "mpg": lambda env: miles_per_gallon(env,  valid_ids(env, env.k.vehicle.get_ids()), gain=1.0),
     "mpj": lambda env: miles_per_megajoule(env, valid_ids(env, env.k.vehicle.get_ids()), gain=1.0),
 }
diff --git a/examples/exp_configs/non_rl/minicity.py b/examples/exp_configs/non_rl/minicity.py
index 23b232480..35d5edbce 100644
--- a/examples/exp_configs/non_rl/minicity.py
+++ b/examples/exp_configs/non_rl/minicity.py
@@ -18,7 +18,7 @@
         speed_mode=1,
     ),
     lane_change_params=SumoLaneChangeParams(
-        lane_change_mode="no_lat_collide",
+        lane_change_mode="no_lc_safe",
     ),
     initial_speed=0,
     num_vehicles=90)
diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py
index 028e5bc7c..28d15bd8e 100644
--- a/examples/exp_configs/rl/multiagent/multiagent_i210.py
+++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py
@@ -28,7 +28,7 @@
 # WANT_DOWNSTREAM_BOUNDARY = True
 ON_RAMP = False
 PENETRATION_RATE = 0.10
-V_DES = 7.0
+V_DES = 5.0
 HORIZON = 1000
 WARMUP_STEPS = 600
 
@@ -64,11 +64,11 @@
     "mpg_reward": False,
     # whether to use the MPJ reward. Otherwise, defaults to a target velocity reward
     "mpj_reward": False,
-    # how many vehicles to look back for the MPG reward
-    "look_back_length": 1,
+    # how many vehicles to look back for any reward
+    "look_back_length": 10,
     # whether to reroute vehicles once they have exited
-    "reroute_on_exit": True,
-    'target_velocity': 8.0,
+    "reroute_on_exit": False,
+    'target_velocity': 5.0,
     # how many AVs there can be at once (this is only for centralized critics)
     "max_num_agents": 10,
     # which edges we shouldn't apply control on
@@ -91,9 +91,10 @@
     "speed_reward_gain": 0.5,
     # penalize stopped vehicles
     "penalize_stops": True,
-
+    "stop_penalty": 0.05,
     # penalize accels
-    "penalize_accel": True
+    "penalize_accel": True,
+    "accel_penalty": 0.05
 })
 
 # CREATE VEHICLE TYPES AND INFLOWS
@@ -264,7 +265,7 @@
         sims_per_step=3,
         warmup_steps=WARMUP_STEPS,
         additional_params=additional_env_params,
-        done_at_exit=False
+        done_at_exit=not additional_env_params["reroute_on_exit"]
     ),
 
     # network-related parameters (see flow.core.params.NetParams and the
diff --git a/examples/train.py b/examples/train.py
index 112b7fa0d..7bf259691 100644
--- a/examples/train.py
+++ b/examples/train.py
@@ -113,7 +113,6 @@ def run_model_stablebaseline(flow_params,
     """
     from stable_baselines.common.vec_env import DummyVecEnv, SubprocVecEnv
     from stable_baselines import PPO2
-
     if num_cpus == 1:
         constructor = env_constructor(params=flow_params, version=0)()
         # The algorithms require a vectorized environment to run
@@ -218,8 +217,8 @@ def setup_exps_rllib(flow_params,
             config["lr"] = tune.grid_search([5e-4, 5e-5])
 
     elif alg_run == "TD3":
-        agent_cls = get_agent_class(alg_run)
-        config = deepcopy(agent_cls._default_config)
+        alg_run = get_agent_class(alg_run)
+        config = deepcopy(alg_run._default_config)
 
         config["num_workers"] = n_cpus
         config["horizon"] = horizon
@@ -242,6 +241,9 @@ def on_episode_start(info):
         episode.user_data["avg_energy"] = []
         episode.user_data["avg_mpg"] = []
         episode.user_data["avg_mpj"] = []
+        episode.user_data["num_cars"] = []
+        episode.user_data["avg_accel_human"] = []
+        episode.user_data["avg_accel_avs"] = []
 
     def on_episode_step(info):
         episode = info["episode"]
@@ -271,6 +273,15 @@ def on_episode_step(info):
             episode.user_data["avg_speed_avs"].append(av_speed)
         episode.user_data["avg_mpg"].append(miles_per_gallon(env, veh_ids, gain=1.0))
         episode.user_data["avg_mpj"].append(miles_per_megajoule(env, veh_ids, gain=1.0))
+        episode.user_data["num_cars"].append(len(env.k.vehicle.get_ids()))
+        episode.user_data["avg_accel_human"].append(np.nan_to_num(np.mean(
+            [np.abs((env.k.vehicle.get_speed(veh_id) - env.k.vehicle.get_previous_speed(veh_id))/env.sim_step) for
+             veh_id in veh_ids if veh_id in env.k.vehicle.previous_speeds.keys()]
+        )))
+        episode.user_data["avg_accel_avs"].append(np.nan_to_num(np.mean(
+            [np.abs((env.k.vehicle.get_speed(veh_id) - env.k.vehicle.get_previous_speed(veh_id))/env.sim_step) for
+             veh_id in rl_ids if veh_id in env.k.vehicle.previous_speeds.keys()]
+        )))
 
     def on_episode_end(info):
         episode = info["episode"]
@@ -281,9 +292,10 @@ def on_episode_end(info):
         episode.custom_metrics["avg_energy_per_veh"] = np.mean(episode.user_data["avg_energy"])
         episode.custom_metrics["avg_mpg_per_veh"] = np.mean(episode.user_data["avg_mpg"])
         episode.custom_metrics["avg_mpj_per_veh"] = np.mean(episode.user_data["avg_mpj"])
+        episode.custom_metrics["num_cars"] = np.mean(episode.user_data["num_cars"])
 
     def on_train_result(info):
-        """Store the mean score of the episode, and adjust the number of adversaries."""
+        """Store the mean score of the episode, and increment or decrement the iteration number for curriculum."""
         trainer = info["trainer"]
         trainer.workers.foreach_worker(
             lambda ev: ev.foreach_env(
@@ -468,7 +480,6 @@ def train_stable_baselines(submodule, flags):
     """Train policies using the PPO algorithm in stable-baselines."""
     from stable_baselines.common.vec_env import DummyVecEnv
     from stable_baselines import PPO2
-
     flow_params = submodule.flow_params
     # Path to the saved files
     exp_tag = flow_params['exp_tag']
diff --git a/flow/algorithms/centralized_PPO.py b/flow/algorithms/centralized_PPO.py
index 57fdd7e33..dca737f75 100644
--- a/flow/algorithms/centralized_PPO.py
+++ b/flow/algorithms/centralized_PPO.py
@@ -1,14 +1,14 @@
+"""An example of customizing PPO to leverage a centralized critic."""
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-"""An example of customizing PPO to leverage a centralized critic."""
-
 import argparse
 import numpy as np
 
 from ray.rllib.agents.ppo.ppo import PPOTrainer
-from flow.algorithms.custom_ppo import CustomPPOTFPolicy
+from flow.algorithms.custom_ppo import CustomPPOTFPolicy, KLCoeffMixin
 from ray.rllib.evaluation.postprocessing import compute_advantages, \
     Postprocessing
 from ray.rllib.policy.sample_batch import SampleBatch
@@ -65,14 +65,17 @@ def __init__(self, obs_space, action_space, num_outputs, model_config,
         self.register_variables(self.central_vf.variables)
 
     def forward(self, input_dict, state, seq_lens):
+        """Run forward inference."""
         return self.model.forward(input_dict, state, seq_lens)
 
     def central_value_function(self, central_obs):
+        """Compute the centralized value function."""
         return tf.reshape(
             self.central_vf(
                 [central_obs]), [-1])
 
     def value_function(self):
+        """Compute the normal value function; this is only here to make the code run."""
         return self.model.value_function()  # not used
 
 
@@ -145,23 +148,27 @@ def __init__(self,
 
     @override(RecurrentTFModelV2)
     def forward_rnn(self, inputs, state, seq_lens):
+        """Forward inference on the RNN."""
         model_out, self._value_out, h, c = self.model(
             [inputs, seq_lens] + state)
         return model_out, [h, c]
 
     @override(ModelV2)
     def get_initial_state(self):
+        """Set up the initial RNN state."""
         return [
             np.zeros(self.cell_size, np.float32),
             np.zeros(self.cell_size, np.float32),
         ]
 
     def central_value_function(self, central_obs):
+        """Compute the central value function."""
         return tf.reshape(
             self.central_vf(
                 [central_obs]), [-1])
 
     def value_function(self):
+        """Compute the normal value function; this is only here to make the code run."""
         return tf.reshape(self._value_out, [-1])  # not used
 
 
@@ -175,18 +182,18 @@ def __init__(self):
         )
 
     def compute_central_vf(self, central_obs):
+        """Run forward inference on the model."""
         feed_dict = {
             self.get_placeholder(CENTRAL_OBS): central_obs,
         }
         return self.get_session().run(self.central_value_function, feed_dict)
 
 
-# Grabs the opponent obs/act and includes it in the experience train_batch,
-# and computes GAE using the central vf predictions.
 def centralized_critic_postprocessing(policy,
                                       sample_batch,
                                       other_agent_batches=None,
                                       episode=None):
+    """Find all other agents that overlapped with you and stack their obs to be passed to the central VF."""
     if policy.loss_initialized():
         assert other_agent_batches is not None
 
@@ -207,13 +214,6 @@ def centralized_critic_postprocessing(policy,
             agent_id: other_agent_batches[agent_id][1]["obs"].copy()
             for agent_id in other_agent_batches.keys()
         }
-        # padded_agent_obs = {agent_id:
-        #     overlap_and_pad_agent(
-        #         time_span,
-        #         rel_agent_time,
-        #         other_obs[agent_id])
-        #     for agent_id,
-        #         rel_agent_time in rel_agents.items()}
         padded_agent_obs = {
             agent_id: fill_missing(
                 agent_time,
@@ -228,8 +228,9 @@ def centralized_critic_postprocessing(policy,
         try:
             central_obs_batch = np.hstack(
                 (sample_batch["obs"], np.hstack(central_obs_list)))
-        except:
+        except Exception as e:
             # TODO(@ev) this is a bug and needs to be fixed
+            print('Error in stacking obs ', e)
             central_obs_batch = sample_batch["obs"]
         max_vf_agents = policy.model.max_num_agents
         num_agents = len(rel_agents) + 1
@@ -286,6 +287,7 @@ def time_overlap(time_span, agent_time):
 
 
 def fill_missing(agent_time, other_agent_time, obs):
+    """Pad the obs to the appropriate length for agents that don't overlap perfectly in time."""
     # shortcut, the two overlap perfectly
     if np.sum(agent_time == other_agent_time) == agent_time.shape[0]:
         return obs
@@ -314,15 +316,9 @@ def overlap_and_pad_agent(time_span, agent_time, obs):
     assert time_overlap(time_span, agent_time)
     print(time_span)
     print(agent_time)
-    if time_span[0] == 7 or agent_time[0] == 7:
-        import ipdb
-        ipdb.set_trace()
     # FIXME(ev) some of these conditions can be combined
     # no padding needed
     if agent_time[0] == time_span[0] and agent_time[1] == time_span[1]:
-        if obs.shape[0] < 200:
-            import ipdb
-            ipdb.set_trace()
         return obs
     # agent enters before time_span starts and exits before time_span end
     if agent_time[0] < time_span[0] and agent_time[1] < time_span[1]:
@@ -331,9 +327,6 @@ def overlap_and_pad_agent(time_span, agent_time, obs):
         overlap_obs = obs[non_overlap_time:]
         padding = np.zeros((missing_time, obs.shape[1]))
         obs_concat = np.concatenate((overlap_obs, padding))
-        if obs_concat.shape[0] < 200:
-            import ipdb
-            ipdb.set_trace()
         return obs_concat
     # agent enters after time_span starts and exits after time_span ends
     elif agent_time[0] > time_span[0] and agent_time[1] > time_span[1]:
@@ -342,9 +335,6 @@ def overlap_and_pad_agent(time_span, agent_time, obs):
         missing_time = agent_time[0] - time_span[0]
         padding = np.zeros((missing_time, obs.shape[1]))
         obs_concat = np.concatenate((padding, overlap_obs))
-        if obs_concat.shape[0] < 200:
-            import ipdb
-            ipdb.set_trace()
         return obs_concat
     # agent time is entirely contained in time_span
     elif agent_time[0] >= time_span[0] and agent_time[1] <= time_span[1]:
@@ -357,9 +347,6 @@ def overlap_and_pad_agent(time_span, agent_time, obs):
         if missing_right > 0:
             padding = np.zeros((missing_right, obs.shape[1]))
             obs_concat = np.concatenate((obs_concat, padding))
-        if obs_concat.shape[0] < 200:
-            import ipdb
-            ipdb.set_trace()
         return obs_concat
     # agent time totally contains time_span
     elif agent_time[0] <= time_span[0] and agent_time[1] >= time_span[1]:
@@ -370,14 +357,11 @@ def overlap_and_pad_agent(time_span, agent_time, obs):
             overlap_obs = overlap_obs[non_overlap_left:]
         if non_overlap_right > 0:
             overlap_obs = overlap_obs[:-non_overlap_right]
-        if overlap_obs.shape[0] < 200:
-            import ipdb
-            ipdb.set_trace()
         return overlap_obs
 
 
-# Copied from PPO but optimizing the central value function
 def loss_with_central_critic(policy, model, dist_class, train_batch):
+    """Set up the PPO loss but replace the VF loss with the centralized VF loss."""
     CentralizedValueMixin.__init__(policy)
 
     logits, state = model.from_batch(train_batch)
@@ -408,6 +392,8 @@ def loss_with_central_critic(policy, model, dist_class, train_batch):
 
 
 class PPOLoss(object):
+    """Object containing the PPO loss function."""
+
     def __init__(self,
                  action_space,
                  dist_class,
@@ -471,6 +457,7 @@ def __init__(self,
         model_config : dict, optional
             model config for use in specifying action distributions.
         """
+
         def reduce_mean_valid(t):
             return tf.reduce_mean(tf.boolean_mask(t, valid_mask))
 
@@ -507,28 +494,13 @@ def reduce_mean_valid(t):
 
 
 def new_ppo_surrogate_loss(policy, model, dist_class, train_batch):
+    """Return the PPO loss with the centralized value function."""
     loss = loss_with_central_critic(policy, model, dist_class, train_batch)
     return loss
 
 
-class KLCoeffMixin(object):
-    def __init__(self, config):
-        # KL Coefficient
-        self.kl_coeff_val = config["kl_coeff"]
-        self.kl_target = config["kl_target"]
-        self.kl_coeff = tf.get_variable(
-            initializer=tf.constant_initializer(self.kl_coeff_val),
-            name="kl_coeff",
-            shape=(),
-            trainable=False,
-            dtype=tf.float32)
-
-    def update_kl(self, blah):
-        pass
-
-
 def setup_mixins(policy, obs_space, action_space, config):
-    # copied from PPO
+    """Construct additional classes that add on to PPO."""
     KLCoeffMixin.__init__(policy, config)
 
     EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"],
@@ -540,7 +512,7 @@ def setup_mixins(policy, obs_space, action_space, config):
 
 
 def central_vf_stats(policy, train_batch, grads):
-    # Report the explained variance of the central value function.
+    """Report the explained variance of the centralized value function."""
     return {
         "vf_explained_var": explained_variance(
             train_batch[Postprocessing.VALUE_TARGETS],
@@ -549,6 +521,7 @@ def central_vf_stats(policy, train_batch, grads):
 
 
 def kl_and_loss_stats(policy, train_batch):
+    """Trianing stats to pass to the tensorboard."""
     return {
         "cur_kl_coeff": tf.cast(policy.kl_coeff, tf.float64),
         "cur_lr": tf.cast(policy.cur_lr, tf.float64),
diff --git a/flow/algorithms/custom_ppo.py b/flow/algorithms/custom_ppo.py
index 65291f1d4..47a4459aa 100644
--- a/flow/algorithms/custom_ppo.py
+++ b/flow/algorithms/custom_ppo.py
@@ -1,4 +1,4 @@
-"""PPO but we add in the outflow after the reward to the final reward."""
+"""PPO but without the adaptive KL term that RLlib added."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -15,11 +15,11 @@
 from ray.rllib.utils.explained_variance import explained_variance
 from ray.rllib.utils.tf_ops import make_tf_callable
 from ray.rllib.utils import try_import_tf
+
 from ray.rllib.agents.trainer_template import build_trainer
 from ray.rllib.agents.ppo.ppo import choose_policy_optimizer, DEFAULT_CONFIG
 from ray.rllib.agents.ppo.ppo import warn_about_bad_reward_scales
 
-
 tf = try_import_tf()
 
 logger = logging.getLogger(__name__)
@@ -29,6 +29,8 @@
 
 
 class PPOLoss(object):
+    """PPO Loss object."""
+
     def __init__(self,
                  action_space,
                  dist_class,
@@ -92,6 +94,7 @@ def __init__(self,
         model_config : dict, optional
             model config for use in specifying action distributions.
         """
+
         def reduce_mean_valid(t):
             return tf.reduce_mean(tf.boolean_mask(t, valid_mask))
 
@@ -127,6 +130,7 @@ def reduce_mean_valid(t):
 
 
 def ppo_surrogate_loss(policy, model, dist_class, train_batch):
+    """Construct and return the PPO loss."""
     logits, state = model.from_batch(train_batch)
     action_dist = dist_class(logits, model)
 
@@ -163,6 +167,7 @@ def ppo_surrogate_loss(policy, model, dist_class, train_batch):
 
 
 def kl_and_loss_stats(policy, train_batch):
+    """Return statistics for the tensorboard."""
     return {
         "cur_kl_coeff": tf.cast(policy.kl_coeff, tf.float64),
         "cur_lr": tf.cast(policy.cur_lr, tf.float64),
@@ -216,6 +221,7 @@ def postprocess_ppo_gae(policy,
 
 
 def clip_gradients(policy, optimizer, loss):
+    """If grad_clip is not None, clip the gradients."""
     variables = policy.model.trainable_variables()
     if policy.config["grad_clip"] is not None:
         grads_and_vars = optimizer.compute_gradients(loss, variables)
@@ -229,6 +235,8 @@ def clip_gradients(policy, optimizer, loss):
 
 
 class ValueNetworkMixin(object):
+    """Construct the value function."""
+
     def __init__(self, obs_space, action_space, config):
         if config["use_gae"]:
 
@@ -242,7 +250,7 @@ def value(ob, prev_action, prev_reward, *state):
                         [prev_reward]),
                     "is_training": tf.convert_to_tensor(False),
                 }, [tf.convert_to_tensor([s]) for s in state],
-                                          tf.convert_to_tensor([1]))
+                    tf.convert_to_tensor([1]))
                 return self.model.value_function()[0]
 
         else:
@@ -255,11 +263,13 @@ def value(ob, prev_action, prev_reward, *state):
 
 
 def setup_config(policy, obs_space, action_space, config):
+    """Add additional custom options from the config."""
     # auto set the model option for layer sharing
     config["model"]["vf_share_layers"] = config["vf_share_layers"]
 
 
 def setup_mixins(policy, obs_space, action_space, config):
+    """Construct additional classes that add on to PPO."""
     KLCoeffMixin.__init__(policy, config)
     ValueNetworkMixin.__init__(policy, obs_space, action_space, config)
     EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"],
@@ -268,6 +278,8 @@ def setup_mixins(policy, obs_space, action_space, config):
 
 
 class KLCoeffMixin(object):
+    """Update the KL Coefficient. This is intentionally disabled to match the PPO paper better."""
+
     def __init__(self, config):
         # KL Coefficient
         self.kl_coeff_val = config["kl_coeff"]
@@ -280,6 +292,7 @@ def __init__(self, config):
             dtype=tf.float32)
 
     def update_kl(self, blah):
+        """Disabled to match the PPO paper better."""
         pass
 
 
@@ -300,6 +313,7 @@ def update_kl(self, blah):
 
 
 def validate_config(config):
+    """Check that the config is set up properly."""
     if config["entropy_coeff"] < 0:
         raise DeprecationWarning("entropy_coeff must be >= 0")
     if isinstance(config["entropy_coeff"], int):
diff --git a/flow/controllers/car_following_models.py b/flow/controllers/car_following_models.py
index 42c9b2a9b..280c94d37 100755
--- a/flow/controllers/car_following_models.py
+++ b/flow/controllers/car_following_models.py
@@ -647,6 +647,7 @@ def __init__(self,
 
     def get_accel(self, env):
         """See parent class."""
+        # without generating waves.
         lead_id = env.k.vehicle.get_leader(self.veh_id)
         if not lead_id:  # no car ahead
             if self.want_max_accel:
diff --git a/flow/controllers/velocity_controllers.py b/flow/controllers/velocity_controllers.py
index 86868c5f7..4d8bfec79 100644
--- a/flow/controllers/velocity_controllers.py
+++ b/flow/controllers/velocity_controllers.py
@@ -122,8 +122,6 @@ def get_accel(self, env):
                         or env.k.vehicle.get_x_by_id(self.veh_id) > self.control_length[1])) \
                     or (self.no_control_edges is not None and len(self.no_control_edges) > 0
                         and edge in self.no_control_edges):
-                # TODO(@evinitsky) put back
-                # or env.k.vehicle.get_edge(self.veh_id) in self.no_control_edges:
                 return None
             else:
                 # compute the acceleration from the desired velocity
diff --git a/flow/core/params.py b/flow/core/params.py
index 0527b33c2..94970d614 100755
--- a/flow/core/params.py
+++ b/flow/core/params.py
@@ -17,7 +17,27 @@
     "all_checks": 31
 }
 
-LC_MODES = {"aggressive": 0, "no_lat_collide": 512, "strategic": 1621}
+LC_MODES = {
+    "no_lc_safe": 512,
+    "no_lc_aggressive": 0,
+    "sumo_default": 1621,
+    "no_strategic_aggressive": 1108,
+    "no_strategic_safe": 1620,
+    "only_strategic_aggressive": 1,
+    "only_strategic_safe": 513,
+    "no_cooperative_aggressive": 1105,
+    "no_cooperative_safe": 1617,
+    "only_cooperative_aggressive": 4,
+    "only_cooperative_safe": 516,
+    "no_speed_gain_aggressive": 1093,
+    "no_speed_gain_safe": 1605,
+    "only_speed_gain_aggressive": 16,
+    "only_speed_gain_safe": 528,
+    "no_right_drive_aggressive": 1045,
+    "no_right_drive_safe": 1557,
+    "only_right_drive_aggressive": 64,
+    "only_right_drive_safe": 576
+}
 
 # Traffic light defaults
 PROGRAM_ID = 1
@@ -906,14 +926,71 @@ class SumoLaneChangeParams:
     ----------
     lane_change_mode : str or int, optional
         may be one of the following:
+        * "no_lc_safe" (default): Disable all SUMO lane changing but still
+          handle safety checks (collision avoidance and safety-gap enforcement)
+          in the simulation. Binary is [001000000000]
+        * "no_lc_aggressive": SUMO lane changes are not executed, collision
+          avoidance and safety-gap enforcement are off.
+          Binary is [000000000000]
+
+        * "sumo_default": Execute all changes requested by a custom controller
+          unless in conflict with TraCI. Binary is [011001010101].
+
+        * "no_strategic_aggressive": Execute all changes except strategic
+          (routing) lane changes unless in conflict with TraCI. Collision
+          avoidance and safety-gap enforcement are off. Binary is [010001010100]
+        * "no_strategic_safe": Execute all changes except strategic
+          (routing) lane changes unless in conflict with TraCI. Collision
+          avoidance and safety-gap enforcement are on. Binary is [011001010100]
+        * "only_strategic_aggressive": Execute only strategic (routing) lane
+          changes unless in conflict with TraCI. Collision avoidance and
+          safety-gap enforcement are off. Binary is [000000000001]
+        * "only_strategic_safe": Execute only strategic (routing) lane
+          changes unless in conflict with TraCI. Collision avoidance and
+          safety-gap enforcement are on. Binary is [001000000001]
+
+        * "no_cooperative_aggressive": Execute all changes except cooperative
+          (change in order to allow others to change) lane changes unless in
+          conflict with TraCI. Collision avoidance and safety-gap enforcement
+          are off. Binary is [010001010001]
+        * "no_cooperative_safe": Execute all changes except cooperative
+          lane changes unless in conflict with TraCI. Collision avoidance and
+          safety-gap enforcement are on. Binary is [011001010001]
+        * "only_cooperative_aggressive": Execute only cooperative lane changes
+          unless in conflict with TraCI. Collision avoidance and safety-gap
+          enforcement are off. Binary is [000000000100]
+        * "only_cooperative_safe": Execute only cooperative lane changes
+          unless in conflict with TraCI. Collision avoidance and safety-gap
+          enforcement are on. Binary is [001000000100]
+
+        * "no_speed_gain_aggressive": Execute all changes except speed gain (the
+           other lane allows for faster driving) lane changes unless in conflict
+           with TraCI. Collision avoidance and safety-gap enforcement are off.
+           Binary is [010001000101]
+        * "no_speed_gain_safe": Execute all changes except speed gain
+          lane changes unless in conflict with TraCI. Collision avoidance and
+          safety-gap enforcement are on. Binary is [011001000101]
+        * "only_speed_gain_aggressive": Execute only speed gain lane changes
+          unless in conflict with TraCI. Collision avoidance and safety-gap
+          enforcement are off. Binary is [000000010000]
+        * "only_speed_gain_safe": Execute only speed gain lane changes
+          unless in conflict with TraCI. Collision avoidance and safety-gap
+          enforcement are on. Binary is [001000010000]
+
+        * "no_right_drive_aggressive": Execute all changes except right drive
+          (obligation to drive on the right) lane changes unless in conflict
+          with TraCI. Collision avoidance and safety-gap enforcement are off.
+          Binary is [010000010101]
+        * "no_right_drive_safe": Execute all changes except right drive
+          lane changes unless in conflict with TraCI. Collision avoidance and
+          safety-gap enforcement are on. Binary is [011000010101]
+        * "only_right_drive_aggressive": Execute only right drive lane changes
+          unless in conflict with TraCI. Collision avoidance and safety-gap
+          enforcement are off. Binary is [000001000000]
+        * "only_right_drive_safe": Execute only right drive lane changes
+          unless in conflict with TraCI. Collision avoidance and safety-gap
+          enforcement are on. Binary is [001001000000]
 
-        * "no_lat_collide" (default): Human cars will not make lane
-          changes, RL cars can lane change into any space, no matter how
-          likely it is to crash
-        * "strategic": Human cars make lane changes in accordance with SUMO
-          to provide speed boosts
-        * "aggressive": RL cars are not limited by sumo with regard to
-          their lane-change actions, and can crash longitudinally
         * int values may be used to define custom lane change modes for the
           given vehicles, specified at:
           http://sumo.dlr.de/wiki/TraCI/Change_Vehicle_State#lane_change_mode_.280xb6.29
@@ -952,7 +1029,7 @@ class SumoLaneChangeParams:
     """
 
     def __init__(self,
-                 lane_change_mode="no_lat_collide",
+                 lane_change_mode="no_lc_safe",
                  model="LC2013",
                  lc_strategic=1.0,
                  lc_cooperative=1.0,
@@ -1060,7 +1137,7 @@ def __init__(self,
         elif not (isinstance(lane_change_mode, int)
                   or isinstance(lane_change_mode, float)):
             logging.error("Setting lane change mode to default.")
-            lane_change_mode = LC_MODES["no_lat_collide"]
+            lane_change_mode = LC_MODES["no_lc_safe"]
 
         self.lane_change_mode = lane_change_mode
 
diff --git a/flow/core/rewards.py b/flow/core/rewards.py
index 0f234da7e..6462b0a8c 100755
--- a/flow/core/rewards.py
+++ b/flow/core/rewards.py
@@ -322,7 +322,7 @@ def energy_consumption(env, gain=.001):
     rho = 1.225  # air density (kg/m^3)
     A = 2.6  # vehicle cross sectional area (m^2)
     for veh_id in env.k.vehicle.get_ids():
-        if veh_id not in env.k.vehicle.previous_speeds:
+        if veh_id not in env.k.vehicle.previous_speeds.keys():
             continue
         speed = env.k.vehicle.get_speed(veh_id)
         prev_speed = env.k.vehicle.get_previous_speed(veh_id)
diff --git a/flow/envs/multiagent/base.py b/flow/envs/multiagent/base.py
index 881461d63..5c4795bbe 100644
--- a/flow/envs/multiagent/base.py
+++ b/flow/envs/multiagent/base.py
@@ -48,6 +48,7 @@ def step(self, rl_actions):
         info : dict
             contains other diagnostic information from the previous action
         """
+        done = {}
         for _ in range(self.env_params.sims_per_step):
             if self.time_counter <= self.env_params.sims_per_step * self.env_params.warmup_steps:
                 self.observed_ids.update(self.k.vehicle.get_ids())
@@ -110,9 +111,11 @@ def step(self, rl_actions):
                 print('A CRASH! A CRASH!!!!!! AAAAAAAAAH!!!!!')
                 break
 
+            done.update({key: True for key in self.k.vehicle.get_arrived_ids()})
+
         states = self.get_state()
-        done = {key: key in self.k.vehicle.get_arrived_ids()
-                for key in states.keys()}
+        done.update({key: key in self.k.vehicle.get_arrived_ids()
+                    for key in states.keys()})
         if crash or (self.time_counter >= self.env_params.sims_per_step *
                      (self.env_params.warmup_steps + self.env_params.horizon)):
             done['__all__'] = True
@@ -128,7 +131,9 @@ def step(self, rl_actions):
             reward = self.compute_reward(rl_actions, fail=crash)
 
         if self.env_params.done_at_exit:
-            for rl_id in self.k.vehicle.get_arrived_rl_ids():
+            # pull out the done keys that might not have corresponding states
+            valid_ids = [key for key, val in done.items() if val]
+            for rl_id in valid_ids:
                 done[rl_id] = True
                 reward[rl_id] = 0
                 states[rl_id] = -1 * np.ones(self.observation_space.shape[0])
diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py
index ad301a3f5..f9bd73dbd 100644
--- a/flow/envs/multiagent/i210.py
+++ b/flow/envs/multiagent/i210.py
@@ -1,4 +1,5 @@
 """Environment for training vehicles to reduce congestion in the I210."""
+
 from gym.spaces import Box
 import numpy as np
 
@@ -94,9 +95,11 @@ def __init__(self, env_params, sim_params, network, simulator='traci'):
 
         # penalize stops
         self.penalize_stops = env_params.additional_params["penalize_stops"]
+        self.stop_penalty = env_params.additional_params["stop_penalty"]
 
         # penalize accel
         self.penalize_accel = env_params.additional_params.get("penalize_accel", False)
+        self.accel_penalty = env_params.additional_params["accel_penalty"]
 
     @property
     def observation_space(self):
@@ -142,16 +145,9 @@ def _apply_rl_actions(self, rl_actions):
         if rl_actions:
             for rl_id, actions in rl_actions.items():
                 accel = actions[0]
-
-                # lane_change_softmax = np.exp(actions[1:4])
-                # lane_change_softmax /= np.sum(lane_change_softmax)
-                # lane_change_action = np.random.choice([-1, 0, 1],
-                #                                       p=lane_change_softmax)
                 id_list.append(rl_id)
                 accel_list.append(accel)
             self.k.vehicle.apply_acceleration(id_list, accel_list)
-            # self.k.vehicle.apply_lane_change(rl_id, lane_change_action)
-            # print('time to apply actions is ', time() - t)
 
     def in_control_range(self, veh_id):
         """Return if a veh_id is on an edge that is allowed to be controlled.
@@ -194,6 +190,7 @@ def compute_reward(self, rl_actions, **kwargs):
 
         rewards = {}
         valid_ids = [rl_id for rl_id in self.k.vehicle.get_rl_ids() if self.in_control_range(rl_id)]
+        valid_human_ids = [rl_id for rl_id in self.k.vehicle.get_ids() if self.in_control_range(rl_id)]
 
         if self.env_params.additional_params["local_reward"]:
             des_speed = self.env_params.additional_params["target_velocity"]
@@ -220,26 +217,29 @@ def compute_reward(self, rl_actions, **kwargs):
                         else:
                             break
                 else:
-                    speeds = []
-                    follow_speed = self.k.vehicle.get_speed(self.k.vehicle.get_follower(rl_id))
-                    if follow_speed >= 0:
-                        speeds.append(follow_speed)
-                    if self.k.vehicle.get_speed(rl_id) >= 0:
-                        speeds.append(self.k.vehicle.get_speed(rl_id))
-                    if len(speeds) > 0:
-                        # rescale so the critic can estimate it quickly
-                        rewards[rl_id] = np.mean([(des_speed - np.abs(speed - des_speed)) ** 2
-                                                  for speed in speeds]) / (des_speed ** 2)
+                    follow_id = rl_id
+                    reward = (des_speed - np.abs(self.k.vehicle.get_speed(rl_id) - des_speed)) ** 2
+                    reward /= ((des_speed ** 2) * self.look_back_length)
+                    rewards[rl_id] = reward
+
+                    for i in range(self.look_back_length):
+                        follow_id = self.k.vehicle.get_follower(follow_id)
+                        if follow_id not in ["", None]:
+
+                            follow_speed = self.k.vehicle.get_speed(self.k.vehicle.get_follower(follow_id))
+                            reward = (des_speed - np.abs(follow_speed - des_speed)) ** 2
+                            reward /= ((des_speed ** 2) * self.look_back_length)
+                            rewards[rl_id] = reward
         else:
             if self.mpg_reward:
-                reward = np.nan_to_num(miles_per_gallon(self, self.k.vehicle.get_ids(), gain=1.0)) / 100.0
+                reward = np.nan_to_num(miles_per_gallon(self, valid_human_ids, gain=1.0)) / 100.0
             else:
-                speeds = self.k.vehicle.get_speed(self.k.vehicle.get_ids())
+                speeds = self.k.vehicle.get_speed(valid_human_ids)
                 des_speed = self.env_params.additional_params["target_velocity"]
                 # rescale so the critic can estimate it quickly
                 if self.reroute_on_exit:
                     reward = np.nan_to_num(np.mean([(des_speed - np.abs(speed - des_speed))
-                                                    for speed in speeds]) / (des_speed))
+                                                    for speed in speeds]) / des_speed)
                 else:
                     reward = np.nan_to_num(np.mean([(des_speed - np.abs(speed - des_speed)) ** 2
                                                     for speed in speeds]) / (des_speed ** 2))
@@ -256,10 +256,8 @@ def compute_reward(self, rl_actions, **kwargs):
                     t_headway = max(
                         self.k.vehicle.get_headway(veh_id) /
                         self.k.vehicle.get_speed(veh_id), 0)
-                    # print('time headway is {}, headway is {}'.format(t_headway, self.k.vehicle.get_headway(veh_id)))
                     scaling_factor = max(0, 1 - self.num_training_iters / self.headway_curriculum_iters)
                     penalty += scaling_factor * self.headway_reward_gain * min((t_headway - t_min) / t_min, 0)
-                    # print('penalty is ', penalty)
 
                 rewards[veh_id] += penalty
 
@@ -274,7 +272,7 @@ def compute_reward(self, rl_actions, **kwargs):
                     follow_id = self.k.vehicle.get_follower(follow_id)
                     if follow_id not in ["", None]:
                         if self.reroute_on_exit:
-                            speed_reward += ((des_speed - np.abs(speed - des_speed))) / (des_speed)
+                            speed_reward += (des_speed - np.abs(speed - des_speed)) / des_speed
                         else:
                             speed_reward += ((des_speed - np.abs(speed - des_speed)) ** 2) / (des_speed ** 2)
                     else:
@@ -287,11 +285,11 @@ def compute_reward(self, rl_actions, **kwargs):
             speed = self.k.vehicle.get_speed(veh_id)
             if self.penalize_stops:
                 if speed < 1.0:
-                    rewards[veh_id] -= .01
+                    rewards[veh_id] -= self.stop_penalty
             if self.penalize_accel and veh_id in self.k.vehicle.previous_speeds:
                 prev_speed = self.k.vehicle.get_previous_speed(veh_id)
                 abs_accel = abs(speed - prev_speed) / self.sim_step
-                rewards[veh_id] -= abs_accel / 400.0
+                rewards[veh_id] -= abs_accel * self.accel_penalty
 
         # print('time to get reward is ', time() - t)
         return rewards
@@ -324,8 +322,6 @@ def additional_command(self):
                 if edge == self.exit_edge and \
                         (self.k.vehicle.get_position(veh_id) > self.k.network.edge_length(self.exit_edge) - 100) \
                         and self.k.vehicle.get_leader(veh_id) is None:
-                    # if self.step_counter > 6000:
-                    #     import ipdb; ipdb.set_trace()
                     type_id = self.k.vehicle.get_type(veh_id)
                     # remove the vehicle
                     self.k.vehicle.remove(veh_id)
@@ -334,7 +330,7 @@ def additional_command(self):
                     del valid_lanes[index]
                     # reintroduce it at the start of the network
                     # TODO(@evinitsky) select the lane and speed a bit more cleanly
-                    # Note, the position is 10 so you are not overlapping with the inflow car that is being removed.
+                    # Note, the position is 20 so you are not overlapping with the inflow car that is being removed.
                     # this allows the vehicle to be immediately inserted.
                     try:
                         self.k.vehicle.add(
@@ -405,6 +401,17 @@ def step(self, rl_actions):
                     done[rl_id] = True
                     reward[rl_id] = 0
                     state[rl_id] = -1 * np.ones(self.observation_space.shape[0])
+        else:
+            # you have to catch the vehicles on the exit edge, they have not yet
+            # recieved a done when the env terminates
+            if done['__all__']:
+                on_exit_edge = [rl_id for rl_id in self.k.vehicle.get_rl_ids()
+                                if self.on_exit_edge(rl_id)]
+                for rl_id in on_exit_edge:
+                    done[rl_id] = True
+                    reward[rl_id] = 0
+                    state[rl_id] = -1 * np.ones(self.observation_space.shape[0])
+
         return state, reward, done, info
 
 
diff --git a/flow/networks/__init__.py b/flow/networks/__init__.py
index af849031d..2b3faced8 100644
--- a/flow/networks/__init__.py
+++ b/flow/networks/__init__.py
@@ -16,10 +16,11 @@
 from flow.networks.minicity import MiniCityNetwork
 from flow.networks.highway_ramps import HighwayRampsNetwork
 from flow.networks.i210_subnetwork import I210SubNetwork
+from flow.networks.i210_subnetwork_ghost_cell import I210SubNetworkGhostCell
 
 __all__ = [
     "Network", "BayBridgeNetwork", "BayBridgeTollNetwork",
     "BottleneckNetwork", "FigureEightNetwork", "TrafficLightGridNetwork",
     "HighwayNetwork", "RingNetwork", "MergeNetwork", "MultiRingNetwork",
-    "MiniCityNetwork", "HighwayRampsNetwork", "I210SubNetwork"
+    "MiniCityNetwork", "HighwayRampsNetwork", "I210SubNetwork", "I210SubNetworkGhostCell"
 ]
diff --git a/flow/visualize/i210_replay.py b/flow/visualize/i210_replay.py
index b2e22d5b3..22fe262b3 100644
--- a/flow/visualize/i210_replay.py
+++ b/flow/visualize/i210_replay.py
@@ -19,7 +19,7 @@
 from ray.tune.registry import register_env
 
 from flow.core.util import emission_to_csv, ensure_dir
-from flow.core.rewards import vehicle_energy_consumption
+from flow.core.rewards import veh_energy_consumption
 from flow.utils.registry import make_create_env
 from flow.utils.rllib import get_flow_params
 from flow.utils.rllib import get_rllib_config
@@ -285,7 +285,7 @@ def replay(args, flow_params, output_dir=None, transfer_test=None, rllib_config=
                         per_vehicle_energy_trace[veh_id].append(0)
                         completed_veh_types[veh_id] = env.k.vehicle.get_type(veh_id)
                     else:
-                        per_vehicle_energy_trace[veh_id].append(-1 * vehicle_energy_consumption(env, veh_id))
+                        per_vehicle_energy_trace[veh_id].append(-1 * veh_energy_consumption(env, veh_id))
 
             if type(done) is dict and done['__all__']:
                 break
diff --git a/flow/visualize/time_space_diagram.py b/flow/visualize/time_space_diagram.py
index 8daffde86..a9509aa64 100644
--- a/flow/visualize/time_space_diagram.py
+++ b/flow/visualize/time_space_diagram.py
@@ -17,7 +17,8 @@
     python time_space_diagram.py </path/to/emission>.csv </path/to/params>.json
 """
 from flow.utils.rllib import get_flow_params
-from flow.networks import RingNetwork, FigureEightNetwork, MergeNetwork, I210SubNetwork, HighwayNetwork
+from flow.networks import RingNetwork, FigureEightNetwork, MergeNetwork, I210SubNetwork, \
+    HighwayNetwork, I210SubNetworkGhostCell
 
 import argparse
 import csv
@@ -38,6 +39,7 @@
     FigureEightNetwork,
     MergeNetwork,
     I210SubNetwork,
+    I210SubNetworkGhostCell,
     HighwayNetwork
 ]
 
@@ -137,6 +139,7 @@ def get_time_space_data(data, params):
         MergeNetwork: _merge,
         FigureEightNetwork: _figure_eight,
         I210SubNetwork: _i210_subnetwork,
+        I210SubNetworkGhostCell: _i210_subnetwork,
         HighwayNetwork: _highway,
     }
 
@@ -431,7 +434,7 @@ def _i210_subnetwork(data, params, all_time):
     # create the output variables
     # TODO(@ev) handle subsampling better than this
     low_time = int(0 / params['sim'].sim_step)
-    high_time = int(1600 / params['sim'].sim_step)
+    high_time = int(1600 * params['env'].sims_per_step / params['sim'].sim_step)
     all_time = all_time[low_time:high_time]
 
     # track only vehicles that were around during this time period
diff --git a/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1 b/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1
index 0693ed4b6..d346e9dc5 100644
Binary files a/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1 and b/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1 differ
diff --git a/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1.tune_metadata b/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1.tune_metadata
index 7eef2ef15..febe7b205 100644
Binary files a/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1.tune_metadata and b/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1.tune_metadata differ
diff --git a/tests/data/rllib_data/multi_agent/params.json b/tests/data/rllib_data/multi_agent/params.json
index 01089f730..1aa9a114c 100644
--- a/tests/data/rllib_data/multi_agent/params.json
+++ b/tests/data/rllib_data/multi_agent/params.json
@@ -8,17 +8,19 @@
     "on_sample_end": null,
     "on_train_result": null
   },
-  "clip_actions": false,
+  "clip_actions": true,
   "clip_param": 0.3,
   "clip_rewards": null,
   "collect_metrics_timeout": 180,
   "compress_observations": false,
   "custom_resources_per_worker": {},
+  "eager": false,
+  "eager_tracing": false,
   "entropy_coeff": 0.0,
   "entropy_coeff_schedule": null,
-  "env": "MultiWaveAttenuationPOEnv-v0",
+  "env": "MultiAgentAccelPOEnv-v1",
   "env_config": {
-    "flow_params": "{\n    \"env\": {\n        \"additional_params\": {\n            \"max_accel\": 1,\n            \"max_decel\": 1,\n            \"ring_length\": [\n                230,\n                230\n            ],\n            \"target_velocity\": 4\n        },\n        \"clip_actions\": true,\n        \"evaluate\": false,\n        \"horizon\": 3000,\n        \"sims_per_step\": 1,\n        \"warmup_steps\": 750\n    },\n    \"env_name\": \"MultiWaveAttenuationPOEnv\",\n    \"exp_tag\": \"lord_of_numrings1\",\n    \"initial\": {\n        \"additional_params\": {},\n        \"bunching\": 20.0,\n        \"edges_distribution\": \"all\",\n        \"lanes_distribution\": Infinity,\n        \"min_gap\": 0,\n        \"perturbation\": 0.0,\n        \"shuffle\": false,\n        \"spacing\": \"custom\",\n        \"x0\": 0\n    },\n    \"net\": {\n        \"additional_params\": {\n            \"lanes\": 1,\n            \"length\": 230,\n            \"num_rings\": 1,\n            \"resolution\": 40,\n            \"speed_limit\": 30\n        },\n        \"inflows\": {\n            \"_InFlows__flows\": []\n        },\n        \"osm_path\": null,\n        \"template\": null\n    },\n    \"network\": \"MultiRingNetwork\",\n    \"sim\": {\n        \"color_vehicles\": true,\n        \"emission_path\": null,\n        \"lateral_resolution\": null,\n        \"no_step_log\": true,\n        \"num_clients\": 1,\n        \"overtake_right\": false,\n        \"port\": null,\n        \"print_warnings\": true,\n        \"pxpm\": 2,\n        \"render\": false,\n        \"restart_instance\": false,\n        \"save_render\": false,\n        \"seed\": null,\n        \"show_radius\": false,\n        \"sight_radius\": 25,\n        \"sim_step\": 0.1,\n        \"teleport_time\": -1\n    },\n    \"simulator\": \"traci\",\n    \"veh\": [\n        {\n            \"acceleration_controller\": [\n                \"IDMController\",\n                {\n                    \"noise\": 0.2\n                }\n            ],\n            \"car_following_params\": {\n                \"controller_params\": {\n                    \"accel\": 2.6,\n                    \"carFollowModel\": \"IDM\",\n                    \"decel\": 4.5,\n                    \"impatience\": 0.5,\n                    \"maxSpeed\": 30,\n                    \"minGap\": 2.5,\n                    \"sigma\": 0.5,\n                    \"speedDev\": 0.1,\n                    \"speedFactor\": 1.0,\n                    \"tau\": 1.0\n                },\n                \"speed_mode\": 25\n            },\n            \"initial_speed\": 0,\n            \"lane_change_controller\": [\n                \"SimLaneChangeController\",\n                {}\n            ],\n            \"lane_change_params\": {\n                \"controller_params\": {\n                    \"laneChangeModel\": \"LC2013\",\n                    \"lcCooperative\": \"1.0\",\n                    \"lcKeepRight\": \"1.0\",\n                    \"lcSpeedGain\": \"1.0\",\n                    \"lcStrategic\": \"1.0\"\n                },\n                \"lane_change_mode\": 512\n            },\n            \"num_vehicles\": 21,\n            \"routing_controller\": [\n                \"ContinuousRouter\",\n                {}\n            ],\n            \"veh_id\": \"human_0\"\n        },\n        {\n            \"acceleration_controller\": [\n                \"RLController\",\n                {}\n            ],\n            \"car_following_params\": {\n                \"controller_params\": {\n                    \"accel\": 2.6,\n                    \"carFollowModel\": \"IDM\",\n                    \"decel\": 4.5,\n                    \"impatience\": 0.5,\n                    \"maxSpeed\": 30,\n                    \"minGap\": 2.5,\n                    \"sigma\": 0.5,\n                    \"speedDev\": 0.1,\n                    \"speedFactor\": 1.0,\n                    \"tau\": 1.0\n                },\n                \"speed_mode\": 25\n            },\n            \"initial_speed\": 0,\n            \"lane_change_controller\": [\n                \"SimLaneChangeController\",\n                {}\n            ],\n            \"lane_change_params\": {\n                \"controller_params\": {\n                    \"laneChangeModel\": \"LC2013\",\n                    \"lcCooperative\": \"1.0\",\n                    \"lcKeepRight\": \"1.0\",\n                    \"lcSpeedGain\": \"1.0\",\n                    \"lcStrategic\": \"1.0\"\n                },\n                \"lane_change_mode\": 512\n            },\n            \"num_vehicles\": 1,\n            \"routing_controller\": [\n                \"ContinuousRouter\",\n                {}\n            ],\n            \"veh_id\": \"rl_0\"\n        }\n    ]\n}",
+    "flow_params": "{\n    \"env\": {\n        \"additional_params\": {\n            \"max_accel\": 3,\n            \"max_decel\": 3,\n            \"sort_vehicles\": false,\n            \"target_velocity\": 20\n        },\n        \"clip_actions\": true,\n        \"evaluate\": false,\n        \"horizon\": 1500,\n        \"sims_per_step\": 1,\n        \"warmup_steps\": 0\n    },\n    \"env_name\": \"flow.envs.multiagent.ring.accel.MultiAgentAccelPOEnv\",\n    \"exp_tag\": \"multiagent_figure_eight\",\n    \"initial\": {\n        \"additional_params\": {},\n        \"bunching\": 0,\n        \"edges_distribution\": \"all\",\n        \"lanes_distribution\": Infinity,\n        \"min_gap\": 0,\n        \"perturbation\": 0.0,\n        \"shuffle\": false,\n        \"spacing\": \"uniform\",\n        \"x0\": 0\n    },\n    \"net\": {\n        \"additional_params\": {\n            \"lanes\": 1,\n            \"radius_ring\": 30,\n            \"resolution\": 40,\n            \"speed_limit\": 30\n        },\n        \"inflows\": {\n            \"_InFlows__flows\": []\n        },\n        \"osm_path\": null,\n        \"template\": null\n    },\n    \"network\": \"flow.networks.figure_eight.FigureEightNetwork\",\n    \"sim\": {\n        \"color_by_speed\": false,\n        \"emission_path\": null,\n        \"force_color_update\": false,\n        \"lateral_resolution\": null,\n        \"no_step_log\": true,\n        \"num_clients\": 1,\n        \"overtake_right\": false,\n        \"port\": null,\n        \"print_warnings\": true,\n        \"pxpm\": 2,\n        \"render\": false,\n        \"restart_instance\": false,\n        \"save_render\": false,\n        \"seed\": null,\n        \"show_radius\": false,\n        \"sight_radius\": 25,\n        \"sim_step\": 0.1,\n        \"teleport_time\": -1,\n        \"use_ballistic\": false\n    },\n    \"simulator\": \"traci\",\n    \"veh\": [\n        {\n            \"acceleration_controller\": [\n                \"IDMController\",\n                {\n                    \"noise\": 0.2\n                }\n            ],\n            \"car_following_params\": {\n                \"controller_params\": {\n                    \"accel\": 2.6,\n                    \"carFollowModel\": \"IDM\",\n                    \"decel\": 1.5,\n                    \"impatience\": 0.5,\n                    \"maxSpeed\": 30,\n                    \"minGap\": 2.5,\n                    \"sigma\": 0.5,\n                    \"speedDev\": 0.1,\n                    \"speedFactor\": 1.0,\n                    \"tau\": 1.0\n                },\n                \"speed_mode\": 1\n            },\n            \"initial_speed\": 0,\n            \"lane_change_controller\": [\n                \"SimLaneChangeController\",\n                {}\n            ],\n            \"lane_change_params\": {\n                \"controller_params\": {\n                    \"laneChangeModel\": \"LC2013\",\n                    \"lcCooperative\": \"1.0\",\n                    \"lcKeepRight\": \"1.0\",\n                    \"lcSpeedGain\": \"1.0\",\n                    \"lcStrategic\": \"1.0\"\n                },\n                \"lane_change_mode\": 512\n            },\n            \"num_vehicles\": 6,\n            \"routing_controller\": [\n                \"ContinuousRouter\",\n                {}\n            ],\n            \"veh_id\": \"human_0\"\n        },\n        {\n            \"acceleration_controller\": [\n                \"RLController\",\n                {}\n            ],\n            \"car_following_params\": {\n                \"controller_params\": {\n                    \"accel\": 3,\n                    \"carFollowModel\": \"IDM\",\n                    \"decel\": 3,\n                    \"impatience\": 0.5,\n                    \"maxSpeed\": 30,\n                    \"minGap\": 2.5,\n                    \"sigma\": 0.5,\n                    \"speedDev\": 0.1,\n                    \"speedFactor\": 1.0,\n                    \"tau\": 1.0\n                },\n                \"speed_mode\": 1\n            },\n            \"initial_speed\": 0,\n            \"lane_change_controller\": [\n                \"SimLaneChangeController\",\n                {}\n            ],\n            \"lane_change_params\": {\n                \"controller_params\": {\n                    \"laneChangeModel\": \"LC2013\",\n                    \"lcCooperative\": \"1.0\",\n                    \"lcKeepRight\": \"1.0\",\n                    \"lcSpeedGain\": \"1.0\",\n                    \"lcStrategic\": \"1.0\"\n                },\n                \"lane_change_mode\": 512\n            },\n            \"num_vehicles\": 1,\n            \"routing_controller\": [\n                \"ContinuousRouter\",\n                {}\n            ],\n            \"veh_id\": \"rl_0\"\n        },\n        {\n            \"acceleration_controller\": [\n                \"IDMController\",\n                {\n                    \"noise\": 0.2\n                }\n            ],\n            \"car_following_params\": {\n                \"controller_params\": {\n                    \"accel\": 2.6,\n                    \"carFollowModel\": \"IDM\",\n                    \"decel\": 1.5,\n                    \"impatience\": 0.5,\n                    \"maxSpeed\": 30,\n                    \"minGap\": 2.5,\n                    \"sigma\": 0.5,\n                    \"speedDev\": 0.1,\n                    \"speedFactor\": 1.0,\n                    \"tau\": 1.0\n                },\n                \"speed_mode\": 1\n            },\n            \"initial_speed\": 0,\n            \"lane_change_controller\": [\n                \"SimLaneChangeController\",\n                {}\n            ],\n            \"lane_change_params\": {\n                \"controller_params\": {\n                    \"laneChangeModel\": \"LC2013\",\n                    \"lcCooperative\": \"1.0\",\n                    \"lcKeepRight\": \"1.0\",\n                    \"lcSpeedGain\": \"1.0\",\n                    \"lcStrategic\": \"1.0\"\n                },\n                \"lane_change_mode\": 512\n            },\n            \"num_vehicles\": 6,\n            \"routing_controller\": [\n                \"ContinuousRouter\",\n                {}\n            ],\n            \"veh_id\": \"human_1\"\n        },\n        {\n            \"acceleration_controller\": [\n                \"RLController\",\n                {}\n            ],\n            \"car_following_params\": {\n                \"controller_params\": {\n                    \"accel\": 3,\n                    \"carFollowModel\": \"IDM\",\n                    \"decel\": 3,\n                    \"impatience\": 0.5,\n                    \"maxSpeed\": 30,\n                    \"minGap\": 2.5,\n                    \"sigma\": 0.5,\n                    \"speedDev\": 0.1,\n                    \"speedFactor\": 1.0,\n                    \"tau\": 1.0\n                },\n                \"speed_mode\": 1\n            },\n            \"initial_speed\": 0,\n            \"lane_change_controller\": [\n                \"SimLaneChangeController\",\n                {}\n            ],\n            \"lane_change_params\": {\n                \"controller_params\": {\n                    \"laneChangeModel\": \"LC2013\",\n                    \"lcCooperative\": \"1.0\",\n                    \"lcKeepRight\": \"1.0\",\n                    \"lcSpeedGain\": \"1.0\",\n                    \"lcStrategic\": \"1.0\"\n                },\n                \"lane_change_mode\": 512\n            },\n            \"num_vehicles\": 1,\n            \"routing_controller\": [\n                \"ContinuousRouter\",\n                {}\n            ],\n            \"veh_id\": \"rl_1\"\n        }\n    ]\n}",
     "run": "PPO"
   },
   "evaluation_config": {},
@@ -26,7 +28,7 @@
   "evaluation_num_episodes": 10,
   "gamma": 0.999,
   "grad_clip": null,
-  "horizon": 3000,
+  "horizon": 1500,
   "ignore_worker_failures": false,
   "input": "sampler",
   "input_evaluation": [
@@ -34,27 +36,31 @@
     "wis"
   ],
   "kl_coeff": 0.2,
-  "kl_target": 0.01,
-  "lambda": 1.0,
+  "kl_target": 0.02,
+  "lambda": 0.97,
   "local_tf_session_args": {
     "inter_op_parallelism_threads": 8,
     "intra_op_parallelism_threads": 8
   },
-  "log_level": "INFO",
+  "log_level": "WARN",
   "log_sys_usage": true,
-  "lr": 1e-05,
+  "lr": 5e-05,
   "lr_schedule": null,
+  "memory": 0,
+  "memory_per_worker": 0,
   "metrics_smoothing_episodes": 100,
   "min_iter_time_s": 0,
   "model": {
     "conv_activation": "relu",
     "conv_filters": null,
+    "custom_action_dist": null,
     "custom_model": null,
     "custom_options": {},
     "custom_preprocessor": null,
     "dim": 84,
     "fcnet_activation": "tanh",
     "fcnet_hiddens": [
+      32,
       32,
       32
     ],
@@ -75,23 +81,25 @@
     "policies": {
       "av": [
         "<class 'ray.rllib.policy.tf_policy_template.PPOTFPolicy'>",
-        "Box(3,)",
+        "Box(6,)",
         "Box(1,)",
         {}
       ]
     },
-    "policies_to_train": [
-      "av"
-    ],
-    "policy_mapping_fn": "tune.function(<function setup_exps.<locals>.policy_mapping_fn at 0x7fda132e6c80>)"
+    "policies_to_train": null,
+    "policy_mapping_fn": "<function policy_mapping_fn at 0x147740598>"
   },
+  "no_done_at_end": false,
+  "no_eager_on_workers": false,
   "num_cpus_for_driver": 1,
   "num_cpus_per_worker": 1,
   "num_envs_per_worker": 1,
   "num_gpus": 0,
   "num_gpus_per_worker": 0,
-  "num_sgd_iter": 30,
+  "num_sgd_iter": 10,
   "num_workers": 2,
+  "object_store_memory": 0,
+  "object_store_memory_per_worker": 0,
   "observation_filter": "NoFilter",
   "optimizer": {},
   "output": null,
@@ -110,7 +118,7 @@
   "sgd_minibatch_size": 128,
   "shuffle_buffer_size": 0,
   "shuffle_sequences": true,
-  "simple_optimizer": true,
+  "simple_optimizer": false,
   "soft_horizon": false,
   "synchronize_filters": true,
   "tf_session_args": {
@@ -126,7 +134,7 @@
     "log_device_placement": false
   },
   "timesteps_per_iteration": 0,
-  "train_batch_size": 60000,
+  "train_batch_size": 30000,
   "use_gae": true,
   "vf_clip_param": 10.0,
   "vf_loss_coeff": 1.0,
diff --git a/tests/data/rllib_data/multi_agent/params.pkl b/tests/data/rllib_data/multi_agent/params.pkl
index cd832aa1c..192cf7558 100644
Binary files a/tests/data/rllib_data/multi_agent/params.pkl and b/tests/data/rllib_data/multi_agent/params.pkl differ
diff --git a/tests/data/rllib_data/single_agent/checkpoint_1/checkpoint-1 b/tests/data/rllib_data/single_agent/checkpoint_1/checkpoint-1
index f8a7e8976..b7ae94640 100644
Binary files a/tests/data/rllib_data/single_agent/checkpoint_1/checkpoint-1 and b/tests/data/rllib_data/single_agent/checkpoint_1/checkpoint-1 differ
diff --git a/tests/data/rllib_data/single_agent/checkpoint_1/checkpoint-1.tune_metadata b/tests/data/rllib_data/single_agent/checkpoint_1/checkpoint-1.tune_metadata
index e83b72aea..55b72be28 100644
Binary files a/tests/data/rllib_data/single_agent/checkpoint_1/checkpoint-1.tune_metadata and b/tests/data/rllib_data/single_agent/checkpoint_1/checkpoint-1.tune_metadata differ
diff --git a/tests/data/rllib_data/single_agent/params.json b/tests/data/rllib_data/single_agent/params.json
index c5e605ef4..c93580225 100644
--- a/tests/data/rllib_data/single_agent/params.json
+++ b/tests/data/rllib_data/single_agent/params.json
@@ -8,17 +8,19 @@
     "on_sample_end": null,
     "on_train_result": null
   },
-  "clip_actions": false,
+  "clip_actions": true,
   "clip_param": 0.3,
   "clip_rewards": null,
   "collect_metrics_timeout": 180,
   "compress_observations": false,
   "custom_resources_per_worker": {},
+  "eager": false,
+  "eager_tracing": false,
   "entropy_coeff": 0.0,
   "entropy_coeff_schedule": null,
-  "env": "WaveAttenuationPOEnv-v0",
+  "env": "AccelEnv-v0",
   "env_config": {
-    "flow_params": "{\n    \"env\": {\n        \"additional_params\": {\n            \"max_accel\": 1,\n            \"max_decel\": 1,\n            \"ring_length\": [\n                220,\n                270\n            ]\n        },\n        \"clip_actions\": false,\n        \"evaluate\": false,\n        \"horizon\": 3000,\n        \"sims_per_step\": 1,\n        \"warmup_steps\": 750\n    },\n    \"env_name\": \"WaveAttenuationPOEnv\",\n    \"exp_tag\": \"stabilizing_the_ring\",\n    \"initial\": {\n        \"additional_params\": {},\n        \"bunching\": 0,\n        \"edges_distribution\": \"all\",\n        \"lanes_distribution\": Infinity,\n        \"min_gap\": 0,\n        \"perturbation\": 0.0,\n        \"shuffle\": false,\n        \"spacing\": \"uniform\",\n        \"x0\": 0\n    },\n    \"net\": {\n        \"additional_params\": {\n            \"lanes\": 1,\n            \"length\": 260,\n            \"resolution\": 40,\n            \"speed_limit\": 30\n        },\n        \"inflows\": {\n            \"_InFlows__flows\": []\n        },\n        \"osm_path\": null,\n        \"template\": null\n    },\n    \"network\": \"RingNetwork\",\n    \"sim\": {\n        \"color_vehicles\": true,\n        \"emission_path\": null,\n        \"lateral_resolution\": null,\n        \"no_step_log\": true,\n        \"num_clients\": 1,\n        \"overtake_right\": false,\n        \"port\": null,\n        \"print_warnings\": true,\n        \"pxpm\": 2,\n        \"render\": false,\n        \"restart_instance\": false,\n        \"save_render\": false,\n        \"seed\": null,\n        \"show_radius\": false,\n        \"sight_radius\": 25,\n        \"sim_step\": 0.1,\n        \"teleport_time\": -1\n    },\n    \"simulator\": \"traci\",\n    \"veh\": [\n        {\n            \"acceleration_controller\": [\n                \"IDMController\",\n                {\n                    \"noise\": 0.2\n                }\n            ],\n            \"car_following_params\": {\n                \"controller_params\": {\n                    \"accel\": 2.6,\n                    \"carFollowModel\": \"IDM\",\n                    \"decel\": 4.5,\n                    \"impatience\": 0.5,\n                    \"maxSpeed\": 30,\n                    \"minGap\": 0,\n                    \"sigma\": 0.5,\n                    \"speedDev\": 0.1,\n                    \"speedFactor\": 1.0,\n                    \"tau\": 1.0\n                },\n                \"speed_mode\": 25\n            },\n            \"initial_speed\": 0,\n            \"lane_change_controller\": [\n                \"SimLaneChangeController\",\n                {}\n            ],\n            \"lane_change_params\": {\n                \"controller_params\": {\n                    \"laneChangeModel\": \"LC2013\",\n                    \"lcCooperative\": \"1.0\",\n                    \"lcKeepRight\": \"1.0\",\n                    \"lcSpeedGain\": \"1.0\",\n                    \"lcStrategic\": \"1.0\"\n                },\n                \"lane_change_mode\": 512\n            },\n            \"num_vehicles\": 21,\n            \"routing_controller\": [\n                \"ContinuousRouter\",\n                {}\n            ],\n            \"veh_id\": \"human\"\n        },\n        {\n            \"acceleration_controller\": [\n                \"RLController\",\n                {}\n            ],\n            \"car_following_params\": {\n                \"controller_params\": {\n                    \"accel\": 2.6,\n                    \"carFollowModel\": \"IDM\",\n                    \"decel\": 4.5,\n                    \"impatience\": 0.5,\n                    \"maxSpeed\": 30,\n                    \"minGap\": 2.5,\n                    \"sigma\": 0.5,\n                    \"speedDev\": 0.1,\n                    \"speedFactor\": 1.0,\n                    \"tau\": 1.0\n                },\n                \"speed_mode\": 25\n            },\n            \"initial_speed\": 0,\n            \"lane_change_controller\": [\n                \"SimLaneChangeController\",\n                {}\n            ],\n            \"lane_change_params\": {\n                \"controller_params\": {\n                    \"laneChangeModel\": \"LC2013\",\n                    \"lcCooperative\": \"1.0\",\n                    \"lcKeepRight\": \"1.0\",\n                    \"lcSpeedGain\": \"1.0\",\n                    \"lcStrategic\": \"1.0\"\n                },\n                \"lane_change_mode\": 512\n            },\n            \"num_vehicles\": 1,\n            \"routing_controller\": [\n                \"ContinuousRouter\",\n                {}\n            ],\n            \"veh_id\": \"rl\"\n        }\n    ]\n}",
+    "flow_params": "{\n    \"env\": {\n        \"additional_params\": {\n            \"max_accel\": 3,\n            \"max_decel\": 3,\n            \"sort_vehicles\": false,\n            \"target_velocity\": 20\n        },\n        \"clip_actions\": true,\n        \"evaluate\": false,\n        \"horizon\": 1500,\n        \"sims_per_step\": 1,\n        \"warmup_steps\": 0\n    },\n    \"env_name\": \"flow.envs.ring.accel.AccelEnv\",\n    \"exp_tag\": \"singleagent_figure_eight\",\n    \"initial\": {\n        \"additional_params\": {},\n        \"bunching\": 0,\n        \"edges_distribution\": \"all\",\n        \"lanes_distribution\": Infinity,\n        \"min_gap\": 0,\n        \"perturbation\": 0.0,\n        \"shuffle\": false,\n        \"spacing\": \"uniform\",\n        \"x0\": 0\n    },\n    \"net\": {\n        \"additional_params\": {\n            \"lanes\": 1,\n            \"radius_ring\": 30,\n            \"resolution\": 40,\n            \"speed_limit\": 30\n        },\n        \"inflows\": {\n            \"_InFlows__flows\": []\n        },\n        \"osm_path\": null,\n        \"template\": null\n    },\n    \"network\": \"flow.networks.figure_eight.FigureEightNetwork\",\n    \"sim\": {\n        \"color_by_speed\": false,\n        \"emission_path\": null,\n        \"force_color_update\": false,\n        \"lateral_resolution\": null,\n        \"no_step_log\": true,\n        \"num_clients\": 1,\n        \"overtake_right\": false,\n        \"port\": null,\n        \"print_warnings\": true,\n        \"pxpm\": 2,\n        \"render\": false,\n        \"restart_instance\": false,\n        \"save_render\": false,\n        \"seed\": null,\n        \"show_radius\": false,\n        \"sight_radius\": 25,\n        \"sim_step\": 0.1,\n        \"teleport_time\": -1,\n        \"use_ballistic\": false\n    },\n    \"simulator\": \"traci\",\n    \"veh\": [\n        {\n            \"acceleration_controller\": [\n                \"IDMController\",\n                {\n                    \"noise\": 0.2\n                }\n            ],\n            \"car_following_params\": {\n                \"controller_params\": {\n                    \"accel\": 2.6,\n                    \"carFollowModel\": \"IDM\",\n                    \"decel\": 1.5,\n                    \"impatience\": 0.5,\n                    \"maxSpeed\": 30,\n                    \"minGap\": 2.5,\n                    \"sigma\": 0.5,\n                    \"speedDev\": 0.1,\n                    \"speedFactor\": 1.0,\n                    \"tau\": 1.0\n                },\n                \"speed_mode\": 1\n            },\n            \"initial_speed\": 0,\n            \"lane_change_controller\": [\n                \"SimLaneChangeController\",\n                {}\n            ],\n            \"lane_change_params\": {\n                \"controller_params\": {\n                    \"laneChangeModel\": \"LC2013\",\n                    \"lcCooperative\": \"1.0\",\n                    \"lcKeepRight\": \"1.0\",\n                    \"lcSpeedGain\": \"1.0\",\n                    \"lcStrategic\": \"1.0\"\n                },\n                \"lane_change_mode\": 512\n            },\n            \"num_vehicles\": 13,\n            \"routing_controller\": [\n                \"ContinuousRouter\",\n                {}\n            ],\n            \"veh_id\": \"human\"\n        },\n        {\n            \"acceleration_controller\": [\n                \"RLController\",\n                {}\n            ],\n            \"car_following_params\": {\n                \"controller_params\": {\n                    \"accel\": 2.6,\n                    \"carFollowModel\": \"IDM\",\n                    \"decel\": 1.5,\n                    \"impatience\": 0.5,\n                    \"maxSpeed\": 30,\n                    \"minGap\": 2.5,\n                    \"sigma\": 0.5,\n                    \"speedDev\": 0.1,\n                    \"speedFactor\": 1.0,\n                    \"tau\": 1.0\n                },\n                \"speed_mode\": 1\n            },\n            \"initial_speed\": 0,\n            \"lane_change_controller\": [\n                \"SimLaneChangeController\",\n                {}\n            ],\n            \"lane_change_params\": {\n                \"controller_params\": {\n                    \"laneChangeModel\": \"LC2013\",\n                    \"lcCooperative\": \"1.0\",\n                    \"lcKeepRight\": \"1.0\",\n                    \"lcSpeedGain\": \"1.0\",\n                    \"lcStrategic\": \"1.0\"\n                },\n                \"lane_change_mode\": 512\n            },\n            \"num_vehicles\": 1,\n            \"routing_controller\": [\n                \"ContinuousRouter\",\n                {}\n            ],\n            \"veh_id\": \"rl\"\n        }\n    ]\n}",
     "run": "PPO"
   },
   "evaluation_config": {},
@@ -26,7 +28,7 @@
   "evaluation_num_episodes": 10,
   "gamma": 0.999,
   "grad_clip": null,
-  "horizon": 3000,
+  "horizon": 1500,
   "ignore_worker_failures": false,
   "input": "sampler",
   "input_evaluation": [
@@ -40,23 +42,27 @@
     "inter_op_parallelism_threads": 8,
     "intra_op_parallelism_threads": 8
   },
-  "log_level": "INFO",
+  "log_level": "WARN",
   "log_sys_usage": true,
   "lr": 5e-05,
   "lr_schedule": null,
+  "memory": 0,
+  "memory_per_worker": 0,
   "metrics_smoothing_episodes": 100,
   "min_iter_time_s": 0,
   "model": {
     "conv_activation": "relu",
     "conv_filters": null,
+    "custom_action_dist": null,
     "custom_model": null,
     "custom_options": {},
     "custom_preprocessor": null,
     "dim": 84,
     "fcnet_activation": "tanh",
     "fcnet_hiddens": [
-      3,
-      3
+      32,
+      32,
+      32
     ],
     "framestack": true,
     "free_log_std": false,
@@ -76,6 +82,8 @@
     "policies_to_train": null,
     "policy_mapping_fn": null
   },
+  "no_done_at_end": false,
+  "no_eager_on_workers": false,
   "num_cpus_for_driver": 1,
   "num_cpus_per_worker": 1,
   "num_envs_per_worker": 1,
@@ -83,6 +91,8 @@
   "num_gpus_per_worker": 0,
   "num_sgd_iter": 10,
   "num_workers": 2,
+  "object_store_memory": 0,
+  "object_store_memory_per_worker": 0,
   "observation_filter": "NoFilter",
   "optimizer": {},
   "output": null,
@@ -117,7 +127,7 @@
     "log_device_placement": false
   },
   "timesteps_per_iteration": 0,
-  "train_batch_size": 60000,
+  "train_batch_size": 30000,
   "use_gae": true,
   "vf_clip_param": 10.0,
   "vf_loss_coeff": 1.0,
diff --git a/tests/data/rllib_data/single_agent/params.pkl b/tests/data/rllib_data/single_agent/params.pkl
index 511d34343..e69753b7f 100644
Binary files a/tests/data/rllib_data/single_agent/params.pkl and b/tests/data/rllib_data/single_agent/params.pkl differ
diff --git a/tests/fast_tests/test_vehicles.py b/tests/fast_tests/test_vehicles.py
index a37b235ff..9ac8b8e50 100644
--- a/tests/fast_tests/test_vehicles.py
+++ b/tests/fast_tests/test_vehicles.py
@@ -33,7 +33,7 @@ def test_speed_lane_change_modes(self):
                 speed_mode='obey_safe_speed',
             ),
             lane_change_params=SumoLaneChangeParams(
-                lane_change_mode="no_lat_collide",
+                lane_change_mode="no_lc_safe",
             )
         )
 
@@ -56,7 +56,7 @@ def test_speed_lane_change_modes(self):
         self.assertEqual(vehicles.type_parameters["typeB"][
                              "car_following_params"].speed_mode, 0)
         self.assertEqual(vehicles.type_parameters["typeB"][
-                             "lane_change_params"].lane_change_mode, 1621)
+                             "lane_change_params"].lane_change_mode, 512)
 
         vehicles.add(
             "typeC",
@@ -89,7 +89,7 @@ def test_controlled_id_params(self):
                 speed_mode="obey_safe_speed",
             ),
             lane_change_params=SumoLaneChangeParams(
-                lane_change_mode="no_lat_collide",
+                lane_change_mode="no_lc_safe",
             ))
         default_mingap = SumoCarFollowingParams().controller_params["minGap"]
         self.assertEqual(vehicles.types[0]["type_params"]["minGap"],