From 2c85838356d19eedef0af6fd064f3f73fea16a76 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 28 Mar 2020 02:06:01 -0700 Subject: [PATCH 01/85] Added QMIX but its hella slow right now --- .../rl/multiagent/multiagent_i210.py | 1 - .../rl/multiagent/multiagent_i210_qmix.py | 180 ++++++ examples/train.py | 32 +- flow/algorithms/qmix/README.md | 1 + flow/algorithms/qmix/__init__.py | 8 + flow/algorithms/qmix/apex.py | 39 ++ flow/algorithms/qmix/mixers.py | 64 ++ flow/algorithms/qmix/model.py | 91 +++ flow/algorithms/qmix/qmix.py | 105 ++++ flow/algorithms/qmix/qmix_policy.py | 562 ++++++++++++++++++ flow/envs/multiagent/__init__.py | 5 +- flow/envs/multiagent/base.py | 15 +- flow/envs/multiagent/i210.py | 68 ++- requirements.txt | 1 + 14 files changed, 1157 insertions(+), 15 deletions(-) create mode 100644 examples/exp_configs/rl/multiagent/multiagent_i210_qmix.py create mode 100644 flow/algorithms/qmix/README.md create mode 100644 flow/algorithms/qmix/__init__.py create mode 100644 flow/algorithms/qmix/apex.py create mode 100644 flow/algorithms/qmix/mixers.py create mode 100644 flow/algorithms/qmix/model.py create mode 100644 flow/algorithms/qmix/qmix.py create mode 100644 flow/algorithms/qmix/qmix_policy.py diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index 872568cab..1779adf69 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -5,7 +5,6 @@ """ import os -from ray.rllib.agents.ppo.ppo_policy import PPOTFPolicy from ray.tune.registry import register_env from flow.controllers import RLController diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210_qmix.py b/examples/exp_configs/rl/multiagent/multiagent_i210_qmix.py new file mode 100644 index 000000000..6c00b44cb --- /dev/null +++ b/examples/exp_configs/rl/multiagent/multiagent_i210_qmix.py @@ -0,0 +1,180 @@ +"""Multi-agent I-210 example. + +Trains a non-constant number of agents, all sharing the same policy, on the +highway with ramps network. +""" +import os + +from ray.tune.registry import register_env + +from flow.controllers import RLController +from flow.controllers.car_following_models import IDMController +import flow.config as config +from flow.core.params import EnvParams +from flow.core.params import NetParams +from flow.core.params import InitialConfig +from flow.core.params import InFlows +from flow.core.params import VehicleParams +from flow.core.params import SumoParams +from flow.core.params import SumoLaneChangeParams +from flow.networks.i210_subnetwork import I210SubNetwork, EDGES_DISTRIBUTION +from flow.envs.multiagent.i210 import I210QMIXMultiEnv, ADDITIONAL_ENV_PARAMS +from flow.utils.registry import make_create_env + +# SET UP PARAMETERS FOR THE SIMULATION + +# number of steps per rollout +HORIZON = 4000 + +# percentage of autonomous vehicles compared to human vehicles on highway +PENETRATION_RATE = 10 + +# SET UP PARAMETERS FOR THE ENVIRONMENT +additional_env_params = ADDITIONAL_ENV_PARAMS.copy() +additional_env_params.update({ + 'max_accel': 2.6, + 'max_decel': 4.5, + # configure the observation space. Look at the I210MultiEnv class for more info. + 'lead_obs': True, + # whether to add in a reward for the speed of nearby vehicles + "local_reward": True, + "num_actions": 5, + "max_num_agents_qmix": 200 +}) + +# CREATE VEHICLE TYPES AND INFLOWS +# no vehicles in the network +vehicles = VehicleParams() +vehicles.add( + "human", + num_vehicles=0, + lane_change_params=SumoLaneChangeParams(lane_change_mode="strategic"), + acceleration_controller=(IDMController, {"a": .3, "b": 2.0, "noise": 0.6}), +) +vehicles.add( + "av", + acceleration_controller=(RLController, {}), + num_vehicles=0, +) + +inflow = InFlows() +# main highway +pen_rate = PENETRATION_RATE / 100 +assert pen_rate < 1.0, "your penetration rate is over 100%" +assert pen_rate > 0.0, "your penetration rate should be above zero" +inflow.add( + veh_type="human", + edge="119257914", + vehs_per_hour=int(10800 * (1 - pen_rate)), + # probability=1.0, + departLane="random", + departSpeed=20) +# # on ramp +# inflow.add( +# veh_type="human", +# edge="27414345", +# vehs_per_hour=321 * pen_rate, +# departLane="random", +# departSpeed=20) +# inflow.add( +# veh_type="human", +# edge="27414342#0", +# vehs_per_hour=421 * pen_rate, +# departLane="random", +# departSpeed=20) + +# Now add the AVs +# main highway +inflow.add( + veh_type="av", + edge="119257914", + vehs_per_hour=int(10800 * pen_rate), + # probability=1.0, + departLane="random", + departSpeed=20) +# # on ramp +# inflow.add( +# veh_type="av", +# edge="27414345", +# vehs_per_hour=int(321 * pen_rate), +# departLane="random", +# departSpeed=20) +# inflow.add( +# veh_type="av", +# edge="27414342#0", +# vehs_per_hour=int(421 * pen_rate), +# departLane="random", +# departSpeed=20) + +NET_TEMPLATE = os.path.join( + config.PROJECT_PATH, + "examples/exp_configs/templates/sumo/test2.net.xml") + +flow_params = dict( + # name of the experiment + exp_tag='I_210_subnetwork', + + # name of the flow environment the experiment is running on + env_name=I210QMIXMultiEnv, + + # name of the network class the experiment is running on + network=I210SubNetwork, + + # simulator that is used by the experiment + simulator='traci', + + # simulation-related parameters + sim=SumoParams( + sim_step=0.5, + render=False, + color_by_speed=False, + restart_instance=True, + use_ballistic=True + ), + + # environment related parameters (see flow.core.params.EnvParams) + env=EnvParams( + horizon=HORIZON, + sims_per_step=1, + warmup_steps=0, + additional_params=additional_env_params, + ), + + # network-related parameters (see flow.core.params.NetParams and the + # network's documentation or ADDITIONAL_NET_PARAMS component) + net=NetParams( + inflows=inflow, + template=NET_TEMPLATE + ), + + # vehicles to be placed in the network at the start of a rollout (see + # flow.core.params.VehicleParams) + veh=vehicles, + + # parameters specifying the positioning of vehicles upon initialization/ + # reset (see flow.core.params.InitialConfig) + initial=InitialConfig( + edges_distribution=EDGES_DISTRIBUTION, + ), +) + +# SET UP RLLIB MULTI-AGENT FEATURES + +create_env, env_name = make_create_env(params=flow_params, version=0) + +# register as rllib env +register_env(env_name, create_env) + +# multiagent configuration +test_env = create_env() +obs_space = test_env.observation_space +act_space = test_env.action_space + +# POLICY_GRAPHS = {'av': (None, obs_space, act_space, {})} + +# POLICIES_TO_TRAIN = ['av'] + + +# def policy_mapping_fn(_): +# """Map a policy in RLlib.""" +# return 'av' diff --git a/examples/train.py b/examples/train.py index 1f2cd6300..b32bbef92 100644 --- a/examples/train.py +++ b/examples/train.py @@ -14,16 +14,20 @@ from time import strftime from copy import deepcopy +from gym.spaces import Tuple import numpy as np import pytz -from stable_baselines.common.vec_env import DummyVecEnv, SubprocVecEnv -from stable_baselines import PPO2 +try: + from stable_baselines.common.vec_env import DummyVecEnv, SubprocVecEnv + from stable_baselines import PPO2 +except: + pass import ray from ray import tune -from ray.tune import run_experiments from ray.tune.registry import register_env +from ray.rllib.env.group_agents_wrapper import _GroupAgentsWrapper try: from ray.rllib.agents.agent import get_agent_class except ImportError: @@ -194,6 +198,10 @@ def setup_exps_rllib(flow_params, config["actor_lr"] = tune.grid_search([1e-3, 1e-4]) config["critic_lr"] = tune.grid_search([1e-3, 1e-4]) config["n_step"] = tune.grid_search([1, 10]) + elif alg_run == "QMIX": + from flow.algorithms.qmix.qmix import QMixTrainer2, DEFAULT_CONFIG + config = deepcopy(DEFAULT_CONFIG) + alg_run = QMixTrainer2 else: sys.exit("We only support PPO and TD3 right now.") @@ -206,6 +214,8 @@ def on_episode_start(info): def on_episode_step(info): episode = info["episode"] env = info["env"].get_unwrapped()[0] + if isinstance(env, _GroupAgentsWrapper): + env = env.env speed = np.mean([speed for speed in env.k.vehicle.get_speed(env.k.vehicle.get_ids()) if speed >= 0]) if not np.isnan(speed): episode.user_data["avg_speed"].append(speed) @@ -238,8 +248,20 @@ def on_episode_end(info): create_env, gym_name = make_create_env(params=flow_params) - # Register as rllib env - register_env(gym_name, create_env) + if flags.algorithm.upper() == "QMIX": + test_env = create_env() + obs_space = test_env.observation_space + act_space = test_env.action_space + max_num_agents_qmix = flow_params['env'].additional_params['max_num_agents_qmix'] + config['env_config']['max_num_agents'] = max_num_agents_qmix + grouping = {"AVs": list(np.arange(max_num_agents_qmix))} + obs_space = Tuple([obs_space] * max_num_agents_qmix) + act_space = Tuple([act_space] * max_num_agents_qmix) + register_env(gym_name, lambda config: create_env(config).with_agent_groups( + grouping, obs_space=obs_space, act_space=act_space)) + else: + # Register as rllib env + register_env(gym_name, create_env) return alg_run, gym_name, config diff --git a/flow/algorithms/qmix/README.md b/flow/algorithms/qmix/README.md new file mode 100644 index 000000000..e8d66616d --- /dev/null +++ b/flow/algorithms/qmix/README.md @@ -0,0 +1 @@ +Code in this package is adapted from https://github.com/oxwhirl/pymarl. diff --git a/flow/algorithms/qmix/__init__.py b/flow/algorithms/qmix/__init__.py new file mode 100644 index 000000000..0de9ff272 --- /dev/null +++ b/flow/algorithms/qmix/__init__.py @@ -0,0 +1,8 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from ray.rllib.agents.qmix.qmix import QMixTrainer, DEFAULT_CONFIG +from ray.rllib.agents.qmix.apex import ApexQMixTrainer + +__all__ = ["QMixTrainer", "ApexQMixTrainer", "DEFAULT_CONFIG"] diff --git a/flow/algorithms/qmix/apex.py b/flow/algorithms/qmix/apex.py new file mode 100644 index 000000000..be6e66638 --- /dev/null +++ b/flow/algorithms/qmix/apex.py @@ -0,0 +1,39 @@ +"""Experimental: scalable Ape-X variant of QMIX""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from ray.rllib.agents.dqn.apex import APEX_TRAINER_PROPERTIES +from flow.algorithms.qmix import QMixTrainer, \ + DEFAULT_CONFIG as QMIX_CONFIG +from ray.rllib.utils import merge_dicts + +APEX_QMIX_DEFAULT_CONFIG = merge_dicts( + QMIX_CONFIG, # see also the options in qmix.py, which are also supported + { + "optimizer": merge_dicts( + QMIX_CONFIG["optimizer"], + { + "max_weight_sync_delay": 400, + "num_replay_buffer_shards": 4, + "batch_replay": True, # required for RNN. Disables prio. + "debug": False + }), + "num_gpus": 0, + "num_workers": 32, + "buffer_size": 2000000, + "learning_starts": 50000, + "train_batch_size": 512, + "sample_batch_size": 50, + "target_network_update_freq": 500000, + "timesteps_per_iteration": 25000, + "per_worker_exploration": True, + "min_iter_time_s": 30, + }, +) + +ApexQMixTrainer = QMixTrainer.with_updates( + name="APEX_QMIX", + default_config=APEX_QMIX_DEFAULT_CONFIG, + **APEX_TRAINER_PROPERTIES) diff --git a/flow/algorithms/qmix/mixers.py b/flow/algorithms/qmix/mixers.py new file mode 100644 index 000000000..3f8fbbce4 --- /dev/null +++ b/flow/algorithms/qmix/mixers.py @@ -0,0 +1,64 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import torch as th +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + + +class VDNMixer(nn.Module): + def __init__(self): + super(VDNMixer, self).__init__() + + def forward(self, agent_qs, batch): + return th.sum(agent_qs, dim=2, keepdim=True) + + +class QMixer(nn.Module): + def __init__(self, n_agents, state_shape, mixing_embed_dim): + super(QMixer, self).__init__() + + self.n_agents = n_agents + self.embed_dim = mixing_embed_dim + self.state_dim = int(np.prod(state_shape)) + + self.hyper_w_1 = nn.Linear(self.state_dim, + self.embed_dim * self.n_agents) + self.hyper_w_final = nn.Linear(self.state_dim, self.embed_dim) + + # State dependent bias for hidden layer + self.hyper_b_1 = nn.Linear(self.state_dim, self.embed_dim) + + # V(s) instead of a bias for the last layers + self.V = nn.Sequential( + nn.Linear(self.state_dim, self.embed_dim), nn.ReLU(), + nn.Linear(self.embed_dim, 1)) + + def forward(self, agent_qs, states): + """Forward pass for the mixer. + + Arguments: + agent_qs: Tensor of shape [B, T, n_agents, n_actions] + states: Tensor of shape [B, T, state_dim] + """ + bs = agent_qs.size(0) + states = states.reshape(-1, self.state_dim) + agent_qs = agent_qs.view(-1, 1, self.n_agents) + # First layer + w1 = th.abs(self.hyper_w_1(states)) + b1 = self.hyper_b_1(states) + w1 = w1.view(-1, self.n_agents, self.embed_dim) + b1 = b1.view(-1, 1, self.embed_dim) + hidden = F.elu(th.bmm(agent_qs, w1) + b1) + # Second layer + w_final = th.abs(self.hyper_w_final(states)) + w_final = w_final.view(-1, self.embed_dim, 1) + # State-dependent bias + v = self.V(states).view(-1, 1, 1) + # Compute final output + y = th.bmm(hidden, w_final) + v + # Reshape and return + q_tot = y.view(bs, -1, 1) + return q_tot diff --git a/flow/algorithms/qmix/model.py b/flow/algorithms/qmix/model.py new file mode 100644 index 000000000..f94f6804e --- /dev/null +++ b/flow/algorithms/qmix/model.py @@ -0,0 +1,91 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from torch import nn +import numpy as np +import torch.nn.functional as F + +from ray.rllib.models.preprocessors import get_preprocessor +from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 +from ray.rllib.utils.annotations import override + + +class RNNModel(TorchModelV2, nn.Module): + """The default RNN model for QMIX.""" + + def __init__(self, obs_space, action_space, num_outputs, model_config, + name): + TorchModelV2.__init__(self, obs_space, action_space, num_outputs, + model_config, name) + nn.Module.__init__(self) + self.obs_size = _get_size(obs_space) + self.rnn_hidden_dim = model_config["lstm_cell_size"] + self.fc1 = nn.Linear(self.obs_size, self.rnn_hidden_dim) + self.rnn = nn.GRUCell(self.rnn_hidden_dim, self.rnn_hidden_dim) + self.fc2 = nn.Linear(self.rnn_hidden_dim, num_outputs) + + @override(TorchModelV2) + def get_initial_state(self): + # make hidden states on same device as model + return [self.fc1.weight.new(1, self.rnn_hidden_dim).zero_().squeeze(0)] + + @override(TorchModelV2) + def forward(self, input_dict, hidden_state, seq_lens): + x = F.relu(self.fc1(input_dict["obs_flat"].float())) + h_in = hidden_state[0].reshape(-1, self.rnn_hidden_dim) + h = self.rnn(x, h_in) + q = self.fc2(h) + return q, [h] + + +def _get_size(obs_space): + return get_preprocessor(obs_space)(obs_space).size + + +class FeedForward(TorchModelV2, nn.Module): + """Generic fully connected network.""" + + def __init__(self, obs_space, action_space, num_outputs, model_config, + name): + TorchModelV2.__init__(self, obs_space, action_space, num_outputs, + model_config, name) + nn.Module.__init__(self) + + hiddens = model_config.get("fcnet_hiddens") + layers = [] + last_layer_size = np.product(obs_space.shape) + for size in hiddens: + layers.append(nn.Linear(in_features=last_layer_size, out_features=size)) + layers.append(nn.ReLU()) + last_layer_size = size + + self._hidden_layers = nn.Sequential(*layers) + + self._hidden_layers.apply(init_weights) + + # TODO(@ev) pick the right initialization + self._logits = nn.Linear( + in_features=last_layer_size, + out_features=num_outputs) + + self._logits.apply(large_initializer) + + @override(TorchModelV2) + def forward(self, input_dict, state, seq_lens): + obs = input_dict["obs_flat"].float() + features = self._hidden_layers(obs.reshape(obs.shape[0], -1)) + logits = self._logits(features) + return logits, state + + +def init_weights(m): + if type(m) == nn.Linear: + nn.init.xavier_uniform_(m.weight) + m.bias.data.fill_(0.01) + + +def large_initializer(m): + if type(m) == nn.Linear: + nn.init.xavier_uniform_(m.weight) + m.bias.data.fill_(0.1) diff --git a/flow/algorithms/qmix/qmix.py b/flow/algorithms/qmix/qmix.py new file mode 100644 index 000000000..88d2bb29b --- /dev/null +++ b/flow/algorithms/qmix/qmix.py @@ -0,0 +1,105 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from ray.rllib.agents.trainer import with_common_config +from ray.rllib.agents.dqn.dqn import GenericOffPolicyTrainer +from flow.algorithms.qmix.qmix_policy import QMixTorchPolicy +from ray.rllib.optimizers import SyncBatchReplayOptimizer + +# yapf: disable +# __sphinx_doc_begin__ +DEFAULT_CONFIG = with_common_config({ + # === QMix === + # Mixing network. Either "qmix", "vdn", or None + "mixer": "qmix", + # Size of the mixing network embedding + "mixing_embed_dim": 32, + # Whether to use Double_Q learning + "double_q": True, + # Optimize over complete episodes by default. + "batch_mode": "complete_episodes", + + # === Evaluation === + # Evaluate with epsilon=0 every `evaluation_interval` training iterations. + # The evaluation stats will be reported under the "evaluation" metric key. + # Note that evaluation is currently not parallelized, and that for Ape-X + # metrics are already only reported for the lowest epsilon workers. + "evaluation_interval": None, + # Number of episodes to run per evaluation period. + "evaluation_num_episodes": 10, + + # === Exploration === + # Max num timesteps for annealing schedules. Exploration is annealed from + # 1.0 to exploration_fraction over this number of timesteps scaled by + # exploration_fraction + "schedule_max_timesteps": 100000, + # Number of env steps to optimize for before returning + "timesteps_per_iteration": 1000, + # Fraction of entire training period over which the exploration rate is + # annealed + "exploration_fraction": 0.1, + # Final value of random action probability + "exploration_final_eps": 0.02, + # Update the target network every `target_network_update_freq` steps. + "target_network_update_freq": 500, + + # === Replay buffer === + # Size of the replay buffer in steps. + "buffer_size": 10000, + + # === Optimization === + # Learning rate for RMSProp optimizer + "lr": 0.0005, + # RMSProp alpha + "optim_alpha": 0.99, + # RMSProp epsilon + "optim_eps": 0.00001, + # If not None, clip gradients during optimization at this value + "grad_norm_clipping": 10, + # How many steps of the model to sample before learning starts. + "learning_starts": 1000, + # Update the replay buffer with this many samples at once. Note that + # this setting applies per-worker if num_workers > 1. + "sample_batch_size": 4, + # Size of a batched sampled from replay buffer for training. Note that + # if async_updates is set, then each worker returns gradients for a + # batch of this size. + "train_batch_size": 32, + + # === Parallelism === + # Number of workers for collecting samples with. This only makes sense + # to increase if your environment is particularly slow to sample, or if + # you"re using the Async or Ape-X optimizers. + "num_workers": 0, + # Whether to use a distribution of epsilons across workers for exploration. + "per_worker_exploration": False, + # Whether to compute priorities on workers. + "worker_side_prioritization": False, + # Prevent iterations from going lower than this time span + "min_iter_time_s": 1, + + # === Model === + "model": { + "lstm_cell_size": 64, + "max_seq_len": 999999, + "fcnet_hiddens": [32, 32] + }, +}) +# __sphinx_doc_end__ +# yapf: enable + + +def make_sync_batch_optimizer(workers, config): + return SyncBatchReplayOptimizer( + workers, + learning_starts=config["learning_starts"], + buffer_size=config["buffer_size"], + train_batch_size=config["train_batch_size"]) + + +QMixTrainer2 = GenericOffPolicyTrainer.with_updates( + name="QMIX", + default_config=DEFAULT_CONFIG, + default_policy=QMixTorchPolicy, + make_policy_optimizer=make_sync_batch_optimizer) diff --git a/flow/algorithms/qmix/qmix_policy.py b/flow/algorithms/qmix/qmix_policy.py new file mode 100644 index 000000000..d0245279d --- /dev/null +++ b/flow/algorithms/qmix/qmix_policy.py @@ -0,0 +1,562 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from gym.spaces import Tuple, Discrete, Dict +import logging +import numpy as np +import torch as th +import torch.nn as nn +from torch.optim import RMSprop +from torch.distributions import Categorical + +import ray +from flow.algorithms.qmix.mixers import VDNMixer, QMixer +from flow.algorithms.qmix.model import RNNModel, _get_size, FeedForward +from ray.rllib.evaluation.metrics import LEARNER_STATS_KEY +from ray.rllib.policy.policy import TupleActions, Policy +from ray.rllib.policy.rnn_sequencing import chop_into_sequences +from ray.rllib.policy.sample_batch import SampleBatch +from ray.rllib.models.catalog import ModelCatalog +from ray.rllib.models.model import _unpack_obs +from ray.rllib.env.constants import GROUP_REWARDS +from ray.rllib.utils.annotations import override + +logger = logging.getLogger(__name__) + +# if the obs space is Dict type, look for the global state under this key +ENV_STATE = "state" + + +class QMixLoss(nn.Module): + def __init__(self, + model, + target_model, + mixer, + target_mixer, + n_agents, + n_actions, + double_q=True, + gamma=0.99): + nn.Module.__init__(self) + self.model = model + self.target_model = target_model + self.mixer = mixer + self.target_mixer = target_mixer + self.n_agents = n_agents + self.n_actions = n_actions + self.double_q = double_q + self.gamma = gamma + + def forward(self, + rewards, + actions, + terminated, + mask, + obs, + next_obs, + action_mask, + next_action_mask, + state=None, + next_state=None): + """Forward pass of the loss. + + Arguments: + rewards: Tensor of shape [B, T, n_agents] + actions: Tensor of shape [B, T, n_agents] + terminated: Tensor of shape [B, T, n_agents] + mask: Tensor of shape [B, T, n_agents] + obs: Tensor of shape [B, T, n_agents, obs_size] + next_obs: Tensor of shape [B, T, n_agents, obs_size] + action_mask: Tensor of shape [B, T, n_agents, n_actions] + next_action_mask: Tensor of shape [B, T, n_agents, n_actions] + state: Tensor of shape [B, T, state_dim] (optional) + next_state: Tensor of shape [B, T, state_dim] (optional) + """ + + # Assert either none or both of state and next_state are given + if state is None and next_state is None: + state = obs # default to state being all agents' observations + next_state = next_obs + elif (state is None) != (next_state is None): + raise ValueError("Expected either neither or both of `state` and " + "`next_state` to be given. Got: " + "\n`state` = {}\n`next_state` = {}".format( + state, next_state)) + + # Calculate estimated Q-Values + mac_out = _unroll_mac(self.model, obs) + + # Pick the Q-Values for the actions taken -> [B * n_agents, T] + chosen_action_qvals = th.gather( + mac_out, dim=3, index=actions.unsqueeze(3)).squeeze(3) + + # Calculate the Q-Values necessary for the target + target_mac_out = _unroll_mac(self.target_model, next_obs) + + # Mask out unavailable actions for the t+1 step + ignore_action_tp1 = (next_action_mask == 0) & (mask == 1).unsqueeze(-1) + target_mac_out[ignore_action_tp1] = -np.inf + + # Max over target Q-Values + if self.double_q: + # Double Q learning computes the target Q values by selecting the + # t+1 timestep action according to the "policy" neural network and + # then estimating the Q-value of that action with the "target" + # neural network + + # Compute the t+1 Q-values to be used in action selection + # using next_obs + mac_out_tp1 = _unroll_mac(self.model, next_obs) + + # mask out unallowed actions + mac_out_tp1[ignore_action_tp1] = -np.inf + + # obtain best actions at t+1 according to policy NN + cur_max_actions = mac_out_tp1.argmax(dim=3, keepdim=True) + + # use the target network to estimate the Q-values of policy + # network's selected actions + target_max_qvals = th.gather(target_mac_out, 3, + cur_max_actions).squeeze(3) + else: + target_max_qvals = target_mac_out.max(dim=3)[0] + + assert target_max_qvals.min().item() != -np.inf, \ + "target_max_qvals contains a masked action; \ + there may be a state with no valid actions." + + # Mix + if self.mixer is not None: + chosen_action_qvals = self.mixer(chosen_action_qvals, state) + target_max_qvals = self.target_mixer(target_max_qvals, next_state) + + # Calculate 1-step Q-Learning targets + targets = rewards + self.gamma * (1 - terminated) * target_max_qvals + + # Td-error + td_error = (chosen_action_qvals - targets.detach()) + + mask = mask.expand_as(td_error) + + # 0-out the targets that came from padded data + masked_td_error = td_error * mask + + # Normal L2 loss, take mean over actual data + loss = (masked_td_error**2).sum() / mask.sum() + return loss, mask, masked_td_error, chosen_action_qvals, targets + + +class QMixTorchPolicy(Policy): + """QMix impl. Assumes homogeneous agents for now. + + You must use MultiAgentEnv.with_agent_groups() to group agents + together for QMix. This creates the proper Tuple obs/action spaces and + populates the '_group_rewards' info field. + + Action masking: to specify an action mask for individual agents, use a + dict space with an action_mask key, e.g. {"obs": ob, "action_mask": mask}. + The mask space must be `Box(0, 1, (n_actions,))`. + """ + + def __init__(self, obs_space, action_space, config): + _validate(obs_space, action_space) + config = dict(ray.rllib.agents.qmix.qmix.DEFAULT_CONFIG, **config) + self.config = config + self.observation_space = obs_space + self.action_space = action_space + self.n_agents = len(obs_space.original_space.spaces) + self.n_actions = action_space.spaces[0].n + self.h_size = config["model"]["lstm_cell_size"] + self.has_env_global_state = False + self.has_action_mask = False + self.device = (th.device("cuda") + if th.cuda.is_available() else th.device("cpu")) + + agent_obs_space = obs_space.original_space.spaces[0] + if isinstance(agent_obs_space, Dict): + space_keys = set(agent_obs_space.spaces.keys()) + if "obs" not in space_keys: + raise ValueError( + "Dict obs space must have subspace labeled `obs`") + self.obs_size = _get_size(agent_obs_space.spaces["obs"]) + if "action_mask" in space_keys: + mask_shape = tuple(agent_obs_space.spaces["action_mask"].shape) + if mask_shape != (self.n_actions, ): + raise ValueError( + "Action mask shape must be {}, got {}".format( + (self.n_actions, ), mask_shape)) + self.has_action_mask = True + if ENV_STATE in space_keys: + self.env_global_state_shape = _get_size( + agent_obs_space.spaces[ENV_STATE]) + self.has_env_global_state = True + else: + self.env_global_state_shape = (self.obs_size, self.n_agents) + # The real agent obs space is nested inside the dict + config["model"]["full_obs_space"] = agent_obs_space + agent_obs_space = agent_obs_space.spaces["obs"] + else: + self.obs_size = _get_size(agent_obs_space) + + self.model = ModelCatalog.get_model_v2( + agent_obs_space, + action_space.spaces[0], + self.n_actions, + config["model"], + framework="torch", + name="model", + default_model=FeedForward).to(self.device) + # TODO(@evinitsky) make an RNN an option + # default_model=RNNModel).to(self.device) + + self.target_model = ModelCatalog.get_model_v2( + agent_obs_space, + action_space.spaces[0], + self.n_actions, + config["model"], + framework="torch", + name="target_model", + default_model=FeedForward).to(self.device) + # default_model=RNNModel).to(self.device) + + # Setup the mixer network. + if config["mixer"] is None: + self.mixer = None + self.target_mixer = None + elif config["mixer"] == "qmix": + self.mixer = QMixer(self.n_agents, self.env_global_state_shape, + config["mixing_embed_dim"]).to(self.device) + self.target_mixer = QMixer( + self.n_agents, self.env_global_state_shape, + config["mixing_embed_dim"]).to(self.device) + elif config["mixer"] == "vdn": + self.mixer = VDNMixer().to(self.device) + self.target_mixer = VDNMixer().to(self.device) + else: + raise ValueError("Unknown mixer type {}".format(config["mixer"])) + + self.cur_epsilon = 1.0 + self.update_target() # initial sync + + # Setup optimizer + self.params = list(self.model.parameters()) + if self.mixer: + self.params += list(self.mixer.parameters()) + self.loss = QMixLoss(self.model, self.target_model, self.mixer, + self.target_mixer, self.n_agents, self.n_actions, + self.config["double_q"], self.config["gamma"]) + self.optimiser = RMSprop( + params=self.params, + lr=config["lr"], + alpha=config["optim_alpha"], + eps=config["optim_eps"]) + + @override(Policy) + def compute_actions(self, + obs_batch, + state_batches=None, + prev_action_batch=None, + prev_reward_batch=None, + info_batch=None, + episodes=None, + **kwargs): + obs_batch, action_mask, _ = self._unpack_observation(obs_batch) + # We need to ensure we do not use the env global state + # to compute actions + + # Compute actions + with th.no_grad(): + q_values, hiddens = _mac( + self.model, + th.as_tensor(obs_batch, dtype=th.float, device=self.device), [ + th.as_tensor( + np.array(s), dtype=th.float, device=self.device) + for s in state_batches + ]) + avail = th.as_tensor( + action_mask, dtype=th.float, device=self.device) + masked_q_values = q_values.clone() + masked_q_values[avail == 0.0] = -float("inf") + # epsilon-greedy action selector + random_numbers = th.rand_like(q_values[:, :, 0]) + pick_random = (random_numbers < self.cur_epsilon).long() + random_actions = Categorical(avail).sample().long() + actions = (pick_random * random_actions + + (1 - pick_random) * masked_q_values.argmax(dim=2)) + actions = actions.cpu().numpy() + hiddens = [s.cpu().numpy() for s in hiddens] + + return TupleActions(list(actions.transpose([1, 0]))), hiddens, {} + + @override(Policy) + def learn_on_batch(self, samples): + obs_batch, action_mask, env_global_state = self._unpack_observation( + samples[SampleBatch.CUR_OBS]) + (next_obs_batch, next_action_mask, + next_env_global_state) = self._unpack_observation( + samples[SampleBatch.NEXT_OBS]) + group_rewards = self._get_group_rewards(samples[SampleBatch.INFOS]) + + input_list = [ + group_rewards, action_mask, next_action_mask, + samples[SampleBatch.ACTIONS], samples[SampleBatch.DONES], + obs_batch, next_obs_batch + ] + if self.has_env_global_state: + input_list.extend([env_global_state, next_env_global_state]) + + output_list, _, seq_lens = \ + chop_into_sequences( + samples[SampleBatch.EPS_ID], + samples[SampleBatch.UNROLL_ID], + samples[SampleBatch.AGENT_INDEX], + input_list, + [], # RNN states not used here + # TODO(@evinitsky) make this an option if we are using an RNN + max_seq_len=1, + # max_seq_len=self.config["model"]["max_seq_len"], + dynamic_max=True) + # These will be padded to shape [B * T, ...] + if self.has_env_global_state: + (rew, action_mask, next_action_mask, act, dones, obs, next_obs, + env_global_state, next_env_global_state) = output_list + else: + (rew, action_mask, next_action_mask, act, dones, obs, + next_obs) = output_list + B, T = len(seq_lens), max(seq_lens) + + def to_batches(arr, dtype): + new_shape = [B, T] + list(arr.shape[1:]) + return th.as_tensor( + np.reshape(arr, new_shape), dtype=dtype, device=self.device) + + rewards = to_batches(rew, th.float) + actions = to_batches(act, th.long) + obs = to_batches(obs, th.float).reshape( + [B, T, self.n_agents, self.obs_size]) + action_mask = to_batches(action_mask, th.float) + next_obs = to_batches(next_obs, th.float).reshape( + [B, T, self.n_agents, self.obs_size]) + next_action_mask = to_batches(next_action_mask, th.float) + if self.has_env_global_state: + env_global_state = to_batches(env_global_state, th.float) + next_env_global_state = to_batches(next_env_global_state, th.float) + + # TODO(ekl) this treats group termination as individual termination + terminated = to_batches(dones, th.float).unsqueeze(2).expand( + B, T, self.n_agents) + + # Create mask for where index is < unpadded sequence length + filled = np.reshape( + np.tile(np.arange(T, dtype=np.float32), B), + [B, T]) < np.expand_dims(seq_lens, 1) + mask = th.as_tensor( + filled, dtype=th.float, device=self.device).unsqueeze(2).expand( + B, T, self.n_agents) + + # Compute loss + loss_out, mask, masked_td_error, chosen_action_qvals, targets = ( + self.loss(rewards, actions, terminated, mask, obs, next_obs, + action_mask, next_action_mask, env_global_state, + next_env_global_state)) + + # Optimise + self.optimiser.zero_grad() + loss_out.backward() + grad_norm = th.nn.utils.clip_grad_norm_( + self.params, self.config["grad_norm_clipping"]) + self.optimiser.step() + + mask_elems = mask.sum().item() + stats = { + "loss": loss_out.item(), + "grad_norm": grad_norm + if isinstance(grad_norm, float) else grad_norm.item(), + "td_error_abs": masked_td_error.abs().sum().item() / mask_elems, + "q_taken_mean": (chosen_action_qvals * mask).sum().item() / + mask_elems, + "target_mean": (targets * mask).sum().item() / mask_elems, + } + return {LEARNER_STATS_KEY: stats} + + @override(Policy) + def get_initial_state(self): # initial RNN state + return [ + s.expand([self.n_agents, -1]).cpu().numpy() + for s in self.model.get_initial_state() + ] + + @override(Policy) + def get_weights(self): + return { + "model": self._cpu_dict(self.model.state_dict()), + "target_model": self._cpu_dict(self.target_model.state_dict()), + "mixer": self._cpu_dict(self.mixer.state_dict()) + if self.mixer else None, + "target_mixer": self._cpu_dict(self.target_mixer.state_dict()) + if self.mixer else None, + } + + @override(Policy) + def set_weights(self, weights): + self.model.load_state_dict(self._device_dict(weights["model"])) + self.target_model.load_state_dict( + self._device_dict(weights["target_model"])) + if weights["mixer"] is not None: + self.mixer.load_state_dict(self._device_dict(weights["mixer"])) + self.target_mixer.load_state_dict( + self._device_dict(weights["target_mixer"])) + + @override(Policy) + def get_state(self): + state = self.get_weights() + state["cur_epsilon"] = self.cur_epsilon + return state + + @override(Policy) + def set_state(self, state): + self.set_weights(state) + self.set_epsilon(state["cur_epsilon"]) + + def update_target(self): + self.target_model.load_state_dict(self.model.state_dict()) + if self.mixer is not None: + self.target_mixer.load_state_dict(self.mixer.state_dict()) + logger.debug("Updated target networks") + + def set_epsilon(self, epsilon): + self.cur_epsilon = epsilon + + def _get_group_rewards(self, info_batch): + group_rewards = np.array([ + info.get(GROUP_REWARDS, [0.0] * self.n_agents) + for info in info_batch + ]) + return group_rewards + + def _device_dict(self, state_dict): + return { + k: th.as_tensor(v, device=self.device) + for k, v in state_dict.items() + } + + @staticmethod + def _cpu_dict(state_dict): + return {k: v.cpu().detach().numpy() for k, v in state_dict.items()} + + def _unpack_observation(self, obs_batch): + """Unpacks the observation, action mask, and state (if present) + from agent grouping. + + Returns: + obs (np.ndarray): obs tensor of shape [B, n_agents, obs_size] + mask (np.ndarray): action mask, if any + state (np.ndarray or None): state tensor of shape [B, state_size] + or None if it is not in the batch + """ + unpacked = _unpack_obs( + np.array(obs_batch, dtype=np.float32), + self.observation_space.original_space, + tensorlib=np) + if self.has_action_mask: + obs = np.concatenate( + [o["obs"] for o in unpacked], + axis=1).reshape([len(obs_batch), self.n_agents, self.obs_size]) + action_mask = np.concatenate( + [o["action_mask"] for o in unpacked], axis=1).reshape( + [len(obs_batch), self.n_agents, self.n_actions]) + else: + if isinstance(unpacked[0], dict): + unpacked_obs = [u["obs"] for u in unpacked] + else: + unpacked_obs = unpacked + obs = np.concatenate( + unpacked_obs, + axis=1).reshape([len(obs_batch), self.n_agents, self.obs_size]) + action_mask = np.ones( + [len(obs_batch), self.n_agents, self.n_actions], + dtype=np.float32) + + if self.has_env_global_state: + state = unpacked[0][ENV_STATE] + else: + state = None + return obs, action_mask, state + + +def _validate(obs_space, action_space): + if not hasattr(obs_space, "original_space") or \ + not isinstance(obs_space.original_space, Tuple): + raise ValueError("Obs space must be a Tuple, got {}. Use ".format( + obs_space) + "MultiAgentEnv.with_agent_groups() to group related " + "agents for QMix.") + if not isinstance(action_space, Tuple): + raise ValueError( + "Action space must be a Tuple, got {}. ".format(action_space) + + "Use MultiAgentEnv.with_agent_groups() to group related " + "agents for QMix.") + if not isinstance(action_space.spaces[0], Discrete): + raise ValueError( + "QMix requires a discrete action space, got {}".format( + action_space.spaces[0])) + if len({str(x) for x in obs_space.original_space.spaces}) > 1: + raise ValueError( + "Implementation limitation: observations of grouped agents " + "must be homogeneous, got {}".format( + obs_space.original_space.spaces)) + if len({str(x) for x in action_space.spaces}) > 1: + raise ValueError( + "Implementation limitation: action space of grouped agents " + "must be homogeneous, got {}".format(action_space.spaces)) + + +def _mac(model, obs, h): + """Forward pass of the multi-agent controller. + + Arguments: + model: TorchModelV2 class + obs: Tensor of shape [B, n_agents, obs_size] + h: List of tensors of shape [B, n_agents, h_size] + + Returns: + q_vals: Tensor of shape [B, n_agents, n_actions] + h: Tensor of shape [B, n_agents, h_size] + """ + B, n_agents = obs.size(0), obs.size(1) + if not isinstance(obs, dict): + obs = {"obs": obs} + obs_agents_as_batches = {k: _drop_agent_dim(v) for k, v in obs.items()} + h_flat = [s.reshape([B * n_agents, -1]) for s in h] + q_flat, h_flat = model(obs_agents_as_batches, h_flat, None) + return q_flat.reshape( + [B, n_agents, -1]), [s.reshape([B, n_agents, -1]) for s in h_flat] + + +def _unroll_mac(model, obs_tensor): + """Computes the estimated Q values for an entire trajectory batch""" + B = obs_tensor.size(0) + T = obs_tensor.size(1) + n_agents = obs_tensor.size(2) + + mac_out = [] + h = [s.expand([B, n_agents, -1]) for s in model.get_initial_state()] + for t in range(T): + q, h = _mac(model, obs_tensor[:, t], h) + mac_out.append(q) + mac_out = th.stack(mac_out, dim=1) # Concat over time + + return mac_out + + +def _drop_agent_dim(T): + shape = list(T.shape) + B, n_agents = shape[0], shape[1] + return T.reshape([B * n_agents] + shape[2:]) + + +def _add_agent_dim(T, n_agents): + shape = list(T.shape) + B = shape[0] // n_agents + assert shape[0] % n_agents == 0 + return T.reshape([B, n_agents] + shape[1:]) diff --git a/flow/envs/multiagent/__init__.py b/flow/envs/multiagent/__init__.py index f7889591d..551bf254e 100644 --- a/flow/envs/multiagent/__init__.py +++ b/flow/envs/multiagent/__init__.py @@ -10,7 +10,7 @@ from flow.envs.multiagent.traffic_light_grid import MultiTrafficLightGridPOEnv from flow.envs.multiagent.highway import MultiAgentHighwayPOEnv from flow.envs.multiagent.merge import MultiAgentMergePOEnv -from flow.envs.multiagent.i210 import I210MultiEnv +from flow.envs.multiagent.i210 import I210MultiEnv, I210QMIXMultiEnv __all__ = [ 'MultiEnv', @@ -21,5 +21,6 @@ 'MultiAgentAccelPOEnv', 'MultiAgentWaveAttenuationPOEnv', 'MultiAgentMergePOEnv', - 'I210MultiEnv' + 'I210MultiEnv', + 'I210QMIXMultiEnv' ] diff --git a/flow/envs/multiagent/base.py b/flow/envs/multiagent/base.py index ec95474c6..cd8bcdd95 100644 --- a/flow/envs/multiagent/base.py +++ b/flow/envs/multiagent/base.py @@ -4,7 +4,7 @@ import numpy as np import random import traceback -from gym.spaces import Box +from gym.spaces import Box, Dict from traci.exceptions import FatalTraCIError from traci.exceptions import TraCIException @@ -122,11 +122,14 @@ def step(self, rl_actions): else: reward = self.compute_reward(rl_actions, fail=crash) - for rl_id in self.k.vehicle.get_arrived_rl_ids(): - done[rl_id] = True - reward[rl_id] = 0 - states[rl_id] = np.zeros(self.observation_space.shape[0]) - + # TODO(@evinitsky) put back and handle the case where qmix is on + # for rl_id in self.k.vehicle.get_arrived_rl_ids(): + # done[rl_id] = True + # reward[rl_id] = 0 + # if isinstance(self.observation_space, Dict): + # states[rl_id] = self.observation_space.sample() + # else: + # states[rl_id] = np.zeros(self.observation_space.shape[0]) return states, reward, done, infos def reset(self, new_inflow_rate=None): diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index 409aeb14f..a7a249ce1 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -1,6 +1,6 @@ """Environment for training vehicles to reduce congestion in the I210.""" -from gym.spaces import Box +from gym.spaces import Box, Discrete, Dict import numpy as np from flow.core.rewards import average_velocity @@ -223,3 +223,69 @@ def veh_statistics(self, rl_id): speed = self.k.vehicle.get_speed(rl_id) / 100.0 lane = (self.k.vehicle.get_lane(rl_id) + 1) / 10.0 return np.array([speed, lane]) + + +class I210QMIXMultiEnv(I210MultiEnv): + def __init__(self, env_params, sim_params, network, simulator='traci'): + super().__init__(env_params, sim_params, network, simulator) + self.max_num_agents = env_params.additional_params.get("max_num_agents_qmix") + self.num_actions = env_params.additional_params.get("num_actions") + self.action_values = np.linspace(start=-np.abs(self.env_params.additional_params['max_decel']), + stop=self.env_params.additional_params['max_accel'], num=self.num_actions) + + @property + def action_space(self): + """See class definition.""" + return Discrete(self.num_actions + 1) + + @property + def observation_space(self): + obs_space = super().observation_space + return Dict({"obs": obs_space, "action_mask": Box(0, 1, shape=(self.action_space.n,))}) + + def _apply_rl_actions(self, rl_actions): + """See class definition.""" + # in the warmup steps, rl_actions is None + if rl_actions: + accel_list = [] + rl_ids = [] + for rl_id, action in rl_actions.items(): + # 0 is the no-op + if action > 0: + accel = self.action_values[action - 1] + accel_list.append(accel) + rl_ids.append(rl_id) + self.k.vehicle.apply_acceleration(rl_ids, accel_list) + + def get_state(self): + rl_ids = self.k.vehicle.get_rl_ids() + veh_info = super().get_state() + veh_info_copy = {idx: {"obs": np.zeros(self.observation_space.spaces['obs'].shape[0]), + "action_mask": self.get_action_mask(valid_agent=False)} + for idx in range(self.max_num_agents)} + veh_info_copy.update({rl_id_idx: {"obs": veh_info[rl_id], + "action_mask": self.get_action_mask(valid_agent=True)} + for rl_id_idx, rl_id in enumerate(rl_ids)}) + veh_info = veh_info_copy + self.rl_id_to_idx_map = {rl_id: i for i, rl_id in enumerate(rl_ids)} + self.idx_to_rl_id_map = {i: rl_id for i, rl_id in enumerate(rl_ids)} + return veh_info + + def compute_reward(self, rl_actions, **kwargs): + reward_dict = super().compute_reward(rl_actions, **kwargs) + temp_reward_dict = {idx: 0 for idx in + range(self.max_num_agents)} + temp_reward_dict.update({self.rl_id_to_idx_map[rl_id]: reward_dict[rl_id] + for rl_id in self.k.vehicle.get_rl_ids()}) + return temp_reward_dict + + def get_action_mask(self, valid_agent): + """If a valid agent, return a 0 in the position of the no-op action. If not, return a 1 in that position + and a zero everywhere else.""" + if valid_agent: + temp_list = [1 for _ in range(self.action_space.n)] + temp_list[0] = 0 + else: + temp_list = [0 for _ in range(self.action_space.n)] + temp_list[0] = 1 + return temp_list \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 4569dfca5..6857b251b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -26,4 +26,5 @@ redis~=2.10.6 pandas==0.24.2 plotly==2.4.0 tabulate +torch== pytz \ No newline at end of file From ea470a3fd234f74671a0dd90f33edb695798c435 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 28 Mar 2020 02:15:56 -0700 Subject: [PATCH 02/85] Speedup --- flow/envs/multiagent/i210.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index a7a249ce1..b7e4caaa2 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -1,5 +1,8 @@ """Environment for training vehicles to reduce congestion in the I210.""" +from copy import deepcopy +from time import time + from gym.spaces import Box, Discrete, Dict import numpy as np @@ -232,6 +235,9 @@ def __init__(self, env_params, sim_params, network, simulator='traci'): self.num_actions = env_params.additional_params.get("num_actions") self.action_values = np.linspace(start=-np.abs(self.env_params.additional_params['max_decel']), stop=self.env_params.additional_params['max_accel'], num=self.num_actions) + self.default_state = {idx: {"obs": np.zeros(self.observation_space.spaces['obs'].shape[0]), + "action_mask": self.get_action_mask(valid_agent=False)} + for idx in range(self.max_num_agents)} @property def action_space(self): @@ -246,6 +252,7 @@ def observation_space(self): def _apply_rl_actions(self, rl_actions): """See class definition.""" # in the warmup steps, rl_actions is None + t = time() if rl_actions: accel_list = [] rl_ids = [] @@ -256,27 +263,35 @@ def _apply_rl_actions(self, rl_actions): accel_list.append(accel) rl_ids.append(rl_id) self.k.vehicle.apply_acceleration(rl_ids, accel_list) + print('time to apply actions is ', time() - t) def get_state(self): + t = time() rl_ids = self.k.vehicle.get_rl_ids() veh_info = super().get_state() - veh_info_copy = {idx: {"obs": np.zeros(self.observation_space.spaces['obs'].shape[0]), - "action_mask": self.get_action_mask(valid_agent=False)} - for idx in range(self.max_num_agents)} + print('time to get state is ', time() - t) + t = time() + # TODO(@evinitsky) think this doesn't have to be a deepcopy + veh_info_copy = deepcopy(self.default_state) + print('time to make copy is ', time() - t) + t = time() veh_info_copy.update({rl_id_idx: {"obs": veh_info[rl_id], "action_mask": self.get_action_mask(valid_agent=True)} for rl_id_idx, rl_id in enumerate(rl_ids)}) + print('time to update copy is ', time() - t) veh_info = veh_info_copy self.rl_id_to_idx_map = {rl_id: i for i, rl_id in enumerate(rl_ids)} self.idx_to_rl_id_map = {i: rl_id for i, rl_id in enumerate(rl_ids)} return veh_info def compute_reward(self, rl_actions, **kwargs): + t = time() reward_dict = super().compute_reward(rl_actions, **kwargs) temp_reward_dict = {idx: 0 for idx in range(self.max_num_agents)} temp_reward_dict.update({self.rl_id_to_idx_map[rl_id]: reward_dict[rl_id] for rl_id in self.k.vehicle.get_rl_ids()}) + print('time to compute reward is ', time() - t) return temp_reward_dict def get_action_mask(self, valid_agent): From 58f357c762f3da3bff29ecdfdca6b7b12490ea6b Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 28 Mar 2020 12:31:15 -0700 Subject: [PATCH 03/85] Replace QMIX reward w global reward --- examples/train.py | 12 +++++++++++- flow/algorithms/qmix/qmix_policy.py | 13 ++++++------- flow/envs/multiagent/i210.py | 17 ++++++++--------- 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/examples/train.py b/examples/train.py index b32bbef92..4e352420f 100644 --- a/examples/train.py +++ b/examples/train.py @@ -59,6 +59,10 @@ def parse_args(args): help='Name of the experiment configuration file, as located in ' 'exp_configs/rl/singleagent or exp_configs/rl/multiagent.') + parser.add_argument( + 'exp_title', type=str, + help='Name of experiment taht results will be stored in') + # optional input parameters parser.add_argument( '--rl_trainer', type=str, default="rllib", @@ -209,6 +213,7 @@ def setup_exps_rllib(flow_params, def on_episode_start(info): episode = info["episode"] episode.user_data["avg_speed"] = [] + episode.user_data["avg_speed_avs"] = [] episode.user_data["avg_energy"] = [] def on_episode_step(info): @@ -219,12 +224,17 @@ def on_episode_step(info): speed = np.mean([speed for speed in env.k.vehicle.get_speed(env.k.vehicle.get_ids()) if speed >= 0]) if not np.isnan(speed): episode.user_data["avg_speed"].append(speed) + av_speed = np.mean([speed for speed in env.k.vehicle.get_speed(env.k.vehicle.get_rl_ids()) if speed >= 0]) + if not np.isnan(av_speed): + episode.user_data["avg_speed_avs"].append(av_speed) episode.user_data["avg_energy"].append(energy_consumption(env)) def on_episode_end(info): episode = info["episode"] avg_speed = np.mean(episode.user_data["avg_speed"]) episode.custom_metrics["avg_speed"] = avg_speed + avg_speed_avs = np.mean(episode.user_data["avg_speed_avs"]) + episode.custom_metrics["avg_speed_avs"] = avg_speed_avs episode.custom_metrics["avg_energy_per_veh"] = np.mean(episode.user_data["avg_energy"]) config["callbacks"] = {"on_episode_start": tune.function(on_episode_start), @@ -286,7 +296,7 @@ def train_rllib(submodule, flags): ray.init() exp_dict = { "run_or_experiment": alg_run, - "name": gym_name, + "name": flags.exp_title, "config": config, "checkpoint_freq": 20, "checkpoint_at_end": True, diff --git a/flow/algorithms/qmix/qmix_policy.py b/flow/algorithms/qmix/qmix_policy.py index d0245279d..ad6857143 100644 --- a/flow/algorithms/qmix/qmix_policy.py +++ b/flow/algorithms/qmix/qmix_policy.py @@ -206,9 +206,8 @@ def __init__(self, obs_space, action_space, config): config["model"], framework="torch", name="model", - default_model=FeedForward).to(self.device) - # TODO(@evinitsky) make an RNN an option - # default_model=RNNModel).to(self.device) + # default_model=FeedForward).to(self.device) + default_model=RNNModel).to(self.device) self.target_model = ModelCatalog.get_model_v2( agent_obs_space, @@ -217,8 +216,8 @@ def __init__(self, obs_space, action_space, config): config["model"], framework="torch", name="target_model", - default_model=FeedForward).to(self.device) - # default_model=RNNModel).to(self.device) + # default_model=FeedForward).to(self.device) + default_model=RNNModel).to(self.device) # Setup the mixer network. if config["mixer"] is None: @@ -314,8 +313,8 @@ def learn_on_batch(self, samples): input_list, [], # RNN states not used here # TODO(@evinitsky) make this an option if we are using an RNN - max_seq_len=1, - # max_seq_len=self.config["model"]["max_seq_len"], + # max_seq_len=1, + max_seq_len=self.config["model"]["max_seq_len"], dynamic_max=True) # These will be padded to shape [B * T, ...] if self.has_env_global_state: diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index b7e4caaa2..70083075e 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -263,22 +263,22 @@ def _apply_rl_actions(self, rl_actions): accel_list.append(accel) rl_ids.append(rl_id) self.k.vehicle.apply_acceleration(rl_ids, accel_list) - print('time to apply actions is ', time() - t) + # print('time to apply actions is ', time() - t) def get_state(self): t = time() rl_ids = self.k.vehicle.get_rl_ids() veh_info = super().get_state() - print('time to get state is ', time() - t) + # print('time to get state is ', time() - t) t = time() # TODO(@evinitsky) think this doesn't have to be a deepcopy veh_info_copy = deepcopy(self.default_state) - print('time to make copy is ', time() - t) + # print('time to make copy is ', time() - t) t = time() veh_info_copy.update({rl_id_idx: {"obs": veh_info[rl_id], "action_mask": self.get_action_mask(valid_agent=True)} for rl_id_idx, rl_id in enumerate(rl_ids)}) - print('time to update copy is ', time() - t) + # print('time to update copy is ', time() - t) veh_info = veh_info_copy self.rl_id_to_idx_map = {rl_id: i for i, rl_id in enumerate(rl_ids)} self.idx_to_rl_id_map = {i: rl_id for i, rl_id in enumerate(rl_ids)} @@ -286,12 +286,11 @@ def get_state(self): def compute_reward(self, rl_actions, **kwargs): t = time() - reward_dict = super().compute_reward(rl_actions, **kwargs) - temp_reward_dict = {idx: 0 for idx in + # There has to be one global reward for qmix + reward = np.nan_to_num(np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids()))) / (20 * self.env_params.horizon) + temp_reward_dict = {idx: reward for idx in range(self.max_num_agents)} - temp_reward_dict.update({self.rl_id_to_idx_map[rl_id]: reward_dict[rl_id] - for rl_id in self.k.vehicle.get_rl_ids()}) - print('time to compute reward is ', time() - t) + # print('time to compute reward is ', time() - t) return temp_reward_dict def get_action_mask(self, valid_agent): From 99f6c090bc554cf8b3543f03d7fb802d47c2aee4 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 28 Mar 2020 12:31:51 -0700 Subject: [PATCH 04/85] Disable RNN --- flow/algorithms/qmix/qmix_policy.py | 12 ++++++------ flow/envs/multiagent/i210.py | 4 ++-- requirements.txt | 2 +- scripts/ray_autoscale.yaml | 5 ++++- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/flow/algorithms/qmix/qmix_policy.py b/flow/algorithms/qmix/qmix_policy.py index ad6857143..d7a8ff359 100644 --- a/flow/algorithms/qmix/qmix_policy.py +++ b/flow/algorithms/qmix/qmix_policy.py @@ -206,8 +206,8 @@ def __init__(self, obs_space, action_space, config): config["model"], framework="torch", name="model", - # default_model=FeedForward).to(self.device) - default_model=RNNModel).to(self.device) + default_model=FeedForward).to(self.device) + # default_model=RNNModel).to(self.device) self.target_model = ModelCatalog.get_model_v2( agent_obs_space, @@ -216,8 +216,8 @@ def __init__(self, obs_space, action_space, config): config["model"], framework="torch", name="target_model", - # default_model=FeedForward).to(self.device) - default_model=RNNModel).to(self.device) + default_model=FeedForward).to(self.device) + # default_model=RNNModel).to(self.device) # Setup the mixer network. if config["mixer"] is None: @@ -313,8 +313,8 @@ def learn_on_batch(self, samples): input_list, [], # RNN states not used here # TODO(@evinitsky) make this an option if we are using an RNN - # max_seq_len=1, - max_seq_len=self.config["model"]["max_seq_len"], + max_seq_len=1, + # max_seq_len=self.config["model"]["max_seq_len"], dynamic_max=True) # These will be padded to shape [B * T, ...] if self.has_env_global_state: diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index 70083075e..e335f0338 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -297,9 +297,9 @@ def get_action_mask(self, valid_agent): """If a valid agent, return a 0 in the position of the no-op action. If not, return a 1 in that position and a zero everywhere else.""" if valid_agent: - temp_list = [1 for _ in range(self.action_space.n)] + temp_list = np.array([1 for _ in range(self.action_space.n)]) temp_list[0] = 0 else: - temp_list = [0 for _ in range(self.action_space.n)] + temp_list = np.array([0 for _ in range(self.action_space.n)]) temp_list[0] = 1 return temp_list \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 6857b251b..ba45639e1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -26,5 +26,5 @@ redis~=2.10.6 pandas==0.24.2 plotly==2.4.0 tabulate -torch== +torch==1.3.1 pytz \ No newline at end of file diff --git a/scripts/ray_autoscale.yaml b/scripts/ray_autoscale.yaml index 5bf2a9c4a..409a378bd 100644 --- a/scripts/ray_autoscale.yaml +++ b/scripts/ray_autoscale.yaml @@ -67,12 +67,15 @@ worker_nodes: # Additional options in the boto docs. setup_commands: - - cd flow && git fetch && git checkout origin/master + - cd flow && git fetch && git checkout origin/i210_qmix head_setup_commands: - pip install boto3==1.10.45 # 1.4.8 adds InstanceMarketOptions - pip install awscli==1.16.309 - pip install pytz + - pip install torch==1.3.1 + - pip install tabulate + - pip install ray==0.8.0 # Custom commands that will be run on worker nodes after common setup. worker_setup_commands: [] From 942fe56130aee34ed61ba26b2fe409cd1e419e7c Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 28 Mar 2020 17:19:00 -0700 Subject: [PATCH 05/85] Add an option for a local reward that just computes speed of the AV and its follower --- .../exp_configs/non_rl/i210_subnetwork.py | 2 +- .../rl/multiagent/multiagent_i210.py | 1 - examples/train.py | 4 +- flow/envs/multiagent/i210.py | 72 +++++++++++-------- 4 files changed, 46 insertions(+), 33 deletions(-) diff --git a/examples/exp_configs/non_rl/i210_subnetwork.py b/examples/exp_configs/non_rl/i210_subnetwork.py index dd85c56cf..d993ae93a 100644 --- a/examples/exp_configs/non_rl/i210_subnetwork.py +++ b/examples/exp_configs/non_rl/i210_subnetwork.py @@ -101,7 +101,7 @@ edge_id = "119257908#1-AddedOnRampEdge" custom_callables = { "avg_merge_speed": lambda env: np.nan_to_num(np.mean( - env.k.vehicle.get_speed(env.k.vehicle.get_ids_by_edge(edge_id)))), + env.k.vehicle.get_speed(env.k.vehicle.get_ids()))), "avg_outflow": lambda env: np.nan_to_num( env.k.vehicle.get_outflow_rate(120)), # we multiply by 5 to account for the vehicle length and by 1000 to convert diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index 872568cab..1779adf69 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -5,7 +5,6 @@ """ import os -from ray.rllib.agents.ppo.ppo_policy import PPOTFPolicy from ray.tune.registry import register_env from flow.controllers import RLController diff --git a/examples/train.py b/examples/train.py index 1f2cd6300..aaa9caddb 100644 --- a/examples/train.py +++ b/examples/train.py @@ -22,7 +22,6 @@ import ray from ray import tune -from ray.tune import run_experiments from ray.tune.registry import register_env try: from ray.rllib.agents.agent import get_agent_class @@ -36,9 +35,9 @@ from flow.utils.registry import make_create_env - def parse_args(args): """Parse training options user can specify in command line. + Returns ------- argparse.Namespace @@ -140,6 +139,7 @@ def setup_exps_rllib(flow_params, policies_to_train=None, ): """Return the relevant components of an RLlib experiment. + Parameters ---------- flow_params : dict diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index 409aeb14f..4082eb415 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -16,6 +16,8 @@ "max_decel": 1, # whether we use an obs space that contains adjacent lane info or just the lead obs "lead_obs": True, + # whether the reward should come from local vehicles instead of global rewards + "local_reward": True } @@ -137,35 +139,47 @@ def compute_reward(self, rl_actions, **kwargs): return {} rewards = {} - for rl_id in self.k.vehicle.get_rl_ids(): - if self.env_params.evaluate: - # reward is speed of vehicle if we are in evaluation mode - reward = self.k.vehicle.get_speed(rl_id) - elif kwargs['fail']: - # reward is 0 if a collision occurred - reward = 0 - else: - # reward high system-level velocities - cost1 = average_velocity(self, fail=kwargs['fail']) - - # penalize small time headways - cost2 = 0 - t_min = 1 # smallest acceptable time headway - - lead_id = self.k.vehicle.get_leader(rl_id) - if lead_id not in ["", None] \ - and self.k.vehicle.get_speed(rl_id) > 0: - t_headway = max( - self.k.vehicle.get_headway(rl_id) / - self.k.vehicle.get_speed(rl_id), 0) - cost2 += min((t_headway - t_min) / t_min, 0) - - # weights for cost1, cost2, and cost3, respectively - eta1, eta2 = 1.00, 0.10 - - reward = max(eta1 * cost1 + eta2 * cost2, 0) - - rewards[rl_id] = reward + if self.env_params.additional_params["local_reward"]: + for rl_id in self.k.vehicle.get_rl_ids(): + rewards[rl_id] = 0 + speeds = [] + follow_speed = self.k.vehicle.get_speed(self.k.vehicle.get_follower(rl_id)) + speeds.extend([speed for speed in follow_speed if speed >= 0]) + if self.k.vehicle.get_speed(rl_id) >= 0: + speeds.append(self.k.vehicle.get_speed(rl_id)) + if len(speeds) > 0: + # rescale so the q function can estimate it quickly + rewards[rl_id] = np.mean(speeds) / 500.0 + else: + for rl_id in self.k.vehicle.get_rl_ids(): + if self.env_params.evaluate: + # reward is speed of vehicle if we are in evaluation mode + reward = self.k.vehicle.get_speed(rl_id) + elif kwargs['fail']: + # reward is 0 if a collision occurred + reward = 0 + else: + # reward high system-level velocities + cost1 = average_velocity(self, fail=kwargs['fail']) + + # penalize small time headways + cost2 = 0 + t_min = 1 # smallest acceptable time headway + + lead_id = self.k.vehicle.get_leader(rl_id) + if lead_id not in ["", None] \ + and self.k.vehicle.get_speed(rl_id) > 0: + t_headway = max( + self.k.vehicle.get_headway(rl_id) / + self.k.vehicle.get_speed(rl_id), 0) + cost2 += min((t_headway - t_min) / t_min, 0) + + # weights for cost1, cost2, and cost3, respectively + eta1, eta2 = 1.00, 0.10 + + reward = max(eta1 * cost1 + eta2 * cost2, 0) + + rewards[rl_id] = reward return rewards def additional_command(self): From 5c165fe72b6487f7431a329a88ee33c59de707ed Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Mon, 6 Apr 2020 13:58:12 -0700 Subject: [PATCH 06/85] Make speed square of rewards --- .../exp_configs/rl/multiagent/multiagent_i210.py | 2 +- examples/train.py | 14 +++++++++++++- flow/envs/multiagent/i210.py | 4 ++-- scripts/ray_autoscale.yaml | 6 ++++-- 4 files changed, 20 insertions(+), 6 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index 1779adf69..5e8922fd7 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -24,7 +24,7 @@ # SET UP PARAMETERS FOR THE SIMULATION # number of steps per rollout -HORIZON = 4000 +HORIZON = 2000 # percentage of autonomous vehicles compared to human vehicles on highway PENETRATION_RATE = 10 diff --git a/examples/train.py b/examples/train.py index 4636e5924..93797e8d0 100644 --- a/examples/train.py +++ b/examples/train.py @@ -270,6 +270,18 @@ def on_episode_end(info): act_space = Tuple([act_space] * max_num_agents_qmix) register_env(gym_name, lambda config: create_env(config).with_agent_groups( grouping, obs_space=obs_space, act_space=act_space)) + policy_graphs = {'av': (None, obs_space, act_space, {})} + + def policy_mapping_fn(_): + return 'av' + + config.update({ + 'multiagent': { + 'policies': policy_graphs, + 'policy_mapping_fn': tune.function(policy_mapping_fn), + "policies_to_train": ["av"] + } + }) else: # Register as rllib env register_env(gym_name, create_env) @@ -308,7 +320,7 @@ def train_rllib(submodule, flags): } date = datetime.now(tz=pytz.utc) date = date.astimezone(pytz.timezone('US/Pacific')).strftime("%m-%d-%Y") - s3_string = "s3://i210.experiments/i210/" \ + s3_string = "s3://eugene.experiments/i210/" \ + date + '/' + flags.exp_title if flags.use_s3: exp_dict['upload_dir'] = s3_string diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index 27f8bab5c..72a6d01ec 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -147,12 +147,12 @@ def compute_reward(self, rl_actions, **kwargs): rewards[rl_id] = 0 speeds = [] follow_speed = self.k.vehicle.get_speed(self.k.vehicle.get_follower(rl_id)) - speeds.extend([speed for speed in follow_speed if speed >= 0]) + speeds.extend([speed for speed in [follow_speed] if speed >= 0]) if self.k.vehicle.get_speed(rl_id) >= 0: speeds.append(self.k.vehicle.get_speed(rl_id)) if len(speeds) > 0: # rescale so the q function can estimate it quickly - rewards[rl_id] = np.mean(speeds) / 500.0 + rewards[rl_id] = np.mean([speed**2 for speed in speeds]) / 500.0 else: for rl_id in self.k.vehicle.get_rl_ids(): if self.env_params.evaluate: diff --git a/scripts/ray_autoscale.yaml b/scripts/ray_autoscale.yaml index 409a378bd..8266483f9 100644 --- a/scripts/ray_autoscale.yaml +++ b/scripts/ray_autoscale.yaml @@ -32,15 +32,16 @@ auth: # By default Ray creates a new private keypair, but you can also use your own. # If you do so, make sure to also set "KeyName" in the head and worker node # configurations below. -# ssh_private_key: /path/to/your/key.pem + ssh_private_key: /Users/eugenevinitsky/.ssh/MyKeyPair.pem # Provider-specific config for the head node, e.g. instance type. By default # Ray will auto-configure unspecified fields such as SubnetId and KeyName. # For more documentation on available fields, see: # http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances head_node: - InstanceType: c4.4xlarge + InstanceType: m4.16xlarge ImageId: ami-09544298704576518 # Flow AMI (Ubuntu) + KeyName: MyKeyPair InstanceMarketOptions: MarketType: spot #Additional options can be found in the boto docs, e.g. @@ -76,6 +77,7 @@ head_setup_commands: - pip install torch==1.3.1 - pip install tabulate - pip install ray==0.8.0 + - pip install tensorflow==1.14.0 # Custom commands that will be run on worker nodes after common setup. worker_setup_commands: [] From 29d16bbcd0eeb2812678ed461722427283a834c3 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Mon, 6 Apr 2020 22:37:41 -0700 Subject: [PATCH 07/85] Add QMIX and MADDPG --- .../rl/multiagent/multiagent_i210_maddpg.py | 199 +++++++++ .../rl/multiagent/multiagent_i210_qmix.py | 4 +- examples/train.py | 9 +- flow/algorithms/maddpg/__init__.py | 0 flow/algorithms/maddpg/maddpg.py | 185 ++++++++ flow/algorithms/maddpg/maddpg_policy.py | 397 ++++++++++++++++++ flow/core/kernel/vehicle/traci.py | 2 +- flow/envs/multiagent/i210.py | 121 +++++- scripts/ray_autoscale.yaml | 3 +- 9 files changed, 907 insertions(+), 13 deletions(-) create mode 100644 examples/exp_configs/rl/multiagent/multiagent_i210_maddpg.py create mode 100644 flow/algorithms/maddpg/__init__.py create mode 100644 flow/algorithms/maddpg/maddpg.py create mode 100644 flow/algorithms/maddpg/maddpg_policy.py diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210_maddpg.py b/examples/exp_configs/rl/multiagent/multiagent_i210_maddpg.py new file mode 100644 index 000000000..58c1ae63a --- /dev/null +++ b/examples/exp_configs/rl/multiagent/multiagent_i210_maddpg.py @@ -0,0 +1,199 @@ +"""Multi-agent I-210 example. + +Trains a non-constant number of agents, all sharing the same policy, on the +highway with ramps network. +""" +import os + +from ray.tune.registry import register_env + +from flow.controllers import RLController +from flow.controllers.car_following_models import IDMController +import flow.config as config +from flow.core.params import EnvParams +from flow.core.params import NetParams +from flow.core.params import InitialConfig +from flow.core.params import InFlows +from flow.core.params import VehicleParams +from flow.core.params import SumoParams +from flow.core.params import SumoLaneChangeParams +from flow.networks.i210_subnetwork import I210SubNetwork, EDGES_DISTRIBUTION +from flow.envs.multiagent.i210 import I210MADDPGMultiEnv, ADDITIONAL_ENV_PARAMS +from flow.utils.registry import make_create_env + +# SET UP PARAMETERS FOR THE SIMULATION + +# number of steps per rollout +HORIZON = 2000 + +# percentage of autonomous vehicles compared to human vehicles on highway +PENETRATION_RATE = 10 + +# SET UP PARAMETERS FOR THE ENVIRONMENT +additional_env_params = ADDITIONAL_ENV_PARAMS.copy() +additional_env_params.update({ + 'max_accel': 2.6, + 'max_decel': 4.5, + # configure the observation space. Look at the I210MultiEnv class for more info. + 'lead_obs': True, + # whether to add in a reward for the speed of nearby vehicles + "local_reward": True, + "num_actions": 5, + "max_num_agents": 200 +}) + +# CREATE VEHICLE TYPES AND INFLOWS +# no vehicles in the network +vehicles = VehicleParams() +vehicles.add( + "human", + num_vehicles=0, + lane_change_params=SumoLaneChangeParams(lane_change_mode="strategic"), + acceleration_controller=(IDMController, {"a": .3, "b": 2.0, "noise": 0.6}), +) +vehicles.add( + "av", + acceleration_controller=(RLController, {}), + num_vehicles=0, +) + +inflow = InFlows() +# main highway +pen_rate = PENETRATION_RATE / 100 +assert pen_rate < 1.0, "your penetration rate is over 100%" +assert pen_rate > 0.0, "your penetration rate should be above zero" +inflow.add( + veh_type="human", + edge="119257914", + vehs_per_hour=int(10800 * (1 - pen_rate)), + # probability=1.0, + departLane="random", + departSpeed=20) +# # on ramp +# inflow.add( +# veh_type="human", +# edge="27414345", +# vehs_per_hour=321 * pen_rate, +# departLane="random", +# departSpeed=20) +# inflow.add( +# veh_type="human", +# edge="27414342#0", +# vehs_per_hour=421 * pen_rate, +# departLane="random", +# departSpeed=20) + +# Now add the AVs +# main highway +inflow.add( + veh_type="av", + edge="119257914", + vehs_per_hour=int(10800 * pen_rate), + # probability=1.0, + departLane="random", + departSpeed=20) +# # on ramp +# inflow.add( +# veh_type="av", +# edge="27414345", +# vehs_per_hour=int(321 * pen_rate), +# departLane="random", +# departSpeed=20) +# inflow.add( +# veh_type="av", +# edge="27414342#0", +# vehs_per_hour=int(421 * pen_rate), +# departLane="random", +# departSpeed=20) + +NET_TEMPLATE = os.path.join( + config.PROJECT_PATH, + "examples/exp_configs/templates/sumo/test2.net.xml") + +flow_params = dict( + # name of the experiment + exp_tag='I_210_subnetwork', + + # name of the flow environment the experiment is running on + env_name=I210MADDPGMultiEnv, + + # name of the network class the experiment is running on + network=I210SubNetwork, + + # simulator that is used by the experiment + simulator='traci', + + # simulation-related parameters + sim=SumoParams( + sim_step=0.5, + render=False, + color_by_speed=False, + restart_instance=True, + use_ballistic=True + ), + + # environment related parameters (see flow.core.params.EnvParams) + env=EnvParams( + horizon=HORIZON, + sims_per_step=1, + warmup_steps=0, + additional_params=additional_env_params, + ), + + # network-related parameters (see flow.core.params.NetParams and the + # network's documentation or ADDITIONAL_NET_PARAMS component) + net=NetParams( + inflows=inflow, + template=NET_TEMPLATE + ), + + # vehicles to be placed in the network at the start of a rollout (see + # flow.core.params.VehicleParams) + veh=vehicles, + + # parameters specifying the positioning of vehicles upon initialization/ + # reset (see flow.core.params.InitialConfig) + initial=InitialConfig( + edges_distribution=EDGES_DISTRIBUTION, + ), +) + +# SET UP RLLIB MULTI-AGENT FEATURES + +create_env, env_name = make_create_env(params=flow_params, version=0) + +# register as rllib env +register_env(env_name, create_env) + +# multiagent configuration +test_env = create_env() +obs_space = test_env.observation_space +act_space = test_env.action_space + +POLICIES_TO_TRAIN = ['av'] + +observation_space_dict = {i: test_env.observation_space for i in range(additional_env_params["max_num_agents"])} +action_space_dict = {i: test_env.action_space for i in range(additional_env_params["max_num_agents"])} + + +def gen_policy(i): + return ( + None, + test_env.observation_space, + test_env.action_space, + { + "agent_id": i, + "use_local_critic": False, + "obs_space_dict": observation_space_dict, + "act_space_dict": action_space_dict, + } + ) + + +POLICY_GRAPHS = {"av": gen_policy(0)} + + +def policy_mapping_fn(_): + """Map a policy in RLlib.""" + return 'av' + diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210_qmix.py b/examples/exp_configs/rl/multiagent/multiagent_i210_qmix.py index 6c00b44cb..2e8eb2a5e 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210_qmix.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210_qmix.py @@ -24,7 +24,7 @@ # SET UP PARAMETERS FOR THE SIMULATION # number of steps per rollout -HORIZON = 4000 +HORIZON = 2000 # percentage of autonomous vehicles compared to human vehicles on highway PENETRATION_RATE = 10 @@ -39,7 +39,7 @@ # whether to add in a reward for the speed of nearby vehicles "local_reward": True, "num_actions": 5, - "max_num_agents_qmix": 200 + "max_num_agents_qmix": 250 }) # CREATE VEHICLE TYPES AND INFLOWS diff --git a/examples/train.py b/examples/train.py index 93797e8d0..7158b6fb8 100644 --- a/examples/train.py +++ b/examples/train.py @@ -203,6 +203,11 @@ def setup_exps_rllib(flow_params, config["actor_lr"] = tune.grid_search([1e-3, 1e-4]) config["critic_lr"] = tune.grid_search([1e-3, 1e-4]) config["n_step"] = tune.grid_search([1, 10]) + elif alg_run == "MADDPG": + from flow.algorithms.maddpg.maddpg import MADDPGTrainer, DEFAULT_CONFIG + config = deepcopy(DEFAULT_CONFIG) + alg_run = MADDPGTrainer + elif alg_run == "QMIX": from flow.algorithms.qmix.qmix import QMixTrainer2, DEFAULT_CONFIG config = deepcopy(DEFAULT_CONFIG) @@ -250,7 +255,6 @@ def on_episode_end(info): # multiagent configuration if policy_graphs is not None: - print("policy_graphs", policy_graphs) config['multiagent'].update({'policies': policy_graphs}) if policy_mapping_fn is not None: config['multiagent'].update({'policy_mapping_fn': tune.function(policy_mapping_fn)}) @@ -282,6 +286,9 @@ def policy_mapping_fn(_): "policies_to_train": ["av"] } }) + elif flags.algorithm.upper() == "MADDPG": + config['max_num_agents'] = flow_params['env'].additional_params['max_num_agents'] + register_env(gym_name, create_env) else: # Register as rllib env register_env(gym_name, create_env) diff --git a/flow/algorithms/maddpg/__init__.py b/flow/algorithms/maddpg/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/flow/algorithms/maddpg/maddpg.py b/flow/algorithms/maddpg/maddpg.py new file mode 100644 index 000000000..69fadcb08 --- /dev/null +++ b/flow/algorithms/maddpg/maddpg.py @@ -0,0 +1,185 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +"""Contributed port of MADDPG from OpenAI baselines. + +The implementation has a couple assumptions: +- The number of agents is fixed and known upfront. +- Each agent is bound to a policy of the same name. +- Discrete actions are sent as logits (pre-softmax). + +For a minimal example, see twostep_game.py, and the README for how to run +with the multi-agent particle envs. +""" + +import logging + +from ray.rllib.agents.trainer import with_common_config +from ray.rllib.agents.dqn.dqn import GenericOffPolicyTrainer +from ray.rllib.contrib.maddpg.maddpg_policy import MADDPGTFPolicy +from ray.rllib.optimizers import SyncReplayOptimizer +from ray.rllib.policy.sample_batch import SampleBatch, MultiAgentBatch + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +# yapf: disable +# __sphinx_doc_begin__ +DEFAULT_CONFIG = with_common_config({ + # === Settings for each individual policy === + # ID of the agent controlled by this policy + "agent_id": None, + # Use a local critic for this policy. + "use_local_critic": False, + + # === Evaluation === + # Evaluation interval + "evaluation_interval": None, + # Number of episodes to run per evaluation period. + "evaluation_num_episodes": 10, + + # === Model === + # Apply a state preprocessor with spec given by the "model" config option + # (like other RL algorithms). This is mostly useful if you have a weird + # observation shape, like an image. Disabled by default. + "use_state_preprocessor": False, + # Postprocess the policy network model output with these hidden layers. If + # use_state_preprocessor is False, then these will be the *only* hidden + # layers in the network. + "actor_hiddens": [64, 64], + # Hidden layers activation of the postprocessing stage of the policy + # network + "actor_hidden_activation": "relu", + # Postprocess the critic network model output with these hidden layers; + # again, if use_state_preprocessor is True, then the state will be + # preprocessed by the model specified with the "model" config option first. + "critic_hiddens": [64, 64], + # Hidden layers activation of the postprocessing state of the critic. + "critic_hidden_activation": "relu", + # N-step Q learning + "n_step": 1, + # Algorithm for good policies + "good_policy": "maddpg", + # Algorithm for adversary policies + "adv_policy": "maddpg", + + # === Replay buffer === + # Size of the replay buffer. Note that if async_updates is set, then + # each worker will have a replay buffer of this size. + "buffer_size": int(1e6), + # Observation compression. Note that compression makes simulation slow in + # MPE. + "compress_observations": False, + + # === Optimization === + # Learning rate for the critic (Q-function) optimizer. + "critic_lr": 1e-2, + # Learning rate for the actor (policy) optimizer. + "actor_lr": 1e-2, + # Update the target network every `target_network_update_freq` steps. + "target_network_update_freq": 0, + # Update the target by \tau * policy + (1-\tau) * target_policy + "tau": 0.01, + # Weights for feature regularization for the actor + "actor_feature_reg": 0.001, + # If not None, clip gradients during optimization at this value + "grad_norm_clipping": 0.5, + # How many steps of the model to sample before learning starts. + "learning_starts": 1024 * 25, + # Update the replay buffer with this many samples at once. Note that this + # setting applies per-worker if num_workers > 1. + "sample_batch_size": 100, + # Size of a batched sampled from replay buffer for training. Note that + # if async_updates is set, then each worker returns gradients for a + # batch of this size. + "train_batch_size": 1024, + # Number of env steps to optimize for before returning + "timesteps_per_iteration": 0, + # How many agents can be in the system in total + "max_num_agents": 1, + + # === Parallelism === + # Number of workers for collecting samples with. This only makes sense + # to increase if your environment is particularly slow to sample, or if + # you're using the Async or Ape-X optimizers. + "num_workers": 1, + # Prevent iterations from going lower than this time span + "min_iter_time_s": 0, +}) +# __sphinx_doc_end__ +# yapf: enable + + +def set_global_timestep(trainer): + global_timestep = trainer.optimizer.num_steps_sampled + trainer.train_start_timestep = global_timestep + + +def before_learn_on_batch(multi_agent_batch, policies, train_batch_size): + samples = {} + + # Modify keys. + for pid, p in policies.items(): + i = p.config["agent_id"] + keys = multi_agent_batch.policy_batches[pid].data.keys() + keys = ["_".join([k, str(i)]) for k in keys] + samples.update( + dict( + zip(keys, + multi_agent_batch.policy_batches[pid].data.values()))) + + # Make ops and feed_dict to get "new_obs" from target action sampler. + new_obs_ph_n = [p.new_obs_ph for p in policies.values()] + new_obs_n = list() + for k, v in samples.items(): + if "new_obs" in k: + new_obs_n.append(v) + + target_act_sampler_n = [p.target_act_sampler for p in policies.values()] + feed_dict = dict(zip(new_obs_ph_n, new_obs_n)) + + new_act_n = p.sess.run(target_act_sampler_n, feed_dict) + samples.update( + {"new_actions_%d" % i: new_act + for i, new_act in enumerate(new_act_n)}) + + # Share samples among agents. + policy_batches = {pid: SampleBatch(samples) for pid in policies.keys()} + return MultiAgentBatch(policy_batches, train_batch_size) + + +def make_optimizer(workers, config): + return SyncReplayOptimizer( + workers, + learning_starts=config["learning_starts"], + buffer_size=config["buffer_size"], + train_batch_size=config["train_batch_size"], + before_learn_on_batch=before_learn_on_batch, + synchronize_sampling=True, + prioritized_replay=False) + + +def add_trainer_metrics(trainer, result): + global_timestep = trainer.optimizer.num_steps_sampled + result.update( + timesteps_this_iter=global_timestep - trainer.train_start_timestep, + info=dict({ + "num_target_updates": trainer.state["num_target_updates"], + }, **trainer.optimizer.stats())) + + +def collect_metrics(trainer): + result = trainer.collect_metrics() + return result + + +MADDPGTrainer = GenericOffPolicyTrainer.with_updates( + name="MADDPG", + default_config=DEFAULT_CONFIG, + default_policy=MADDPGTFPolicy, + before_init=None, + before_train_step=set_global_timestep, + make_policy_optimizer=make_optimizer, + after_train_result=add_trainer_metrics, + collect_metrics_fn=collect_metrics, + before_evaluate_fn=None) diff --git a/flow/algorithms/maddpg/maddpg_policy.py b/flow/algorithms/maddpg/maddpg_policy.py new file mode 100644 index 000000000..3924d5c3b --- /dev/null +++ b/flow/algorithms/maddpg/maddpg_policy.py @@ -0,0 +1,397 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import ray +from ray.rllib.agents.dqn.dqn_policy import minimize_and_clip, _adjust_nstep +from ray.rllib.evaluation.metrics import LEARNER_STATS_KEY +from ray.rllib.policy.sample_batch import SampleBatch +from ray.rllib.models import ModelCatalog +from ray.rllib.utils.annotations import override +from ray.rllib.utils.error import UnsupportedSpaceException +from ray.rllib.policy.policy import Policy +from ray.rllib.policy.tf_policy import TFPolicy +from ray.rllib.utils import try_import_tf, try_import_tfp + +import logging +from gym.spaces import Box, Discrete +import numpy as np + +logger = logging.getLogger(__name__) + +tf = try_import_tf() +tfp = try_import_tfp() + + +class MADDPGPostprocessing(object): + """Implements agentwise termination signal and n-step learning.""" + + @override(Policy) + def postprocess_trajectory(self, + sample_batch, + other_agent_batches=None, + episode=None): + # FIXME: Get done from info is required since agentwise done is not + # supported now. + sample_batch.data["dones"] = self.get_done_from_info( + sample_batch.data["infos"]) + + # N-step Q adjustments + if self.config["n_step"] > 1: + _adjust_nstep(self.config["n_step"], self.config["gamma"], + sample_batch[SampleBatch.CUR_OBS], + sample_batch[SampleBatch.ACTIONS], + sample_batch[SampleBatch.REWARDS], + sample_batch[SampleBatch.NEXT_OBS], + sample_batch[SampleBatch.DONES]) + + return sample_batch + + +class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy): + def __init__(self, obs_space, act_space, config): + # _____ Initial Configuration + self.config = config = dict(ray.rllib.contrib.maddpg.DEFAULT_CONFIG, + **config) + self.global_step = tf.train.get_or_create_global_step() + + # FIXME: Get done from info is required since agentwise done is not + # supported now. + self.get_done_from_info = np.vectorize( + lambda info: info.get("done", False)) + + agent_id = config["agent_id"] + + # _____ Environment Setting + def _make_continuous_space(space): + if isinstance(space, Box): + return space + elif isinstance(space, Discrete): + return Box( + low=np.zeros((space.n, )), high=np.ones((space.n, ))) + else: + raise UnsupportedSpaceException( + "Space {} is not supported.".format(space)) + + if len(config["multiagent"]["policies"]) > 1: + obs_space_n = [ + _make_continuous_space(space) + for _, (_, space, _, + _) in sorted(config["multiagent"]["policies"].items()) + ] + act_space_n = [ + _make_continuous_space(space) + for _, (_, _, space, + _) in sorted(config["multiagent"]["policies"].items()) + ] + else: + obs_space = config["multiagent"]["policies"][list(config["multiagent"]["policies"].keys())][1] + act_space = config["multiagent"]["policies"][list(config["multiagent"]["policies"].keys())][2] + num_agents = config["max_num_agents"] + obs_space_n = [ + _make_continuous_space(obs_space) + for i in range(num_agents) + ] + act_space_n = [ + _make_continuous_space(act_space) + for i in range(num_agents) + ] + + # _____ Placeholders + # Placeholders for policy evaluation and updates + def _make_ph_n(space_n, name=""): + return [ + tf.placeholder( + tf.float32, + shape=(None, ) + space.shape, + name=name + "_%d" % i) for i, space in enumerate(space_n) + ] + + obs_ph_n = _make_ph_n(obs_space_n, "obs") + act_ph_n = _make_ph_n(act_space_n, "actions") + new_obs_ph_n = _make_ph_n(obs_space_n, "new_obs") + new_act_ph_n = _make_ph_n(act_space_n, "new_actions") + rew_ph = tf.placeholder( + tf.float32, shape=None, name="rewards_{}".format(agent_id)) + done_ph = tf.placeholder( + tf.float32, shape=None, name="dones_{}".format(agent_id)) + + if config["use_local_critic"]: + if len(config["multiagent"]["policies"]) > 1: + + obs_space_n, act_space_n = [obs_space_n[agent_id]], [ + act_space_n[agent_id] + ] + obs_ph_n, act_ph_n = [obs_ph_n[agent_id]], [act_ph_n[agent_id]] + new_obs_ph_n, new_act_ph_n = [new_obs_ph_n[agent_id]], [ + new_act_ph_n[agent_id] + ] + agent_id = 0 + else: + agent_id = 0 + obs_space_n, act_space_n = [obs_space_n[agent_id]], [ + act_space_n[agent_id] + ] + obs_ph_n, act_ph_n = [obs_ph_n[agent_id]], [act_ph_n[agent_id]] + new_obs_ph_n, new_act_ph_n = [new_obs_ph_n[agent_id]], [ + new_act_ph_n[agent_id] + ] + + # _____ Value Network + # Build critic network for t. + critic, _, critic_model_n, critic_vars = self._build_critic_network( + obs_ph_n, + act_ph_n, + obs_space_n, + act_space_n, + hiddens=config["critic_hiddens"], + activation=getattr(tf.nn, config["critic_hidden_activation"]), + scope="critic") + + # Build critic network for t + 1. + target_critic, _, _, target_critic_vars = self._build_critic_network( + new_obs_ph_n, + new_act_ph_n, + obs_space_n, + act_space_n, + hiddens=config["critic_hiddens"], + activation=getattr(tf.nn, config["critic_hidden_activation"]), + scope="target_critic") + + # Build critic loss. + td_error = tf.subtract( + tf.stop_gradient( + rew_ph + (1.0 - done_ph) * + (config["gamma"]**config["n_step"]) * target_critic[:, 0]), + critic[:, 0]) + critic_loss = tf.reduce_mean(td_error**2) + + # _____ Policy Network + # Build actor network for t. + act_sampler, actor_feature, actor_model, actor_vars = ( + self._build_actor_network( + obs_ph_n[agent_id], + obs_space_n[agent_id], + act_space_n[agent_id], + hiddens=config["actor_hiddens"], + activation=getattr(tf.nn, config["actor_hidden_activation"]), + scope="actor")) + + # Build actor network for t + 1. + self.new_obs_ph = new_obs_ph_n[agent_id] + self.target_act_sampler, _, _, target_actor_vars = ( + self._build_actor_network( + self.new_obs_ph, + obs_space_n[agent_id], + act_space_n[agent_id], + hiddens=config["actor_hiddens"], + activation=getattr(tf.nn, config["actor_hidden_activation"]), + scope="target_actor")) + + # Build actor loss. + act_n = act_ph_n.copy() + act_n[agent_id] = act_sampler + critic, _, _, _ = self._build_critic_network( + obs_ph_n, + act_n, + obs_space_n, + act_space_n, + hiddens=config["critic_hiddens"], + activation=getattr(tf.nn, config["critic_hidden_activation"]), + scope="critic") + actor_loss = -tf.reduce_mean(critic) + if config["actor_feature_reg"] is not None: + actor_loss += config["actor_feature_reg"] * tf.reduce_mean( + actor_feature**2) + + # _____ Losses + self.losses = {"critic": critic_loss, "actor": actor_loss} + + # _____ Optimizers + self.optimizers = { + "critic": tf.train.AdamOptimizer(config["critic_lr"]), + "actor": tf.train.AdamOptimizer(config["actor_lr"]) + } + + # _____ Build variable update ops. + self.tau = tf.placeholder_with_default( + config["tau"], shape=(), name="tau") + + def _make_target_update_op(vs, target_vs, tau): + return [ + target_v.assign(tau * v + (1.0 - tau) * target_v) + for v, target_v in zip(vs, target_vs) + ] + + self.update_target_vars = _make_target_update_op( + critic_vars + actor_vars, target_critic_vars + target_actor_vars, + self.tau) + + def _make_set_weight_op(variables): + vs = list() + for v in variables.values(): + vs += v + phs = [ + tf.placeholder( + tf.float32, + shape=v.get_shape(), + name=v.name.split(":")[0] + "_ph") for v in vs + ] + return tf.group(*[v.assign(ph) for v, ph in zip(vs, phs)]), phs + + self.vars = { + "critic": critic_vars, + "actor": actor_vars, + "target_critic": target_critic_vars, + "target_actor": target_actor_vars + } + self.update_vars, self.vars_ph = _make_set_weight_op(self.vars) + + # _____ TensorFlow Initialization + + self.sess = tf.get_default_session() + + def _make_loss_inputs(placeholders): + return [(ph.name.split("/")[-1].split(":")[0], ph) + for ph in placeholders] + + loss_inputs = _make_loss_inputs(obs_ph_n + act_ph_n + new_obs_ph_n + + new_act_ph_n + [rew_ph, done_ph]) + + TFPolicy.__init__( + self, + obs_space, + act_space, + self.sess, + obs_input=obs_ph_n[agent_id], + action_sampler=act_sampler, + loss=actor_loss + critic_loss, + loss_inputs=loss_inputs) + + self.sess.run(tf.global_variables_initializer()) + + # Hard initial update + self.update_target(1.0) + + @override(TFPolicy) + def optimizer(self): + return None + + @override(TFPolicy) + def gradients(self, optimizer, loss): + if self.config["grad_norm_clipping"] is not None: + self.gvs = { + k: minimize_and_clip(optimizer, self.losses[k], self.vars[k], + self.config["grad_norm_clipping"]) + for k, optimizer in self.optimizers.items() + } + else: + self.gvs = { + k: optimizer.compute_gradients(self.losses[k], self.vars[k]) + for k, optimizer in self.optimizers.items() + } + return self.gvs["critic"] + self.gvs["actor"] + + @override(TFPolicy) + def build_apply_op(self, optimizer, grads_and_vars): + critic_apply_op = self.optimizers["critic"].apply_gradients( + self.gvs["critic"]) + + with tf.control_dependencies([tf.assign_add(self.global_step, 1)]): + with tf.control_dependencies([critic_apply_op]): + actor_apply_op = self.optimizers["actor"].apply_gradients( + self.gvs["actor"]) + + return actor_apply_op + + @override(TFPolicy) + def extra_compute_action_feed_dict(self): + return {} + + @override(TFPolicy) + def extra_compute_grad_fetches(self): + return {LEARNER_STATS_KEY: {}} + + @override(TFPolicy) + def get_weights(self): + var_list = [] + for var in self.vars.values(): + var_list += var + return self.sess.run(var_list) + + @override(TFPolicy) + def set_weights(self, weights): + self.sess.run( + self.update_vars, feed_dict=dict(zip(self.vars_ph, weights))) + + @override(Policy) + def get_state(self): + return TFPolicy.get_state(self) + + @override(Policy) + def set_state(self, state): + TFPolicy.set_state(self, state) + + def _build_critic_network(self, + obs_n, + act_n, + obs_space_n, + act_space_n, + hiddens, + activation=None, + scope=None): + with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as scope: + if self.config["use_state_preprocessor"]: + model_n = [ + ModelCatalog.get_model({ + "obs": obs, + "is_training": self._get_is_training_placeholder(), + }, obs_space, act_space, 1, self.config["model"]) + for obs, obs_space, act_space in zip( + obs_n, obs_space_n, act_space_n) + ] + out_n = [model.last_layer for model in model_n] + out = tf.concat(out_n + act_n, axis=1) + else: + model_n = [None] * len(obs_n) + out = tf.concat(obs_n + act_n, axis=1) + + for hidden in hiddens: + out = tf.layers.dense(out, units=hidden, activation=activation) + feature = out + out = tf.layers.dense(feature, units=1, activation=None) + + return out, feature, model_n, tf.global_variables(scope.name) + + def _build_actor_network(self, + obs, + obs_space, + act_space, + hiddens, + activation=None, + scope=None): + with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as scope: + if self.config["use_state_preprocessor"]: + model = ModelCatalog.get_model({ + "obs": obs, + "is_training": self._get_is_training_placeholder(), + }, obs_space, act_space, 1, self.config["model"]) + out = model.last_layer + else: + model = None + out = obs + + for hidden in hiddens: + out = tf.layers.dense(out, units=hidden, activation=activation) + feature = tf.layers.dense( + out, units=act_space.shape[0], activation=None) + sampler = tfp.distributions.RelaxedOneHotCategorical( + temperature=1.0, logits=feature).sample() + + return sampler, feature, model, tf.global_variables(scope.name) + + def update_target(self, tau=None): + if tau is not None: + self.sess.run(self.update_target_vars, {self.tau: tau}) + else: + self.sess.run(self.update_target_vars) diff --git a/flow/core/kernel/vehicle/traci.py b/flow/core/kernel/vehicle/traci.py index 50cd106c9..3a5ad0760 100644 --- a/flow/core/kernel/vehicle/traci.py +++ b/flow/core/kernel/vehicle/traci.py @@ -521,7 +521,7 @@ def get_departed_ids(self): if len(self._departed_ids) > 0: return self._departed_ids[-1] else: - return 0 + return [] def get_previous_speed(self, veh_id, error=-1001): """See parent class.""" diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index 72a6d01ec..d2c480893 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -1,5 +1,6 @@ """Environment for training vehicles to reduce congestion in the I210.""" +from collections import OrderedDict from copy import deepcopy from time import time @@ -178,7 +179,7 @@ def compute_reward(self, rl_actions, **kwargs): cost2 += min((t_headway - t_min) / t_min, 0) # weights for cost1, cost2, and cost3, respectively - eta1, eta2 = 1.00, 0.10 + eta1, eta2 = 1.00, 0.0 reward = max(eta1 * cost1 + eta2 * cost2, 0) @@ -252,6 +253,9 @@ def __init__(self, env_params, sim_params, network, simulator='traci'): self.default_state = {idx: {"obs": np.zeros(self.observation_space.spaces['obs'].shape[0]), "action_mask": self.get_action_mask(valid_agent=False)} for idx in range(self.max_num_agents)} + self.rl_id_to_idx_map = OrderedDict() + self.idx_to_rl_id_map = OrderedDict() + self.index_counter = 0 @property def action_space(self): @@ -270,8 +274,9 @@ def _apply_rl_actions(self, rl_actions): if rl_actions: accel_list = [] rl_ids = [] - for rl_id, action in rl_actions.items(): + for rl_id in self.k.vehicle.get_rl_ids(): # 0 is the no-op + action = rl_actions[self.rl_id_to_idx_map[rl_id]] if action > 0: accel = self.action_values[action - 1] accel_list.append(accel) @@ -280,18 +285,25 @@ def _apply_rl_actions(self, rl_actions): # print('time to apply actions is ', time() - t) def get_state(self): - t = time() rl_ids = self.k.vehicle.get_rl_ids() veh_info = super().get_state() + + for key in self.k.vehicle.get_departed_ids(): + if key not in self.rl_id_to_idx_map and key in self.k.vehicle.get_rl_ids(): + self.rl_id_to_idx_map[key] = self.index_counter + self.idx_to_rl_id_map[self.index_counter] = key + self.index_counter += 1 + print(self.index_counter) + # print('time to get state is ', time() - t) t = time() # TODO(@evinitsky) think this doesn't have to be a deepcopy veh_info_copy = deepcopy(self.default_state) # print('time to make copy is ', time() - t) t = time() - veh_info_copy.update({rl_id_idx: {"obs": veh_info[rl_id], + veh_info_copy.update({self.rl_id_to_idx_map[rl_id]: {"obs": veh_info[rl_id], "action_mask": self.get_action_mask(valid_agent=True)} - for rl_id_idx, rl_id in enumerate(rl_ids)}) + for rl_id in rl_ids}) # print('time to update copy is ', time() - t) veh_info = veh_info_copy self.rl_id_to_idx_map = {rl_id: i for i, rl_id in enumerate(rl_ids)} @@ -299,9 +311,8 @@ def get_state(self): return veh_info def compute_reward(self, rl_actions, **kwargs): - t = time() # There has to be one global reward for qmix - reward = np.nan_to_num(np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids()))) / (20 * self.env_params.horizon) + reward = np.nan_to_num(np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_rl_ids()))) / (20 * self.env_params.horizon) temp_reward_dict = {idx: reward for idx in range(self.max_num_agents)} # print('time to compute reward is ', time() - t) @@ -316,4 +327,98 @@ def get_action_mask(self, valid_agent): else: temp_list = np.array([0 for _ in range(self.action_space.n)]) temp_list[0] = 1 - return temp_list \ No newline at end of file + return temp_list + + def reset(self): + veh_info = super().reset() + self.rl_id_to_idx_map = OrderedDict() + self.idx_to_rl_id_map = OrderedDict() + self.index_counter = 0 + for key in self.k.vehicle.get_departed_ids() + self.k.vehicle.get_rl_ids(): + if key not in self.rl_id_to_idx_map and key in self.k.vehicle.get_rl_ids(): + self.rl_id_to_idx_map[key] = self.index_counter + self.idx_to_rl_id_map[self.index_counter] = key + self.index_counter += 1 + rl_ids = self.k.vehicle.get_rl_ids() + # TODO(@evinitsky) think this doesn't have to be a deepcopy + veh_info_copy = deepcopy(self.default_state) + try: + veh_info_copy.update({self.rl_id_to_idx_map[rl_id]: veh_info[rl_id] for rl_id in rl_ids}) + except: + import ipdb; + ipdb.set_trace() + return veh_info + + +class I210MADDPGMultiEnv(I210MultiEnv): + def __init__(self, env_params, sim_params, network, simulator='traci'): + super().__init__(env_params, sim_params, network, simulator) + self.max_num_agents = env_params.additional_params.get("max_num_agents") + self.rl_id_to_idx_map = OrderedDict() + self.idx_to_rl_id_map = OrderedDict() + self.index_counter = 0 + self.default_state = {idx: np.zeros(self.observation_space.shape[0]) + for idx in range(self.max_num_agents)} + + def _apply_rl_actions(self, rl_actions): + """See class definition.""" + # in the warmup steps, rl_actions is None + t = time() + if rl_actions: + accel_list = [] + rl_ids = [] + for rl_id in self.k.vehicle.get_rl_ids(): + if rl_id in self.rl_id_to_idx_map: + accel_list.append(rl_actions[self.rl_id_to_idx_map[rl_id]]) + rl_ids.append(rl_id) + self.k.vehicle.apply_acceleration(rl_ids, accel_list) + print('time to apply actions is ', time() - t) + + def get_state(self): + t = time() + + for key in self.k.vehicle.get_departed_ids(): + if key not in self.rl_id_to_idx_map and key in self.k.vehicle.get_rl_ids(): + self.rl_id_to_idx_map[key] = self.index_counter + self.idx_to_rl_id_map[self.index_counter] = key + self.index_counter += 1 + print(self.index_counter) + + rl_ids = self.k.vehicle.get_rl_ids() + veh_info = super().get_state() + # TODO(@evinitsky) think this doesn't have to be a deepcopy + veh_info_copy = deepcopy(self.default_state) + veh_info_copy.update({self.rl_id_to_idx_map[rl_id]: veh_info[rl_id] + for rl_id in rl_ids}) + # print('time to update copy is ', time() - t) + veh_info = veh_info_copy + print('state time is ', time() - t) + + return veh_info + + def compute_reward(self, rl_actions, **kwargs): + # There has to be one global reward for qmix + t = time() + reward = np.nan_to_num(np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids()))) / (20 * self.env_params.horizon) + temp_reward_dict = {idx: reward for idx in + range(self.max_num_agents)} + print('reward time is ', time() - t) + return temp_reward_dict + + def reset(self): + veh_info = super().reset() + self.rl_id_to_idx_map = OrderedDict() + self.idx_to_rl_id_map = OrderedDict() + self.index_counter = 0 + rl_ids = self.k.vehicle.get_rl_ids() + for key in self.k.vehicle.get_departed_ids() + self.k.vehicle.get_rl_ids(): + if key not in self.rl_id_to_idx_map and key in self.k.vehicle.get_rl_ids(): + self.rl_id_to_idx_map[key] = self.index_counter + self.idx_to_rl_id_map[self.index_counter] = key + self.index_counter += 1 + # TODO(@evinitsky) think this doesn't have to be a deepcopy + veh_info_copy = deepcopy(self.default_state) + veh_info_copy.update({self.rl_id_to_idx_map[rl_id]: veh_info[rl_id] + for rl_id in enumerate(rl_ids)}) + + return veh_info_copy \ No newline at end of file diff --git a/scripts/ray_autoscale.yaml b/scripts/ray_autoscale.yaml index 8266483f9..f9d9e913b 100644 --- a/scripts/ray_autoscale.yaml +++ b/scripts/ray_autoscale.yaml @@ -57,8 +57,9 @@ head_node: worker_nodes: InstanceType: c4.4xlarge ImageId: ami-09544298704576518 # Flow AMI (Ubuntu) + KeyName: MyKeyPair - #Run workers on spot by default. Comment this out to use on-demand. + #Run workers on spot by default. Comment this out to use on-demand. InstanceMarketOptions: MarketType: spot # Additional options can be found in the boto docs, e.g. From 0edfec600f86e0ce3ff1f1ce7cd0ba0a2aaf404f Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Tue, 7 Apr 2020 11:10:27 -0700 Subject: [PATCH 08/85] Turn on RNN --- examples/train.py | 3 +++ flow/algorithms/qmix/qmix_policy.py | 12 ++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/examples/train.py b/examples/train.py index 7158b6fb8..b64c5f23a 100644 --- a/examples/train.py +++ b/examples/train.py @@ -211,6 +211,9 @@ def setup_exps_rllib(flow_params, elif alg_run == "QMIX": from flow.algorithms.qmix.qmix import QMixTrainer2, DEFAULT_CONFIG config = deepcopy(DEFAULT_CONFIG) + if flags.grid_search: + config["exploration_fraction"] = tune.grid_search([0.1, 0.3]) + config["buffer_size"] = tune.grid_search([10000, 100000]) alg_run = QMixTrainer2 else: sys.exit("We only support PPO and TD3 right now.") diff --git a/flow/algorithms/qmix/qmix_policy.py b/flow/algorithms/qmix/qmix_policy.py index d7a8ff359..ad6857143 100644 --- a/flow/algorithms/qmix/qmix_policy.py +++ b/flow/algorithms/qmix/qmix_policy.py @@ -206,8 +206,8 @@ def __init__(self, obs_space, action_space, config): config["model"], framework="torch", name="model", - default_model=FeedForward).to(self.device) - # default_model=RNNModel).to(self.device) + # default_model=FeedForward).to(self.device) + default_model=RNNModel).to(self.device) self.target_model = ModelCatalog.get_model_v2( agent_obs_space, @@ -216,8 +216,8 @@ def __init__(self, obs_space, action_space, config): config["model"], framework="torch", name="target_model", - default_model=FeedForward).to(self.device) - # default_model=RNNModel).to(self.device) + # default_model=FeedForward).to(self.device) + default_model=RNNModel).to(self.device) # Setup the mixer network. if config["mixer"] is None: @@ -313,8 +313,8 @@ def learn_on_batch(self, samples): input_list, [], # RNN states not used here # TODO(@evinitsky) make this an option if we are using an RNN - max_seq_len=1, - # max_seq_len=self.config["model"]["max_seq_len"], + # max_seq_len=1, + max_seq_len=self.config["model"]["max_seq_len"], dynamic_max=True) # These will be padded to shape [B * T, ...] if self.has_env_global_state: From 9d6344fb181a4462d06f3dedb5eb92a2997122da Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Tue, 7 Apr 2020 13:21:52 -0700 Subject: [PATCH 09/85] Increase inflow in i210 subnetwork --- examples/exp_configs/non_rl/i210_subnetwork.py | 2 +- examples/exp_configs/rl/multiagent/multiagent_i210_qmix.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/exp_configs/non_rl/i210_subnetwork.py b/examples/exp_configs/non_rl/i210_subnetwork.py index d993ae93a..1065c853b 100644 --- a/examples/exp_configs/non_rl/i210_subnetwork.py +++ b/examples/exp_configs/non_rl/i210_subnetwork.py @@ -33,7 +33,7 @@ inflow.add( veh_type="human", edge="119257914", - vehs_per_hour=8378, + vehs_per_hour=10800, departLane="random", departSpeed=23) # on ramp diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210_qmix.py b/examples/exp_configs/rl/multiagent/multiagent_i210_qmix.py index 2e8eb2a5e..64841a6a4 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210_qmix.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210_qmix.py @@ -39,7 +39,7 @@ # whether to add in a reward for the speed of nearby vehicles "local_reward": True, "num_actions": 5, - "max_num_agents_qmix": 250 + "max_num_agents_qmix": 200 }) # CREATE VEHICLE TYPES AND INFLOWS From f3e7ce88babe13b573197c96291ce96df172cf3f Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Thu, 30 Apr 2020 15:17:10 -0700 Subject: [PATCH 10/85] Add option to reroute exiting vehicles back into the network --- .../rl/multiagent/multiagent_i210.py | 10 +++- .../rl/multiagent/multiagent_straight_road.py | 9 +++- flow/envs/base.py | 8 +++ flow/envs/multiagent/base.py | 9 ++++ flow/envs/multiagent/i210.py | 54 ++++++++++++++++++- 5 files changed, 84 insertions(+), 6 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index 327282e28..498024b35 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -43,7 +43,9 @@ # configure the observation space. Look at the I210MultiEnv class for more info. 'lead_obs': True, # whether to add in a reward for the speed of nearby vehicles - "local_reward": True + "local_reward": True, + # whether to reroute vehicles once they have exited + "reroute_on_exit": True }) # CREATE VEHICLE TYPES AND INFLOWS @@ -114,6 +116,10 @@ config.PROJECT_PATH, "examples/exp_configs/templates/sumo/test2.net.xml") +warmup_steps = 0 +if additional_env_params['reroute_on_exit']: + warmup_steps = 400 + flow_params = dict( # name of the experiment exp_tag='I_210_subnetwork', @@ -140,7 +146,7 @@ env=EnvParams( horizon=HORIZON, sims_per_step=1, - warmup_steps=0, + warmup_steps=warmup_steps, additional_params=additional_env_params, ), diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py index 9ed38656f..237576e3f 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py +++ b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py @@ -48,7 +48,9 @@ 'max_decel': 4.5, 'target_velocity': 18, 'local_reward': True, - 'lead_obs': True + 'lead_obs': True, + # whether to reroute vehicles once they have exited + "reroute_on_exit": True }) @@ -92,6 +94,9 @@ name="rl_highway_inflow") # SET UP FLOW PARAMETERS +warmup_steps = 0 +if additional_env_params['reroute_on_exit']: + warmup_steps = 400 flow_params = dict( # name of the experiment @@ -109,7 +114,7 @@ # environment related parameters (see flow.core.params.EnvParams) env=EnvParams( horizon=HORIZON, - warmup_steps=0, + warmup_steps=warmup_steps, sims_per_step=1, # do not put more than one additional_params=additional_env_params, ), diff --git a/flow/envs/base.py b/flow/envs/base.py index 1abb8a3c9..4c4554ef3 100644 --- a/flow/envs/base.py +++ b/flow/envs/base.py @@ -148,6 +148,10 @@ def __init__(self, self.state = None self.obs_var_labels = [] + # track IDs that have ever been observed in the system + self.observed_ids = set() + self.observed_rl_ids = set() + # simulation step size self.sim_step = sim_params.sim_step @@ -430,6 +434,10 @@ def reset(self): # reset the time counter self.time_counter = 0 + # reset the observed ids + self.observed_ids = set() + self.observed_rl_ids = set() + # Now that we've passed the possibly fake init steps some rl libraries # do, we can feel free to actually render things if self.should_render: diff --git a/flow/envs/multiagent/base.py b/flow/envs/multiagent/base.py index dfc7c72ad..14b931a3f 100644 --- a/flow/envs/multiagent/base.py +++ b/flow/envs/multiagent/base.py @@ -52,6 +52,10 @@ def step(self, rl_actions): self.time_counter += 1 self.step_counter += 1 + if self.time_counter < self.env_params.sims_per_step * self.env_params.warmup_steps: + self.observed_ids.update(self.k.vehicle.get_ids()) + self.observed_rl_ids.update(self.k.vehicle.get_rl_ids()) + # perform acceleration actions for controlled human-driven vehicles if len(self.k.vehicle.get_controlled_ids()) > 0: accel = [] @@ -103,6 +107,7 @@ def step(self, rl_actions): # stop collecting new simulation steps if there is a collision if crash: + print('A CRASH! A CRASH!!!!!! AAAAAAAAAH!!!!!') break states = self.get_state() @@ -149,6 +154,10 @@ def reset(self, new_inflow_rate=None): # reset the time counter self.time_counter = 0 + # reset the observed ids + self.observed_ids = set() + self.observed_rl_ids = set() + # Now that we've passed the possibly fake init steps some rl libraries # do, we can feel free to actually render things if self.should_render: diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index f931b3bec..d4e501a89 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -63,7 +63,10 @@ class I210MultiEnv(MultiEnv): def __init__(self, env_params, sim_params, network, simulator='traci'): super().__init__(env_params, sim_params, network, simulator) self.lead_obs = env_params.additional_params.get("lead_obs") + self.reroute_on_exit = env_params.additional_params.get("reroute_on_exit") self.max_lanes = MAX_LANES + self.entrance_edge = "119257914" + self.exit_edge = "119257908#3" @property def observation_space(self): @@ -130,7 +133,6 @@ def get_state(self): else: lead_speed = self.k.vehicle.get_speed(lead_id) headway = self.k.vehicle.get_headway(rl_id) - self.leader.append(lead_id) veh_info.update({rl_id: np.array([speed / SPEED_SCALE, headway /HEADWAY_SCALE, lead_speed / SPEED_SCALE])}) else: veh_info = {rl_id: np.concatenate((self.state_util(rl_id), @@ -194,8 +196,10 @@ def compute_reward(self, rl_actions, **kwargs): def additional_command(self): """See parent class. - Define which vehicles are observed for visualization purposes. + Define which vehicles are observed for visualization purposes. Additionally, optionally reroute vehicles + back once they have exited. """ + super().additional_command() # specify observed vehicles for rl_id in self.k.vehicle.get_rl_ids(): # leader @@ -203,6 +207,39 @@ def additional_command(self): if lead_id: self.k.vehicle.set_observed(lead_id) + if self.reroute_on_exit and self.time_counter >= self.env_params.sims_per_step * self.env_params.warmup_steps \ + and not self.env_params.evaluate: + veh_ids = self.k.vehicle.get_ids() + edges = self.k.vehicle.get_edge(veh_ids) + for veh_id, edge in zip(veh_ids, edges): + if edge == "": + continue + if edge[0] == ":": # center edge + continue + # on the exit edge, near the end, and is the vehicle furthest along + if edge == self.exit_edge and \ + (self.k.vehicle.get_position(veh_id) > self.k.network.edge_length(self.exit_edge) - 100)\ + and self.k.vehicle.get_leader(veh_id) is None: + type_id = self.k.vehicle.get_type(veh_id) + # remove the vehicle + self.k.vehicle.remove(veh_id) + lane = np.random.randint(low=0, high=self.max_lanes) + # reintroduce it at the start of the network + # TODO(@evinitsky) select the lane and speed a bit more cleanly + self.k.vehicle.add( + veh_id=veh_id, + edge=self.entrance_edge, + type_id=str(type_id), + lane=str(lane), + pos="0", + speed="20.0") + + departed_ids = self.k.vehicle.get_departed_ids() + if len(departed_ids) > 0: + for veh_id in departed_ids: + if veh_id not in self.observed_ids: + self.k.vehicle.remove(veh_id) + def state_util(self, rl_id): """Return an array of headway, tailway, leader speed, follower speed. @@ -243,6 +280,17 @@ def veh_statistics(self, rl_id): lane = (self.k.vehicle.get_lane(rl_id) + 1) / 10.0 return np.array([speed, lane]) + def step(self, rl_actions): + state, reward, done, info = super().step(rl_actions) + # handle the edge case where a vehicle hasn't been put back when the rollout terminates + if self.reroute_on_exit and done['__all__']: + for rl_id in self.observed_rl_ids: + if rl_id not in state.keys(): + done[rl_id] = True + reward[rl_id] = 0 + state[rl_id] = -1 * np.ones(self.observation_space.shape[0]) + return state, reward, done, info + class MultiStraightRoad(I210MultiEnv): """Partially observable multi-agent environment for a straight road. Look at superclass for more information.""" @@ -250,6 +298,8 @@ class MultiStraightRoad(I210MultiEnv): def __init__(self, env_params, sim_params, network, simulator): super().__init__(env_params, sim_params, network, simulator) self.max_lanes = 1 + self.entrance_edge = self.network.routes['highway_0'][0][0][0] + self.exit_edge = self.network.routes['highway_0'][0][0][-1] def _apply_rl_actions(self, rl_actions): """See class definition.""" From 360035790691041309c9257b6eae1dbe37b2f82a Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Thu, 30 Apr 2020 15:52:07 -0700 Subject: [PATCH 11/85] Change the position at which vehicles are rerouted so tall the vehicles are immediately put back --- .../exp_configs/rl/multiagent/multiagent_straight_road.py | 1 + flow/envs/multiagent/base.py | 8 ++++---- flow/envs/multiagent/i210.py | 6 ++++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py index 237576e3f..aa1946570 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py +++ b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py @@ -71,6 +71,7 @@ # autonomous vehicles vehicles.add( + color='red', veh_id='rl', acceleration_controller=(RLController, {})) diff --git a/flow/envs/multiagent/base.py b/flow/envs/multiagent/base.py index 14b931a3f..126107b00 100644 --- a/flow/envs/multiagent/base.py +++ b/flow/envs/multiagent/base.py @@ -49,13 +49,13 @@ def step(self, rl_actions): contains other diagnostic information from the previous action """ for _ in range(self.env_params.sims_per_step): - self.time_counter += 1 - self.step_counter += 1 - - if self.time_counter < self.env_params.sims_per_step * self.env_params.warmup_steps: + if self.time_counter <= self.env_params.sims_per_step * self.env_params.warmup_steps: self.observed_ids.update(self.k.vehicle.get_ids()) self.observed_rl_ids.update(self.k.vehicle.get_rl_ids()) + self.time_counter += 1 + self.step_counter += 1 + # perform acceleration actions for controlled human-driven vehicles if len(self.k.vehicle.get_controlled_ids()) > 0: accel = [] diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index d4e501a89..be3ec3de0 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -226,13 +226,15 @@ def additional_command(self): lane = np.random.randint(low=0, high=self.max_lanes) # reintroduce it at the start of the network # TODO(@evinitsky) select the lane and speed a bit more cleanly + # Note, the position is 10 so you are not overlapping with the inflow car that is being removed. + # this allows the vehicle to be immediately inserted. self.k.vehicle.add( veh_id=veh_id, edge=self.entrance_edge, type_id=str(type_id), lane=str(lane), - pos="0", - speed="20.0") + pos="10.0", + speed="23.0") departed_ids = self.k.vehicle.get_departed_ids() if len(departed_ids) > 0: From fe05dcde2e5c06efad1290a9e971e682cd989e02 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 2 May 2020 19:12:14 -0700 Subject: [PATCH 12/85] Add single agent case --- .../rl/multiagent/multiagent_i210.py | 6 +- .../rl/multiagent/multiagent_straight_road.py | 2 +- .../singleagent/singleagent_straightroad.py | 163 +++++++++++++ flow/envs/__init__.py | 2 + flow/envs/multiagent/i210.py | 42 +--- flow/envs/straightroad_env.py | 229 ++++++++++++++++++ flow/utils/rllib.py | 2 +- flow/visualize/visualizer_rllib.py | 4 +- 8 files changed, 412 insertions(+), 38 deletions(-) create mode 100644 examples/exp_configs/rl/singleagent/singleagent_straightroad.py create mode 100644 flow/envs/straightroad_env.py diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index 498024b35..b74f64027 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -26,7 +26,7 @@ # SET UP PARAMETERS FOR THE SIMULATION # number of steps per rollout -HORIZON = 4000 +HORIZON = 2000 VEH_PER_HOUR_BASE_119257914 = 10800 VEH_PER_HOUR_BASE_27414345 = 321 @@ -45,7 +45,8 @@ # whether to add in a reward for the speed of nearby vehicles "local_reward": True, # whether to reroute vehicles once they have exited - "reroute_on_exit": True + "reroute_on_exit": True, + 'target_velocity': 18, }) # CREATE VEHICLE TYPES AND INFLOWS @@ -148,6 +149,7 @@ sims_per_step=1, warmup_steps=warmup_steps, additional_params=additional_env_params, + done_at_exit=False ), # network-related parameters (see flow.core.params.NetParams and the diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py index aa1946570..a15471539 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py +++ b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py @@ -125,7 +125,7 @@ sim_step=0.5, render=False, use_ballistic=True, - restart_instance=False + restart_instance=True ), # network-related parameters (see flow.core.params.NetParams and the diff --git a/examples/exp_configs/rl/singleagent/singleagent_straightroad.py b/examples/exp_configs/rl/singleagent/singleagent_straightroad.py new file mode 100644 index 000000000..b7b8698d6 --- /dev/null +++ b/examples/exp_configs/rl/singleagent/singleagent_straightroad.py @@ -0,0 +1,163 @@ +"""Multi-agent highway with ramps example. +Trains a non-constant number of agents, all sharing the same policy, on the +highway with ramps network. +""" +from flow.controllers import RLController, IDMController +from flow.core.params import EnvParams, NetParams, InitialConfig, InFlows, \ + VehicleParams, SumoParams, SumoLaneChangeParams +from flow.envs.ring.accel import ADDITIONAL_ENV_PARAMS +from flow.networks import HighwayNetwork +from flow.envs import SingleStraightRoad +from flow.networks.highway import ADDITIONAL_NET_PARAMS +from flow.utils.registry import make_create_env +from ray.tune.registry import register_env + + +# SET UP PARAMETERS FOR THE SIMULATION + +# number of steps per rollout +HORIZON = 2000 + +# inflow rate on the highway in vehicles per hour +HIGHWAY_INFLOW_RATE = 10800 / 5 +# percentage of autonomous vehicles compared to human vehicles on highway +PENETRATION_RATE = 10 + + +# SET UP PARAMETERS FOR THE NETWORK + +additional_net_params = ADDITIONAL_NET_PARAMS.copy() +additional_net_params.update({ + # length of the highway + "length": 2000, + # number of lanes + "lanes": 1, + # speed limit for all edges + "speed_limit": 30, + # number of edges to divide the highway into + "num_edges": 2 +}) + + +# SET UP PARAMETERS FOR THE ENVIRONMENT + +additional_env_params = ADDITIONAL_ENV_PARAMS.copy() +additional_env_params.update({ + 'max_accel': 2.6, + 'max_decel': 4.5, + 'target_velocity': 18.0, + 'local_reward': True, + 'lead_obs': True, + "terminate_on_wave": False, + # the environment is not allowed to terminate below this horizon length + 'wave_termination_horizon': 1000, + # the speed below which we consider a wave to have occured + 'wave_termination_speed': 10.0, + # whether the vehicle continues to acquire reward after it exits the system. This causes it to have incentive + # to leave the network in a good state after it leaves + 'reward_after_exit': True +}) + + +# CREATE VEHICLE TYPES AND INFLOWS + +vehicles = VehicleParams() +inflows = InFlows() + +# human vehicles +vehicles.add( + "human", + num_vehicles=0, + lane_change_params=SumoLaneChangeParams( + lane_change_mode="strategic", + ), + acceleration_controller=(IDMController, {"a": .3, "b": 2.0, "noise": 0.5}), +) + +# autonomous vehicles +vehicles.add( + veh_id='rl', + acceleration_controller=(RLController, {})) + +# add human vehicles on the highway +inflows.add( + veh_type="human", + edge="highway_0", + vehs_per_hour=int(HIGHWAY_INFLOW_RATE * (1 - PENETRATION_RATE / 100)), + depart_lane="free", + depart_speed="23.0", + name="idm_highway_inflow") + +# add autonomous vehicles on the highway +# they will stay on the highway, i.e. they won't exit through the off-ramps +inflows.add( + veh_type="rl", + edge="highway_0", + vehs_per_hour=int(HIGHWAY_INFLOW_RATE * (PENETRATION_RATE / 100)), + depart_lane="free", + depart_speed="23.0", + name="rl_highway_inflow") + +# SET UP FLOW PARAMETERS +done_at_exit = True +if additional_env_params['reward_after_exit']: + done_at_exit = False + +flow_params = dict( + # name of the experiment + exp_tag='singleagent_highway', + + # name of the flow environment the experiment is running on + env_name=SingleStraightRoad, + + # name of the network class the experiment is running on + network=HighwayNetwork, + + # simulator that is used by the experiment + simulator='traci', + + # environment related parameters (see flow.core.params.EnvParams) + env=EnvParams( + horizon=HORIZON, + warmup_steps=0, + sims_per_step=1, # do not put more than one + done_at_exit=done_at_exit, + additional_params=additional_env_params, + ), + + # sumo-related parameters (see flow.core.params.SumoParams) + sim=SumoParams( + sim_step=0.5, + render=False, + use_ballistic=True, + restart_instance=True + ), + + # network-related parameters (see flow.core.params.NetParams and the + # network's documentation or ADDITIONAL_NET_PARAMS component) + net=NetParams( + inflows=inflows, + additional_params=additional_net_params + ), + + # vehicles to be placed in the network at the start of a rollout (see + # flow.core.params.VehicleParams) + veh=vehicles, + + # parameters specifying the positioning of vehicles upon initialization/ + # reset (see flow.core.params.InitialConfig) + initial=InitialConfig(), +) + + +# SET UP RLLIB MULTI-AGENT FEATURES + +create_env, env_name = make_create_env(params=flow_params, version=0) + +# register as rllib env +register_env(env_name, create_env) + +# multiagent configuration +test_env = create_env() +obs_space = test_env.observation_space +act_space = test_env.action_space \ No newline at end of file diff --git a/flow/envs/__init__.py b/flow/envs/__init__.py index 5befe6a33..6f4351cc0 100755 --- a/flow/envs/__init__.py +++ b/flow/envs/__init__.py @@ -11,6 +11,7 @@ from flow.envs.ring.wave_attenuation import WaveAttenuationEnv, \ WaveAttenuationPOEnv from flow.envs.merge import MergePOEnv +from flow.envs.straightroad_env import SingleStraightRoad from flow.envs.test import TestEnv # deprecated classes whose names have changed @@ -36,6 +37,7 @@ 'BottleneckDesiredVelocityEnv', 'TestEnv', 'BayBridgeEnv', + 'SingleStraightRoad', # deprecated classes 'BottleNeckAccelEnv', 'DesiredVelocityEnv', diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index be3ec3de0..7a1d56211 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -65,6 +65,7 @@ def __init__(self, env_params, sim_params, network, simulator='traci'): self.lead_obs = env_params.additional_params.get("lead_obs") self.reroute_on_exit = env_params.additional_params.get("reroute_on_exit") self.max_lanes = MAX_LANES + self.num_enter_lanes = 5 self.entrance_edge = "119257914" self.exit_edge = "119257908#3" @@ -158,39 +159,16 @@ def compute_reward(self, rl_actions, **kwargs): if self.k.vehicle.get_speed(rl_id) >= 0: speeds.append(self.k.vehicle.get_speed(rl_id)) if len(speeds) > 0: - # rescale so the q function can estimate it quickly + # rescale so the critic can estimate it quickly rewards[rl_id] = np.mean([(des_speed - np.abs(speed - des_speed))**2 for speed in speeds]) / (des_speed**2) else: - for rl_id in self.k.vehicle.get_rl_ids(): - if self.env_params.evaluate: - # reward is speed of vehicle if we are in evaluation mode - reward = self.k.vehicle.get_speed(rl_id) - elif kwargs['fail']: - # reward is 0 if a collision occurred - reward = 0 - else: - # reward high system-level velocities - cost1 = average_velocity(self, fail=kwargs['fail']) - - # penalize small time headways - cost2 = 0 - t_min = 1 # smallest acceptable time headway - - lead_id = self.k.vehicle.get_leader(rl_id) - if lead_id not in ["", None] \ - and self.k.vehicle.get_speed(rl_id) > 0: - t_headway = max( - self.k.vehicle.get_headway(rl_id) / - self.k.vehicle.get_speed(rl_id), 0) - cost2 += min((t_headway - t_min) / t_min, 0) - - # weights for cost1, cost2, and cost3, respectively - eta1, eta2 = 1.00, 0.10 - - reward = max(eta1 * cost1 + eta2 * cost2, 0) - - rewards[rl_id] = reward + speeds = self.k.vehicle.get_speed(self.k.vehicle.get_ids()) + des_speed = self.env_params.additional_params["target_velocity"] + # rescale so the critic can estimate it quickly + reward = np.nan_to_num(np.mean([(des_speed - np.abs(speed - des_speed))**2 + for speed in speeds]) / (des_speed**2)) + rewards = {rl_id: reward for rl_id in self.k.vehicle.get_rl_ids()} return rewards def additional_command(self): @@ -223,7 +201,7 @@ def additional_command(self): type_id = self.k.vehicle.get_type(veh_id) # remove the vehicle self.k.vehicle.remove(veh_id) - lane = np.random.randint(low=0, high=self.max_lanes) + lane = np.random.randint(low=0, high=self.num_enter_lanes) # reintroduce it at the start of the network # TODO(@evinitsky) select the lane and speed a bit more cleanly # Note, the position is 10 so you are not overlapping with the inflow car that is being removed. @@ -299,7 +277,7 @@ class MultiStraightRoad(I210MultiEnv): def __init__(self, env_params, sim_params, network, simulator): super().__init__(env_params, sim_params, network, simulator) - self.max_lanes = 1 + self.num_enter_lanes = 1 self.entrance_edge = self.network.routes['highway_0'][0][0][0] self.exit_edge = self.network.routes['highway_0'][0][0][-1] diff --git a/flow/envs/straightroad_env.py b/flow/envs/straightroad_env.py new file mode 100644 index 000000000..1fd5c208c --- /dev/null +++ b/flow/envs/straightroad_env.py @@ -0,0 +1,229 @@ +"""Environment for training vehicles to reduce congestion in the I210.""" + +from gym.spaces import Box +import numpy as np + +from flow.envs.base import Env + +# largest number of lanes on any given edge in the network +MAX_LANES = 6 +MAX_NUM_VEHS = 8 +SPEED_SCALE = 50 +HEADWAY_SCALE = 1000 + +ADDITIONAL_ENV_PARAMS = { + # maximum acceleration for autonomous vehicles, in m/s^2 + "max_accel": 1, + # maximum deceleration for autonomous vehicles, in m/s^2 + "max_decel": 1, + # whether we use an obs space that contains adjacent lane info or just the lead obs + "lead_obs": True, + # whether the reward should come from local vehicles instead of global rewards + "local_reward": True, + # if the environment terminates once a wave has occurred + "terminate_on_wave": False, + # the environment is not allowed to terminate below this horizon length + 'wave_termination_horizon': 500, + # the speed below which we consider a wave to have occured + 'wave_termination_speed': 10.0 +} + + +class I210SingleEnv(Env): + """Partially observable single-agent environment for the I-210 subnetworks. + The policy is shared among the agents, so there can be a non-constant + number of RL vehicles throughout the simulation. + Required from env_params: + * max_accel: maximum acceleration for autonomous vehicles, in m/s^2 + * max_decel: maximum deceleration for autonomous vehicles, in m/s^2 + The following states, actions and rewards are considered for one autonomous + vehicle only, as they will be computed in the same way for each of them. + States + The observation consists of the speeds and bumper-to-bumper headways of + the vehicles immediately preceding and following autonomous vehicles in + all of the preceding lanes as well, a binary value indicating which of + these vehicles is autonomous, and the speed of the autonomous vehicle. + Missing vehicles are padded with zeros. + Actions + The action consists of an acceleration, bound according to the + environment parameters, as well as three values that will be converted + into probabilities via softmax to decide of a lane change (left, none + or right). NOTE: lane changing is currently not enabled. It's a TODO. + Rewards + The reward function encourages proximity of the system-level velocity + to a desired velocity specified in the environment parameters, while + slightly penalizing small time headways among autonomous vehicles. + Termination + A rollout is terminated if the time horizon is reached or if two + vehicles collide into one another. + """ + + def __init__(self, env_params, sim_params, network, simulator='traci'): + super().__init__(env_params, sim_params, network, simulator) + self.lead_obs = env_params.additional_params.get("lead_obs") + self.max_lanes = MAX_LANES + self.total_reward = 0.0 + + @property + def observation_space(self): + """See class definition.""" + # speed, speed of leader, headway + if self.lead_obs: + return Box( + low=-float('inf'), + high=float('inf'), + shape=(3 * MAX_NUM_VEHS,), + dtype=np.float32 + ) + # speed, dist to ego vehicle, binary value which is 1 if the vehicle is + # an AV + else: + leading_obs = 3 * self.max_lanes + follow_obs = 3 * self.max_lanes + + # speed and lane + self_obs = 2 + + return Box( + low=-float('inf'), + high=float('inf'), + shape=(leading_obs + follow_obs + self_obs,), + dtype=np.float32 + ) + + @property + def action_space(self): + """See class definition.""" + return Box( + low=-np.abs(self.env_params.additional_params['max_decel']), + high=self.env_params.additional_params['max_accel'], + shape=(1 * MAX_NUM_VEHS,), # (4,), + dtype=np.float32) + + def _apply_rl_actions(self, rl_actions): + """See class definition.""" + # in the warmup steps, rl_actions is None + if rl_actions is not None: + accels = [] + veh_ids = [] + rl_ids = self.get_sorted_rl_ids() + + for i, rl_id in enumerate(self.rl_id_list): + accels.append(rl_actions[i]) + veh_ids.append(rl_id) + + # lane_change_softmax = np.exp(actions[1:4]) + # lane_change_softmax /= np.sum(lane_change_softmax) + # lane_change_action = np.random.choice([-1, 0, 1], + # p=lane_change_softmax) + + self.k.vehicle.apply_acceleration(rl_ids, accels) + # self.k.vehicle.apply_lane_change(rl_id, lane_change_action) + + def get_state(self): + """See class definition.""" + rl_ids = self.get_sorted_rl_ids() + self.rl_id_list = rl_ids + veh_info = np.zeros(self.observation_space.shape[0]) + per_vehicle_obs = 3 + for i, rl_id in enumerate(rl_ids): + speed = self.k.vehicle.get_speed(rl_id) + lead_id = self.k.vehicle.get_leader(rl_id) + if lead_id in ["", None]: + # in case leader is not visible + lead_speed = SPEED_SCALE + headway = HEADWAY_SCALE + else: + lead_speed = self.k.vehicle.get_speed(lead_id) + headway = self.k.vehicle.get_headway(rl_id) + veh_info[i * per_vehicle_obs: (i+1) * per_vehicle_obs] = [speed / SPEED_SCALE, + headway / HEADWAY_SCALE, lead_speed / SPEED_SCALE] + return veh_info + + def compute_reward(self, rl_actions, **kwargs): + """See class definition.""" + # in the warmup steps + if rl_actions is None: + return {} + + rl_ids = self.get_sorted_rl_ids() + + des_speed = self.env_params.additional_params["target_velocity"] + rewards = np.nan_to_num(np.mean([(des_speed - np.abs(speed - des_speed))**2 + for speed in self.k.vehicle.get_speed(rl_ids)])) / (des_speed**2) + return rewards + + def get_sorted_rl_ids(self): + rl_ids = self.k.vehicle.get_rl_ids() + rl_ids = sorted(rl_ids, key=lambda veh_id: self.k.vehicle.get_x_by_id(veh_id)) + rl_ids = rl_ids[-MAX_NUM_VEHS:] + return rl_ids + + def additional_command(self): + """See parent class. + Define which vehicles are observed for visualization purposes. + """ + # specify observed vehicles + for rl_id in self.k.vehicle.get_rl_ids(): + # leader + lead_id = self.k.vehicle.get_leader(rl_id) + if lead_id: + self.k.vehicle.set_observed(lead_id) + + def state_util(self, rl_id): + """Return an array of headway, tailway, leader speed, follower speed. + Also return a 1 if leader is rl 0 otherwise, a 1 if follower is rl 0 otherwise. + If there are fewer than MAX_LANES the extra + entries are filled with -1 to disambiguate from zeros. + """ + veh = self.k.vehicle + lane_headways = veh.get_lane_headways(rl_id).copy() + lane_tailways = veh.get_lane_tailways(rl_id).copy() + lane_leader_speed = veh.get_lane_leaders_speed(rl_id).copy() + lane_follower_speed = veh.get_lane_followers_speed(rl_id).copy() + leader_ids = veh.get_lane_leaders(rl_id).copy() + follower_ids = veh.get_lane_followers(rl_id).copy() + rl_ids = self.k.vehicle.get_rl_ids() + is_leader_rl = [1 if l_id in rl_ids else 0 for l_id in leader_ids] + is_follow_rl = [1 if f_id in rl_ids else 0 for f_id in follower_ids] + diff = MAX_LANES - len(is_leader_rl) + if diff > 0: + # the minus 1 disambiguates missing cars from missing lanes + lane_headways += diff * [-1] + lane_tailways += diff * [-1] + lane_leader_speed += diff * [-1] + lane_follower_speed += diff * [-1] + is_leader_rl += diff * [-1] + is_follow_rl += diff * [-1] + lane_headways = np.asarray(lane_headways) / 1000 + lane_tailways = np.asarray(lane_tailways) / 1000 + lane_leader_speed = np.asarray(lane_leader_speed) / 100 + lane_follower_speed = np.asarray(lane_follower_speed) / 100 + return np.concatenate((lane_headways, lane_tailways, lane_leader_speed, + lane_follower_speed, is_leader_rl, + is_follow_rl)) + + def veh_statistics(self, rl_id): + """Return speed, edge information, and x, y about the vehicle itself.""" + speed = self.k.vehicle.get_speed(rl_id) / 100.0 + lane = (self.k.vehicle.get_lane(rl_id) + 1) / 10.0 + return np.array([speed, lane]) + + +class SingleStraightRoad(I210SingleEnv): + """Partially observable multi-agent environment for a straight road. Look at superclass for more information.""" + + def __init__(self, env_params, sim_params, network, simulator): + super().__init__(env_params, sim_params, network, simulator) + self.max_lanes = 1 + + def step(self, rl_actions): + obs, rew, done, info = super().step(rl_actions) + mean_speed = np.nan_to_num(np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids()))) + if self.env_params.additional_params['terminate_on_wave'] and \ + mean_speed < self.env_params.additional_params['wave_termination_speed'] \ + and self.time_counter > self.env_params.additional_params['wave_termination_horizon'] \ + and len(self.k.vehicle.get_ids()) > 0: + done = True + + return obs, rew, done, info \ No newline at end of file diff --git a/flow/utils/rllib.py b/flow/utils/rllib.py index 7d777d769..ca8072c85 100644 --- a/flow/utils/rllib.py +++ b/flow/utils/rllib.py @@ -146,7 +146,7 @@ def get_flow_params(config): if flow_params["net"]["inflows"]: net.inflows.__dict__ = flow_params["net"]["inflows"].copy() - if len(net.template) > 0: + if net.template and len(net.template) > 0: dirname = os.getcwd() filename = os.path.join(dirname, '../../examples') split = net.template.split('examples')[1][1:] diff --git a/flow/visualize/visualizer_rllib.py b/flow/visualize/visualizer_rllib.py index 8c38a91c1..c1dd83193 100644 --- a/flow/visualize/visualizer_rllib.py +++ b/flow/visualize/visualizer_rllib.py @@ -166,7 +166,7 @@ def visualizer_rllib(args): if multiagent: rets = {} # map the agent id to its policy - policy_map_fn = config['multiagent']['policy_mapping_fn'].func + policy_map_fn = config['multiagent']['policy_mapping_fn'] for key in config['multiagent']['policies'].keys(): rets[key] = [] else: @@ -177,7 +177,7 @@ def visualizer_rllib(args): if multiagent: state_init = {} # map the agent id to its policy - policy_map_fn = config['multiagent']['policy_mapping_fn'].func + policy_map_fn = config['multiagent']['policy_mapping_fn'] size = config['model']['lstm_cell_size'] for key in config['multiagent']['policies'].keys(): state_init[key] = [np.zeros(size, np.float32), From 72f8459d1f41aa183ffe2fd5856202891b601ac5 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Fri, 8 May 2020 15:25:17 -0700 Subject: [PATCH 13/85] Address comments --- ...gleagent_straightroad.py => singleagent_straight_road.py} | 0 flow/envs/base.py | 5 +++++ 2 files changed, 5 insertions(+) rename examples/exp_configs/rl/singleagent/{singleagent_straightroad.py => singleagent_straight_road.py} (100%) diff --git a/examples/exp_configs/rl/singleagent/singleagent_straightroad.py b/examples/exp_configs/rl/singleagent/singleagent_straight_road.py similarity index 100% rename from examples/exp_configs/rl/singleagent/singleagent_straightroad.py rename to examples/exp_configs/rl/singleagent/singleagent_straight_road.py diff --git a/flow/envs/base.py b/flow/envs/base.py index 4c4554ef3..adc959b9a 100644 --- a/flow/envs/base.py +++ b/flow/envs/base.py @@ -326,6 +326,11 @@ def step(self, rl_actions): contains other diagnostic information from the previous action """ for _ in range(self.env_params.sims_per_step): + # This tracks vehicles that have appeared during warmup steps + if self.time_counter <= self.env_params.sims_per_step * self.env_params.warmup_steps: + self.observed_ids.update(self.k.vehicle.get_ids()) + self.observed_rl_ids.update(self.k.vehicle.get_rl_ids()) + self.time_counter += 1 self.step_counter += 1 From b80d441de87f3f9afc83d6d8a531801a9f59c798 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Fri, 8 May 2020 15:32:52 -0700 Subject: [PATCH 14/85] Pydoc style --- .../singleagent/singleagent_straight_road.py | 3 ++- flow/envs/multiagent/i210.py | 15 ++++++------ flow/envs/straightroad_env.py | 24 ++++++++++--------- 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/examples/exp_configs/rl/singleagent/singleagent_straight_road.py b/examples/exp_configs/rl/singleagent/singleagent_straight_road.py index b7b8698d6..265d34d42 100644 --- a/examples/exp_configs/rl/singleagent/singleagent_straight_road.py +++ b/examples/exp_configs/rl/singleagent/singleagent_straight_road.py @@ -1,4 +1,5 @@ """Multi-agent highway with ramps example. + Trains a non-constant number of agents, all sharing the same policy, on the highway with ramps network. """ @@ -160,4 +161,4 @@ # multiagent configuration test_env = create_env() obs_space = test_env.observation_space -act_space = test_env.action_space \ No newline at end of file +act_space = test_env.action_space diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index 51a24d3e6..a6e39cdec 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -3,7 +3,6 @@ from gym.spaces import Box import numpy as np -from flow.core.rewards import average_velocity from flow.envs.multiagent.base import MultiEnv # largest number of lanes on any given edge in the network @@ -136,7 +135,8 @@ def get_state(self): else: lead_speed = self.k.vehicle.get_speed(lead_id) headway = self.k.vehicle.get_headway(rl_id) - veh_info.update({rl_id: np.array([speed / SPEED_SCALE, headway /HEADWAY_SCALE, lead_speed / SPEED_SCALE])}) + veh_info.update({rl_id: np.array([speed / SPEED_SCALE, headway / HEADWAY_SCALE, + lead_speed / SPEED_SCALE])}) else: veh_info = {rl_id: np.concatenate((self.state_util(rl_id), self.veh_statistics(rl_id))) @@ -162,14 +162,14 @@ def compute_reward(self, rl_actions, **kwargs): speeds.append(self.k.vehicle.get_speed(rl_id)) if len(speeds) > 0: # rescale so the critic can estimate it quickly - rewards[rl_id] = np.mean([(des_speed - np.abs(speed - des_speed))**2 - for speed in speeds]) / (des_speed**2) + rewards[rl_id] = np.mean([(des_speed - np.abs(speed - des_speed)) ** 2 + for speed in speeds]) / (des_speed ** 2) else: speeds = self.k.vehicle.get_speed(self.k.vehicle.get_ids()) des_speed = self.env_params.additional_params["target_velocity"] # rescale so the critic can estimate it quickly - reward = np.nan_to_num(np.mean([(des_speed - np.abs(speed - des_speed))**2 - for speed in speeds]) / (des_speed**2)) + reward = np.nan_to_num(np.mean([(des_speed - np.abs(speed - des_speed)) ** 2 + for speed in speeds]) / (des_speed ** 2)) rewards = {rl_id: reward for rl_id in self.k.vehicle.get_rl_ids()} return rewards @@ -198,7 +198,7 @@ def additional_command(self): continue # on the exit edge, near the end, and is the vehicle furthest along if edge == self.exit_edge and \ - (self.k.vehicle.get_position(veh_id) > self.k.network.edge_length(self.exit_edge) - 100)\ + (self.k.vehicle.get_position(veh_id) > self.k.network.edge_length(self.exit_edge) - 100) \ and self.k.vehicle.get_leader(veh_id) is None: type_id = self.k.vehicle.get_type(veh_id) # remove the vehicle @@ -263,6 +263,7 @@ def veh_statistics(self, rl_id): return np.array([speed, lane]) def step(self, rl_actions): + """See parent class for more details; add option to reroute vehicles.""" state, reward, done, info = super().step(rl_actions) # handle the edge case where a vehicle hasn't been put back when the rollout terminates if self.reroute_on_exit and done['__all__']: diff --git a/flow/envs/straightroad_env.py b/flow/envs/straightroad_env.py index 1fd5c208c..92fbb855b 100644 --- a/flow/envs/straightroad_env.py +++ b/flow/envs/straightroad_env.py @@ -31,6 +31,7 @@ class I210SingleEnv(Env): """Partially observable single-agent environment for the I-210 subnetworks. + The policy is shared among the agents, so there can be a non-constant number of RL vehicles throughout the simulation. Required from env_params: @@ -118,7 +119,6 @@ def _apply_rl_actions(self, rl_actions): # p=lane_change_softmax) self.k.vehicle.apply_acceleration(rl_ids, accels) - # self.k.vehicle.apply_lane_change(rl_id, lane_change_action) def get_state(self): """See class definition.""" @@ -136,8 +136,9 @@ def get_state(self): else: lead_speed = self.k.vehicle.get_speed(lead_id) headway = self.k.vehicle.get_headway(rl_id) - veh_info[i * per_vehicle_obs: (i+1) * per_vehicle_obs] = [speed / SPEED_SCALE, - headway / HEADWAY_SCALE, lead_speed / SPEED_SCALE] + veh_info[i * per_vehicle_obs: (i + 1) * per_vehicle_obs] = [speed / SPEED_SCALE, + headway / HEADWAY_SCALE, + lead_speed / SPEED_SCALE] return veh_info def compute_reward(self, rl_actions, **kwargs): @@ -149,20 +150,19 @@ def compute_reward(self, rl_actions, **kwargs): rl_ids = self.get_sorted_rl_ids() des_speed = self.env_params.additional_params["target_velocity"] - rewards = np.nan_to_num(np.mean([(des_speed - np.abs(speed - des_speed))**2 - for speed in self.k.vehicle.get_speed(rl_ids)])) / (des_speed**2) + rewards = np.nan_to_num(np.mean([(des_speed - np.abs(speed - des_speed)) ** 2 + for speed in self.k.vehicle.get_speed(rl_ids)])) / (des_speed ** 2) return rewards def get_sorted_rl_ids(self): + """Return the MAX_NUM_VEHS closest to the exit.""" rl_ids = self.k.vehicle.get_rl_ids() rl_ids = sorted(rl_ids, key=lambda veh_id: self.k.vehicle.get_x_by_id(veh_id)) rl_ids = rl_ids[-MAX_NUM_VEHS:] return rl_ids def additional_command(self): - """See parent class. - Define which vehicles are observed for visualization purposes. - """ + """Define which vehicles are observed for visualization purposes.""" # specify observed vehicles for rl_id in self.k.vehicle.get_rl_ids(): # leader @@ -172,6 +172,7 @@ def additional_command(self): def state_util(self, rl_id): """Return an array of headway, tailway, leader speed, follower speed. + Also return a 1 if leader is rl 0 otherwise, a 1 if follower is rl 0 otherwise. If there are fewer than MAX_LANES the extra entries are filled with -1 to disambiguate from zeros. @@ -218,12 +219,13 @@ def __init__(self, env_params, sim_params, network, simulator): self.max_lanes = 1 def step(self, rl_actions): + """See parent class.""" obs, rew, done, info = super().step(rl_actions) mean_speed = np.nan_to_num(np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids()))) if self.env_params.additional_params['terminate_on_wave'] and \ - mean_speed < self.env_params.additional_params['wave_termination_speed'] \ - and self.time_counter > self.env_params.additional_params['wave_termination_horizon'] \ + mean_speed < self.env_params.additional_params['wave_termination_speed'] \ + and self.time_counter > self.env_params.additional_params['wave_termination_horizon'] \ and len(self.k.vehicle.get_ids()) > 0: done = True - return obs, rew, done, info \ No newline at end of file + return obs, rew, done, info From 6c1ef8a911b24fe1bfbbb401166ff9bed726d958 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 9 May 2020 16:30:43 -0700 Subject: [PATCH 15/85] MADDPG is running but not training --- .../rl/multiagent/multiagent_i210_qmix.py | 180 ------------------ .../multiagent_straight_road_maddpg.py | 5 +- examples/train.py | 1 + flow/algorithms/maddpg/maddpg.py | 2 +- flow/algorithms/maddpg/maddpg_policy.py | 11 +- flow/envs/multiagent/base.py | 4 + flow/envs/multiagent/i210.py | 70 ++++--- 7 files changed, 54 insertions(+), 219 deletions(-) delete mode 100644 examples/exp_configs/rl/multiagent/multiagent_i210_qmix.py diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210_qmix.py b/examples/exp_configs/rl/multiagent/multiagent_i210_qmix.py deleted file mode 100644 index 64841a6a4..000000000 --- a/examples/exp_configs/rl/multiagent/multiagent_i210_qmix.py +++ /dev/null @@ -1,180 +0,0 @@ -"""Multi-agent I-210 example. - -Trains a non-constant number of agents, all sharing the same policy, on the -highway with ramps network. -""" -import os - -from ray.tune.registry import register_env - -from flow.controllers import RLController -from flow.controllers.car_following_models import IDMController -import flow.config as config -from flow.core.params import EnvParams -from flow.core.params import NetParams -from flow.core.params import InitialConfig -from flow.core.params import InFlows -from flow.core.params import VehicleParams -from flow.core.params import SumoParams -from flow.core.params import SumoLaneChangeParams -from flow.networks.i210_subnetwork import I210SubNetwork, EDGES_DISTRIBUTION -from flow.envs.multiagent.i210 import I210QMIXMultiEnv, ADDITIONAL_ENV_PARAMS -from flow.utils.registry import make_create_env - -# SET UP PARAMETERS FOR THE SIMULATION - -# number of steps per rollout -HORIZON = 2000 - -# percentage of autonomous vehicles compared to human vehicles on highway -PENETRATION_RATE = 10 - -# SET UP PARAMETERS FOR THE ENVIRONMENT -additional_env_params = ADDITIONAL_ENV_PARAMS.copy() -additional_env_params.update({ - 'max_accel': 2.6, - 'max_decel': 4.5, - # configure the observation space. Look at the I210MultiEnv class for more info. - 'lead_obs': True, - # whether to add in a reward for the speed of nearby vehicles - "local_reward": True, - "num_actions": 5, - "max_num_agents_qmix": 200 -}) - -# CREATE VEHICLE TYPES AND INFLOWS -# no vehicles in the network -vehicles = VehicleParams() -vehicles.add( - "human", - num_vehicles=0, - lane_change_params=SumoLaneChangeParams(lane_change_mode="strategic"), - acceleration_controller=(IDMController, {"a": .3, "b": 2.0, "noise": 0.6}), -) -vehicles.add( - "av", - acceleration_controller=(RLController, {}), - num_vehicles=0, -) - -inflow = InFlows() -# main highway -pen_rate = PENETRATION_RATE / 100 -assert pen_rate < 1.0, "your penetration rate is over 100%" -assert pen_rate > 0.0, "your penetration rate should be above zero" -inflow.add( - veh_type="human", - edge="119257914", - vehs_per_hour=int(10800 * (1 - pen_rate)), - # probability=1.0, - departLane="random", - departSpeed=20) -# # on ramp -# inflow.add( -# veh_type="human", -# edge="27414345", -# vehs_per_hour=321 * pen_rate, -# departLane="random", -# departSpeed=20) -# inflow.add( -# veh_type="human", -# edge="27414342#0", -# vehs_per_hour=421 * pen_rate, -# departLane="random", -# departSpeed=20) - -# Now add the AVs -# main highway -inflow.add( - veh_type="av", - edge="119257914", - vehs_per_hour=int(10800 * pen_rate), - # probability=1.0, - departLane="random", - departSpeed=20) -# # on ramp -# inflow.add( -# veh_type="av", -# edge="27414345", -# vehs_per_hour=int(321 * pen_rate), -# departLane="random", -# departSpeed=20) -# inflow.add( -# veh_type="av", -# edge="27414342#0", -# vehs_per_hour=int(421 * pen_rate), -# departLane="random", -# departSpeed=20) - -NET_TEMPLATE = os.path.join( - config.PROJECT_PATH, - "examples/exp_configs/templates/sumo/test2.net.xml") - -flow_params = dict( - # name of the experiment - exp_tag='I_210_subnetwork', - - # name of the flow environment the experiment is running on - env_name=I210QMIXMultiEnv, - - # name of the network class the experiment is running on - network=I210SubNetwork, - - # simulator that is used by the experiment - simulator='traci', - - # simulation-related parameters - sim=SumoParams( - sim_step=0.5, - render=False, - color_by_speed=False, - restart_instance=True, - use_ballistic=True - ), - - # environment related parameters (see flow.core.params.EnvParams) - env=EnvParams( - horizon=HORIZON, - sims_per_step=1, - warmup_steps=0, - additional_params=additional_env_params, - ), - - # network-related parameters (see flow.core.params.NetParams and the - # network's documentation or ADDITIONAL_NET_PARAMS component) - net=NetParams( - inflows=inflow, - template=NET_TEMPLATE - ), - - # vehicles to be placed in the network at the start of a rollout (see - # flow.core.params.VehicleParams) - veh=vehicles, - - # parameters specifying the positioning of vehicles upon initialization/ - # reset (see flow.core.params.InitialConfig) - initial=InitialConfig( - edges_distribution=EDGES_DISTRIBUTION, - ), -) - -# SET UP RLLIB MULTI-AGENT FEATURES - -create_env, env_name = make_create_env(params=flow_params, version=0) - -# register as rllib env -register_env(env_name, create_env) - -# multiagent configuration -test_env = create_env() -obs_space = test_env.observation_space -act_space = test_env.action_space - -# POLICY_GRAPHS = {'av': (None, obs_space, act_space, {})} - -# POLICIES_TO_TRAIN = ['av'] - - -# def policy_mapping_fn(_): -# """Map a policy in RLlib.""" -# return 'av' diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road_maddpg.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road_maddpg.py index 3656cef81..f1a3b5bc3 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_straight_road_maddpg.py +++ b/examples/exp_configs/rl/multiagent/multiagent_straight_road_maddpg.py @@ -49,8 +49,9 @@ 'target_velocity': 18, 'local_reward': True, 'lead_obs': True, + 'max_num_agents': 25, # whether to reroute vehicles once they have exited - "reroute_on_exit": True + "reroute_on_exit": False, }) @@ -125,7 +126,7 @@ sim_step=0.5, render=False, use_ballistic=True, - restart_instance=True + restart_instance=False ), # network-related parameters (see flow.core.params.NetParams and the diff --git a/examples/train.py b/examples/train.py index 3e00f6093..b392039af 100644 --- a/examples/train.py +++ b/examples/train.py @@ -209,6 +209,7 @@ def setup_exps_rllib(flow_params, elif alg_run == "MADDPG": from flow.algorithms.maddpg.maddpg import MADDPGTrainer, DEFAULT_CONFIG config = deepcopy(DEFAULT_CONFIG) + config["actor_feature_reg"] = 0.0 alg_run = MADDPGTrainer elif alg_run == "QMIX": diff --git a/flow/algorithms/maddpg/maddpg.py b/flow/algorithms/maddpg/maddpg.py index 69fadcb08..d1b4a140e 100644 --- a/flow/algorithms/maddpg/maddpg.py +++ b/flow/algorithms/maddpg/maddpg.py @@ -16,7 +16,7 @@ from ray.rllib.agents.trainer import with_common_config from ray.rllib.agents.dqn.dqn import GenericOffPolicyTrainer -from ray.rllib.contrib.maddpg.maddpg_policy import MADDPGTFPolicy +from flow.algorithms.maddpg.maddpg_policy import MADDPGTFPolicy from ray.rllib.optimizers import SyncReplayOptimizer from ray.rllib.policy.sample_batch import SampleBatch, MultiAgentBatch diff --git a/flow/algorithms/maddpg/maddpg_policy.py b/flow/algorithms/maddpg/maddpg_policy.py index 3924d5c3b..707afba95 100644 --- a/flow/algorithms/maddpg/maddpg_policy.py +++ b/flow/algorithms/maddpg/maddpg_policy.py @@ -85,8 +85,8 @@ def _make_continuous_space(space): _) in sorted(config["multiagent"]["policies"].items()) ] else: - obs_space = config["multiagent"]["policies"][list(config["multiagent"]["policies"].keys())][1] - act_space = config["multiagent"]["policies"][list(config["multiagent"]["policies"].keys())][2] + obs_space = config["multiagent"]["policies"][list(config["multiagent"]["policies"].keys())[0]][1] + act_space = config["multiagent"]["policies"][list(config["multiagent"]["policies"].keys())[0]][2] num_agents = config["max_num_agents"] obs_space_n = [ _make_continuous_space(obs_space) @@ -385,10 +385,11 @@ def _build_actor_network(self, out = tf.layers.dense(out, units=hidden, activation=activation) feature = tf.layers.dense( out, units=act_space.shape[0], activation=None) - sampler = tfp.distributions.RelaxedOneHotCategorical( - temperature=1.0, logits=feature).sample() + # TODO(@ev) what is going on here?? Why is this here?? + # sampler = tfp.distributions.RelaxedOneHotCategorical( + # temperature=1.0, logits=feature).sample() - return sampler, feature, model, tf.global_variables(scope.name) + return feature, feature, model, tf.global_variables(scope.name) def update_target(self, tau=None): if tau is not None: diff --git a/flow/envs/multiagent/base.py b/flow/envs/multiagent/base.py index 26c5e8a15..22152cdd1 100644 --- a/flow/envs/multiagent/base.py +++ b/flow/envs/multiagent/base.py @@ -98,6 +98,10 @@ def step(self, rl_actions): # store new observations in the vehicles and traffic lights class self.k.update(reset=False) + if self.time_counter <= self.env_params.sims_per_step * self.env_params.warmup_steps: + self.observed_ids.update(self.k.vehicle.get_ids()) + self.observed_rl_ids.update(self.k.vehicle.get_rl_ids()) + # update the colors of vehicles if self.sim_params.render: self.k.vehicle.update_vehicle_colors() diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index ebf48cc56..25290e192 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -72,7 +72,7 @@ def __init__(self, env_params, sim_params, network, simulator='traci'): self.max_lanes = MAX_LANES self.num_enter_lanes = 5 self.entrance_edge = "119257914" - self.exit_edge = "119257908#3" + self.exit_edge = "119257908#2" self.leader = [] @property @@ -196,6 +196,8 @@ def additional_command(self): and not self.env_params.evaluate: veh_ids = self.k.vehicle.get_ids() edges = self.k.vehicle.get_edge(veh_ids) + valid_lanes = list(range(self.num_enter_lanes)) + num_trials = 0 for veh_id, edge in zip(veh_ids, edges): if edge == "": continue @@ -205,10 +207,14 @@ def additional_command(self): if edge == self.exit_edge and \ (self.k.vehicle.get_position(veh_id) > self.k.network.edge_length(self.exit_edge) - 100) \ and self.k.vehicle.get_leader(veh_id) is None: + # if self.step_counter > 6000: + # import ipdb; ipdb.set_trace() type_id = self.k.vehicle.get_type(veh_id) # remove the vehicle self.k.vehicle.remove(veh_id) - lane = np.random.randint(low=0, high=self.num_enter_lanes) + index = np.random.randint(low=0, high=len(valid_lanes)) + lane = valid_lanes[index] + del valid_lanes[index] # reintroduce it at the start of the network # TODO(@evinitsky) select the lane and speed a bit more cleanly # Note, the position is 10 so you are not overlapping with the inflow car that is being removed. @@ -218,7 +224,7 @@ def additional_command(self): edge=self.entrance_edge, type_id=str(type_id), lane=str(lane), - pos="10.0", + pos="20.0", speed="23.0") departed_ids = self.k.vehicle.get_departed_ids() @@ -283,7 +289,7 @@ def step(self, rl_actions): class I210MADDPGMultiEnv(I210MultiEnv): def __init__(self, env_params, sim_params, network, simulator='traci'): super().__init__(env_params, sim_params, network, simulator) - self.max_num_agents = env_params.additional_params.get("max_num_agents") + self.max_num_agents = env_params.additional_params["max_num_agents"] self.rl_id_to_idx_map = OrderedDict() self.idx_to_rl_id_map = OrderedDict() self.index_counter = 0 @@ -295,6 +301,7 @@ def _apply_rl_actions(self, rl_actions): # in the warmup steps, rl_actions is None t = time() if rl_actions: + # print(rl_actions) accel_list = [] rl_ids = [] for rl_id in self.k.vehicle.get_rl_ids(): @@ -302,27 +309,33 @@ def _apply_rl_actions(self, rl_actions): accel_list.append(rl_actions[self.rl_id_to_idx_map[rl_id]]) rl_ids.append(rl_id) self.k.vehicle.apply_acceleration(rl_ids, accel_list) - print('time to apply actions is ', time() - t) + # print('time to apply actions is ', time() - t) def get_state(self): t = time() - for key in self.k.vehicle.get_departed_ids(): - if key not in self.rl_id_to_idx_map and key in self.k.vehicle.get_rl_ids(): - self.rl_id_to_idx_map[key] = self.index_counter - self.idx_to_rl_id_map[self.index_counter] = key - self.index_counter += 1 - print(self.index_counter) + # TODO(@evinitsky) clean this up + self.index_counter = 0 + self.rl_id_to_idx_map = {} + for key in self.k.vehicle.get_rl_ids(): + self.rl_id_to_idx_map[key] = self.index_counter + self.idx_to_rl_id_map[self.index_counter] = key + self.index_counter += 1 + if self.index_counter > self.max_num_agents: + break - rl_ids = self.k.vehicle.get_rl_ids() veh_info = super().get_state() # TODO(@evinitsky) think this doesn't have to be a deepcopy veh_info_copy = deepcopy(self.default_state) - veh_info_copy.update({self.rl_id_to_idx_map[rl_id]: veh_info[rl_id] - for rl_id in rl_ids}) + # id_list = zip(list(range(self.max_num_agents)), rl_ids) + try: + veh_info_copy.update({self.rl_id_to_idx_map[rl_id]: veh_info[rl_id] + for rl_id in self.rl_id_to_idx_map.keys()}) + except: + import ipdb; ipdb.set_trace() # print('time to update copy is ', time() - t) veh_info = veh_info_copy - print('state time is ', time() - t) + # print('state time is ', time() - t) return veh_info @@ -332,26 +345,16 @@ def compute_reward(self, rl_actions, **kwargs): reward = np.nan_to_num(np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids()))) / (20 * self.env_params.horizon) temp_reward_dict = {idx: reward for idx in range(self.max_num_agents)} - print('reward time is ', time() - t) + # print('reward time is ', time() - t) return temp_reward_dict def reset(self, new_inflow_rate=None): - veh_info = super().reset(new_inflow_rate) + super().reset(new_inflow_rate) self.rl_id_to_idx_map = OrderedDict() self.idx_to_rl_id_map = OrderedDict() self.index_counter = 0 - rl_ids = self.k.vehicle.get_rl_ids() - for key in self.k.vehicle.get_departed_ids() + self.k.vehicle.get_rl_ids(): - if key not in self.rl_id_to_idx_map and key in self.k.vehicle.get_rl_ids(): - self.rl_id_to_idx_map[key] = self.index_counter - self.idx_to_rl_id_map[self.index_counter] = key - self.index_counter += 1 - # TODO(@evinitsky) think this doesn't have to be a deepcopy - veh_info_copy = deepcopy(self.default_state) - veh_info_copy.update({self.rl_id_to_idx_map[rl_id]: veh_info[rl_id] - for rl_id in enumerate(rl_ids)}) - return veh_info_copy + return self.get_state() class MultiStraightRoad(I210MultiEnv): @@ -388,11 +391,16 @@ def _apply_rl_actions(self, rl_actions): """See class definition.""" # in the warmup steps, rl_actions is None if rl_actions: + # print(rl_actions) + rl_ids = [] accels = [] - for rl_id, actions in rl_actions.items(): - accels.append(actions[0]) - rl_ids.append(rl_id) + for idx, actions in rl_actions.items(): + if idx < self.index_counter: + accels.append(actions[0]) + rl_ids.append(self.idx_to_rl_id_map[idx]) + else: + break # prevent the AV from blocking the entrance self.k.vehicle.apply_acceleration(rl_ids, accels) From 73fe42144f72a9c70f3c4e7eaae9ce94630423eb Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 9 May 2020 18:17:24 -0700 Subject: [PATCH 16/85] Minor --- .../rl/multiagent/multiagent_straight_road_maddpg.py | 2 +- flow/envs/multiagent/i210.py | 9 +++------ scripts/ray_autoscale.yaml | 11 ++++++----- scripts/run_exps.sh | 5 +++++ 4 files changed, 15 insertions(+), 12 deletions(-) create mode 100755 scripts/run_exps.sh diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road_maddpg.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road_maddpg.py index f1a3b5bc3..73d5d1f0c 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_straight_road_maddpg.py +++ b/examples/exp_configs/rl/multiagent/multiagent_straight_road_maddpg.py @@ -49,7 +49,7 @@ 'target_velocity': 18, 'local_reward': True, 'lead_obs': True, - 'max_num_agents': 25, + 'max_num_agents': 5, # whether to reroute vehicles once they have exited "reroute_on_exit": False, }) diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index 25290e192..3be6cef16 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -321,18 +321,15 @@ def get_state(self): self.rl_id_to_idx_map[key] = self.index_counter self.idx_to_rl_id_map[self.index_counter] = key self.index_counter += 1 - if self.index_counter > self.max_num_agents: + if self.index_counter >= self.max_num_agents: break veh_info = super().get_state() # TODO(@evinitsky) think this doesn't have to be a deepcopy veh_info_copy = deepcopy(self.default_state) # id_list = zip(list(range(self.max_num_agents)), rl_ids) - try: - veh_info_copy.update({self.rl_id_to_idx_map[rl_id]: veh_info[rl_id] - for rl_id in self.rl_id_to_idx_map.keys()}) - except: - import ipdb; ipdb.set_trace() + veh_info_copy.update({self.rl_id_to_idx_map[rl_id]: veh_info[rl_id] + for rl_id in self.rl_id_to_idx_map.keys()}) # print('time to update copy is ', time() - t) veh_info = veh_info_copy # print('state time is ', time() - t) diff --git a/scripts/ray_autoscale.yaml b/scripts/ray_autoscale.yaml index 8df447496..0b0d573a8 100644 --- a/scripts/ray_autoscale.yaml +++ b/scripts/ray_autoscale.yaml @@ -1,4 +1,4 @@ -\# cluster.yaml ========================================= +# cluster.yaml ========================================= # An unique identifier for the head node and workers of this cluster. cluster_name: test # @@ -32,7 +32,7 @@ auth: # By default Ray creates a new private keypair, but you can also use your own. # If you do so, make sure to also set "KeyName" in the head and worker node # configurations below. - ssh_private_key: /Users/eugenevinitsky/.ssh/MyKeyPair.pem + ssh_private_key: /Users/eugenevinitsky/.ssh/MyKeyPair2.pem # Provider-specific config for the head node, e.g. instance type. By default # Ray will auto-configure unspecified fields such as SubnetId and KeyName. @@ -40,7 +40,8 @@ auth: # http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances head_node: InstanceType: c4.4xlarge - ImageId: ami-0b489700e7f810707 # Flow AMI (Ubuntu) + ImageId: ami-09544298704576518 # Flow AMI (Ubuntu) + KeyName: MyKeyPair2 InstanceMarketOptions: MarketType: spot #Additional options can be found in the boto docs, e.g. @@ -55,7 +56,8 @@ head_node: # http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances worker_nodes: InstanceType: c4.4xlarge - ImageId: ami-0b489700e7f810707 # Flow AMI (Ubuntu) + ImageId: ami-09544298704576518 # Flow AMI (Ubuntu) + KeyName: MyKeyPair2 #Run workers on spot by default. Comment this out to use on-demand. InstanceMarketOptions: @@ -75,7 +77,6 @@ head_setup_commands: - pip install boto3==1.10.45 # 1.4.8 adds InstanceMarketOptions - pip install awscli==1.16.309 - pip install stable-baselines - - pip install torch==1.4.0 - pip install pytz - pip install torch==1.3.1 - pip install tabulate diff --git a/scripts/run_exps.sh b/scripts/run_exps.sh new file mode 100755 index 000000000..6774c75ae --- /dev/null +++ b/scripts/run_exps.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +ray exec ray_autoscale.yaml "python flow/examples/train.py multiagent_i210 i210_reroute_test --algorithm PPO +--num_iterations 200 --num_cpus 12 --num_rollouts 12 --rl_trainer rllib --use_s3" --start --stop \ +--cluster-name=ev_i210_test --tmux \ No newline at end of file From ff2ca54b2a281387164de9bb1b290cbf53b2883d Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 9 May 2020 19:11:44 -0700 Subject: [PATCH 17/85] But what if we just catch and ignore the exception --- flow/envs/multiagent/i210.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index 3be6cef16..7c70653a6 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -219,13 +219,16 @@ def additional_command(self): # TODO(@evinitsky) select the lane and speed a bit more cleanly # Note, the position is 10 so you are not overlapping with the inflow car that is being removed. # this allows the vehicle to be immediately inserted. - self.k.vehicle.add( - veh_id=veh_id, - edge=self.entrance_edge, - type_id=str(type_id), - lane=str(lane), - pos="20.0", - speed="23.0") + try: + self.k.vehicle.add( + veh_id=veh_id, + edge=self.entrance_edge, + type_id=str(type_id), + lane=str(lane), + pos="20.0", + speed="23.0") + except Exception as e: + print(e) departed_ids = self.k.vehicle.get_departed_ids() if len(departed_ids) > 0: From adfde0e7a1e4bea68942b51291f839cf941f07b5 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sat, 9 May 2020 19:35:39 -0700 Subject: [PATCH 18/85] Minor change to AMI to get sUMO version 1.1.0 --- scripts/ray_autoscale.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/ray_autoscale.yaml b/scripts/ray_autoscale.yaml index 0b0d573a8..9be491d33 100644 --- a/scripts/ray_autoscale.yaml +++ b/scripts/ray_autoscale.yaml @@ -40,7 +40,7 @@ auth: # http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances head_node: InstanceType: c4.4xlarge - ImageId: ami-09544298704576518 # Flow AMI (Ubuntu) + ImageId: ami-0c047f3ddd3939b30 # Flow AMI (Ubuntu) KeyName: MyKeyPair2 InstanceMarketOptions: MarketType: spot @@ -56,7 +56,7 @@ head_node: # http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances worker_nodes: InstanceType: c4.4xlarge - ImageId: ami-09544298704576518 # Flow AMI (Ubuntu) + ImageId: ami-0c047f3ddd3939b30 # Flow AMI (Ubuntu) KeyName: MyKeyPair2 #Run workers on spot by default. Comment this out to use on-demand. From 05561759642cbba2813c79ee00176ef994b8504b Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 10 May 2020 16:44:26 -0700 Subject: [PATCH 19/85] Add MPG reward --- .../exp_configs/non_rl/i210_subnetwork.py | 49 ++++++++++---- examples/exp_configs/non_rl/straight_road.py | 5 +- .../rl/multiagent/multiagent_i210.py | 4 +- .../rl/multiagent/multiagent_i210_maddpg.py | 2 + .../rl/multiagent/multiagent_straight_road.py | 6 +- .../multiagent_straight_road_maddpg.py | 2 + examples/train.py | 10 ++- flow/core/experiment.py | 4 +- flow/core/kernel/vehicle/base.py | 15 +++++ flow/core/kernel/vehicle/traci.py | 11 +++- flow/core/rewards.py | 64 +++++++++++++++++++ flow/envs/multiagent/i210.py | 43 ++++++++----- flow/visualize/visualizer_rllib.py | 3 + scripts/run_exps.sh | 10 ++- 14 files changed, 188 insertions(+), 40 deletions(-) diff --git a/examples/exp_configs/non_rl/i210_subnetwork.py b/examples/exp_configs/non_rl/i210_subnetwork.py index 1065c853b..4e640e873 100644 --- a/examples/exp_configs/non_rl/i210_subnetwork.py +++ b/examples/exp_configs/non_rl/i210_subnetwork.py @@ -4,6 +4,7 @@ import numpy as np from flow.controllers.car_following_models import IDMController +from flow.controllers.velocity_controllers import FollowerStopper from flow.core.params import SumoParams from flow.core.params import EnvParams from flow.core.params import NetParams @@ -11,31 +12,36 @@ from flow.core.params import VehicleParams from flow.core.params import InitialConfig from flow.core.params import InFlows +from flow.core.rewards import miles_per_gallon import flow.config as config from flow.envs import TestEnv from flow.networks.i210_subnetwork import I210SubNetwork, EDGES_DISTRIBUTION +PENETRATION_RATE = 10.0 +HIGHWAY_INFLOW_RATE = 10800 + # create the base vehicle type that will be used for inflows vehicles = VehicleParams() +# human vehicles vehicles.add( "human", num_vehicles=0, lane_change_params=SumoLaneChangeParams( lane_change_mode="strategic", ), - acceleration_controller=(IDMController, { - "a": 0.3, "b": 2.0, "noise": 0.5 - }), + acceleration_controller=(IDMController, {"a": .3, "b": 2.0, "noise": 0.5}), ) +if PENETRATION_RATE > 0.0: + vehicles.add( + "av", + num_vehicles=0, + acceleration_controller=(FollowerStopper, {"v_des": 12.0}), + ) + + inflow = InFlows() -# main highway -inflow.add( - veh_type="human", - edge="119257914", - vehs_per_hour=10800, - departLane="random", - departSpeed=23) + # on ramp # inflow.add( # veh_type="human", @@ -50,6 +56,23 @@ # departLane="random", # departSpeed=20) +inflow.add( + veh_type="human", + edge="119257914", + vehs_per_hour=int(HIGHWAY_INFLOW_RATE * (1 - PENETRATION_RATE / 100)), + depart_lane="free", + depart_speed="23", + name="idm_highway_inflow") + +if PENETRATION_RATE > 0.0: + inflow.add( + veh_type="av", + edge="119257914", + vehs_per_hour=int(HIGHWAY_INFLOW_RATE * (PENETRATION_RATE / 100)), + depart_lane="free", + depart_speed="23", + name="av_highway_inflow") + NET_TEMPLATE = os.path.join( config.PROJECT_PATH, "examples/exp_configs/templates/sumo/test2.net.xml") @@ -71,13 +94,14 @@ sim=SumoParams( sim_step=0.5, render=False, - color_by_speed=True, + color_by_speed=False, use_ballistic=True ), # environment related parameters (see flow.core.params.EnvParams) env=EnvParams( - horizon=4500, + horizon=2000, + warmup_steps=400 ), # network-related parameters (see flow.core.params.NetParams and the @@ -109,4 +133,5 @@ "avg_density": lambda env: 5 * 1000 * len(env.k.vehicle.get_ids_by_edge( edge_id)) / (env.k.network.edge_length(edge_id) * env.k.network.num_lanes(edge_id)), + "mpg": lambda env: miles_per_gallon(env, env.k.vehicle.get_ids(), gain=1.0) } diff --git a/examples/exp_configs/non_rl/straight_road.py b/examples/exp_configs/non_rl/straight_road.py index c557ce836..6d9da03d0 100644 --- a/examples/exp_configs/non_rl/straight_road.py +++ b/examples/exp_configs/non_rl/straight_road.py @@ -9,6 +9,7 @@ from flow.controllers.velocity_controllers import FollowerStopper from flow.core.params import EnvParams, NetParams, InitialConfig, InFlows, \ VehicleParams, SumoParams, SumoLaneChangeParams +from flow.core.rewards import miles_per_gallon from flow.networks import HighwayNetwork from flow.envs import TestEnv from flow.networks.highway import ADDITIONAL_NET_PARAMS @@ -58,7 +59,7 @@ vehicles.add( "av", num_vehicles=0, - acceleration_controller=(FollowerStopper, {"v_des": 18.0}), + acceleration_controller=(FollowerStopper, {"v_des": 12.0}), ) # add human vehicles on the highway @@ -128,4 +129,6 @@ custom_callables = { "avg_speed": lambda env: np.nan_to_num(np.mean( env.k.vehicle.get_speed(env.k.vehicle.get_ids_by_edge(['highway_0', 'highway_1'])))), + "mpg": lambda env: miles_per_gallon(env, env.k.vehicle.get_ids(), gain=1.0) + } diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index a2f713b8c..2917e7794 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -45,9 +45,11 @@ 'lead_obs': True, # whether to add in a reward for the speed of nearby vehicles "local_reward": True, + # whether to use the MPG reward. Otherwise, defaults to a target velocity reward + "mpg_reward": True, # whether to reroute vehicles once they have exited "reroute_on_exit": True, - 'target_velocity': 18, + 'target_velocity': 12.0, }) # CREATE VEHICLE TYPES AND INFLOWS diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210_maddpg.py b/examples/exp_configs/rl/multiagent/multiagent_i210_maddpg.py index 58c1ae63a..8ae041360 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210_maddpg.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210_maddpg.py @@ -38,6 +38,8 @@ 'lead_obs': True, # whether to add in a reward for the speed of nearby vehicles "local_reward": True, + # whether to use the MPG reward. Otherwise, defaults to a target velocity reward + "mpg_reward": True, "num_actions": 5, "max_num_agents": 200 }) diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py index a15471539..916540ee4 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py +++ b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py @@ -47,10 +47,12 @@ 'max_accel': 2.6, 'max_decel': 4.5, 'target_velocity': 18, - 'local_reward': True, + 'local_reward': False, 'lead_obs': True, # whether to reroute vehicles once they have exited - "reroute_on_exit": True + "reroute_on_exit": True, + # whether to use the MPG reward. Otherwise, defaults to a target velocity reward + "mpg_reward": True, }) diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road_maddpg.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road_maddpg.py index 73d5d1f0c..6aa69503c 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_straight_road_maddpg.py +++ b/examples/exp_configs/rl/multiagent/multiagent_straight_road_maddpg.py @@ -52,6 +52,8 @@ 'max_num_agents': 5, # whether to reroute vehicles once they have exited "reroute_on_exit": False, + # whether to use the MPG reward. Otherwise, defaults to a target velocity reward + "mpg_reward": True }) diff --git a/examples/train.py b/examples/train.py index 16a239e18..a02d169ac 100644 --- a/examples/train.py +++ b/examples/train.py @@ -36,7 +36,7 @@ from ray.rllib.agents.registry import get_agent_class from flow.core.util import ensure_dir -from flow.core.rewards import energy_consumption +from flow.core.rewards import energy_consumption, miles_per_gallon from flow.utils.registry import env_constructor from flow.utils.rllib import FlowParamsEncoder, get_flow_params from flow.utils.registry import make_create_env @@ -199,7 +199,7 @@ def setup_exps_rllib(flow_params, config["horizon"] = horizon config["model"].update({"fcnet_hiddens": [32, 32, 32]}) config["train_batch_size"] = horizon * n_rollouts - config["gamma"] = 0.999 # discount rate + config["gamma"] = 0.995 # discount rate config["use_gae"] = True config["lambda"] = 0.97 config["kl_target"] = 0.02 @@ -238,6 +238,8 @@ def on_episode_start(info): episode.user_data["avg_speed"] = [] episode.user_data["avg_speed_avs"] = [] episode.user_data["avg_energy"] = [] + episode.user_data["avg_mpg"] = [] + def on_episode_step(info): episode = info["episode"] @@ -251,6 +253,8 @@ def on_episode_step(info): if not np.isnan(av_speed): episode.user_data["avg_speed_avs"].append(av_speed) episode.user_data["avg_energy"].append(energy_consumption(env)) + episode.user_data["avg_mpg"].append(miles_per_gallon(env, env.k.vehicle.get_ids())) + def on_episode_end(info): episode = info["episode"] @@ -259,6 +263,8 @@ def on_episode_end(info): avg_speed_avs = np.mean(episode.user_data["avg_speed_avs"]) episode.custom_metrics["avg_speed_avs"] = avg_speed_avs episode.custom_metrics["avg_energy_per_veh"] = np.mean(episode.user_data["avg_energy"]) + episode.custom_metrics["avg_mpg_per_veh"] = np.mean(episode.user_data["avg_mpg"]) + config["callbacks"] = {"on_episode_start": tune.function(on_episode_start), "on_episode_step": tune.function(on_episode_step), diff --git a/flow/core/experiment.py b/flow/core/experiment.py index 69a78cb0e..61c960c4b 100755 --- a/flow/core/experiment.py +++ b/flow/core/experiment.py @@ -157,8 +157,8 @@ def rl_actions(*_): for (key, lambda_func) in self.custom_callables.items(): custom_vals[key].append(lambda_func(self.env)) - if done: - break + # if done: + # break # Store the information from the run in info_dict. outflow = self.env.k.vehicle.get_outflow_rate(int(500)) diff --git a/flow/core/kernel/vehicle/base.py b/flow/core/kernel/vehicle/base.py index c68d68c3a..aae98d134 100644 --- a/flow/core/kernel/vehicle/base.py +++ b/flow/core/kernel/vehicle/base.py @@ -297,6 +297,21 @@ def get_num_not_departed(self): """ raise NotImplementedError + def get_fuel_consumption(selfself, veh_id, error=-1001): + """Return the mpg / s of the specified vehicle. + + Parameters + ---------- + veh_id : str or list of str + vehicle id, or list of vehicle ids + error : any, optional + value that is returned if the vehicle is not found + + Returns + ------- + float + """ + def get_speed(self, veh_id, error=-1001): """Return the speed of the specified vehicle. diff --git a/flow/core/kernel/vehicle/traci.py b/flow/core/kernel/vehicle/traci.py index 3439e98cc..cac99e380 100644 --- a/flow/core/kernel/vehicle/traci.py +++ b/flow/core/kernel/vehicle/traci.py @@ -335,7 +335,8 @@ def _add_departed(self, veh_id, veh_type): tc.VAR_EDGES, tc.VAR_POSITION, tc.VAR_ANGLE, - tc.VAR_SPEED_WITHOUT_TRACI + tc.VAR_SPEED_WITHOUT_TRACI, + tc.VAR_FUELCONSUMPTION ]) self.kernel_api.vehicle.subscribeLeader(veh_id, 2000) @@ -370,6 +371,8 @@ def _add_departed(self, veh_id, veh_type): self.kernel_api.vehicle.getLaneIndex(veh_id) self.__sumo_obs[veh_id][tc.VAR_SPEED] = \ self.kernel_api.vehicle.getSpeed(veh_id) + self.__sumo_obs[veh_id][tc.VAR_FUELCONSUMPTION] = \ + self.kernel_api.vehicle.getFuelConsumption(veh_id) # make sure that the order of rl_ids is kept sorted self.__rl_ids.sort() @@ -533,6 +536,12 @@ def get_num_not_departed(self): """See parent class.""" return self.num_not_departed + def get_fuel_consumption(self, veh_id, error=-1001): + """Return fuel consumption in gallons/s.""" + if isinstance(veh_id, (list, np.ndarray)): + return [self.get_fuel_consumption(vehID, error) for vehID in veh_id] + return self.__sumo_obs.get(veh_id, {}).get(tc.VAR_FUELCONSUMPTION, error) * 0.000264172 + def get_previous_speed(self, veh_id, error=-1001): """See parent class.""" if isinstance(veh_id, (list, np.ndarray)): diff --git a/flow/core/rewards.py b/flow/core/rewards.py index 6de472af2..5361d16a6 100755 --- a/flow/core/rewards.py +++ b/flow/core/rewards.py @@ -330,3 +330,67 @@ def energy_consumption(env, gain=.001): power += M * speed * accel + M * g * Cr * speed + 0.5 * rho * A * Ca * speed ** 3 return -gain * power + + +def veh_energy_consumption(env, veh_id, gain=.001): + """Calculate power consumption of a vehicle. + + Assumes vehicle is an average sized vehicle. + The power calculated here is the lower bound of the actual power consumed + by a vehicle. + """ + power = 0 + + M = 1200 # mass of average sized vehicle (kg) + g = 9.81 # gravitational acceleration (m/s^2) + Cr = 0.005 # rolling resistance coefficient + Ca = 0.3 # aerodynamic drag coefficient + rho = 1.225 # air density (kg/m^3) + A = 2.6 # vehicle cross sectional area (m^2) + speed = env.k.vehicle.get_speed(veh_id) + prev_speed = env.k.vehicle.get_previous_speed(veh_id) + + accel = abs(speed - prev_speed) / env.sim_step + + power += M * speed * accel + M * g * Cr * speed + 0.5 * rho * A * Ca * speed ** 3 + + return -gain * power + + +def miles_per_gallon(env, veh_id=None, gain=.001): + """Calculate mpg of either a particular vehicle or the total average of all the vehilces. + + Assumes vehicle is an average sized vehicle. + The power calculated here is the lower bound of the actual power consumed + by a vehicle. + """ + mpg = 0 + counter = 0 + # engine_efficiency = 0.2 + # joules_per_gallon = 120 * 10**6 + if not isinstance(veh_id, list): + speed = env.k.vehicle.get_speed(veh_id) + # convert to be positive since the above is a penalty + gallons_per_s = env.k.vehicle.get_fuel_consumption(veh_id) + if gallons_per_s > 0 and speed >= 0.0: + # meters / gallon is (v * \delta t) / (gallons_s * \delta t) + mpg = speed / gallons_per_s + else: + for veh_id in env.k.vehicle.get_ids(): + speed = env.k.vehicle.get_speed(veh_id) + # convert to be positive since the above is a penalty + gallons_per_s = env.k.vehicle.get_fuel_consumption(veh_id) + if gallons_per_s > 0 and speed >= 0.0: + counter += 1 + # meters / gallon is (v * \delta t) / (gallons_per_s * \delta t) + mpg += speed / gallons_per_s + if counter > 0: + mpg /= counter + + # convert from meters per gallon to miles per gallon + mpg /= 1609.0 + # mpg *= engine_efficiency + # mpg *= joules_per_gallon + + return mpg + diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index 7c70653a6..d8e3bc3d6 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -7,6 +7,7 @@ from gym.spaces import Box, Discrete, Dict import numpy as np +from flow.core.rewards import miles_per_gallon from flow.envs.multiagent.base import MultiEnv # largest number of lanes on any given edge in the network @@ -73,6 +74,7 @@ def __init__(self, env_params, sim_params, network, simulator='traci'): self.num_enter_lanes = 5 self.entrance_edge = "119257914" self.exit_edge = "119257908#2" + self.mpg_reward = env_params.additional_params["mpg_reward"] self.leader = [] @property @@ -159,22 +161,28 @@ def compute_reward(self, rl_actions, **kwargs): des_speed = self.env_params.additional_params["target_velocity"] for rl_id in self.k.vehicle.get_rl_ids(): rewards[rl_id] = 0 - speeds = [] - follow_speed = self.k.vehicle.get_speed(self.k.vehicle.get_follower(rl_id)) - if follow_speed >= 0: - speeds.append(follow_speed) - if self.k.vehicle.get_speed(rl_id) >= 0: - speeds.append(self.k.vehicle.get_speed(rl_id)) - if len(speeds) > 0: - # rescale so the critic can estimate it quickly - rewards[rl_id] = np.mean([(des_speed - np.abs(speed - des_speed)) ** 2 - for speed in speeds]) / (des_speed ** 2) + if self.mpg_reward: + rewards[rl_id] = miles_per_gallon(self, rl_id) / 100.0 + else: + speeds = [] + follow_speed = self.k.vehicle.get_speed(self.k.vehicle.get_follower(rl_id)) + if follow_speed >= 0: + speeds.append(follow_speed) + if self.k.vehicle.get_speed(rl_id) >= 0: + speeds.append(self.k.vehicle.get_speed(rl_id)) + if len(speeds) > 0: + # rescale so the critic can estimate it quickly + rewards[rl_id] = np.mean([(des_speed - np.abs(speed - des_speed)) ** 2 + for speed in speeds]) / (des_speed ** 2) else: - speeds = self.k.vehicle.get_speed(self.k.vehicle.get_ids()) - des_speed = self.env_params.additional_params["target_velocity"] - # rescale so the critic can estimate it quickly - reward = np.nan_to_num(np.mean([(des_speed - np.abs(speed - des_speed)) ** 2 - for speed in speeds]) / (des_speed ** 2)) + if self.mpg_reward: + reward = np.nan_to_num(miles_per_gallon(self, self.k.vehicle.get_ids())) / 100.0 + else: + speeds = self.k.vehicle.get_speed(self.k.vehicle.get_ids()) + des_speed = self.env_params.additional_params["target_velocity"] + # rescale so the critic can estimate it quickly + reward = np.nan_to_num(np.mean([(des_speed - np.abs(speed - des_speed)) ** 2 + for speed in speeds]) / (des_speed ** 2)) rewards = {rl_id: reward for rl_id in self.k.vehicle.get_rl_ids()} return rewards @@ -342,7 +350,10 @@ def get_state(self): def compute_reward(self, rl_actions, **kwargs): # There has to be one global reward for qmix t = time() - reward = np.nan_to_num(np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids()))) / (20 * self.env_params.horizon) + if self.mpg_reward: + reward = np.nan_to_num(miles_per_gallon(self, self.k.vehicle.get_ids())) + else: + reward = np.nan_to_num(np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids()))) / (20 * self.env_params.horizon) temp_reward_dict = {idx: reward for idx in range(self.max_num_agents)} # print('reward time is ', time() - t) diff --git a/flow/visualize/visualizer_rllib.py b/flow/visualize/visualizer_rllib.py index c1dd83193..7ae079de5 100644 --- a/flow/visualize/visualizer_rllib.py +++ b/flow/visualize/visualizer_rllib.py @@ -160,6 +160,9 @@ def visualizer_rllib(args): else: env = gym.make(env_name) + if hasattr(env, "reroute_on_exit"): + env.reroute_on_exit = False + if args.render_mode == 'sumo_gui': env.sim_params.render = True # set to True after initializing agent and env diff --git a/scripts/run_exps.sh b/scripts/run_exps.sh index 6774c75ae..03e694c4f 100755 --- a/scripts/run_exps.sh +++ b/scripts/run_exps.sh @@ -1,5 +1,9 @@ #!/bin/bash -ray exec ray_autoscale.yaml "python flow/examples/train.py multiagent_i210 i210_reroute_test --algorithm PPO ---num_iterations 200 --num_cpus 12 --num_rollouts 12 --rl_trainer rllib --use_s3" --start --stop \ ---cluster-name=ev_i210_test --tmux \ No newline at end of file +#ray exec ray_autoscale.yaml "python flow/examples/train.py multiagent_i210 i210_reroute_test --algorithm PPO \ +#--num_iterations 200 --num_cpus 12 --num_rollouts 12 --rl_trainer rllib --use_s3" --start --stop \ +#--cluster-name=ev_i210_test --tmux + +ray exec ray_autoscale.yaml "python flow/examples/train.py multiagent_i210 i210_reroute_test2 --algorithm PPO \ +--num_iterations 200 --num_cpus 4 --num_rollouts 4 --rl_trainer rllib --use_s3" --start --stop \ +--cluster-name=ev_i210_test2 --tmux \ No newline at end of file From 186105137b5f105e243a932147e157d5eac0e38f Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 10 May 2020 16:46:35 -0700 Subject: [PATCH 20/85] Commit for local reward tests --- .../rl/multiagent/multiagent_straight_road.py | 2 +- scripts/run_exps.sh | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py index 916540ee4..1408c0e6b 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py +++ b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py @@ -47,7 +47,7 @@ 'max_accel': 2.6, 'max_decel': 4.5, 'target_velocity': 18, - 'local_reward': False, + 'local_reward': True, 'lead_obs': True, # whether to reroute vehicles once they have exited "reroute_on_exit": True, diff --git a/scripts/run_exps.sh b/scripts/run_exps.sh index 03e694c4f..794ee8f41 100755 --- a/scripts/run_exps.sh +++ b/scripts/run_exps.sh @@ -4,6 +4,17 @@ #--num_iterations 200 --num_cpus 12 --num_rollouts 12 --rl_trainer rllib --use_s3" --start --stop \ #--cluster-name=ev_i210_test --tmux -ray exec ray_autoscale.yaml "python flow/examples/train.py multiagent_i210 i210_reroute_test2 --algorithm PPO \ ---num_iterations 200 --num_cpus 4 --num_rollouts 4 --rl_trainer rllib --use_s3" --start --stop \ +#ray exec ray_autoscale.yaml "python flow/examples/train.py multiagent_i210 i210_reroute_test2 --algorithm PPO \ +##--num_iterations 200 --num_cpus 4 --num_rollouts 4 --rl_trainer rllib --use_s3" --start --stop \ +##--cluster-name=ev_i210_test2 --tmux + +# 5/10 +ray exec ray_autoscale.yaml "python flow/examples/train.py multiagent_straight_road \ +straight_road_reroute_local_rew_mpg --algorithm PPO \ +--num_iterations 200 --num_cpus 8 --num_rollouts84 --rl_trainer rllib --use_s3" --start --stop \ +--cluster-name=ev_i210_test1 --tmux + +ray exec ray_autoscale.yaml "python flow/examples/train.py multiagent_i210 \ +i210_reroute_local_rew_mpg --algorithm PPO \ +--num_iterations 200 --num_cpus 8 --num_rollouts84 --rl_trainer rllib --use_s3" --start --stop \ --cluster-name=ev_i210_test2 --tmux \ No newline at end of file From 2cd6329f8370b71107eaa4b7a20b4f7f9489347f Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 10 May 2020 21:03:54 -0700 Subject: [PATCH 21/85] Add a centralized critic --- examples/exp_configs/non_rl/straight_road.py | 4 +- .../rl/multiagent/multiagent_i210.py | 2 + .../rl/multiagent/multiagent_straight_road.py | 2 + examples/train.py | 30 +- flow/algorithms/centralized_PPO.py | 533 ++++++++++++++++++ flow/algorithms/custom_ppo.py | 351 ++++++++++++ flow/algorithms/maddpg/maddpg.py | 2 +- flow/envs/multiagent/i210.py | 26 +- scripts/ray_autoscale.yaml | 5 +- scripts/run_exps.sh | 4 +- 10 files changed, 946 insertions(+), 13 deletions(-) create mode 100644 flow/algorithms/centralized_PPO.py create mode 100644 flow/algorithms/custom_ppo.py diff --git a/examples/exp_configs/non_rl/straight_road.py b/examples/exp_configs/non_rl/straight_road.py index 6d9da03d0..ecc296d22 100644 --- a/examples/exp_configs/non_rl/straight_road.py +++ b/examples/exp_configs/non_rl/straight_road.py @@ -23,7 +23,7 @@ # inflow rate on the highway in vehicles per hour HIGHWAY_INFLOW_RATE = 10800 / 5 # percentage of autonomous vehicles compared to human vehicles on highway -PENETRATION_RATE = 0.0 +PENETRATION_RATE = 10.0 # SET UP PARAMETERS FOR THE NETWORK @@ -99,7 +99,7 @@ # environment related parameters (see flow.core.params.EnvParams) env=EnvParams( horizon=HORIZON, - warmup_steps=0, + warmup_steps=400, sims_per_step=1, ), diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index 2917e7794..39aca472b 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -50,6 +50,8 @@ # whether to reroute vehicles once they have exited "reroute_on_exit": True, 'target_velocity': 12.0, + # how many AVs there can be at once (this is only for centralized critics) + "max_num_agents": 10 }) # CREATE VEHICLE TYPES AND INFLOWS diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py index 1408c0e6b..64a16ad5a 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py +++ b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py @@ -53,6 +53,8 @@ "reroute_on_exit": True, # whether to use the MPG reward. Otherwise, defaults to a target velocity reward "mpg_reward": True, + # how many AVs there can be at once (this is only for centralized critics) + "max_num_agents": 10 }) diff --git a/examples/train.py b/examples/train.py index a02d169ac..8aa4b2a4f 100644 --- a/examples/train.py +++ b/examples/train.py @@ -197,13 +197,41 @@ def setup_exps_rllib(flow_params, config["num_workers"] = n_cpus config["horizon"] = horizon - config["model"].update({"fcnet_hiddens": [32, 32, 32]}) + config["model"].update({"fcnet_hiddens": [32, 32]}) config["train_batch_size"] = horizon * n_rollouts config["gamma"] = 0.995 # discount rate config["use_gae"] = True config["lambda"] = 0.97 config["kl_target"] = 0.02 config["num_sgd_iter"] = 10 + if flags.grid_search: + config["lambda"] = tune.grid_search([0.5, 0.9]) + config["lr"] = tune.grid_search([5e-4, 5e-5]) + elif alg_run == "CENTRALIZEDPPO": + from flow.algorithms.centralized_PPO import CCTrainer, CentralizedCriticModel + from ray.rllib.agents.ppo import DEFAULT_CONFIG + from ray.rllib.models import ModelCatalog + alg_run = CCTrainer + config = deepcopy(DEFAULT_CONFIG) + config['model']['custom_model'] = "cc_model" + config["model"]["custom_options"]["max_num_agents"] = flow_params['env'].additional_params['max_num_agents'] + config["model"]["custom_options"]["central_vf_size"] = 100 + + ModelCatalog.register_custom_model("cc_model", CentralizedCriticModel) + + config["num_workers"] = n_cpus + config["horizon"] = horizon + config["model"].update({"fcnet_hiddens": [32, 32]}) + config["train_batch_size"] = horizon * n_rollouts + config["gamma"] = 0.995 # discount rate + config["use_gae"] = True + config["lambda"] = 0.97 + config["kl_target"] = 0.02 + config["num_sgd_iter"] = 10 + if flags.grid_search: + config["lambda"] = tune.grid_search([0.5, 0.9]) + config["lr"] = tune.grid_search([5e-4, 5e-5]) + elif alg_run == "TD3": agent_cls = get_agent_class(alg_run) config = deepcopy(agent_cls._default_config) diff --git a/flow/algorithms/centralized_PPO.py b/flow/algorithms/centralized_PPO.py new file mode 100644 index 000000000..b574263e4 --- /dev/null +++ b/flow/algorithms/centralized_PPO.py @@ -0,0 +1,533 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +"""An example of customizing PPO to leverage a centralized critic.""" + +import argparse +import numpy as np + +from gym.spaces import Dict + +from ray import tune +from ray.rllib.agents.ppo.ppo import PPOTrainer +from ray.rllib.agents.ppo.ppo_policy import PPOTFPolicy, KLCoeffMixin, BEHAVIOUR_LOGITS +from ray.rllib.evaluation.postprocessing import compute_advantages, \ + Postprocessing +from ray.rllib.policy.sample_batch import SampleBatch +from ray.rllib.policy.tf_policy import LearningRateSchedule, \ + EntropyCoeffSchedule, ACTION_LOGP +from ray.rllib.models.modelv2 import ModelV2 +from ray.rllib.models.tf.tf_modelv2 import TFModelV2 +from ray.rllib.models.tf.recurrent_tf_modelv2 import RecurrentTFModelV2 +from ray.rllib.models.model import restore_original_dimensions +from ray.rllib.utils.annotations import override +from ray.rllib.models.tf.fcnet_v2 import FullyConnectedNetwork +from ray.rllib.utils.explained_variance import explained_variance +from ray.rllib.utils import try_import_tf + + +tf = try_import_tf() + +CENTRAL_OBS = "central_obs" +OPPONENT_ACTION = "opponent_action" + +parser = argparse.ArgumentParser() +parser.add_argument("--stop", type=int, default=100000) + +#TODOy + +class CentralizedCriticModel(TFModelV2): + """Multi-agent model that implements a centralized VF.""" + # TODO(@evinitsky) make this work with more than boxes + + def __init__(self, obs_space, action_space, num_outputs, model_config, + name): + super(CentralizedCriticModel, self).__init__( + obs_space, action_space, num_outputs, model_config, name) + # Base of the model + self.model = FullyConnectedNetwork(obs_space, action_space, + num_outputs, model_config, name) + self.register_variables(self.model.variables()) + + # Central VF maps (obs, opp_ops, opp_act) -> vf_pred + self.max_num_agents = model_config['custom_options']['max_num_agents'] + self.obs_space_shape = obs_space.shape[0] + self.obs_space = obs_space + other_obs = tf.keras.layers.Input(shape=(obs_space.shape[0] * self.max_num_agents, ), name="central_obs") + central_vf_dense = tf.keras.layers.Dense( + model_config['custom_options']['central_vf_size'], activation=tf.nn.tanh, name="c_vf_dense")(other_obs) + central_vf_out = tf.keras.layers.Dense( + 1, activation=None, name="c_vf_out")(central_vf_dense) + self.central_vf = tf.keras.Model( + inputs=[other_obs], outputs=central_vf_out) + self.register_variables(self.central_vf.variables) + + def forward(self, input_dict, state, seq_lens): + return self.model.forward(input_dict, state, seq_lens) + + def central_value_function(self, central_obs): + return tf.reshape( + self.central_vf( + [central_obs]), [-1]) + + def value_function(self): + return self.model.value_function() # not used + + +# TODO(@evinitsky) support recurrence +class CentralizedCriticModelRNN(RecurrentTFModelV2): + """Example of using the Keras functional API to define a RNN model.""" + + def __init__(self, + obs_space, + action_space, + num_outputs, + model_config, + name, + hiddens_size=64, + cell_size=64): + super(CentralizedCriticModelRNN, self).__init__(obs_space, action_space, num_outputs, + model_config, name) + self.cell_size = cell_size + + # Define input layers + input_layer = tf.keras.layers.Input( + shape=(None, obs_space.shape[0]), name="inputs") + state_in_h = tf.keras.layers.Input(shape=(cell_size, ), name="h") + state_in_c = tf.keras.layers.Input(shape=(cell_size, ), name="c") + seq_in = tf.keras.layers.Input(shape=(), name="seq_in") + + # Preprocess observation with a hidden layer and send to LSTM cell + dense1 = tf.keras.layers.Dense( + hiddens_size, activation=tf.nn.relu, name="dense1")(input_layer) + lstm_out, state_h, state_c = tf.keras.layers.LSTM( + cell_size, return_sequences=True, return_state=True, name="lstm")( + inputs=dense1, + mask=tf.sequence_mask(seq_in), + initial_state=[state_in_h, state_in_c]) + + # Postprocess LSTM output with another hidden layer and compute values + logits = tf.keras.layers.Dense( + self.num_outputs, + activation=tf.keras.activations.linear, + name="logits")(lstm_out) + values = tf.keras.layers.Dense( + 1, activation=None, name="values")(lstm_out) + + # Create the RNN model + self.model = tf.keras.Model( + inputs=[input_layer, seq_in, state_in_h, state_in_c], + outputs=[logits, values, state_h, state_c]) + self.register_variables(self.model.variables) + self.model.summary() + + #TODO(@evinitsky) add layer sharing to the VF + # Create the centralized VF + # Central VF maps (obs, opp_ops, opp_act) -> vf_pred + self.max_num_agents = model_config.get("max_num_agents", 120) + self.obs_space_shape = obs_space.shape[0] + other_obs = tf.keras.layers.Input(shape=(obs_space.shape[0] * self.max_num_agents,), name="all_agent_obs") + central_vf_dense = tf.keras.layers.Dense( + model_config.get("central_vf_size", 64), activation=tf.nn.tanh, name="c_vf_dense")(other_obs) + central_vf_out = tf.keras.layers.Dense( + 1, activation=None, name="c_vf_out")(central_vf_dense) + self.central_vf = tf.keras.Model( + inputs=[other_obs], outputs=central_vf_out) + self.register_variables(self.central_vf.variables) + + @override(RecurrentTFModelV2) + def forward_rnn(self, inputs, state, seq_lens): + model_out, self._value_out, h, c = self.model([inputs, seq_lens] + + state) + return model_out, [h, c] + + @override(ModelV2) + def get_initial_state(self): + return [ + np.zeros(self.cell_size, np.float32), + np.zeros(self.cell_size, np.float32), + ] + + def central_value_function(self, central_obs): + return tf.reshape( + self.central_vf( + [central_obs]), [-1]) + + def value_function(self): + return tf.reshape(self._value_out, [-1]) # not used + + +class CentralizedValueMixin(object): + """Add methods to evaluate the central value function from the model.""" + + def __init__(self): + # TODO(@evinitsky) clean up naming + self.central_value_function = self.model.central_value_function( + self.get_placeholder(CENTRAL_OBS) + ) + + def compute_central_vf(self, central_obs): + feed_dict = { + self.get_placeholder(CENTRAL_OBS): central_obs, + } + return self.get_session().run(self.central_value_function, feed_dict) + + +# Grabs the opponent obs/act and includes it in the experience train_batch, +# and computes GAE using the central vf predictions. +def centralized_critic_postprocessing(policy, + sample_batch, + other_agent_batches=None, + episode=None): + if policy.loss_initialized(): + assert other_agent_batches is not None + + time_span = (sample_batch['t'][0], sample_batch['t'][-1]) + # there's a new problem here, namely that a segment might not be continuous due to the rerouting + other_agent_timespans = {agent_id: + (other_agent_batches[agent_id][1]["t"][0], + other_agent_batches[agent_id][1]["t"][-1]) + for agent_id in other_agent_batches.keys()} + other_agent_times = {agent_id: other_agent_batches[agent_id][1]["t"] + for agent_id in other_agent_batches.keys()} + agent_time = sample_batch['t'] + # find agents whose time overlaps with the current agent + rel_agents = {agent_id: other_agent_time for agent_id, + other_agent_time in + other_agent_timespans.items() + if time_overlap(time_span, other_agent_time)} + if len(rel_agents) > 0: + other_obs = {agent_id: + other_agent_batches[agent_id][1]["obs"].copy() + for agent_id in rel_agents.keys()} + # padded_agent_obs = {agent_id: + # overlap_and_pad_agent( + # time_span, + # rel_agent_time, + # other_obs[agent_id]) + # for agent_id, + # rel_agent_time in rel_agents.items()} + padded_agent_obs = {agent_id: + fill_missing( + agent_time, + other_agent_times[agent_id], + other_obs[agent_id]) + for agent_id, + rel_agent_time in rel_agents.items()} + # okay, now we need to stack and sort + central_obs_list = [padded_obs for padded_obs in padded_agent_obs.values()] + try: + central_obs_batch = np.hstack((sample_batch["obs"], np.hstack(central_obs_list))) + except: + import ipdb; ipdb.set_trace() + else: + central_obs_batch = sample_batch["obs"] + max_vf_agents = policy.model.max_num_agents + num_agents = len(rel_agents) + 1 + if num_agents < max_vf_agents: + diff = max_vf_agents - num_agents + zero_pad = np.zeros((central_obs_batch.shape[0], + policy.model.obs_space_shape * diff)) + central_obs_batch = np.hstack((central_obs_batch, + zero_pad)) + elif num_agents > max_vf_agents: + print("Too many agents!") + + # also record the opponent obs and actions in the trajectory + sample_batch[CENTRAL_OBS] = central_obs_batch + + # overwrite default VF prediction with the central VF + sample_batch[SampleBatch.VF_PREDS] = policy.compute_central_vf(sample_batch[CENTRAL_OBS]) + else: + # policy hasn't initialized yet, use zeros + #TODO(evinitsky) put in the right shape + obs_shape = sample_batch[SampleBatch.CUR_OBS].shape[1] + obs_shape = (1, obs_shape * (policy.model.max_num_agents)) + sample_batch[CENTRAL_OBS] = np.zeros(obs_shape) + # TODO(evinitsky) put in the right shape. Will break if actions aren't 1 + sample_batch[SampleBatch.VF_PREDS] = np.zeros(1, dtype=np.float32) + + completed = sample_batch["dones"][-1] + + # if not completed and policy.loss_initialized(): + # last_r = 0.0 + # else: + # next_state = [] + # for i in range(policy.num_state_tensors()): + # next_state.append([sample_batch["state_out_{}".format(i)][-1]]) + # last_r = policy.compute_central_vf(sample_batch[CENTRAL_OBS][-1][np.newaxis, ...])[0] + + batch = compute_advantages( + sample_batch, + 0.0, + policy.config["gamma"], + policy.config["lambda"], + use_gae=policy.config["use_gae"]) + return batch + + + +def time_overlap(time_span, agent_time): + """Check if agent_time overlaps with time_span""" + if agent_time[0] <= time_span[1] and agent_time[1] >= time_span[0]: + return True + else: + return False + + +def fill_missing(agent_time, other_agent_time, obs): + # shortcut, the two overlap perfectly + if np.sum(agent_time == other_agent_time) == agent_time.shape[0]: + return obs + new_obs = np.zeros((agent_time.shape[0], obs.shape[1])) + other_agent_time_set = set(other_agent_time) + for i, time in enumerate(agent_time): + if time in other_agent_time_set: + new_obs[i] = obs[np.where(other_agent_time == time)] + return new_obs + + +def overlap_and_pad_agent(time_span, agent_time, obs): + """take the part of obs that overlaps, pad to length time_span + Arguments: + time_span (tuple): tuple of the first and last time that the agent + of interest is in the system + agent_time (tuple): tuple of the first and last time that the + agent whose obs we are padding is in the system + obs (np.ndarray): observations of the agent whose time is + agent_time + """ + assert time_overlap(time_span, agent_time) + print(time_span) + print(agent_time) + if time_span[0] == 7 or agent_time[0] == 7: + import ipdb; ipdb.set_trace() + # FIXME(ev) some of these conditions can be combined + # no padding needed + if agent_time[0] == time_span[0] and agent_time[1] == time_span[1]: + if obs.shape[0] < 200: + import ipdb; ipdb.set_trace() + return obs + # agent enters before time_span starts and exits before time_span end + if agent_time[0] < time_span[0] and agent_time[1] < time_span[1]: + non_overlap_time = time_span[0] - agent_time[0] + missing_time = time_span[1] - agent_time[1] + overlap_obs = obs[non_overlap_time:] + padding = np.zeros((missing_time, obs.shape[1])) + obs_concat = np.concatenate((overlap_obs, padding)) + if obs_concat.shape[0] < 200: + import ipdb; ipdb.set_trace() + return obs_concat + # agent enters after time_span starts and exits after time_span ends + elif agent_time[0] > time_span[0] and agent_time[1] > time_span[1]: + non_overlap_time = agent_time[1] - time_span[1] + overlap_obs = obs[:-non_overlap_time] + missing_time = agent_time[0] - time_span[0] + padding = np.zeros((missing_time, obs.shape[1])) + obs_concat = np.concatenate((padding, overlap_obs)) + if obs_concat.shape[0] < 200: + import ipdb; ipdb.set_trace() + return obs_concat + # agent time is entirely contained in time_span + elif agent_time[0] >= time_span[0] and agent_time[1] <= time_span[1]: + missing_left = agent_time[0] - time_span[0] + missing_right = time_span[1] - agent_time[1] + obs_concat = obs + if missing_left > 0: + padding = np.zeros((missing_left, obs.shape[1])) + obs_concat = np.concatenate((padding, obs_concat)) + if missing_right > 0: + padding = np.zeros((missing_right, obs.shape[1])) + obs_concat = np.concatenate((obs_concat, padding)) + if obs_concat.shape[0] < 200: + import ipdb; ipdb.set_trace() + return obs_concat + # agent time totally contains time_span + elif agent_time[0] <= time_span[0] and agent_time[1] >= time_span[1]: + non_overlap_left = time_span[0] - agent_time[0] + non_overlap_right = agent_time[1] - time_span[1] + overlap_obs = obs + if non_overlap_left > 0: + overlap_obs = overlap_obs[non_overlap_left:] + if non_overlap_right > 0: + overlap_obs = overlap_obs[:-non_overlap_right] + if overlap_obs.shape[0] < 200: + import ipdb; ipdb.set_trace() + return overlap_obs + + +# Copied from PPO but optimizing the central value function +def loss_with_central_critic(policy, model, dist_class, train_batch): + CentralizedValueMixin.__init__(policy) + + logits, state = model.from_batch(train_batch) + action_dist = dist_class(logits, model) + + policy.loss_obj = PPOLoss( + policy.action_space, + dist_class, + model, + train_batch[Postprocessing.VALUE_TARGETS], + train_batch[Postprocessing.ADVANTAGES], + train_batch[SampleBatch.ACTIONS], + train_batch[BEHAVIOUR_LOGITS], + train_batch[ACTION_LOGP], + train_batch[SampleBatch.VF_PREDS], + action_dist, + policy.central_value_function, + policy.kl_coeff, + tf.ones_like(train_batch[Postprocessing.ADVANTAGES], dtype=tf.bool), + entropy_coeff=policy.entropy_coeff, + clip_param=policy.config["clip_param"], + vf_clip_param=policy.config["vf_clip_param"], + vf_loss_coeff=policy.config["vf_loss_coeff"], + use_gae=policy.config["use_gae"], + model_config=policy.config["model"]) + + return policy.loss_obj.loss + + +class PPOLoss(object): + def __init__(self, + action_space, + dist_class, + model, + value_targets, + advantages, + actions, + prev_logits, + prev_actions_logp, + vf_preds, + curr_action_dist, + value_fn, + cur_kl_coeff, + valid_mask, + entropy_coeff=0, + clip_param=0.1, + vf_clip_param=0.1, + vf_loss_coeff=1.0, + use_gae=True, + model_config=None): + """Constructs the loss for Proximal Policy Objective. + + Arguments: + action_space: Environment observation space specification. + dist_class: action distribution class for logits. + value_targets (Placeholder): Placeholder for target values; used + for GAE. + actions (Placeholder): Placeholder for actions taken + from previous model evaluation. + advantages (Placeholder): Placeholder for calculated advantages + from previous model evaluation. + prev_logits (Placeholder): Placeholder for logits output from + previous model evaluation. + prev_actions_logp (Placeholder): Placeholder for prob output from + previous model evaluation. + vf_preds (Placeholder): Placeholder for value function output + from previous model evaluation. + curr_action_dist (ActionDistribution): ActionDistribution + of the current model. + value_fn (Tensor): Current value function output Tensor. + cur_kl_coeff (Variable): Variable holding the current PPO KL + coefficient. + valid_mask (Tensor): A bool mask of valid input elements (#2992). + entropy_coeff (float): Coefficient of the entropy regularizer. + clip_param (float): Clip parameter + vf_clip_param (float): Clip parameter for the value function + vf_loss_coeff (float): Coefficient of the value function loss + use_gae (bool): If true, use the Generalized Advantage Estimator. + model_config (dict): (Optional) model config for use in specifying + action distributions. + """ + + def reduce_mean_valid(t): + return tf.reduce_mean(tf.boolean_mask(t, valid_mask)) + + prev_dist = dist_class(prev_logits, model) + # Make loss functions. + logp_ratio = tf.exp(curr_action_dist.logp(actions) - prev_actions_logp) + action_kl = prev_dist.kl(curr_action_dist) + self.mean_kl = reduce_mean_valid(action_kl) + + curr_entropy = curr_action_dist.entropy() + self.mean_entropy = reduce_mean_valid(curr_entropy) + + surrogate_loss = tf.minimum( + advantages * logp_ratio, + advantages * tf.clip_by_value(logp_ratio, 1 - clip_param, + 1 + clip_param)) + self.mean_policy_loss = reduce_mean_valid(-surrogate_loss) + + if use_gae: + vf_loss1 = tf.square(value_fn - value_targets) + vf_clipped = vf_preds + tf.clip_by_value( + value_fn - vf_preds, -vf_clip_param, vf_clip_param) + vf_loss2 = tf.square(vf_clipped - value_targets) + vf_loss = tf.maximum(vf_loss1, vf_loss2) + self.mean_vf_loss = reduce_mean_valid(vf_loss) + loss = reduce_mean_valid( + -surrogate_loss + cur_kl_coeff * action_kl + + vf_loss_coeff * vf_loss - entropy_coeff * curr_entropy) + else: + self.mean_vf_loss = tf.constant(0.0) + loss = reduce_mean_valid(-surrogate_loss + + cur_kl_coeff * action_kl - + entropy_coeff * curr_entropy) + self.loss = loss + + +def new_ppo_surrogate_loss(policy, model, dist_class, train_batch): + loss = loss_with_central_critic(policy, model, dist_class, train_batch) + return loss + + +def setup_mixins(policy, obs_space, action_space, config): + # copied from PPO + KLCoeffMixin.__init__(policy, config) + EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], + config["entropy_coeff_schedule"]) + LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"]) + # hack: put in a noop VF so some of the inherited PPO code runs + policy.value_function = tf.zeros( + tf.shape(policy.get_placeholder(SampleBatch.CUR_OBS))[0]) + + +def central_vf_stats(policy, train_batch, grads): + # Report the explained variance of the central value function. + return { + "vf_explained_var": explained_variance( + train_batch[Postprocessing.VALUE_TARGETS], + policy.central_value_function), + } + +def kl_and_loss_stats(policy, train_batch): + print(train_batch["rewards"]) + return { + "cur_kl_coeff": tf.cast(policy.kl_coeff, tf.float64), + "cur_lr": tf.cast(policy.cur_lr, tf.float64), + "total_loss": policy.loss_obj.loss, + "policy_loss": policy.loss_obj.mean_policy_loss, + "vf_loss": policy.loss_obj.mean_vf_loss, + "vf_explained_var": explained_variance( + train_batch[Postprocessing.VALUE_TARGETS], + policy.model.value_function()), + "vf_preds": train_batch[Postprocessing.VALUE_TARGETS], + "kl": policy.loss_obj.mean_kl, + "entropy": policy.loss_obj.mean_entropy, + "entropy_coeff": tf.cast(policy.entropy_coeff, tf.float64), + "avg_rew": train_batch["rewards"][-1] + } + +CCPPO = PPOTFPolicy.with_updates( + name="CCPPO", + postprocess_fn=centralized_critic_postprocessing, + loss_fn=new_ppo_surrogate_loss, + stats_fn=kl_and_loss_stats, + before_loss_init=setup_mixins, + grad_stats_fn=central_vf_stats, + mixins=[ + LearningRateSchedule, EntropyCoeffSchedule, KLCoeffMixin, + CentralizedValueMixin + ]) + +CCTrainer = PPOTrainer.with_updates(name="CCPPOTrainer", default_policy=CCPPO) \ No newline at end of file diff --git a/flow/algorithms/custom_ppo.py b/flow/algorithms/custom_ppo.py new file mode 100644 index 000000000..26c94af95 --- /dev/null +++ b/flow/algorithms/custom_ppo.py @@ -0,0 +1,351 @@ +"""PPO but we add in the outflow after the reward to the final reward""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import logging + +import numpy as np +import ray +from ray.rllib.agents.ppo.ppo import PPOTrainer +from ray.rllib.evaluation.postprocessing import compute_advantages, \ + Postprocessing +from ray.rllib.policy.sample_batch import SampleBatch +from ray.rllib.policy.tf_policy import LearningRateSchedule, \ + EntropyCoeffSchedule, ACTION_LOGP +from ray.rllib.policy.tf_policy_template import build_tf_policy +from ray.rllib.utils.explained_variance import explained_variance +from ray.rllib.utils.tf_ops import make_tf_callable +from ray.rllib.utils import try_import_tf + +tf = try_import_tf() + +logger = logging.getLogger(__name__) + +# Frozen logits of the policy that computed the action +BEHAVIOUR_LOGITS = "behaviour_logits" + + +class PPOLoss(object): + def __init__(self, + action_space, + dist_class, + model, + value_targets, + advantages, + actions, + prev_logits, + prev_actions_logp, + vf_preds, + curr_action_dist, + value_fn, + cur_kl_coeff, + valid_mask, + entropy_coeff=0, + clip_param=0.1, + vf_clip_param=0.1, + vf_loss_coeff=1.0, + use_gae=True, + model_config=None): + """Constructs the loss for Proximal Policy Objective. + + Arguments: + action_space: Environment observation space specification. + dist_class: action distribution class for logits. + value_targets (Placeholder): Placeholder for target values; used + for GAE. + actions (Placeholder): Placeholder for actions taken + from previous model evaluation. + advantages (Placeholder): Placeholder for calculated advantages + from previous model evaluation. + prev_logits (Placeholder): Placeholder for logits output from + previous model evaluation. + prev_actions_logp (Placeholder): Placeholder for prob output from + previous model evaluation. + vf_preds (Placeholder): Placeholder for value function output + from previous model evaluation. + curr_action_dist (ActionDistribution): ActionDistribution + of the current model. + value_fn (Tensor): Current value function output Tensor. + cur_kl_coeff (Variable): Variable holding the current PPO KL + coefficient. + valid_mask (Tensor): A bool mask of valid input elements (#2992). + entropy_coeff (float): Coefficient of the entropy regularizer. + clip_param (float): Clip parameter + vf_clip_param (float): Clip parameter for the value function + vf_loss_coeff (float): Coefficient of the value function loss + use_gae (bool): If true, use the Generalized Advantage Estimator. + model_config (dict): (Optional) model config for use in specifying + action distributions. + """ + + def reduce_mean_valid(t): + return tf.reduce_mean(tf.boolean_mask(t, valid_mask)) + + prev_dist = dist_class(prev_logits, model) + # Make loss functions. + logp_ratio = tf.exp(curr_action_dist.logp(actions) - prev_actions_logp) + action_kl = prev_dist.kl(curr_action_dist) + self.mean_kl = reduce_mean_valid(action_kl) + + curr_entropy = curr_action_dist.entropy() + self.mean_entropy = reduce_mean_valid(curr_entropy) + + surrogate_loss = tf.minimum( + advantages * logp_ratio, + advantages * tf.clip_by_value(logp_ratio, 1 - clip_param, + 1 + clip_param)) + self.mean_policy_loss = reduce_mean_valid(-surrogate_loss) + + if use_gae: + vf_loss1 = tf.square(value_fn - value_targets) + vf_clipped = vf_preds + tf.clip_by_value( + value_fn - vf_preds, -vf_clip_param, vf_clip_param) + vf_loss2 = tf.square(vf_clipped - value_targets) + vf_loss = tf.maximum(vf_loss1, vf_loss2) + self.mean_vf_loss = reduce_mean_valid(vf_loss) + loss = reduce_mean_valid( + -surrogate_loss + cur_kl_coeff * action_kl + + vf_loss_coeff * vf_loss - entropy_coeff * curr_entropy) + else: + self.mean_vf_loss = tf.constant(0.0) + loss = reduce_mean_valid(-surrogate_loss + + cur_kl_coeff * action_kl - + entropy_coeff * curr_entropy) + self.loss = loss + + +def ppo_surrogate_loss(policy, model, dist_class, train_batch): + logits, state = model.from_batch(train_batch) + action_dist = dist_class(logits, model) + + if state: + max_seq_len = tf.reduce_max(train_batch["seq_lens"]) + mask = tf.sequence_mask(train_batch["seq_lens"], max_seq_len) + mask = tf.reshape(mask, [-1]) + else: + mask = tf.ones_like( + train_batch[Postprocessing.ADVANTAGES], dtype=tf.bool) + + policy.loss_obj = PPOLoss( + policy.action_space, + dist_class, + model, + train_batch[Postprocessing.VALUE_TARGETS], + train_batch[Postprocessing.ADVANTAGES], + train_batch[SampleBatch.ACTIONS], + train_batch[BEHAVIOUR_LOGITS], + train_batch[ACTION_LOGP], + train_batch[SampleBatch.VF_PREDS], + action_dist, + model.value_function(), + policy.kl_coeff, + mask, + entropy_coeff=policy.entropy_coeff, + clip_param=policy.config["clip_param"], + vf_clip_param=policy.config["vf_clip_param"], + vf_loss_coeff=policy.config["vf_loss_coeff"], + use_gae=policy.config["use_gae"], + model_config=policy.config["model"]) + + return policy.loss_obj.loss + + +def kl_and_loss_stats(policy, train_batch): + return { + "cur_kl_coeff": tf.cast(policy.kl_coeff, tf.float64), + "cur_lr": tf.cast(policy.cur_lr, tf.float64), + "total_loss": policy.loss_obj.loss, + "policy_loss": policy.loss_obj.mean_policy_loss, + "vf_loss": policy.loss_obj.mean_vf_loss, + "vf_explained_var": explained_variance( + train_batch[Postprocessing.VALUE_TARGETS], + policy.model.value_function()), + "vf_preds": train_batch[Postprocessing.VALUE_TARGETS], + "kl": policy.loss_obj.mean_kl, + "entropy": policy.loss_obj.mean_entropy, + "entropy_coeff": tf.cast(policy.entropy_coeff, tf.float64), + "advantages": train_batch[Postprocessing.ADVANTAGES], + "rewards": train_batch["rewards"] + } + + +def vf_preds_and_logits_fetches(policy): + """Adds value function and logits outputs to experience train_batches.""" + return { + SampleBatch.VF_PREDS: policy.model.value_function(), + BEHAVIOUR_LOGITS: policy.model.last_output(), + } + + +def postprocess_ppo_gae(policy, + sample_batch, + other_agent_batches=None, + episode=None): + """Adds the policy logits, VF preds, and advantages to the trajectory.""" + + net_outflow = 0.0 + if episode is not None: + post_exit_rew_len = policy.post_exit_rew_len + outflow = np.array(episode.user_data['outflow']) + final_time = sample_batch['t'][-1] + if final_time + post_exit_rew_len >= outflow.shape[0]: + if final_time > 0: + net_outflow = np.mean((outflow[final_time:])) + else: + net_outflow = np.mean((outflow[final_time:])) + else: + net_outflow = np.mean((outflow[final_time:final_time + post_exit_rew_len])) + # This is a hack because we are never returning done correctly so we just check if we have a time equal to the horizon + # if we do, we clearly never completed + if 't' in sample_batch.keys(): + completed = (sample_batch['t'][-1] < policy.horizon - 1) or sample_batch["dones"][-1] + else: + completed = False + if completed: + last_r = 0.0 + else: + next_state = [] + for i in range(policy.num_state_tensors()): + next_state.append([sample_batch["state_out_{}".format(i)][-1]]) + last_r = policy._value(sample_batch[SampleBatch.NEXT_OBS][-1], + sample_batch[SampleBatch.ACTIONS][-1], + sample_batch[SampleBatch.REWARDS][-1], + *next_state) + + # now scale the rewards by the horizo so the cumulative reward is independent of time in the system + # TODO(@evinitsky) does this make sense? + # if policy.terminal_reward and sample_batch['rewards'].shape[0] > 1: + # sample_batch['rewards'][:-1] = sample_batch['rewards'][:-1] / (sample_batch['rewards'][:-1].shape[0]) + # else: + # sample_batch['rewards'] = sample_batch['rewards'] / (sample_batch['rewards'].shape[0]) + + batch = compute_advantages( + sample_batch, + last_r, + policy.config["gamma"], + policy.config["lambda"], + use_gae=policy.config["use_gae"]) + return batch + + +def clip_gradients(policy, optimizer, loss): + variables = policy.model.trainable_variables() + if policy.config["grad_clip"] is not None: + grads_and_vars = optimizer.compute_gradients(loss, variables) + grads = [g for (g, v) in grads_and_vars] + policy.grads, _ = tf.clip_by_global_norm(grads, + policy.config["grad_clip"]) + clipped_grads = list(zip(policy.grads, variables)) + return clipped_grads + else: + return optimizer.compute_gradients(loss, variables) + + +class KLCoeffMixin(object): + def __init__(self, config): + # KL Coefficient + self.kl_coeff_val = config["kl_coeff"] + self.kl_target = config["kl_target"] + self.kl_coeff = tf.get_variable( + initializer=tf.constant_initializer(self.kl_coeff_val), + name="kl_coeff", + shape=(), + trainable=False, + dtype=tf.float32) + + def update_kl(self, sampled_kl): + if sampled_kl > 2.0 * self.kl_target: + self.kl_coeff_val *= 1.5 + elif sampled_kl < 0.5 * self.kl_target: + self.kl_coeff_val *= 0.5 + self.kl_coeff.load(self.kl_coeff_val, session=self.get_session()) + return self.kl_coeff_val + + +class ValueNetworkMixin(object): + def __init__(self, obs_space, action_space, config): + if config["use_gae"]: + + @make_tf_callable(self.get_session()) + def value(ob, prev_action, prev_reward, *state): + model_out, _ = self.model({ + SampleBatch.CUR_OBS: tf.convert_to_tensor([ob]), + SampleBatch.PREV_ACTIONS: tf.convert_to_tensor( + [prev_action]), + SampleBatch.PREV_REWARDS: tf.convert_to_tensor( + [prev_reward]), + "is_training": tf.convert_to_tensor(False), + }, [tf.convert_to_tensor([s]) for s in state], + tf.convert_to_tensor([1])) + return self.model.value_function()[0] + + else: + + @make_tf_callable(self.get_session()) + def value(ob, prev_action, prev_reward, *state): + return tf.constant(0.0) + + self._value = value + + +def setup_config(policy, obs_space, action_space, config): + # auto set the model option for layer sharing + config["model"]["vf_share_layers"] = config["vf_share_layers"] + + +def setup_mixins(policy, obs_space, action_space, config): + ValueNetworkMixin.__init__(policy, obs_space, action_space, config) + KLCoeffMixin.__init__(policy, config) + EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], + config["entropy_coeff_schedule"]) + LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"]) + + +CustomPPOTFPolicy = build_tf_policy( + name="CustomPPOTFPolicy", + get_default_config=lambda: ray.rllib.agents.ppo.ppo.DEFAULT_CONFIG, + loss_fn=ppo_surrogate_loss, + stats_fn=kl_and_loss_stats, + extra_action_fetches_fn=vf_preds_and_logits_fetches, + postprocess_fn=postprocess_ppo_gae, + gradients_fn=clip_gradients, + before_init=setup_config, + before_loss_init=setup_mixins, + mixins=[ + LearningRateSchedule, EntropyCoeffSchedule, KLCoeffMixin, + ValueNetworkMixin + ]) + +def validate_config(config): + if config["entropy_coeff"] < 0: + raise DeprecationWarning("entropy_coeff must be >= 0") + if isinstance(config["entropy_coeff"], int): + config["entropy_coeff"] = float(config["entropy_coeff"]) + if config["batch_mode"] == "truncate_episodes" and not config["use_gae"]: + raise ValueError( + "Episode truncation is not supported without a value " + "function. Consider setting batch_mode=complete_episodes.") + if config["multiagent"]["policies"] and not config["simple_optimizer"]: + logger.info( + "In multi-agent mode, policies will be optimized sequentially " + "by the multi-GPU optimizer. Consider setting " + "simple_optimizer=True if this doesn't work for you.") + if config["simple_optimizer"]: + logger.warning( + "Using the simple minibatch optimizer. This will significantly " + "reduce performance, consider simple_optimizer=False.") + elif tf and tf.executing_eagerly(): + config["simple_optimizer"] = True # multi-gpu not supported + +from ray.rllib.agents.trainer_template import build_trainer +from ray.rllib.agents.ppo.ppo import choose_policy_optimizer, DEFAULT_CONFIG, update_kl, \ + warn_about_bad_reward_scales +CustomPPOTrainer = build_trainer( + name="CustomPPOTrainer", + default_config=DEFAULT_CONFIG, + default_policy=CustomPPOTFPolicy, + make_policy_optimizer=choose_policy_optimizer, + validate_config=validate_config, + after_optimizer_step=update_kl, + after_train_result=warn_about_bad_reward_scales) \ No newline at end of file diff --git a/flow/algorithms/maddpg/maddpg.py b/flow/algorithms/maddpg/maddpg.py index d1b4a140e..942206871 100644 --- a/flow/algorithms/maddpg/maddpg.py +++ b/flow/algorithms/maddpg/maddpg.py @@ -83,7 +83,7 @@ # Weights for feature regularization for the actor "actor_feature_reg": 0.001, # If not None, clip gradients during optimization at this value - "grad_norm_clipping": 0.5, + "grad_norm_clipping": None, # How many steps of the model to sample before learning starts. "learning_starts": 1024 * 25, # Update the replay buffer with this many samples at once. Note that this diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index d8e3bc3d6..d85a1f190 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -351,13 +351,31 @@ def compute_reward(self, rl_actions, **kwargs): # There has to be one global reward for qmix t = time() if self.mpg_reward: - reward = np.nan_to_num(miles_per_gallon(self, self.k.vehicle.get_ids())) + if self.env_params.additional_params["local_reward"]: + reward = super().compute_reward(rl_actions) + reward_dict = {idx: 0 for idx in + range(self.max_num_agents)} + reward_dict.update({self.rl_id_to_idx_map[rl_id]: reward[rl_id] for rl_id in reward.keys() + if rl_id in self.rl_id_to_idx_map.keys()}) + print(reward_dict) + else: + reward = np.nan_to_num(miles_per_gallon(self, self.k.vehicle.get_ids())) / 100.0 + reward_dict = {idx: reward for idx in + range(self.max_num_agents)} else: - reward = np.nan_to_num(np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids()))) / (20 * self.env_params.horizon) - temp_reward_dict = {idx: reward for idx in + if self.env_params.additional_params["local_reward"]: + reward = super().compute_reward(rl_actions) + reward_dict = {idx: 0 for idx in range(self.max_num_agents)} + reward_dict.update({self.rl_id_to_idx_map[rl_id]: reward[rl_id] for rl_id in reward.keys() + if rl_id in self.rl_id_to_idx_map.keys()}) + else: + reward = np.nan_to_num(np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids()))) / (20 * self.env_params.horizon) + reward_dict = {idx: reward for idx in + range(self.max_num_agents)} + # print('reward time is ', time() - t) - return temp_reward_dict + return reward_dict def reset(self, new_inflow_rate=None): super().reset(new_inflow_rate) diff --git a/scripts/ray_autoscale.yaml b/scripts/ray_autoscale.yaml index 9be491d33..f2cd447c9 100644 --- a/scripts/ray_autoscale.yaml +++ b/scripts/ray_autoscale.yaml @@ -79,9 +79,8 @@ head_setup_commands: - pip install stable-baselines - pip install pytz - pip install torch==1.3.1 - - pip install tabulate - - pip install ray==0.8.0 - - pip install tensorflow==1.14.0 + - pip install tensorflow==2.0.0 + - pip install lz4 # Custom commands that will be run on worker nodes after common setup. worker_setup_commands: [] diff --git a/scripts/run_exps.sh b/scripts/run_exps.sh index 794ee8f41..06f97d268 100755 --- a/scripts/run_exps.sh +++ b/scripts/run_exps.sh @@ -11,10 +11,10 @@ # 5/10 ray exec ray_autoscale.yaml "python flow/examples/train.py multiagent_straight_road \ straight_road_reroute_local_rew_mpg --algorithm PPO \ ---num_iterations 200 --num_cpus 8 --num_rollouts84 --rl_trainer rllib --use_s3" --start --stop \ +--num_iterations 200 --num_cpus 8 --num_rollouts 8 --rl_trainer rllib --use_s3" --start --stop \ --cluster-name=ev_i210_test1 --tmux ray exec ray_autoscale.yaml "python flow/examples/train.py multiagent_i210 \ i210_reroute_local_rew_mpg --algorithm PPO \ ---num_iterations 200 --num_cpus 8 --num_rollouts84 --rl_trainer rllib --use_s3" --start --stop \ +--num_iterations 200 --num_cpus 8 --num_rollouts 8 --rl_trainer rllib --use_s3" --start --stop \ --cluster-name=ev_i210_test2 --tmux \ No newline at end of file From 9a2a4a6b5bb7ac40096649b9fec0fde54f950436 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 10 May 2020 23:24:35 -0700 Subject: [PATCH 22/85] Add curricula over speed and headway --- .../rl/multiagent/multiagent_i210.py | 15 ++++++- .../rl/multiagent/multiagent_straight_road.py | 18 +++++++- examples/train.py | 9 +++- flow/algorithms/centralized_PPO.py | 4 +- flow/envs/multiagent/base.py | 4 -- flow/envs/multiagent/i210.py | 41 +++++++++++++++++++ 6 files changed, 82 insertions(+), 9 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index 39aca472b..ccb808b78 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -51,7 +51,20 @@ "reroute_on_exit": True, 'target_velocity': 12.0, # how many AVs there can be at once (this is only for centralized critics) - "max_num_agents": 10 + "max_num_agents": 10, + # whether to add a slight reward for opening up a gap that will be annealed out N iterations in + "headway_curriculum": False, + # how many timesteps to anneal the headway curriculum over + "headway_curriculum_iters": 100, + # weight of the headway reward + "headway_reward_gain": 0.1, + + # whether to add a slight reward for traveling at a desired speed + "speed_curriculum": True, + # how many timesteps to anneal the headway curriculum over + "speed_curriculum_iters": 100, + # weight of the headway reward + "speed_reward_gain": 0.5 }) # CREATE VEHICLE TYPES AND INFLOWS diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py index 64a16ad5a..c422cc6e7 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py +++ b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py @@ -46,7 +46,7 @@ additional_env_params.update({ 'max_accel': 2.6, 'max_decel': 4.5, - 'target_velocity': 18, + 'target_velocity': 12.0, 'local_reward': True, 'lead_obs': True, # whether to reroute vehicles once they have exited @@ -54,7 +54,21 @@ # whether to use the MPG reward. Otherwise, defaults to a target velocity reward "mpg_reward": True, # how many AVs there can be at once (this is only for centralized critics) - "max_num_agents": 10 + "max_num_agents": 10, + + # whether to add a slight reward for opening up a gap that will be annealed out N iterations in + "headway_curriculum": False, + # how many timesteps to anneal the headway curriculum over + "headway_curriculum_iters": 100, + # weight of the headway reward + "headway_reward_gain": 1.0, + + # whether to add a slight reward for traveling at a desired speed + "speed_curriculum": True, + # how many timesteps to anneal the headway curriculum over + "speed_curriculum_iters": 100, + # weight of the headway reward + "speed_reward_gain": 0.5 }) diff --git a/examples/train.py b/examples/train.py index 8aa4b2a4f..4d9252cd3 100644 --- a/examples/train.py +++ b/examples/train.py @@ -293,10 +293,17 @@ def on_episode_end(info): episode.custom_metrics["avg_energy_per_veh"] = np.mean(episode.user_data["avg_energy"]) episode.custom_metrics["avg_mpg_per_veh"] = np.mean(episode.user_data["avg_mpg"]) + def on_train_result(info): + """Store the mean score of the episode, and increment or decrement how many adversaries are on""" + trainer = info["trainer"] + trainer.workers.foreach_worker( + lambda ev: ev.foreach_env( + lambda env: env.set_iteration_num())) config["callbacks"] = {"on_episode_start": tune.function(on_episode_start), "on_episode_step": tune.function(on_episode_step), - "on_episode_end": tune.function(on_episode_end)} + "on_episode_end": tune.function(on_episode_end), + "on_train_result": tune.function(on_train_result)} # save the flow params for replay flow_json = json.dumps( diff --git a/flow/algorithms/centralized_PPO.py b/flow/algorithms/centralized_PPO.py index b574263e4..e9e55d5a5 100644 --- a/flow/algorithms/centralized_PPO.py +++ b/flow/algorithms/centralized_PPO.py @@ -129,8 +129,10 @@ def __init__(self, other_obs = tf.keras.layers.Input(shape=(obs_space.shape[0] * self.max_num_agents,), name="all_agent_obs") central_vf_dense = tf.keras.layers.Dense( model_config.get("central_vf_size", 64), activation=tf.nn.tanh, name="c_vf_dense")(other_obs) + central_vf_dense2 = tf.keras.layers.Dense( + model_config.get("central_vf_size", 64), activation=tf.nn.tanh, name="c_vf_dense")(central_vf_dense) central_vf_out = tf.keras.layers.Dense( - 1, activation=None, name="c_vf_out")(central_vf_dense) + 1, activation=None, name="c_vf_out")(central_vf_dense2) self.central_vf = tf.keras.Model( inputs=[other_obs], outputs=central_vf_out) self.register_variables(self.central_vf.variables) diff --git a/flow/envs/multiagent/base.py b/flow/envs/multiagent/base.py index 22152cdd1..26c5e8a15 100644 --- a/flow/envs/multiagent/base.py +++ b/flow/envs/multiagent/base.py @@ -98,10 +98,6 @@ def step(self, rl_actions): # store new observations in the vehicles and traffic lights class self.k.update(reset=False) - if self.time_counter <= self.env_params.sims_per_step * self.env_params.warmup_steps: - self.observed_ids.update(self.k.vehicle.get_ids()) - self.observed_rl_ids.update(self.k.vehicle.get_rl_ids()) - # update the colors of vehicles if self.sim_params.render: self.k.vehicle.update_vehicle_colors() diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index d85a1f190..29d83629b 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -75,6 +75,18 @@ def __init__(self, env_params, sim_params, network, simulator='traci'): self.entrance_edge = "119257914" self.exit_edge = "119257908#2" self.mpg_reward = env_params.additional_params["mpg_reward"] + # whether to add a slight reward for opening up a gap that will be annealed out N iterations in + self.headway_curriculum = env_params.additional_params["headway_curriculum"] + # how many timesteps to anneal the headway curriculum over + self.headway_curriculum_iters = env_params.additional_params["headway_curriculum_iters"] + self.headway_reward_gain = env_params.additional_params["headway_curriculum_iters"] + + # whether to add a slight reward for opening up a gap that will be annealed out N iterations in + self.speed_curriculum = env_params.additional_params["speed_curriculum"] + # how many timesteps to anneal the headway curriculum over + self.speed_curriculum_iters = env_params.additional_params["speed_curriculum_iters"] + self.speed_reward_gain = env_params.additional_params["speed_curriculum_iters"] + self.num_training_iters = 0 self.leader = [] @property @@ -184,8 +196,37 @@ def compute_reward(self, rl_actions, **kwargs): reward = np.nan_to_num(np.mean([(des_speed - np.abs(speed - des_speed)) ** 2 for speed in speeds]) / (des_speed ** 2)) rewards = {rl_id: reward for rl_id in self.k.vehicle.get_rl_ids()} + if self.headway_curriculum and self.num_training_iters <= self.headway_curriculum_iters: + t_min = 1 # smallest acceptable time headway + for veh_id, rew in rewards.items(): + lead_id = self.k.vehicle.get_leader(veh_id) + penalty = 0 + if lead_id not in ["", None] \ + and self.k.vehicle.get_speed(veh_id) > 0: + t_headway = max( + self.k.vehicle.get_headway(veh_id) / + self.k.vehicle.get_speed(veh_id), 0) + # print('time headway is {}, headway is {}'.format(t_headway, self.k.vehicle.get_headway(veh_id))) + scaling_factor = max(0, 1 - self.num_training_iters / self.headway_curriculum_iters) + penalty += scaling_factor * self.headway_reward_gain * min((t_headway - t_min) / t_min, 0) + # print('penalty is ', penalty) + + rewards[veh_id] += penalty + + if self.speed_curriculum and self.num_training_iters <= self.speed_curriculum_iters: + des_speed = self.env_params.additional_params["target_velocity"] + + for veh_id, rew in rewards.items(): + speed = self.k.vehicle.get_speed(veh_id) + speed_reward = 0.0 + if speed >= 0: + speed_reward = ((des_speed - np.abs(speed - des_speed)) ** 2) / (des_speed ** 2) + rewards[veh_id] += speed_reward return rewards + def set_iteration_num(self): + self.num_training_iters += 1 + def additional_command(self): """See parent class. From f1abcfedc2e619edffbf2d639acdbae7f8b0e2ac Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Mon, 11 May 2020 09:45:15 -0700 Subject: [PATCH 23/85] add timing, remove adaptive KL from PPO --- .../rl/multiagent/multiagent_straight_road.py | 48 ++++++++++------ examples/train.py | 7 ++- flow/algorithms/centralized_PPO.py | 10 ++-- flow/algorithms/custom_ppo.py | 56 ++----------------- flow/envs/multiagent/i210.py | 29 +++++++--- flow/visualize/visualizer_rllib.py | 6 ++ scripts/run_exps.sh | 22 ++++++-- 7 files changed, 86 insertions(+), 92 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py index c422cc6e7..2c18414ec 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py +++ b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py @@ -3,27 +3,31 @@ Trains a non-constant number of agents, all sharing the same policy, on the highway with ramps network. """ -from flow.controllers import RLController, IDMController -from flow.core.params import EnvParams, NetParams, InitialConfig, InFlows, \ - VehicleParams, SumoParams, SumoLaneChangeParams -from flow.envs.ring.accel import ADDITIONAL_ENV_PARAMS +from flow.controllers import BandoFTLController, RLController +from flow.core.params import EnvParams +from flow.core.params import NetParams +from flow.core.params import InitialConfig +from flow.core.params import InFlows +from flow.core.params import VehicleParams +from flow.core.params import SumoParams +from flow.core.params import SumoLaneChangeParams from flow.networks import HighwayNetwork +from flow.envs.ring.accel import ADDITIONAL_ENV_PARAMS from flow.envs.multiagent import MultiStraightRoad from flow.networks.highway import ADDITIONAL_NET_PARAMS from flow.utils.registry import make_create_env from ray.tune.registry import register_env - # SET UP PARAMETERS FOR THE SIMULATION - -# number of steps per rollout -HORIZON = 2000 - -# inflow rate on the highway in vehicles per hour -HIGHWAY_INFLOW_RATE = 10800 / 5 # percentage of autonomous vehicles compared to human vehicles on highway PENETRATION_RATE = 10 +TRAFFIC_SPEED = 11 +END_SPEED = 16 +TRAFFIC_FLOW = 2056 +HORIZON = 2000 +INCLUDE_NOISE = False + # SET UP PARAMETERS FOR THE NETWORK @@ -36,7 +40,12 @@ # speed limit for all edges "speed_limit": 30, # number of edges to divide the highway into - "num_edges": 2 + "num_edges": 2, + # whether to include a ghost edge of length 500m. This edge is provided a + # different speed limit. + "use_ghost_edge": True, + # speed limit for the ghost edge + "ghost_speed_limit": END_SPEED }) @@ -76,15 +85,20 @@ vehicles = VehicleParams() inflows = InFlows() - -# human vehicles vehicles.add( "human", num_vehicles=0, lane_change_params=SumoLaneChangeParams( lane_change_mode="strategic", ), - acceleration_controller=(IDMController, {"a": .3, "b": 2.0, "noise": 0.5}), + acceleration_controller=(BandoFTLController, { + 'alpha': .5, + 'beta': 20.0, + 'h_st': 12.0, + 'h_go': 50.0, + 'v_max': 30.0, + 'noise': 1.0 if INCLUDE_NOISE else 0.0, + }), ) # autonomous vehicles @@ -97,7 +111,7 @@ inflows.add( veh_type="human", edge="highway_0", - vehs_per_hour=int(HIGHWAY_INFLOW_RATE * (1 - PENETRATION_RATE / 100)), + vehs_per_hour=int(TRAFFIC_FLOW * (1 - PENETRATION_RATE / 100)), depart_lane="free", depart_speed="23.0", name="idm_highway_inflow") @@ -107,7 +121,7 @@ inflows.add( veh_type="rl", edge="highway_0", - vehs_per_hour=int(HIGHWAY_INFLOW_RATE * (PENETRATION_RATE / 100)), + vehs_per_hour=int(TRAFFIC_FLOW * (PENETRATION_RATE / 100)), depart_lane="free", depart_speed="23.0", name="rl_highway_inflow") diff --git a/examples/train.py b/examples/train.py index 4d9252cd3..37d54ffc3 100644 --- a/examples/train.py +++ b/examples/train.py @@ -192,8 +192,10 @@ def setup_exps_rllib(flow_params, alg_run = flags.algorithm.upper() if alg_run == "PPO": - agent_cls = get_agent_class(alg_run) - config = deepcopy(agent_cls._default_config) + from flow.algorithms.custom_ppo import CustomPPOTrainer + from ray.rllib.agents.ppo import DEFAULT_CONFIG + alg_run = CustomPPOTrainer + config = deepcopy(DEFAULT_CONFIG) config["num_workers"] = n_cpus config["horizon"] = horizon @@ -248,6 +250,7 @@ def setup_exps_rllib(flow_params, from flow.algorithms.maddpg.maddpg import MADDPGTrainer, DEFAULT_CONFIG config = deepcopy(DEFAULT_CONFIG) config["actor_feature_reg"] = 0.0 + config["learning_starts"] = 100 alg_run = MADDPGTrainer elif alg_run == "QMIX": diff --git a/flow/algorithms/centralized_PPO.py b/flow/algorithms/centralized_PPO.py index e9e55d5a5..e11139640 100644 --- a/flow/algorithms/centralized_PPO.py +++ b/flow/algorithms/centralized_PPO.py @@ -10,7 +10,7 @@ from ray import tune from ray.rllib.agents.ppo.ppo import PPOTrainer -from ray.rllib.agents.ppo.ppo_policy import PPOTFPolicy, KLCoeffMixin, BEHAVIOUR_LOGITS +from ray.rllib.agents.ppo.ppo_policy import PPOTFPolicy, BEHAVIOUR_LOGITS from ray.rllib.evaluation.postprocessing import compute_advantages, \ Postprocessing from ray.rllib.policy.sample_batch import SampleBatch @@ -468,12 +468,11 @@ def reduce_mean_valid(t): vf_loss = tf.maximum(vf_loss1, vf_loss2) self.mean_vf_loss = reduce_mean_valid(vf_loss) loss = reduce_mean_valid( - -surrogate_loss + cur_kl_coeff * action_kl + + -surrogate_loss + vf_loss_coeff * vf_loss - entropy_coeff * curr_entropy) else: self.mean_vf_loss = tf.constant(0.0) - loss = reduce_mean_valid(-surrogate_loss + - cur_kl_coeff * action_kl - + loss = reduce_mean_valid(-surrogate_loss - entropy_coeff * curr_entropy) self.loss = loss @@ -485,7 +484,6 @@ def new_ppo_surrogate_loss(policy, model, dist_class, train_batch): def setup_mixins(policy, obs_space, action_space, config): # copied from PPO - KLCoeffMixin.__init__(policy, config) EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], config["entropy_coeff_schedule"]) LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"]) @@ -528,7 +526,7 @@ def kl_and_loss_stats(policy, train_batch): before_loss_init=setup_mixins, grad_stats_fn=central_vf_stats, mixins=[ - LearningRateSchedule, EntropyCoeffSchedule, KLCoeffMixin, + LearningRateSchedule, EntropyCoeffSchedule, CentralizedValueMixin ]) diff --git a/flow/algorithms/custom_ppo.py b/flow/algorithms/custom_ppo.py index 26c94af95..da86abcc8 100644 --- a/flow/algorithms/custom_ppo.py +++ b/flow/algorithms/custom_ppo.py @@ -105,13 +105,11 @@ def reduce_mean_valid(t): vf_loss = tf.maximum(vf_loss1, vf_loss2) self.mean_vf_loss = reduce_mean_valid(vf_loss) loss = reduce_mean_valid( - -surrogate_loss + cur_kl_coeff * action_kl + + -surrogate_loss + vf_loss_coeff * vf_loss - entropy_coeff * curr_entropy) else: self.mean_vf_loss = tf.constant(0.0) - loss = reduce_mean_valid(-surrogate_loss + - cur_kl_coeff * action_kl - - entropy_coeff * curr_entropy) + loss = reduce_mean_valid(-surrogate_loss -entropy_coeff * curr_entropy) self.loss = loss @@ -184,24 +182,7 @@ def postprocess_ppo_gae(policy, episode=None): """Adds the policy logits, VF preds, and advantages to the trajectory.""" - net_outflow = 0.0 - if episode is not None: - post_exit_rew_len = policy.post_exit_rew_len - outflow = np.array(episode.user_data['outflow']) - final_time = sample_batch['t'][-1] - if final_time + post_exit_rew_len >= outflow.shape[0]: - if final_time > 0: - net_outflow = np.mean((outflow[final_time:])) - else: - net_outflow = np.mean((outflow[final_time:])) - else: - net_outflow = np.mean((outflow[final_time:final_time + post_exit_rew_len])) - # This is a hack because we are never returning done correctly so we just check if we have a time equal to the horizon - # if we do, we clearly never completed - if 't' in sample_batch.keys(): - completed = (sample_batch['t'][-1] < policy.horizon - 1) or sample_batch["dones"][-1] - else: - completed = False + completed = sample_batch["dones"][-1] if completed: last_r = 0.0 else: @@ -213,13 +194,6 @@ def postprocess_ppo_gae(policy, sample_batch[SampleBatch.REWARDS][-1], *next_state) - # now scale the rewards by the horizo so the cumulative reward is independent of time in the system - # TODO(@evinitsky) does this make sense? - # if policy.terminal_reward and sample_batch['rewards'].shape[0] > 1: - # sample_batch['rewards'][:-1] = sample_batch['rewards'][:-1] / (sample_batch['rewards'][:-1].shape[0]) - # else: - # sample_batch['rewards'] = sample_batch['rewards'] / (sample_batch['rewards'].shape[0]) - batch = compute_advantages( sample_batch, last_r, @@ -242,27 +216,6 @@ def clip_gradients(policy, optimizer, loss): return optimizer.compute_gradients(loss, variables) -class KLCoeffMixin(object): - def __init__(self, config): - # KL Coefficient - self.kl_coeff_val = config["kl_coeff"] - self.kl_target = config["kl_target"] - self.kl_coeff = tf.get_variable( - initializer=tf.constant_initializer(self.kl_coeff_val), - name="kl_coeff", - shape=(), - trainable=False, - dtype=tf.float32) - - def update_kl(self, sampled_kl): - if sampled_kl > 2.0 * self.kl_target: - self.kl_coeff_val *= 1.5 - elif sampled_kl < 0.5 * self.kl_target: - self.kl_coeff_val *= 0.5 - self.kl_coeff.load(self.kl_coeff_val, session=self.get_session()) - return self.kl_coeff_val - - class ValueNetworkMixin(object): def __init__(self, obs_space, action_space, config): if config["use_gae"]: @@ -296,7 +249,6 @@ def setup_config(policy, obs_space, action_space, config): def setup_mixins(policy, obs_space, action_space, config): ValueNetworkMixin.__init__(policy, obs_space, action_space, config) - KLCoeffMixin.__init__(policy, config) EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], config["entropy_coeff_schedule"]) LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"]) @@ -313,7 +265,7 @@ def setup_mixins(policy, obs_space, action_space, config): before_init=setup_config, before_loss_init=setup_mixins, mixins=[ - LearningRateSchedule, EntropyCoeffSchedule, KLCoeffMixin, + LearningRateSchedule, EntropyCoeffSchedule, ValueNetworkMixin ]) diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index 29d83629b..cab31eb51 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -76,16 +76,16 @@ def __init__(self, env_params, sim_params, network, simulator='traci'): self.exit_edge = "119257908#2" self.mpg_reward = env_params.additional_params["mpg_reward"] # whether to add a slight reward for opening up a gap that will be annealed out N iterations in - self.headway_curriculum = env_params.additional_params["headway_curriculum"] + self.headway_curriculum = env_params.additional_params.get("headway_curriculum", False) # how many timesteps to anneal the headway curriculum over - self.headway_curriculum_iters = env_params.additional_params["headway_curriculum_iters"] - self.headway_reward_gain = env_params.additional_params["headway_curriculum_iters"] + self.headway_curriculum_iters = env_params.additional_params.get("headway_curriculum_iters", 0) + self.headway_reward_gain = env_params.additional_params.get("headway_reward_gain", 0.0) # whether to add a slight reward for opening up a gap that will be annealed out N iterations in - self.speed_curriculum = env_params.additional_params["speed_curriculum"] + self.speed_curriculum = env_params.additional_params.get("speed_curriculum", False) # how many timesteps to anneal the headway curriculum over - self.speed_curriculum_iters = env_params.additional_params["speed_curriculum_iters"] - self.speed_reward_gain = env_params.additional_params["speed_curriculum_iters"] + self.speed_curriculum_iters = env_params.additional_params.get("speed_curriculum_iters", 0) + self.speed_reward_gain = env_params.additional_params.get("speed_reward_gain", 0.0) self.num_training_iters = 0 self.leader = [] @@ -128,7 +128,10 @@ def action_space(self): def _apply_rl_actions(self, rl_actions): """See class definition.""" # in the warmup steps, rl_actions is None + id_list = [] + accel_list = [] if rl_actions: + t = time() for rl_id, actions in rl_actions.items(): accel = actions[0] @@ -136,12 +139,15 @@ def _apply_rl_actions(self, rl_actions): # lane_change_softmax /= np.sum(lane_change_softmax) # lane_change_action = np.random.choice([-1, 0, 1], # p=lane_change_softmax) - - self.k.vehicle.apply_acceleration(rl_id, accel) - # self.k.vehicle.apply_lane_change(rl_id, lane_change_action) + id_list.append(rl_id) + accel_list.append(accel) + self.k.vehicle.apply_acceleration(id_list, accel_list) + # self.k.vehicle.apply_lane_change(rl_id, lane_change_action) + print('time to apply actions is ', time() - t) def get_state(self): """See class definition.""" + t = time() if self.lead_obs: veh_info = {} for rl_id in self.k.vehicle.get_rl_ids(): @@ -160,6 +166,7 @@ def get_state(self): veh_info = {rl_id: np.concatenate((self.state_util(rl_id), self.veh_statistics(rl_id))) for rl_id in self.k.vehicle.get_rl_ids()} + print('time to get state is ', time() - t) return veh_info def compute_reward(self, rl_actions, **kwargs): @@ -168,6 +175,7 @@ def compute_reward(self, rl_actions, **kwargs): if rl_actions is None: return {} + t = time() rewards = {} if self.env_params.additional_params["local_reward"]: des_speed = self.env_params.additional_params["target_velocity"] @@ -222,6 +230,8 @@ def compute_reward(self, rl_actions, **kwargs): if speed >= 0: speed_reward = ((des_speed - np.abs(speed - des_speed)) ** 2) / (des_speed ** 2) rewards[veh_id] += speed_reward + print('time to get reward is ', time() - t) + return rewards def set_iteration_num(self): @@ -385,6 +395,7 @@ def get_state(self): # print('time to update copy is ', time() - t) veh_info = veh_info_copy # print('state time is ', time() - t) + print(veh_info) return veh_info diff --git a/flow/visualize/visualizer_rllib.py b/flow/visualize/visualizer_rllib.py index 7ae079de5..d6b7506fd 100644 --- a/flow/visualize/visualizer_rllib.py +++ b/flow/visualize/visualizer_rllib.py @@ -26,6 +26,7 @@ from ray.rllib.agents.registry import get_agent_class from ray.tune.registry import register_env +from flow.core.rewards import miles_per_gallon from flow.core.util import emission_to_csv from flow.utils.registry import make_create_env from flow.utils.rllib import get_flow_params @@ -200,6 +201,7 @@ def visualizer_rllib(args): # Simulate and collect metrics final_outflows = [] final_inflows = [] + mpg = [] mean_speed = [] std_speed = [] for i in range(args.num_rollouts): @@ -217,6 +219,8 @@ def visualizer_rllib(args): if speeds: vel.append(np.mean(speeds)) + mpg.append(miles_per_gallon(env.unwrapped, vehicles.get_ids())) + if multiagent: action = {} for agent_id in state.keys(): @@ -287,6 +291,8 @@ def visualizer_rllib(args): print('Average, std: {}, {}'.format(np.mean(std_speed), np.std( std_speed))) + print('Average, std miles per gallon: {}, {}'.format(np.mean(mpg), np.std(mpg))) + # Compute arrival rate of vehicles in the last 500 sec of the run print("\nOutflows (veh/hr):") print(final_outflows) diff --git a/scripts/run_exps.sh b/scripts/run_exps.sh index 06f97d268..773127ece 100755 --- a/scripts/run_exps.sh +++ b/scripts/run_exps.sh @@ -9,12 +9,22 @@ ##--cluster-name=ev_i210_test2 --tmux # 5/10 +#ray exec ray_autoscale.yaml "python flow/examples/train.py multiagent_straight_road \ +#straight_road_reroute_local_rew_mpg --algorithm PPO \ +#--num_iterations 200 --num_cpus 8 --num_rollouts 8 --rl_trainer rllib --use_s3" --start --stop \ +#--cluster-name=ev_i210_test1 --tmux +# +#ray exec ray_autoscale.yaml "python flow/examples/train.py multiagent_i210 \ +#i210_reroute_local_rew_mpg --algorithm PPO \ +#--num_iterations 200 --num_cpus 8 --num_rollouts 8 --rl_trainer rllib --use_s3" --start --stop \ +#--cluster-name=ev_i210_test2 --tmux + ray exec ray_autoscale.yaml "python flow/examples/train.py multiagent_straight_road \ -straight_road_reroute_local_rew_mpg --algorithm PPO \ ---num_iterations 200 --num_cpus 8 --num_rollouts 8 --rl_trainer rllib --use_s3" --start --stop \ ---cluster-name=ev_i210_test1 --tmux +straight_road_reroute_local_rew_mpg_curr --algorithm PPO \ +--num_iterations 200 --num_cpus 7 --num_rollouts 7 --rl_trainer rllib --use_s3 --grid_search" --start --stop \ +--cluster-name=ev_i210_test3 --tmux ray exec ray_autoscale.yaml "python flow/examples/train.py multiagent_i210 \ -i210_reroute_local_rew_mpg --algorithm PPO \ ---num_iterations 200 --num_cpus 8 --num_rollouts 8 --rl_trainer rllib --use_s3" --start --stop \ ---cluster-name=ev_i210_test2 --tmux \ No newline at end of file +i210_reroute_local_rew_mpg_curr --algorithm PPO \ +--num_iterations 200 --num_cpus 7 --num_rollouts 7 --rl_trainer rllib --use_s3 --grid_search" --start --stop \ +--cluster-name=ev_i210_test4 --tmux \ No newline at end of file From f9e573d11ffac5ba51b12ad9db3aadbcc6694b68 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Mon, 11 May 2020 09:54:45 -0700 Subject: [PATCH 24/85] Minor fixes to KL --- examples/exp_configs/non_rl/i210_subnetwork.py | 4 ++-- .../rl/multiagent/multiagent_i210.py | 2 +- flow/algorithms/centralized_PPO.py | 17 ++++++++++++++++- flow/algorithms/custom_ppo.py | 17 +++++++++++++++-- 4 files changed, 34 insertions(+), 6 deletions(-) diff --git a/examples/exp_configs/non_rl/i210_subnetwork.py b/examples/exp_configs/non_rl/i210_subnetwork.py index 4e640e873..de008480e 100644 --- a/examples/exp_configs/non_rl/i210_subnetwork.py +++ b/examples/exp_configs/non_rl/i210_subnetwork.py @@ -17,7 +17,7 @@ from flow.envs import TestEnv from flow.networks.i210_subnetwork import I210SubNetwork, EDGES_DISTRIBUTION -PENETRATION_RATE = 10.0 +PENETRATION_RATE = 0.0 HIGHWAY_INFLOW_RATE = 10800 # create the base vehicle type that will be used for inflows @@ -101,7 +101,7 @@ # environment related parameters (see flow.core.params.EnvParams) env=EnvParams( horizon=2000, - warmup_steps=400 + warmup_steps=600 ), # network-related parameters (see flow.core.params.NetParams and the diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index ccb808b78..9610a69d7 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -137,7 +137,7 @@ warmup_steps = 0 if additional_env_params['reroute_on_exit']: - warmup_steps = 400 + warmup_steps = 1200 flow_params = dict( # name of the experiment diff --git a/flow/algorithms/centralized_PPO.py b/flow/algorithms/centralized_PPO.py index e11139640..c1f2e75d6 100644 --- a/flow/algorithms/centralized_PPO.py +++ b/flow/algorithms/centralized_PPO.py @@ -482,8 +482,23 @@ def new_ppo_surrogate_loss(policy, model, dist_class, train_batch): return loss +class KLCoeffMixin(object): + def __init__(self, config): + # KL Coefficient + self.kl_coeff_val = config["kl_coeff"] + self.kl_target = config["kl_target"] + self.kl_coeff = tf.get_variable( + initializer=tf.constant_initializer(self.kl_coeff_val), + name="kl_coeff", + shape=(), + trainable=False, + dtype=tf.float32) + + def setup_mixins(policy, obs_space, action_space, config): # copied from PPO + KLCoeffMixin.__init__(policy, config) + EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], config["entropy_coeff_schedule"]) LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"]) @@ -527,7 +542,7 @@ def kl_and_loss_stats(policy, train_batch): grad_stats_fn=central_vf_stats, mixins=[ LearningRateSchedule, EntropyCoeffSchedule, - CentralizedValueMixin + CentralizedValueMixin, KLCoeffMixin ]) CCTrainer = PPOTrainer.with_updates(name="CCPPOTrainer", default_policy=CCPPO) \ No newline at end of file diff --git a/flow/algorithms/custom_ppo.py b/flow/algorithms/custom_ppo.py index da86abcc8..050d7d1fd 100644 --- a/flow/algorithms/custom_ppo.py +++ b/flow/algorithms/custom_ppo.py @@ -248,12 +248,26 @@ def setup_config(policy, obs_space, action_space, config): def setup_mixins(policy, obs_space, action_space, config): + KLCoeffMixin.__init__(policy, config) ValueNetworkMixin.__init__(policy, obs_space, action_space, config) EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], config["entropy_coeff_schedule"]) LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"]) +class KLCoeffMixin(object): + def __init__(self, config): + # KL Coefficient + self.kl_coeff_val = config["kl_coeff"] + self.kl_target = config["kl_target"] + self.kl_coeff = tf.get_variable( + initializer=tf.constant_initializer(self.kl_coeff_val), + name="kl_coeff", + shape=(), + trainable=False, + dtype=tf.float32) + + CustomPPOTFPolicy = build_tf_policy( name="CustomPPOTFPolicy", get_default_config=lambda: ray.rllib.agents.ppo.ppo.DEFAULT_CONFIG, @@ -266,7 +280,7 @@ def setup_mixins(policy, obs_space, action_space, config): before_loss_init=setup_mixins, mixins=[ LearningRateSchedule, EntropyCoeffSchedule, - ValueNetworkMixin + ValueNetworkMixin, KLCoeffMixin ]) def validate_config(config): @@ -299,5 +313,4 @@ def validate_config(config): default_policy=CustomPPOTFPolicy, make_policy_optimizer=choose_policy_optimizer, validate_config=validate_config, - after_optimizer_step=update_kl, after_train_result=warn_about_bad_reward_scales) \ No newline at end of file From 5605ec26b70730d1ee7ecaf30efb35cfd9f74e06 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Mon, 11 May 2020 12:42:44 -0700 Subject: [PATCH 25/85] switch back to old AMI for testing purposes --- examples/exp_configs/non_rl/highway_single.py | 38 +++++++++++++++++-- .../rl/multiagent/multiagent_i210.py | 2 +- .../rl/multiagent/multiagent_straight_road.py | 4 +- flow/envs/multiagent/i210.py | 10 +++-- scripts/ray_autoscale.yaml | 4 +- 5 files changed, 46 insertions(+), 12 deletions(-) diff --git a/examples/exp_configs/non_rl/highway_single.py b/examples/exp_configs/non_rl/highway_single.py index 46b18c0e9..51d937ac1 100644 --- a/examples/exp_configs/non_rl/highway_single.py +++ b/examples/exp_configs/non_rl/highway_single.py @@ -3,7 +3,11 @@ Trains a non-constant number of agents, all sharing the same policy, on the highway with ramps network. """ + +import numpy as np + from flow.controllers import BandoFTLController +from flow.controllers.velocity_controllers import FollowerStopper from flow.core.params import EnvParams from flow.core.params import NetParams from flow.core.params import InitialConfig @@ -11,6 +15,7 @@ from flow.core.params import VehicleParams from flow.core.params import SumoParams from flow.core.params import SumoLaneChangeParams +from flow.core.rewards import miles_per_gallon from flow.networks import HighwayNetwork from flow.envs import TestEnv from flow.networks.highway import ADDITIONAL_NET_PARAMS @@ -18,8 +23,9 @@ TRAFFIC_SPEED = 11 END_SPEED = 16 TRAFFIC_FLOW = 2056 -HORIZON = 3600 +HORIZON = 2000 INCLUDE_NOISE = False +PENETRATION_RATE = 0.10 additional_net_params = ADDITIONAL_NET_PARAMS.copy() additional_net_params.update({ @@ -55,15 +61,33 @@ }), ) + +if PENETRATION_RATE > 0.0: + vehicles.add( + "av", + color='red', + num_vehicles=0, + acceleration_controller=(FollowerStopper, {"v_des": 12.0}), + ) + inflows = InFlows() inflows.add( veh_type="human", edge="highway_0", - vehs_per_hour=TRAFFIC_FLOW, + vehs_per_hour=int(TRAFFIC_FLOW * (1-PENETRATION_RATE)), depart_lane="free", depart_speed=TRAFFIC_SPEED, name="idm_highway_inflow") +if PENETRATION_RATE > 0.0: + inflows.add( + veh_type="av", + edge="highway_0", + vehs_per_hour=int(TRAFFIC_FLOW * (PENETRATION_RATE)), + depart_lane="free", + depart_speed=TRAFFIC_SPEED, + name="idm_highway_inflow") + # SET UP FLOW PARAMETERS flow_params = dict( @@ -82,7 +106,7 @@ # environment related parameters (see flow.core.params.EnvParams) env=EnvParams( horizon=HORIZON, - warmup_steps=0, + warmup_steps=400, sims_per_step=1, ), @@ -108,3 +132,11 @@ # reset (see flow.core.params.InitialConfig) initial=InitialConfig(), ) + +custom_callables = { + "avg_merge_speed": lambda env: np.nan_to_num(np.mean( + env.k.vehicle.get_speed(env.k.vehicle.get_ids()))), + "avg_outflow": lambda env: np.nan_to_num( + env.k.vehicle.get_outflow_rate(120)), + "mpg": lambda env: miles_per_gallon(env, env.k.vehicle.get_ids(), gain=1.0) +} diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index 9610a69d7..ba38471bc 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -48,7 +48,7 @@ # whether to use the MPG reward. Otherwise, defaults to a target velocity reward "mpg_reward": True, # whether to reroute vehicles once they have exited - "reroute_on_exit": True, + "reroute_on_exit": False, 'target_velocity': 12.0, # how many AVs there can be at once (this is only for centralized critics) "max_num_agents": 10, diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py index 2c18414ec..bb6f11a2c 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py +++ b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py @@ -59,7 +59,7 @@ 'local_reward': True, 'lead_obs': True, # whether to reroute vehicles once they have exited - "reroute_on_exit": True, + "reroute_on_exit": False, # whether to use the MPG reward. Otherwise, defaults to a target velocity reward "mpg_reward": True, # how many AVs there can be at once (this is only for centralized critics) @@ -77,7 +77,7 @@ # how many timesteps to anneal the headway curriculum over "speed_curriculum_iters": 100, # weight of the headway reward - "speed_reward_gain": 0.5 + "speed_reward_gain": 1.0 }) diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index cab31eb51..0aac22240 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -143,7 +143,7 @@ def _apply_rl_actions(self, rl_actions): accel_list.append(accel) self.k.vehicle.apply_acceleration(id_list, accel_list) # self.k.vehicle.apply_lane_change(rl_id, lane_change_action) - print('time to apply actions is ', time() - t) + # print('time to apply actions is ', time() - t) def get_state(self): """See class definition.""" @@ -166,7 +166,7 @@ def get_state(self): veh_info = {rl_id: np.concatenate((self.state_util(rl_id), self.veh_statistics(rl_id))) for rl_id in self.k.vehicle.get_rl_ids()} - print('time to get state is ', time() - t) + # print('time to get state is ', time() - t) return veh_info def compute_reward(self, rl_actions, **kwargs): @@ -229,8 +229,10 @@ def compute_reward(self, rl_actions, **kwargs): speed_reward = 0.0 if speed >= 0: speed_reward = ((des_speed - np.abs(speed - des_speed)) ** 2) / (des_speed ** 2) - rewards[veh_id] += speed_reward - print('time to get reward is ', time() - t) + scaling_factor = max(0, 1 - self.num_training_iters / self.headway_curriculum_iters) + + rewards[veh_id] += speed_reward * scaling_factor * self.speed_reward_gain + # print('time to get reward is ', time() - t) return rewards diff --git a/scripts/ray_autoscale.yaml b/scripts/ray_autoscale.yaml index f2cd447c9..c43685ea1 100644 --- a/scripts/ray_autoscale.yaml +++ b/scripts/ray_autoscale.yaml @@ -40,7 +40,7 @@ auth: # http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances head_node: InstanceType: c4.4xlarge - ImageId: ami-0c047f3ddd3939b30 # Flow AMI (Ubuntu) + ImageId: ami-0b489700e7f810707 # Flow AMI (Ubuntu) KeyName: MyKeyPair2 InstanceMarketOptions: MarketType: spot @@ -56,7 +56,7 @@ head_node: # http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances worker_nodes: InstanceType: c4.4xlarge - ImageId: ami-0c047f3ddd3939b30 # Flow AMI (Ubuntu) + ImageId: ami-0b489700e7f810707 # Flow AMI (Ubuntu) KeyName: MyKeyPair2 #Run workers on spot by default. Comment this out to use on-demand. From e5437d359d3a2c2c2846bb9e2b42fbc151cb693c Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Mon, 11 May 2020 13:46:43 -0700 Subject: [PATCH 26/85] Disable custom PPO for a sec --- .../rl/multiagent/multiagent_straight_road.py | 4 ++-- examples/train.py | 13 +++++++++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py index bb6f11a2c..31a1caad7 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py +++ b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py @@ -59,7 +59,7 @@ 'local_reward': True, 'lead_obs': True, # whether to reroute vehicles once they have exited - "reroute_on_exit": False, + "reroute_on_exit": True, # whether to use the MPG reward. Otherwise, defaults to a target velocity reward "mpg_reward": True, # how many AVs there can be at once (this is only for centralized critics) @@ -127,7 +127,7 @@ name="rl_highway_inflow") # SET UP FLOW PARAMETERS -warmup_steps = 0 +warmup_steps = 400 if additional_env_params['reroute_on_exit']: warmup_steps = 400 diff --git a/examples/train.py b/examples/train.py index 37d54ffc3..a259164da 100644 --- a/examples/train.py +++ b/examples/train.py @@ -192,10 +192,13 @@ def setup_exps_rllib(flow_params, alg_run = flags.algorithm.upper() if alg_run == "PPO": - from flow.algorithms.custom_ppo import CustomPPOTrainer - from ray.rllib.agents.ppo import DEFAULT_CONFIG - alg_run = CustomPPOTrainer - config = deepcopy(DEFAULT_CONFIG) + # from flow.algorithms.custom_ppo import CustomPPOTrainer + # from ray.rllib.agents.ppo import DEFAULT_CONFIG + # alg_run = CustomPPOTrainer + # config = deepcopy(DEFAULT_CONFIG) + agent_cls = get_agent_class(alg_run) + config = deepcopy(agent_cls._default_config) + config["num_workers"] = n_cpus config["horizon"] = horizon @@ -240,12 +243,14 @@ def setup_exps_rllib(flow_params, config["num_workers"] = n_cpus config["horizon"] = horizon + config["learning_starts"] = 0 config["buffer_size"] = 20000 # reduced to test if this is the source of memory problems if flags.grid_search: config["prioritized_replay"] = tune.grid_search(['True', 'False']) config["actor_lr"] = tune.grid_search([1e-3, 1e-4]) config["critic_lr"] = tune.grid_search([1e-3, 1e-4]) config["n_step"] = tune.grid_search([1, 10]) + elif alg_run == "MADDPG": from flow.algorithms.maddpg.maddpg import MADDPGTrainer, DEFAULT_CONFIG config = deepcopy(DEFAULT_CONFIG) From 083aaee3509422f6c7a776437fbb08b30362b515 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Mon, 11 May 2020 18:11:07 -0700 Subject: [PATCH 27/85] Minor changes to centralized PPO to upgrade it to 0.9.0 --- .../rl/multiagent/multiagent_straight_road.py | 6 +- flow/algorithms/centralized_PPO.py | 76 +++++++++---------- flow/algorithms/custom_ppo.py | 4 +- flow/envs/multiagent/i210.py | 14 ++-- flow/visualize/visualizer_rllib.py | 1 + scripts/ray_autoscale.yaml | 7 +- 6 files changed, 51 insertions(+), 57 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py index 31a1caad7..cd9c40f0d 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py +++ b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py @@ -77,7 +77,7 @@ # how many timesteps to anneal the headway curriculum over "speed_curriculum_iters": 100, # weight of the headway reward - "speed_reward_gain": 1.0 + "speed_reward_gain": 2.0 }) @@ -127,9 +127,9 @@ name="rl_highway_inflow") # SET UP FLOW PARAMETERS -warmup_steps = 400 +warmup_steps = 200 if additional_env_params['reroute_on_exit']: - warmup_steps = 400 + warmup_steps = 200 flow_params = dict( # name of the experiment diff --git a/flow/algorithms/centralized_PPO.py b/flow/algorithms/centralized_PPO.py index c1f2e75d6..1186e28fa 100644 --- a/flow/algorithms/centralized_PPO.py +++ b/flow/algorithms/centralized_PPO.py @@ -10,12 +10,12 @@ from ray import tune from ray.rllib.agents.ppo.ppo import PPOTrainer -from ray.rllib.agents.ppo.ppo_policy import PPOTFPolicy, BEHAVIOUR_LOGITS +from flow.algorithms.custom_ppo import CustomPPOTFPolicy from ray.rllib.evaluation.postprocessing import compute_advantages, \ Postprocessing from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.policy.tf_policy import LearningRateSchedule, \ - EntropyCoeffSchedule, ACTION_LOGP + EntropyCoeffSchedule from ray.rllib.models.modelv2 import ModelV2 from ray.rllib.models.tf.tf_modelv2 import TFModelV2 from ray.rllib.models.tf.recurrent_tf_modelv2 import RecurrentTFModelV2 @@ -184,46 +184,38 @@ def centralized_critic_postprocessing(policy, if policy.loss_initialized(): assert other_agent_batches is not None - time_span = (sample_batch['t'][0], sample_batch['t'][-1]) - # there's a new problem here, namely that a segment might not be continuous due to the rerouting - other_agent_timespans = {agent_id: - (other_agent_batches[agent_id][1]["t"][0], - other_agent_batches[agent_id][1]["t"][-1]) - for agent_id in other_agent_batches.keys()} + # time_span = (sample_batch['t'][0], sample_batch['t'][-1]) + # # there's a new problem here, namely that a segment might not be continuous due to the rerouting + # other_agent_timespans = {agent_id: + # (other_agent_batches[agent_id][1]["t"][0], + # other_agent_batches[agent_id][1]["t"][-1]) + # for agent_id in other_agent_batches.keys()} other_agent_times = {agent_id: other_agent_batches[agent_id][1]["t"] for agent_id in other_agent_batches.keys()} agent_time = sample_batch['t'] - # find agents whose time overlaps with the current agent - rel_agents = {agent_id: other_agent_time for agent_id, - other_agent_time in - other_agent_timespans.items() - if time_overlap(time_span, other_agent_time)} - if len(rel_agents) > 0: - other_obs = {agent_id: - other_agent_batches[agent_id][1]["obs"].copy() - for agent_id in rel_agents.keys()} - # padded_agent_obs = {agent_id: - # overlap_and_pad_agent( - # time_span, - # rel_agent_time, - # other_obs[agent_id]) - # for agent_id, - # rel_agent_time in rel_agents.items()} - padded_agent_obs = {agent_id: - fill_missing( - agent_time, - other_agent_times[agent_id], - other_obs[agent_id]) - for agent_id, - rel_agent_time in rel_agents.items()} - # okay, now we need to stack and sort - central_obs_list = [padded_obs for padded_obs in padded_agent_obs.values()] - try: - central_obs_batch = np.hstack((sample_batch["obs"], np.hstack(central_obs_list))) - except: - import ipdb; ipdb.set_trace() - else: - central_obs_batch = sample_batch["obs"] + # # find agents whose time overlaps with the current agent + rel_agents = {agent_id: other_agent_time for agent_id, other_agent_time in other_agent_times.items()} + # if len(rel_agents) > 0: + other_obs = {agent_id: + other_agent_batches[agent_id][1]["obs"].copy() + for agent_id in other_agent_batches.keys()} + # padded_agent_obs = {agent_id: + # overlap_and_pad_agent( + # time_span, + # rel_agent_time, + # other_obs[agent_id]) + # for agent_id, + # rel_agent_time in rel_agents.items()} + padded_agent_obs = {agent_id: + fill_missing( + agent_time, + other_agent_times[agent_id], + other_obs[agent_id]) + for agent_id, + rel_agent_time in rel_agents.items()} + # okay, now we need to stack and sort + central_obs_list = [padded_obs for padded_obs in padded_agent_obs.values()] + central_obs_batch = np.hstack((sample_batch["obs"], np.hstack(central_obs_list))) max_vf_agents = policy.model.max_num_agents num_agents = len(rel_agents) + 1 if num_agents < max_vf_agents: @@ -372,8 +364,8 @@ def loss_with_central_critic(policy, model, dist_class, train_batch): train_batch[Postprocessing.VALUE_TARGETS], train_batch[Postprocessing.ADVANTAGES], train_batch[SampleBatch.ACTIONS], - train_batch[BEHAVIOUR_LOGITS], - train_batch[ACTION_LOGP], + train_batch[SampleBatch.ACTION_DIST_INPUTS], + train_batch[SampleBatch.ACTION_LOGP], train_batch[SampleBatch.VF_PREDS], action_dist, policy.central_value_function, @@ -533,7 +525,7 @@ def kl_and_loss_stats(policy, train_batch): "avg_rew": train_batch["rewards"][-1] } -CCPPO = PPOTFPolicy.with_updates( +CCPPO = CustomPPOTFPolicy.with_updates( name="CCPPO", postprocess_fn=centralized_critic_postprocessing, loss_fn=new_ppo_surrogate_loss, diff --git a/flow/algorithms/custom_ppo.py b/flow/algorithms/custom_ppo.py index 050d7d1fd..dab9bed8c 100644 --- a/flow/algorithms/custom_ppo.py +++ b/flow/algorithms/custom_ppo.py @@ -12,7 +12,7 @@ Postprocessing from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.policy.tf_policy import LearningRateSchedule, \ - EntropyCoeffSchedule, ACTION_LOGP + EntropyCoeffSchedule from ray.rllib.policy.tf_policy_template import build_tf_policy from ray.rllib.utils.explained_variance import explained_variance from ray.rllib.utils.tf_ops import make_tf_callable @@ -133,7 +133,7 @@ def ppo_surrogate_loss(policy, model, dist_class, train_batch): train_batch[Postprocessing.ADVANTAGES], train_batch[SampleBatch.ACTIONS], train_batch[BEHAVIOUR_LOGITS], - train_batch[ACTION_LOGP], + train_batch[SampleBatch.ACTION_LOGP], train_batch[SampleBatch.VF_PREDS], action_dist, model.value_function(), diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index 0aac22240..f4792d51b 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -76,16 +76,16 @@ def __init__(self, env_params, sim_params, network, simulator='traci'): self.exit_edge = "119257908#2" self.mpg_reward = env_params.additional_params["mpg_reward"] # whether to add a slight reward for opening up a gap that will be annealed out N iterations in - self.headway_curriculum = env_params.additional_params.get("headway_curriculum", False) + self.headway_curriculum = env_params.additional_params["headway_curriculum"] # how many timesteps to anneal the headway curriculum over - self.headway_curriculum_iters = env_params.additional_params.get("headway_curriculum_iters", 0) - self.headway_reward_gain = env_params.additional_params.get("headway_reward_gain", 0.0) + self.headway_curriculum_iters = env_params.additional_params["headway_curriculum_iters"] + self.headway_reward_gain = env_params.additional_params["headway_reward_gain"] # whether to add a slight reward for opening up a gap that will be annealed out N iterations in - self.speed_curriculum = env_params.additional_params.get("speed_curriculum", False) + self.speed_curriculum = env_params.additional_params["speed_curriculum"] # how many timesteps to anneal the headway curriculum over - self.speed_curriculum_iters = env_params.additional_params.get("speed_curriculum_iters", 0) - self.speed_reward_gain = env_params.additional_params.get("speed_reward_gain", 0.0) + self.speed_curriculum_iters = env_params.additional_params["speed_curriculum_iters"] + self.speed_reward_gain = env_params.additional_params["speed_reward_gain"] self.num_training_iters = 0 self.leader = [] @@ -229,7 +229,7 @@ def compute_reward(self, rl_actions, **kwargs): speed_reward = 0.0 if speed >= 0: speed_reward = ((des_speed - np.abs(speed - des_speed)) ** 2) / (des_speed ** 2) - scaling_factor = max(0, 1 - self.num_training_iters / self.headway_curriculum_iters) + scaling_factor = max(0, 1 - self.num_training_iters / self.speed_curriculum_iters) rewards[veh_id] += speed_reward * scaling_factor * self.speed_reward_gain # print('time to get reward is ', time() - t) diff --git a/flow/visualize/visualizer_rllib.py b/flow/visualize/visualizer_rllib.py index d6b7506fd..7ff089eeb 100644 --- a/flow/visualize/visualizer_rllib.py +++ b/flow/visualize/visualizer_rllib.py @@ -163,6 +163,7 @@ def visualizer_rllib(args): if hasattr(env, "reroute_on_exit"): env.reroute_on_exit = False + env.env_params.warmup_steps = 0 if args.render_mode == 'sumo_gui': env.sim_params.render = True # set to True after initializing agent and env diff --git a/scripts/ray_autoscale.yaml b/scripts/ray_autoscale.yaml index c43685ea1..c4f2e31a1 100644 --- a/scripts/ray_autoscale.yaml +++ b/scripts/ray_autoscale.yaml @@ -40,7 +40,7 @@ auth: # http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances head_node: InstanceType: c4.4xlarge - ImageId: ami-0b489700e7f810707 # Flow AMI (Ubuntu) + ImageId: ami-0c047f3ddd3939b30 # Flow AMI (Ubuntu) KeyName: MyKeyPair2 InstanceMarketOptions: MarketType: spot @@ -56,7 +56,7 @@ head_node: # http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances worker_nodes: InstanceType: c4.4xlarge - ImageId: ami-0b489700e7f810707 # Flow AMI (Ubuntu) + ImageId: ami-0c047f3ddd3939b30 # Flow AMI (Ubuntu) KeyName: MyKeyPair2 #Run workers on spot by default. Comment this out to use on-demand. @@ -72,7 +72,7 @@ setup_commands: - cd flow && git fetch && git checkout origin/flow_maddpg head_setup_commands: - - pip install ray==0.8.0 + - pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.9.0.dev0-cp37-cp37m-manylinux1_x86_64.whl - pip install tabulate - pip install boto3==1.10.45 # 1.4.8 adds InstanceMarketOptions - pip install awscli==1.16.309 @@ -81,6 +81,7 @@ head_setup_commands: - pip install torch==1.3.1 - pip install tensorflow==2.0.0 - pip install lz4 + - pip install dm-tree # Custom commands that will be run on worker nodes after common setup. worker_setup_commands: [] From ba3210f918b005dfb536c8c098323c93bce43c2e Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Mon, 11 May 2020 19:48:24 -0700 Subject: [PATCH 28/85] Visuallizer rllib fixes for centralized PPO --- .../exp_configs/rl/multiagent/multiagent_straight_road.py | 4 ++-- flow/envs/multiagent/i210.py | 3 +++ flow/visualize/visualizer_rllib.py | 6 ++++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py index cd9c40f0d..0ec9ccd64 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py +++ b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py @@ -34,7 +34,7 @@ additional_net_params = ADDITIONAL_NET_PARAMS.copy() additional_net_params.update({ # length of the highway - "length": 2000, + "length": 2500, # number of lanes "lanes": 1, # speed limit for all edges @@ -56,7 +56,7 @@ 'max_accel': 2.6, 'max_decel': 4.5, 'target_velocity': 12.0, - 'local_reward': True, + 'local_reward': False, 'lead_obs': True, # whether to reroute vehicles once they have exited "reroute_on_exit": True, diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index f4792d51b..642271479 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -183,6 +183,9 @@ def compute_reward(self, rl_actions, **kwargs): rewards[rl_id] = 0 if self.mpg_reward: rewards[rl_id] = miles_per_gallon(self, rl_id) / 100.0 + follow_id = self.k.vehicle.get_follower(rl_id) + if follow_id not in ["", None]: + rewards[rl_id] += miles_per_gallon(self, follow_id) / 100.0 else: speeds = [] follow_speed = self.k.vehicle.get_speed(self.k.vehicle.get_follower(rl_id)) diff --git a/flow/visualize/visualizer_rllib.py b/flow/visualize/visualizer_rllib.py index 7ff089eeb..c7d30947b 100644 --- a/flow/visualize/visualizer_rllib.py +++ b/flow/visualize/visualizer_rllib.py @@ -91,6 +91,11 @@ def visualizer_rllib(args): sys.exit(1) if args.run: agent_cls = get_agent_class(args.run) + elif config['env_config']['run'] == "": + from flow.algorithms.centralized_PPO import CCTrainer, CentralizedCriticModel + from ray.rllib.models import ModelCatalog + agent_cls = CCTrainer + ModelCatalog.register_custom_model("cc_model", CentralizedCriticModel) elif config_run: agent_cls = get_agent_class(config_run) else: @@ -163,6 +168,7 @@ def visualizer_rllib(args): if hasattr(env, "reroute_on_exit"): env.reroute_on_exit = False + env.env_params.horizon += env.env_params.warmup_steps env.env_params.warmup_steps = 0 if args.render_mode == 'sumo_gui': From 3f4da916690abd56e5ede602dedaa35179af8e4d Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Mon, 11 May 2020 19:51:27 -0700 Subject: [PATCH 29/85] Add the ability to look back in the platoon for the MPG reward --- .../exp_configs/rl/multiagent/multiagent_i210.py | 2 ++ .../rl/multiagent/multiagent_straight_road.py | 4 +++- flow/envs/multiagent/i210.py | 12 +++++++++--- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index ba38471bc..19a093504 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -47,6 +47,8 @@ "local_reward": True, # whether to use the MPG reward. Otherwise, defaults to a target velocity reward "mpg_reward": True, + # how many vehicles to look back for the MPG reward + "look_back_length": 1, # whether to reroute vehicles once they have exited "reroute_on_exit": False, 'target_velocity': 12.0, diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py index 0ec9ccd64..b4049700b 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py +++ b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py @@ -56,12 +56,14 @@ 'max_accel': 2.6, 'max_decel': 4.5, 'target_velocity': 12.0, - 'local_reward': False, + 'local_reward': True, 'lead_obs': True, # whether to reroute vehicles once they have exited "reroute_on_exit": True, # whether to use the MPG reward. Otherwise, defaults to a target velocity reward "mpg_reward": True, + # how many vehicles to look back for the MPG reward + "look_back_length": 3, # how many AVs there can be at once (this is only for centralized critics) "max_num_agents": 10, diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index 642271479..eadb56b32 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -75,6 +75,8 @@ def __init__(self, env_params, sim_params, network, simulator='traci'): self.entrance_edge = "119257914" self.exit_edge = "119257908#2" self.mpg_reward = env_params.additional_params["mpg_reward"] + self.look_back_length = env_params.additional_params["look_back_length"] + # whether to add a slight reward for opening up a gap that will be annealed out N iterations in self.headway_curriculum = env_params.additional_params["headway_curriculum"] # how many timesteps to anneal the headway curriculum over @@ -183,9 +185,13 @@ def compute_reward(self, rl_actions, **kwargs): rewards[rl_id] = 0 if self.mpg_reward: rewards[rl_id] = miles_per_gallon(self, rl_id) / 100.0 - follow_id = self.k.vehicle.get_follower(rl_id) - if follow_id not in ["", None]: - rewards[rl_id] += miles_per_gallon(self, follow_id) / 100.0 + follow_id = rl_id + for i in range(self.look_back_length): + follow_id = self.k.vehicle.get_follower(follow_id) + if follow_id not in ["", None]: + rewards[rl_id] += miles_per_gallon(self, follow_id) / 100.0 + else: + break else: speeds = [] follow_speed = self.k.vehicle.get_speed(self.k.vehicle.get_follower(rl_id)) From ad694c5c3b9cef7041931f280d2c74c616db41bd Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Wed, 13 May 2020 12:13:48 -0400 Subject: [PATCH 30/85] Minor --- .../rl/multiagent/multiagent_straight_road.py | 11 +++++---- flow/controllers/car_following_models.py | 23 +++++++++++-------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py index b4049700b..7cf9d7a28 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py +++ b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py @@ -56,14 +56,14 @@ 'max_accel': 2.6, 'max_decel': 4.5, 'target_velocity': 12.0, - 'local_reward': True, + 'local_reward': False, 'lead_obs': True, # whether to reroute vehicles once they have exited - "reroute_on_exit": True, + "reroute_on_exit": False, # whether to use the MPG reward. Otherwise, defaults to a target velocity reward "mpg_reward": True, # how many vehicles to look back for the MPG reward - "look_back_length": 3, + "look_back_length": 10, # how many AVs there can be at once (this is only for centralized critics) "max_num_agents": 10, @@ -79,7 +79,7 @@ # how many timesteps to anneal the headway curriculum over "speed_curriculum_iters": 100, # weight of the headway reward - "speed_reward_gain": 2.0 + "speed_reward_gain": 10.0 }) @@ -129,7 +129,8 @@ name="rl_highway_inflow") # SET UP FLOW PARAMETERS -warmup_steps = 200 +# TODO(@evinitsky) how do we warm up the network without setting a wave in to start? +warmup_steps = 0 if additional_env_params['reroute_on_exit']: warmup_steps = 200 diff --git a/flow/controllers/car_following_models.py b/flow/controllers/car_following_models.py index 42c9b2a9b..124b4ea59 100755 --- a/flow/controllers/car_following_models.py +++ b/flow/controllers/car_following_models.py @@ -647,15 +647,20 @@ def __init__(self, def get_accel(self, env): """See parent class.""" - lead_id = env.k.vehicle.get_leader(self.veh_id) - if not lead_id: # no car ahead - if self.want_max_accel: - return self.max_accel - - v_l = env.k.vehicle.get_speed(lead_id) - v = env.k.vehicle.get_speed(self.veh_id) - s = env.k.vehicle.get_headway(self.veh_id) - return self.accel_func(v, v_l, s) + # TODO(@evinitsky) this is a hack to make rerouting works. This gets vehicles into the network + # without generating waves. + if env.time_counter < env.env_params.warmup_steps: + return None + else: + lead_id = env.k.vehicle.get_leader(self.veh_id) + if not lead_id: # no car ahead + if self.want_max_accel: + return self.max_accel + + v_l = env.k.vehicle.get_speed(lead_id) + v = env.k.vehicle.get_speed(self.veh_id) + s = env.k.vehicle.get_headway(self.veh_id) + return self.accel_func(v, v_l, s) def accel_func(self, v, v_l, s): """Compute the acceleration function.""" From 429df10ad4a1b6d5a044de826c9f659f0509816a Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Wed, 13 May 2020 12:24:01 -0400 Subject: [PATCH 31/85] Add MPG reward --- flow/core/kernel/vehicle/base.py | 13 +++++ flow/core/kernel/vehicle/traci.py | 12 ++++- flow/core/rewards.py | 90 +++++++++++++++++++++++++++++++ 3 files changed, 114 insertions(+), 1 deletion(-) diff --git a/flow/core/kernel/vehicle/base.py b/flow/core/kernel/vehicle/base.py index c68d68c3a..599afe3d5 100644 --- a/flow/core/kernel/vehicle/base.py +++ b/flow/core/kernel/vehicle/base.py @@ -297,6 +297,19 @@ def get_num_not_departed(self): """ raise NotImplementedError + def get_fuel_consumption(selfself, veh_id, error=-1001): + """Return the mpg / s of the specified vehicle. + Parameters + ---------- + veh_id : str or list of str + vehicle id, or list of vehicle ids + error : any, optional + value that is returned if the vehicle is not found + Returns + ------- + float + """ + def get_speed(self, veh_id, error=-1001): """Return the speed of the specified vehicle. diff --git a/flow/core/kernel/vehicle/traci.py b/flow/core/kernel/vehicle/traci.py index d165dbdea..134bac49f 100644 --- a/flow/core/kernel/vehicle/traci.py +++ b/flow/core/kernel/vehicle/traci.py @@ -335,7 +335,8 @@ def _add_departed(self, veh_id, veh_type): tc.VAR_EDGES, tc.VAR_POSITION, tc.VAR_ANGLE, - tc.VAR_SPEED_WITHOUT_TRACI + tc.VAR_SPEED_WITHOUT_TRACI, + tc.VAR_FUELCONSUMPTION ]) self.kernel_api.vehicle.subscribeLeader(veh_id, 2000) @@ -370,6 +371,8 @@ def _add_departed(self, veh_id, veh_type): self.kernel_api.vehicle.getLaneIndex(veh_id) self.__sumo_obs[veh_id][tc.VAR_SPEED] = \ self.kernel_api.vehicle.getSpeed(veh_id) + self.__sumo_obs[veh_id][tc.VAR_FUELCONSUMPTION] = \ + self.kernel_api.vehicle.getFuelConsumption(veh_id) # make sure that the order of rl_ids is kept sorted self.__rl_ids.sort() @@ -533,6 +536,13 @@ def get_num_not_departed(self): """See parent class.""" return self.num_not_departed + def get_fuel_consumption(self, veh_id, error=-1001): + """Return fuel consumption in gallons/s.""" + ml_to_gallons = 0.000264172 + if isinstance(veh_id, (list, np.ndarray)): + return [self.get_fuel_consumption(vehID, error) for vehID in veh_id] + return self.__sumo_obs.get(veh_id, {}).get(tc.VAR_FUELCONSUMPTION, error) * ml_to_gallons + def get_previous_speed(self, veh_id, error=-1001): """See parent class.""" if isinstance(veh_id, (list, np.ndarray)): diff --git a/flow/core/rewards.py b/flow/core/rewards.py index 6de472af2..91f91e088 100755 --- a/flow/core/rewards.py +++ b/flow/core/rewards.py @@ -330,3 +330,93 @@ def energy_consumption(env, gain=.001): power += M * speed * accel + M * g * Cr * speed + 0.5 * rho * A * Ca * speed ** 3 return -gain * power + + +def veh_energy_consumption(env, veh_id, gain=.001): + """Calculate power consumption of a vehicle. + Assumes vehicle is an average sized vehicle. + The power calculated here is the lower bound of the actual power consumed + by a vehicle. + """ + power = 0 + + M = 1200 # mass of average sized vehicle (kg) + g = 9.81 # gravitational acceleration (m/s^2) + Cr = 0.005 # rolling resistance coefficient + Ca = 0.3 # aerodynamic drag coefficient + rho = 1.225 # air density (kg/m^3) + A = 2.6 # vehicle cross sectional area (m^2) + speed = env.k.vehicle.get_speed(veh_id) + prev_speed = env.k.vehicle.get_previous_speed(veh_id) + + accel = abs(speed - prev_speed) / env.sim_step + + power += M * speed * accel + M * g * Cr * speed + 0.5 * rho * A * Ca * speed ** 3 + + return -gain * power + + +def miles_per_megajoule(env, veh_id=None, gain=.001): + """Calculate miles per mega-joule of either a particular vehicle or the total average of all the vehilces. + Assumes vehicle is an average sized vehicle. + The power calculated here is the lower bound of the actual power consumed + by a vehicle. + """ + mpj = 0 + counter = 0 + if not isinstance(veh_id, list): + speed = env.k.vehicle.get_speed(veh_id) + # convert to be positive since the function called is a penalty + power = -veh_energy_consumption(env, veh_id, gain=1.0) + if power > 0 and speed >= 0.0: + # meters / joule is (v * \delta t) / (power * \delta t) + mpj = speed / power + else: + for veh_id in env.k.vehicle.get_ids(): + speed = env.k.vehicle.get_speed(veh_id) + # convert to be positive since the function called is a penalty + power = -veh_energy_consumption(env, veh_id, gain=1.0) + if power > 0 and speed >= 0.0: + counter += 1 + # meters / joule is (v * \delta t) / (power * \delta t) + mpj += speed / power + if counter > 0: + mpj /= counter + + # convert from meters per joule to miles per joule + mpj /= 1609.0 + # convert from miles per joule to miles per megajoule + mpj *= 10**6 + + return mpj * gain + + +def miles_per_gallon(env, veh_id=None, gain=.001): + """Calculate mpg of either a particular vehicle or the total average of all the vehilces. + Assumes vehicle is an average sized vehicle. + The power calculated here is the lower bound of the actual power consumed + by a vehicle. + """ + mpg = 0 + counter = 0 + if not isinstance(veh_id, list): + speed = env.k.vehicle.get_speed(veh_id) + gallons_per_s = env.k.vehicle.get_fuel_consumption(veh_id) + if gallons_per_s > 0 and speed >= 0.0: + # meters / gallon is (v * \delta t) / (gallons_s * \delta t) + mpg = speed / gallons_per_s + else: + for veh_id in env.k.vehicle.get_ids(): + speed = env.k.vehicle.get_speed(veh_id) + gallons_per_s = env.k.vehicle.get_fuel_consumption(veh_id) + if gallons_per_s > 0 and speed >= 0.0: + counter += 1 + # meters / gallon is (v * \delta t) / (gallons_per_s * \delta t) + mpg += speed / gallons_per_s + if counter > 0: + mpg /= counter + + # convert from meters per gallon to miles per gallon + mpg /= 1609.0 + + return mpg * gain From 76707c17427da7d5d0b407792a3049fbe892f4af Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Wed, 13 May 2020 12:35:14 -0400 Subject: [PATCH 32/85] Flake8 --- examples/train.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/examples/train.py b/examples/train.py index e34b2935c..d871beaeb 100644 --- a/examples/train.py +++ b/examples/train.py @@ -23,7 +23,9 @@ except ImportError: print("Stable-baselines not installed") +import ray from ray import tune +from ray.tune.registry import register_env from ray.rllib.env.group_agents_wrapper import _GroupAgentsWrapper try: from ray.rllib.agents.agent import get_agent_class @@ -124,8 +126,6 @@ def run_model_stablebaseline(flow_params, stable_baselines.* the trained model """ - from stable_baselines.common.vec_env import DummyVecEnv, SubprocVecEnv - from stable_baselines import PPO2 if num_cpus == 1: constructor = env_constructor(params=flow_params, version=0)() @@ -175,12 +175,6 @@ def setup_exps_rllib(flow_params, dict training configuration parameters """ - from ray import tune - from ray.tune.registry import register_env - try: - from ray.rllib.agents.agent import get_agent_class - except ImportError: - from ray.rllib.agents.registry import get_agent_class horizon = flow_params['env'].horizon @@ -263,8 +257,6 @@ def on_episode_end(info): def train_rllib(submodule, flags): """Train policies using the PPO algorithm in RLlib.""" - import ray - from ray.tune import run_experiments flow_params = submodule.flow_params flow_params['sim'].render = flags.render @@ -413,8 +405,6 @@ def train_h_baselines(flow_params, args, multiagent): def train_stable_baselines(submodule, flags): """Train policies using the PPO algorithm in stable-baselines.""" - from stable_baselines.common.vec_env import DummyVecEnv - from stable_baselines import PPO2 flow_params = submodule.flow_params # Path to the saved files From 4b615b467a38bf65350acef2448d47f0d8698685 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Wed, 13 May 2020 13:46:46 -0400 Subject: [PATCH 33/85] Minor --- .../rl/multiagent/multiagent_straight_road.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py index 7cf9d7a28..88cfba995 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py +++ b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py @@ -55,11 +55,11 @@ additional_env_params.update({ 'max_accel': 2.6, 'max_decel': 4.5, - 'target_velocity': 12.0, - 'local_reward': False, + 'target_velocity': 11.0, + 'local_reward': True, 'lead_obs': True, # whether to reroute vehicles once they have exited - "reroute_on_exit": False, + "reroute_on_exit": True, # whether to use the MPG reward. Otherwise, defaults to a target velocity reward "mpg_reward": True, # how many vehicles to look back for the MPG reward @@ -77,9 +77,9 @@ # whether to add a slight reward for traveling at a desired speed "speed_curriculum": True, # how many timesteps to anneal the headway curriculum over - "speed_curriculum_iters": 100, + "speed_curriculum_iters": 50, # weight of the headway reward - "speed_reward_gain": 10.0 + "speed_reward_gain": 2.0 }) @@ -132,7 +132,7 @@ # TODO(@evinitsky) how do we warm up the network without setting a wave in to start? warmup_steps = 0 if additional_env_params['reroute_on_exit']: - warmup_steps = 200 + warmup_steps = 400 flow_params = dict( # name of the experiment From ac8e206a94fe90f863aa45169a9f3d5c3b174717 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Thu, 14 May 2020 12:55:45 -0400 Subject: [PATCH 34/85] Minor --- examples/train.py | 2 +- flow/controllers/velocity_controllers.py | 75 ++++++++++++------------ 2 files changed, 40 insertions(+), 37 deletions(-) diff --git a/examples/train.py b/examples/train.py index a259164da..276ab6235 100644 --- a/examples/train.py +++ b/examples/train.py @@ -243,7 +243,7 @@ def setup_exps_rllib(flow_params, config["num_workers"] = n_cpus config["horizon"] = horizon - config["learning_starts"] = 0 + config["learning_starts"] = 10000 config["buffer_size"] = 20000 # reduced to test if this is the source of memory problems if flags.grid_search: config["prioritized_replay"] = tune.grid_search(['True', 'False']) diff --git a/flow/controllers/velocity_controllers.py b/flow/controllers/velocity_controllers.py index 2e4b7c22a..0095451be 100644 --- a/flow/controllers/velocity_controllers.py +++ b/flow/controllers/velocity_controllers.py @@ -74,46 +74,49 @@ def find_intersection_dist(self, env): def get_accel(self, env): """See parent class.""" - lead_id = env.k.vehicle.get_leader(self.veh_id) - this_vel = env.k.vehicle.get_speed(self.veh_id) - lead_vel = env.k.vehicle.get_speed(lead_id) - - if self.v_des is None: + if env.time_counter < env.env_params.warmup_steps: return None - - if lead_id is None: - v_cmd = self.v_des else: - dx = env.k.vehicle.get_headway(self.veh_id) - dv_minus = min(lead_vel - this_vel, 0) - - dx_1 = self.dx_1_0 + 1 / (2 * self.d_1) * dv_minus**2 - dx_2 = self.dx_2_0 + 1 / (2 * self.d_2) * dv_minus**2 - dx_3 = self.dx_3_0 + 1 / (2 * self.d_3) * dv_minus**2 - v = min(max(lead_vel, 0), self.v_des) - # compute the desired velocity - if dx <= dx_1: - v_cmd = 0 - elif dx <= dx_2: - v_cmd = v * (dx - dx_1) / (dx_2 - dx_1) - elif dx <= dx_3: - v_cmd = v + (self.v_des - this_vel) * (dx - dx_2) \ - / (dx_3 - dx_2) - else: - v_cmd = self.v_des + lead_id = env.k.vehicle.get_leader(self.veh_id) + this_vel = env.k.vehicle.get_speed(self.veh_id) + lead_vel = env.k.vehicle.get_speed(lead_id) - edge = env.k.vehicle.get_edge(self.veh_id) + if self.v_des is None: + return None - if edge == "": - return None - - if self.find_intersection_dist(env) <= 10 and \ - env.k.vehicle.get_edge(self.veh_id) in self.danger_edges or \ - env.k.vehicle.get_edge(self.veh_id)[0] == ":": - return None - else: - # compute the acceleration from the desired velocity - return (v_cmd - this_vel) / env.sim_step + if lead_id is None: + v_cmd = self.v_des + else: + dx = env.k.vehicle.get_headway(self.veh_id) + dv_minus = min(lead_vel - this_vel, 0) + + dx_1 = self.dx_1_0 + 1 / (2 * self.d_1) * dv_minus**2 + dx_2 = self.dx_2_0 + 1 / (2 * self.d_2) * dv_minus**2 + dx_3 = self.dx_3_0 + 1 / (2 * self.d_3) * dv_minus**2 + v = min(max(lead_vel, 0), self.v_des) + # compute the desired velocity + if dx <= dx_1: + v_cmd = 0 + elif dx <= dx_2: + v_cmd = v * (dx - dx_1) / (dx_2 - dx_1) + elif dx <= dx_3: + v_cmd = v + (self.v_des - this_vel) * (dx - dx_2) \ + / (dx_3 - dx_2) + else: + v_cmd = self.v_des + + edge = env.k.vehicle.get_edge(self.veh_id) + + if edge == "": + return None + + if self.find_intersection_dist(env) <= 10 and \ + env.k.vehicle.get_edge(self.veh_id) in self.danger_edges or \ + env.k.vehicle.get_edge(self.veh_id)[0] == ":": + return None + else: + # compute the acceleration from the desired velocity + return (v_cmd - this_vel) / env.sim_step class NonLocalFollowerStopper(FollowerStopper): From 2f823d8a446eb5d8cf60e07253c03fbee7da4f56 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Thu, 14 May 2020 14:34:50 -0400 Subject: [PATCH 35/85] Flake8 --- flow/core/rewards.py | 69 ++++++++++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 28 deletions(-) diff --git a/flow/core/rewards.py b/flow/core/rewards.py index 91f91e088..829316424 100755 --- a/flow/core/rewards.py +++ b/flow/core/rewards.py @@ -356,32 +356,38 @@ def veh_energy_consumption(env, veh_id, gain=.001): return -gain * power -def miles_per_megajoule(env, veh_id=None, gain=.001): +def miles_per_megajoule(env, veh_ids=None, gain=.001): """Calculate miles per mega-joule of either a particular vehicle or the total average of all the vehilces. Assumes vehicle is an average sized vehicle. The power calculated here is the lower bound of the actual power consumed by a vehicle. + + Parameters + ---------- + env : flow.envs.Env + the environment variable, which contains information on the current + state of the system. + veh_ids : [list] + list of veh_ids to compute the reward over + gain : float + scaling factor for the reward """ mpj = 0 counter = 0 - if not isinstance(veh_id, list): + if not isinstance(veh_ids, list): + veh_ids = [veh_ids] + elif veh_ids is None: + veh_ids = env.k.vehicle.get_ids() + for veh_id in veh_ids: speed = env.k.vehicle.get_speed(veh_id) # convert to be positive since the function called is a penalty power = -veh_energy_consumption(env, veh_id, gain=1.0) if power > 0 and speed >= 0.0: + counter += 1 # meters / joule is (v * \delta t) / (power * \delta t) - mpj = speed / power - else: - for veh_id in env.k.vehicle.get_ids(): - speed = env.k.vehicle.get_speed(veh_id) - # convert to be positive since the function called is a penalty - power = -veh_energy_consumption(env, veh_id, gain=1.0) - if power > 0 and speed >= 0.0: - counter += 1 - # meters / joule is (v * \delta t) / (power * \delta t) - mpj += speed / power - if counter > 0: - mpj /= counter + mpj += speed / power + if counter > 0: + mpj /= counter # convert from meters per joule to miles per joule mpj /= 1609.0 @@ -391,30 +397,37 @@ def miles_per_megajoule(env, veh_id=None, gain=.001): return mpj * gain -def miles_per_gallon(env, veh_id=None, gain=.001): +def miles_per_gallon(env, veh_ids=None, gain=.001): """Calculate mpg of either a particular vehicle or the total average of all the vehilces. Assumes vehicle is an average sized vehicle. The power calculated here is the lower bound of the actual power consumed by a vehicle. + + Parameters + ---------- + env : flow.envs.Env + the environment variable, which contains information on the current + state of the system. + veh_ids : [list] + list of veh_ids to compute the reward over + gain : float + scaling factor for the reward """ mpg = 0 counter = 0 - if not isinstance(veh_id, list): + if not isinstance(veh_ids, list): + veh_ids = [veh_ids] + elif veh_ids is None: + veh_ids = env.k.vehicle.get_ids() + for veh_id in veh_ids: speed = env.k.vehicle.get_speed(veh_id) gallons_per_s = env.k.vehicle.get_fuel_consumption(veh_id) if gallons_per_s > 0 and speed >= 0.0: - # meters / gallon is (v * \delta t) / (gallons_s * \delta t) - mpg = speed / gallons_per_s - else: - for veh_id in env.k.vehicle.get_ids(): - speed = env.k.vehicle.get_speed(veh_id) - gallons_per_s = env.k.vehicle.get_fuel_consumption(veh_id) - if gallons_per_s > 0 and speed >= 0.0: - counter += 1 - # meters / gallon is (v * \delta t) / (gallons_per_s * \delta t) - mpg += speed / gallons_per_s - if counter > 0: - mpg /= counter + counter += 1 + # meters / gallon is (v * \delta t) / (gallons_per_s * \delta t) + mpg += speed / gallons_per_s + if counter > 0: + mpg /= counter # convert from meters per gallon to miles per gallon mpg /= 1609.0 From 91f1498e4089cbaefe3d604cbe093e9f2ab4c720 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Thu, 14 May 2020 14:37:48 -0400 Subject: [PATCH 36/85] Pydoc style --- flow/core/kernel/vehicle/base.py | 1 + flow/core/rewards.py | 7 +++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/flow/core/kernel/vehicle/base.py b/flow/core/kernel/vehicle/base.py index 599afe3d5..706504027 100644 --- a/flow/core/kernel/vehicle/base.py +++ b/flow/core/kernel/vehicle/base.py @@ -299,6 +299,7 @@ def get_num_not_departed(self): def get_fuel_consumption(selfself, veh_id, error=-1001): """Return the mpg / s of the specified vehicle. + Parameters ---------- veh_id : str or list of str diff --git a/flow/core/rewards.py b/flow/core/rewards.py index 829316424..f92fb9f3c 100755 --- a/flow/core/rewards.py +++ b/flow/core/rewards.py @@ -334,6 +334,7 @@ def energy_consumption(env, gain=.001): def veh_energy_consumption(env, veh_id, gain=.001): """Calculate power consumption of a vehicle. + Assumes vehicle is an average sized vehicle. The power calculated here is the lower bound of the actual power consumed by a vehicle. @@ -357,7 +358,8 @@ def veh_energy_consumption(env, veh_id, gain=.001): def miles_per_megajoule(env, veh_ids=None, gain=.001): - """Calculate miles per mega-joule of either a particular vehicle or the total average of all the vehilces. + """Calculate miles per mega-joule of either a particular vehicle or the total average of all the vehicles. + Assumes vehicle is an average sized vehicle. The power calculated here is the lower bound of the actual power consumed by a vehicle. @@ -398,7 +400,8 @@ def miles_per_megajoule(env, veh_ids=None, gain=.001): def miles_per_gallon(env, veh_ids=None, gain=.001): - """Calculate mpg of either a particular vehicle or the total average of all the vehilces. + """Calculate mpg of either a particular vehicle or the total average of all the vehicles. + Assumes vehicle is an average sized vehicle. The power calculated here is the lower bound of the actual power consumed by a vehicle. From 44d682c82afd7424711145ef2f54c37fb476b2b9 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Thu, 14 May 2020 15:18:29 -0400 Subject: [PATCH 37/85] Minor --- scripts/ray_autoscale.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/ray_autoscale.yaml b/scripts/ray_autoscale.yaml index c4f2e31a1..5d89f02f9 100644 --- a/scripts/ray_autoscale.yaml +++ b/scripts/ray_autoscale.yaml @@ -39,7 +39,7 @@ auth: # For more documentation on available fields, see: # http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances head_node: - InstanceType: c4.4xlarge + InstanceType: c4.8xlarge ImageId: ami-0c047f3ddd3939b30 # Flow AMI (Ubuntu) KeyName: MyKeyPair2 InstanceMarketOptions: @@ -55,7 +55,7 @@ head_node: # For more documentation on available fields, see: # http://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances worker_nodes: - InstanceType: c4.4xlarge + InstanceType: c4.8xlarge ImageId: ami-0c047f3ddd3939b30 # Flow AMI (Ubuntu) KeyName: MyKeyPair2 From 1b3172e32f79cd34ed037956fbf2ef9aba9f0940 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Thu, 14 May 2020 15:19:23 -0400 Subject: [PATCH 38/85] Fix none condition handling --- flow/core/rewards.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/flow/core/rewards.py b/flow/core/rewards.py index f92fb9f3c..3cca916f5 100755 --- a/flow/core/rewards.py +++ b/flow/core/rewards.py @@ -376,10 +376,10 @@ def miles_per_megajoule(env, veh_ids=None, gain=.001): """ mpj = 0 counter = 0 - if not isinstance(veh_ids, list): - veh_ids = [veh_ids] - elif veh_ids is None: + if veh_ids is None: veh_ids = env.k.vehicle.get_ids() + elif not isinstance(veh_ids, list): + veh_ids = [veh_ids] for veh_id in veh_ids: speed = env.k.vehicle.get_speed(veh_id) # convert to be positive since the function called is a penalty @@ -418,10 +418,10 @@ def miles_per_gallon(env, veh_ids=None, gain=.001): """ mpg = 0 counter = 0 - if not isinstance(veh_ids, list): - veh_ids = [veh_ids] - elif veh_ids is None: + if veh_ids is None: veh_ids = env.k.vehicle.get_ids() + elif not isinstance(veh_ids, list): + veh_ids = [veh_ids] for veh_id in veh_ids: speed = env.k.vehicle.get_speed(veh_id) gallons_per_s = env.k.vehicle.get_fuel_consumption(veh_id) From 6d37f1523caa163a00a3ee75cbbae31822a930d6 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Thu, 14 May 2020 17:20:38 -0400 Subject: [PATCH 39/85] Minor --- .../exp_configs/rl/multiagent/multiagent_straight_road.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py index 88cfba995..29642b599 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py +++ b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py @@ -56,7 +56,7 @@ 'max_accel': 2.6, 'max_decel': 4.5, 'target_velocity': 11.0, - 'local_reward': True, + 'local_reward': False, 'lead_obs': True, # whether to reroute vehicles once they have exited "reroute_on_exit": True, @@ -75,7 +75,7 @@ "headway_reward_gain": 1.0, # whether to add a slight reward for traveling at a desired speed - "speed_curriculum": True, + "speed_curriculum": False, # how many timesteps to anneal the headway curriculum over "speed_curriculum_iters": 50, # weight of the headway reward From 12c0f36c8c4de1afbf1ad5bf83af5fa06c5212cc Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Fri, 15 May 2020 00:11:51 -0400 Subject: [PATCH 40/85] Downgrade to 0.8.0 --- examples/exp_configs/non_rl/highway_single.py | 23 ++++++++++--------- .../rl/multiagent/multiagent_straight_road.py | 15 ++++-------- examples/train.py | 11 ++++----- flow/algorithms/centralized_PPO.py | 18 ++++++++++----- flow/algorithms/custom_ppo.py | 4 ++-- flow/controllers/base_controller.py | 2 ++ flow/controllers/car_following_models.py | 21 ++++++++--------- flow/envs/multiagent/i210.py | 15 ++++++++---- scripts/ray_autoscale.yaml | 2 +- 9 files changed, 57 insertions(+), 54 deletions(-) diff --git a/examples/exp_configs/non_rl/highway_single.py b/examples/exp_configs/non_rl/highway_single.py index 51d937ac1..7948dfdef 100644 --- a/examples/exp_configs/non_rl/highway_single.py +++ b/examples/exp_configs/non_rl/highway_single.py @@ -6,7 +6,7 @@ import numpy as np -from flow.controllers import BandoFTLController +from flow.controllers import BandoFTLController, IDMController from flow.controllers.velocity_controllers import FollowerStopper from flow.core.params import EnvParams from flow.core.params import NetParams @@ -15,7 +15,7 @@ from flow.core.params import VehicleParams from flow.core.params import SumoParams from flow.core.params import SumoLaneChangeParams -from flow.core.rewards import miles_per_gallon +from flow.core.rewards import miles_per_gallon, from flow.networks import HighwayNetwork from flow.envs import TestEnv from flow.networks.highway import ADDITIONAL_NET_PARAMS @@ -51,14 +51,15 @@ lane_change_params=SumoLaneChangeParams( lane_change_mode="strategic", ), - acceleration_controller=(BandoFTLController, { - 'alpha': .5, - 'beta': 20.0, - 'h_st': 12.0, - 'h_go': 50.0, - 'v_max': 30.0, - 'noise': 1.0 if INCLUDE_NOISE else 0.0, - }), + # acceleration_controller=(BandoFTLController, { + # 'alpha': .5, + # 'beta': 20.0, + # 'h_st': 12.0, + # 'h_go': 50.0, + # 'v_max': 30.0, + # 'noise': 1.0 if INCLUDE_NOISE else 0.0, + # }), + acceleration_controller=(IDMController, {}), ) @@ -86,7 +87,7 @@ vehs_per_hour=int(TRAFFIC_FLOW * (PENETRATION_RATE)), depart_lane="free", depart_speed=TRAFFIC_SPEED, - name="idm_highway_inflow") + name="rl_highway_inflow") # SET UP FLOW PARAMETERS diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py index 29642b599..aef3ee521 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py +++ b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py @@ -3,7 +3,7 @@ Trains a non-constant number of agents, all sharing the same policy, on the highway with ramps network. """ -from flow.controllers import BandoFTLController, RLController +from flow.controllers import IDMController, RLController from flow.core.params import EnvParams from flow.core.params import NetParams from flow.core.params import InitialConfig @@ -75,7 +75,7 @@ "headway_reward_gain": 1.0, # whether to add a slight reward for traveling at a desired speed - "speed_curriculum": False, + "speed_curriculum": True, # how many timesteps to anneal the headway curriculum over "speed_curriculum_iters": 50, # weight of the headway reward @@ -93,14 +93,7 @@ lane_change_params=SumoLaneChangeParams( lane_change_mode="strategic", ), - acceleration_controller=(BandoFTLController, { - 'alpha': .5, - 'beta': 20.0, - 'h_st': 12.0, - 'h_go': 50.0, - 'v_max': 30.0, - 'noise': 1.0 if INCLUDE_NOISE else 0.0, - }), + acceleration_controller=(IDMController, {}), ) # autonomous vehicles @@ -160,7 +153,7 @@ sim_step=0.5, render=False, use_ballistic=True, - restart_instance=True + restart_instance=False ), # network-related parameters (see flow.core.params.NetParams and the diff --git a/examples/train.py b/examples/train.py index 276ab6235..410902fba 100644 --- a/examples/train.py +++ b/examples/train.py @@ -192,13 +192,10 @@ def setup_exps_rllib(flow_params, alg_run = flags.algorithm.upper() if alg_run == "PPO": - # from flow.algorithms.custom_ppo import CustomPPOTrainer - # from ray.rllib.agents.ppo import DEFAULT_CONFIG - # alg_run = CustomPPOTrainer - # config = deepcopy(DEFAULT_CONFIG) - agent_cls = get_agent_class(alg_run) - config = deepcopy(agent_cls._default_config) - + from flow.algorithms.custom_ppo import CustomPPOTrainer + from ray.rllib.agents.ppo import DEFAULT_CONFIG + alg_run = CustomPPOTrainer + config = deepcopy(DEFAULT_CONFIG) config["num_workers"] = n_cpus config["horizon"] = horizon diff --git a/flow/algorithms/centralized_PPO.py b/flow/algorithms/centralized_PPO.py index 1186e28fa..3affd47c4 100644 --- a/flow/algorithms/centralized_PPO.py +++ b/flow/algorithms/centralized_PPO.py @@ -15,7 +15,7 @@ Postprocessing from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.policy.tf_policy import LearningRateSchedule, \ - EntropyCoeffSchedule + EntropyCoeffSchedule, ACTION_LOGP from ray.rllib.models.modelv2 import ModelV2 from ray.rllib.models.tf.tf_modelv2 import TFModelV2 from ray.rllib.models.tf.recurrent_tf_modelv2 import RecurrentTFModelV2 @@ -28,6 +28,9 @@ tf = try_import_tf() +# Frozen logits of the policy that computed the action +BEHAVIOUR_LOGITS = "behaviour_logits" + CENTRAL_OBS = "central_obs" OPPONENT_ACTION = "opponent_action" @@ -215,7 +218,10 @@ def centralized_critic_postprocessing(policy, rel_agent_time in rel_agents.items()} # okay, now we need to stack and sort central_obs_list = [padded_obs for padded_obs in padded_agent_obs.values()] - central_obs_batch = np.hstack((sample_batch["obs"], np.hstack(central_obs_list))) + try: + central_obs_batch = np.hstack((sample_batch["obs"], np.hstack(central_obs_list))) + except: + import ipdb; ipdb.set_trace() max_vf_agents = policy.model.max_num_agents num_agents = len(rel_agents) + 1 if num_agents < max_vf_agents: @@ -364,8 +370,8 @@ def loss_with_central_critic(policy, model, dist_class, train_batch): train_batch[Postprocessing.VALUE_TARGETS], train_batch[Postprocessing.ADVANTAGES], train_batch[SampleBatch.ACTIONS], - train_batch[SampleBatch.ACTION_DIST_INPUTS], - train_batch[SampleBatch.ACTION_LOGP], + train_batch[BEHAVIOUR_LOGITS], + train_batch[ACTION_LOGP], train_batch[SampleBatch.VF_PREDS], action_dist, policy.central_value_function, @@ -485,6 +491,8 @@ def __init__(self, config): shape=(), trainable=False, dtype=tf.float32) + def update_kl(self, blah): + pass def setup_mixins(policy, obs_space, action_space, config): @@ -508,7 +516,6 @@ def central_vf_stats(policy, train_batch, grads): } def kl_and_loss_stats(policy, train_batch): - print(train_batch["rewards"]) return { "cur_kl_coeff": tf.cast(policy.kl_coeff, tf.float64), "cur_lr": tf.cast(policy.cur_lr, tf.float64), @@ -522,7 +529,6 @@ def kl_and_loss_stats(policy, train_batch): "kl": policy.loss_obj.mean_kl, "entropy": policy.loss_obj.mean_entropy, "entropy_coeff": tf.cast(policy.entropy_coeff, tf.float64), - "avg_rew": train_batch["rewards"][-1] } CCPPO = CustomPPOTFPolicy.with_updates( diff --git a/flow/algorithms/custom_ppo.py b/flow/algorithms/custom_ppo.py index dab9bed8c..050d7d1fd 100644 --- a/flow/algorithms/custom_ppo.py +++ b/flow/algorithms/custom_ppo.py @@ -12,7 +12,7 @@ Postprocessing from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.policy.tf_policy import LearningRateSchedule, \ - EntropyCoeffSchedule + EntropyCoeffSchedule, ACTION_LOGP from ray.rllib.policy.tf_policy_template import build_tf_policy from ray.rllib.utils.explained_variance import explained_variance from ray.rllib.utils.tf_ops import make_tf_callable @@ -133,7 +133,7 @@ def ppo_surrogate_loss(policy, model, dist_class, train_batch): train_batch[Postprocessing.ADVANTAGES], train_batch[SampleBatch.ACTIONS], train_batch[BEHAVIOUR_LOGITS], - train_batch[SampleBatch.ACTION_LOGP], + train_batch[ACTION_LOGP], train_batch[SampleBatch.VF_PREDS], action_dist, model.value_function(), diff --git a/flow/controllers/base_controller.py b/flow/controllers/base_controller.py index 4004b1c4d..9ec939257 100755 --- a/flow/controllers/base_controller.py +++ b/flow/controllers/base_controller.py @@ -104,6 +104,8 @@ def get_action(self, env): # time step if accel is None: return None + else: + accel = min(max(accel, -self.max_deaccel), self.max_accel) # add noise to the accelerations, if requested if self.accel_noise > 0: diff --git a/flow/controllers/car_following_models.py b/flow/controllers/car_following_models.py index 124b4ea59..30e77162e 100755 --- a/flow/controllers/car_following_models.py +++ b/flow/controllers/car_following_models.py @@ -649,18 +649,15 @@ def get_accel(self, env): """See parent class.""" # TODO(@evinitsky) this is a hack to make rerouting works. This gets vehicles into the network # without generating waves. - if env.time_counter < env.env_params.warmup_steps: - return None - else: - lead_id = env.k.vehicle.get_leader(self.veh_id) - if not lead_id: # no car ahead - if self.want_max_accel: - return self.max_accel - - v_l = env.k.vehicle.get_speed(lead_id) - v = env.k.vehicle.get_speed(self.veh_id) - s = env.k.vehicle.get_headway(self.veh_id) - return self.accel_func(v, v_l, s) + lead_id = env.k.vehicle.get_leader(self.veh_id) + if not lead_id: # no car ahead + if self.want_max_accel: + return self.max_accel + + v_l = env.k.vehicle.get_speed(lead_id) + v = env.k.vehicle.get_speed(self.veh_id) + s = env.k.vehicle.get_headway(self.veh_id) + return self.accel_func(v, v_l, s) def accel_func(self, v, v_l, s): """Compute the acceleration function.""" diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index eadb56b32..9acb6cd93 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -189,7 +189,7 @@ def compute_reward(self, rl_actions, **kwargs): for i in range(self.look_back_length): follow_id = self.k.vehicle.get_follower(follow_id) if follow_id not in ["", None]: - rewards[rl_id] += miles_per_gallon(self, follow_id) / 100.0 + rewards[rl_id] += (miles_per_gallon(self, follow_id) - 14.0) / 100.0 else: break else: @@ -210,8 +210,12 @@ def compute_reward(self, rl_actions, **kwargs): speeds = self.k.vehicle.get_speed(self.k.vehicle.get_ids()) des_speed = self.env_params.additional_params["target_velocity"] # rescale so the critic can estimate it quickly - reward = np.nan_to_num(np.mean([(des_speed - np.abs(speed - des_speed)) ** 2 - for speed in speeds]) / (des_speed ** 2)) + if self.reroute_on_exit: + reward = np.nan_to_num(np.mean([(des_speed - np.abs(speed - des_speed)) + for speed in speeds]) / (des_speed)) + else: + reward = np.nan_to_num(np.mean([(des_speed - np.abs(speed - des_speed)) ** 2 + for speed in speeds]) / (des_speed ** 2)) rewards = {rl_id: reward for rl_id in self.k.vehicle.get_rl_ids()} if self.headway_curriculum and self.num_training_iters <= self.headway_curriculum_iters: t_min = 1 # smallest acceptable time headway @@ -237,7 +241,10 @@ def compute_reward(self, rl_actions, **kwargs): speed = self.k.vehicle.get_speed(veh_id) speed_reward = 0.0 if speed >= 0: - speed_reward = ((des_speed - np.abs(speed - des_speed)) ** 2) / (des_speed ** 2) + if self.reroute_on_exit: + speed_reward = ((des_speed - np.abs(speed - des_speed))) / (des_speed) + else: + speed_reward = ((des_speed - np.abs(speed - des_speed)) ** 2) / (des_speed ** 2) scaling_factor = max(0, 1 - self.num_training_iters / self.speed_curriculum_iters) rewards[veh_id] += speed_reward * scaling_factor * self.speed_reward_gain diff --git a/scripts/ray_autoscale.yaml b/scripts/ray_autoscale.yaml index 5d89f02f9..a7e81561f 100644 --- a/scripts/ray_autoscale.yaml +++ b/scripts/ray_autoscale.yaml @@ -72,7 +72,7 @@ setup_commands: - cd flow && git fetch && git checkout origin/flow_maddpg head_setup_commands: - - pip install -U https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-0.9.0.dev0-cp37-cp37m-manylinux1_x86_64.whl + - pip install ray==0.8.0 - pip install tabulate - pip install boto3==1.10.45 # 1.4.8 adds InstanceMarketOptions - pip install awscli==1.16.309 From de4b21df5bde5c580383d59128769caad041a432 Mon Sep 17 00:00:00 2001 From: AboudyKreidieh Date: Fri, 15 May 2020 22:23:43 -0700 Subject: [PATCH 41/85] removed sweep --- .../non_rl/i210_subnetwork_sweep.py | 151 ------------------ 1 file changed, 151 deletions(-) delete mode 100644 examples/exp_configs/non_rl/i210_subnetwork_sweep.py diff --git a/examples/exp_configs/non_rl/i210_subnetwork_sweep.py b/examples/exp_configs/non_rl/i210_subnetwork_sweep.py deleted file mode 100644 index 28cba81ce..000000000 --- a/examples/exp_configs/non_rl/i210_subnetwork_sweep.py +++ /dev/null @@ -1,151 +0,0 @@ -"""I-210 subnetwork example. - -In this case flow_params is a list of dicts. This is to test the effects of -multiple human-driver model parameters on the flow traffic. -""" -from collections import OrderedDict -from copy import deepcopy -import itertools -import os -import numpy as np - -from flow.core.params import SumoParams -from flow.core.params import EnvParams -from flow.core.params import NetParams -from flow.core.params import SumoLaneChangeParams -from flow.core.params import VehicleParams -from flow.core.params import InitialConfig -from flow.core.params import InFlows -import flow.config as config -from flow.envs import TestEnv -from flow.networks.i210_subnetwork import I210SubNetwork, EDGES_DISTRIBUTION - -# the default parameters for all lane change parameters -default_dict = { - "lane_change_mode": "strategic", - "model": "LC2013", - "lc_strategic": 1.0, - "lc_cooperative": 1.0, - "lc_speed_gain": 1.0, - "lc_keep_right": 1.0, - "lc_look_ahead_left": 2.0, - "lc_speed_gain_right": 1.0, - "lc_sublane": 1.0, - "lc_pushy": 0, - "lc_pushy_gap": 0.6, - "lc_assertive": 1, - "lc_accel_lat": 1.0 -} - -# values to sweep through for some lane change parameters -sweep_dict = OrderedDict({ - "lc_strategic": [1.0, 2.0, 4.0, 8.0], - "lc_cooperative": [1.0, 2.0], - "lc_look_ahead_left": [2.0, 4.0] -}) - -# Create a list of possible lane change parameter combinations. -all_names = sorted(sweep_dict) -combinations = itertools.product(*(sweep_dict[name] for name in all_names)) -combination_list = list(combinations) -res = [] -for val in combination_list: - curr_dict = {} - for elem, name in zip(val, all_names): - curr_dict[name] = elem - res.append(curr_dict) - -# Create a list of all possible flow_params dictionaries to sweep through the -# different lane change parameters. -flow_params = [] - -for lane_change_dict in res: - # no vehicles in the network. The lane change parameters of inflowing - # vehicles are updated here. - vehicles = VehicleParams() - update_dict = deepcopy(default_dict) - update_dict.update(lane_change_dict) - vehicles.add( - "human", - num_vehicles=0, - lane_change_params=SumoLaneChangeParams(**update_dict) - ) - - inflow = InFlows() - # main highway - inflow.add( - veh_type="human", - edge="119257914", - vehs_per_hour=8378, - # probability=1.0, - departLane="random", - departSpeed=20) - # on ramp - inflow.add( - veh_type="human", - edge="27414345", - vehs_per_hour=321, - departLane="random", - departSpeed=20) - inflow.add( - veh_type="human", - edge="27414342#0", - vehs_per_hour=421, - departLane="random", - departSpeed=20) - - NET_TEMPLATE = os.path.join( - config.PROJECT_PATH, - "examples/exp_configs/templates/sumo/test2.net.xml") - - params = dict( - # name of the experiment - exp_tag='I-210_subnetwork', - - # name of the flow environment the experiment is running on - env_name=TestEnv, - - # name of the network class the experiment is running on - network=I210SubNetwork, - - # simulator that is used by the experiment - simulator='traci', - - # simulation-related parameters - sim=SumoParams( - sim_step=0.8, - render=True, - color_by_speed=True - ), - - # environment related parameters (see flow.core.params.EnvParams) - env=EnvParams( - horizon=4500, # one hour of run time - ), - - # network-related parameters (see flow.core.params.NetParams and the - # network's documentation or ADDITIONAL_NET_PARAMS component) - net=NetParams( - inflows=inflow, - template=NET_TEMPLATE - ), - - # vehicles to be placed in the network at the start of a rollout (see - # flow.core.params.VehicleParams) - veh=vehicles, - - # parameters specifying the positioning of vehicles upon - # initialization/reset (see flow.core.params.InitialConfig) - initial=InitialConfig( - edges_distribution=EDGES_DISTRIBUTION, - ), - ) - - # Store the next flow_params dict. - flow_params.append(params) - - -custom_callables = { - "avg_merge_speed": lambda env: np.mean(env.k.vehicle.get_speed( - env.k.vehicle.get_ids_by_edge("119257908#1-AddedOnRampEdge"))) -} From 62afe9c979b9b41cb4bdfb316443662c3909c8c0 Mon Sep 17 00:00:00 2001 From: AboudyKreidieh Date: Fri, 15 May 2020 22:24:11 -0700 Subject: [PATCH 42/85] added updated single lane highway network config --- examples/exp_configs/non_rl/highway_single.py | 65 ++++++++++--------- flow/networks/highway.py | 19 +++--- 2 files changed, 44 insertions(+), 40 deletions(-) diff --git a/examples/exp_configs/non_rl/highway_single.py b/examples/exp_configs/non_rl/highway_single.py index 46b18c0e9..505a8e2eb 100644 --- a/examples/exp_configs/non_rl/highway_single.py +++ b/examples/exp_configs/non_rl/highway_single.py @@ -1,9 +1,5 @@ -"""Multi-agent highway with ramps example. - -Trains a non-constant number of agents, all sharing the same policy, on the -highway with ramps network. -""" -from flow.controllers import BandoFTLController +"""Example of an open network with human-driven vehicles.""" +from flow.controllers import IDMController from flow.core.params import EnvParams from flow.core.params import NetParams from flow.core.params import InitialConfig @@ -11,15 +7,21 @@ from flow.core.params import VehicleParams from flow.core.params import SumoParams from flow.core.params import SumoLaneChangeParams +from flow.core.params import SumoCarFollowingParams from flow.networks import HighwayNetwork from flow.envs import TestEnv from flow.networks.highway import ADDITIONAL_NET_PARAMS -TRAFFIC_SPEED = 11 -END_SPEED = 16 -TRAFFIC_FLOW = 2056 -HORIZON = 3600 -INCLUDE_NOISE = False +# the speed of vehicles entering the network +TRAFFIC_SPEED = 24.1 +# the maximum speed at the downstream boundary edge +END_SPEED = 6.0 +# the inflow rate of vehicles +TRAFFIC_FLOW = 2215 +# the simulation time horizon (in steps) +HORIZON = 4500 +# whether to include noise in the car-following models +INCLUDE_NOISE = True additional_net_params = ADDITIONAL_NET_PARAMS.copy() additional_net_params.update({ @@ -31,28 +33,29 @@ "speed_limit": 30, # number of edges to divide the highway into "num_edges": 2, - # whether to include a ghost edge of length 500m. This edge is provided a - # different speed limit. + # whether to include a ghost edge "use_ghost_edge": True, # speed limit for the ghost edge - "ghost_speed_limit": END_SPEED + "ghost_speed_limit": END_SPEED, + # length of the cell imposing a boundary + "boundary_cell_length": 300, }) vehicles = VehicleParams() vehicles.add( "human", - num_vehicles=0, + acceleration_controller=(IDMController, { + 'a': 1.3, + 'b': 2.0, + 'noise': 0.3 if INCLUDE_NOISE else 0.0 + }), + car_following_params=SumoCarFollowingParams( + min_gap=0.5 + ), lane_change_params=SumoLaneChangeParams( - lane_change_mode="strategic", + model="SL2015", + lc_sublane=2.0, ), - acceleration_controller=(BandoFTLController, { - 'alpha': .5, - 'beta': 20.0, - 'h_st': 12.0, - 'h_go': 50.0, - 'v_max': 30.0, - 'noise': 1.0 if INCLUDE_NOISE else 0.0, - }), ) inflows = InFlows() @@ -60,15 +63,12 @@ veh_type="human", edge="highway_0", vehs_per_hour=TRAFFIC_FLOW, - depart_lane="free", - depart_speed=TRAFFIC_SPEED, - name="idm_highway_inflow") - -# SET UP FLOW PARAMETERS + departLane="free", + departSpeed=TRAFFIC_SPEED) flow_params = dict( # name of the experiment - exp_tag='highway-single', + exp_tag='highway', # name of the flow environment the experiment is running on env_name=TestEnv, @@ -88,9 +88,10 @@ # sumo-related parameters (see flow.core.params.SumoParams) sim=SumoParams( - sim_step=0.5, + sim_step=0.4, render=False, - restart_instance=False + restart_instance=False, + use_ballistic=True ), # network-related parameters (see flow.core.params.NetParams and the diff --git a/flow/networks/highway.py b/flow/networks/highway.py index 7e9c18ad5..d1d50caad 100644 --- a/flow/networks/highway.py +++ b/flow/networks/highway.py @@ -14,11 +14,13 @@ "speed_limit": 30, # number of edges to divide the highway into "num_edges": 1, - # whether to include a ghost edge of length 500m. This edge is provided a - # different speed limit. + # whether to include a ghost edge. This edge is provided a different speed + # limit. "use_ghost_edge": False, # speed limit for the ghost edge "ghost_speed_limit": 25, + # length of the cell imposing a boundary + "boundary_cell_length": 500 } @@ -34,9 +36,10 @@ class HighwayNetwork(Network): * **lanes** : number of lanes in the highway * **speed_limit** : max speed limit of the highway * **num_edges** : number of edges to divide the highway into - * **use_ghost_edge** : whether to include a ghost edge of length 500m. This - edge is provided a different speed limit. + * **use_ghost_edge** : whether to include a ghost edge. This edge is + provided a different speed limit. * **ghost_speed_limit** : speed limit for the ghost edge + * **boundary_cell_length** : length of the cell imposing a boundary Usage ----- @@ -70,8 +73,6 @@ def __init__(self, if p not in net_params.additional_params: raise KeyError('Network parameter "{}" not supplied'.format(p)) - self.end_length = 500 - super().__init__(name, vehicles, net_params, initial_config, traffic_lights) @@ -80,6 +81,7 @@ def specify_nodes(self, net_params): length = net_params.additional_params["length"] num_edges = net_params.additional_params.get("num_edges", 1) segment_lengths = np.linspace(0, length, num_edges+1) + end_length = net_params.additional_params["boundary_cell_length"] nodes = [] for i in range(num_edges+1): @@ -92,7 +94,7 @@ def specify_nodes(self, net_params): if self.net_params.additional_params["use_ghost_edge"]: nodes += [{ "id": "edge_{}".format(num_edges + 1), - "x": length + self.end_length, + "x": length + end_length, "y": 0 }] @@ -103,6 +105,7 @@ def specify_edges(self, net_params): length = net_params.additional_params["length"] num_edges = net_params.additional_params.get("num_edges", 1) segment_length = length/float(num_edges) + end_length = net_params.additional_params["boundary_cell_length"] edges = [] for i in range(num_edges): @@ -120,7 +123,7 @@ def specify_edges(self, net_params): "type": "highway_end", "from": "edge_{}".format(num_edges), "to": "edge_{}".format(num_edges + 1), - "length": self.end_length + "length": end_length }] return edges From b52d90a9eeb8d1a2642d0f4a2c49708a26355c96 Mon Sep 17 00:00:00 2001 From: AboudyKreidieh Date: Fri, 15 May 2020 22:25:23 -0700 Subject: [PATCH 43/85] added i210 xml files with downstream edge --- .../templates/sumo/i210_with_ghost_cell.xml | 5719 +++++++++++++++++ .../i210_with_ghost_cell_with_downstream.xml | 5719 +++++++++++++++++ 2 files changed, 11438 insertions(+) create mode 100644 examples/exp_configs/templates/sumo/i210_with_ghost_cell.xml create mode 100644 examples/exp_configs/templates/sumo/i210_with_ghost_cell_with_downstream.xml diff --git a/examples/exp_configs/templates/sumo/i210_with_ghost_cell.xml b/examples/exp_configs/templates/sumo/i210_with_ghost_cell.xml new file mode 100644 index 000000000..66e5a1131 --- /dev/null +++ b/examples/exp_configs/templates/sumo/i210_with_ghost_cell.xml @@ -0,0 +1,5719 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/examples/exp_configs/templates/sumo/i210_with_ghost_cell_with_downstream.xml b/examples/exp_configs/templates/sumo/i210_with_ghost_cell_with_downstream.xml new file mode 100644 index 000000000..10d4d8d45 --- /dev/null +++ b/examples/exp_configs/templates/sumo/i210_with_ghost_cell_with_downstream.xml @@ -0,0 +1,5719 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From c37046091531f7849abde62b2759d9e0be46bb60 Mon Sep 17 00:00:00 2001 From: AboudyKreidieh Date: Fri, 15 May 2020 22:26:14 -0700 Subject: [PATCH 44/85] added I210Router --- flow/controllers/__init__.py | 5 +++-- flow/controllers/routing_controllers.py | 26 +++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/flow/controllers/__init__.py b/flow/controllers/__init__.py index 4dfcf05b7..a61d16980 100755 --- a/flow/controllers/__init__.py +++ b/flow/controllers/__init__.py @@ -28,7 +28,7 @@ # routing controllers from flow.controllers.base_routing_controller import BaseRouter from flow.controllers.routing_controllers import ContinuousRouter, \ - GridRouter, BayBridgeRouter + GridRouter, BayBridgeRouter, I210Router __all__ = [ "RLController", "BaseController", "BaseLaneChangeController", "BaseRouter", @@ -36,5 +36,6 @@ "IDMController", "SimCarFollowingController", "FollowerStopper", "PISaturation", "StaticLaneChanger", "SimLaneChangeController", "ContinuousRouter", "GridRouter", "BayBridgeRouter", "LACController", - "GippsController", "NonLocalFollowerStopper", "BandoFTLController" + "GippsController", "NonLocalFollowerStopper", "BandoFTLController", + "I210Router" ] diff --git a/flow/controllers/routing_controllers.py b/flow/controllers/routing_controllers.py index e6ccdde78..02aa34cb4 100755 --- a/flow/controllers/routing_controllers.py +++ b/flow/controllers/routing_controllers.py @@ -124,3 +124,29 @@ def choose_route(self, env): new_route = super().choose_route(env) return new_route + + +class I210Router(ContinuousRouter): + """Assists in choosing routes in select cases for the I-210 sub-network. + + Extension to the Continuous Router. + + Usage + ----- + See base class for usage example. + """ + + def choose_route(self, env): + """See parent class.""" + edge = env.k.vehicle.get_edge(self.veh_id) + lane = env.k.vehicle.get_lane(self.veh_id) + + # vehicles on these edges in lanes 4 and 5 are not going to be able to + # make it out in time + if edge == "119257908#1-AddedOffRampEdge" and lane in [5, 4, 3]: + new_route = env.available_routes[ + "119257908#1-AddedOffRampEdge"][0][0] + else: + new_route = super().choose_route(env) + + return new_route From 4525a7a69f47e0ee18a65a7533d8085e51f17b4f Mon Sep 17 00:00:00 2001 From: AboudyKreidieh Date: Fri, 15 May 2020 22:50:30 -0700 Subject: [PATCH 45/85] added updated i210 features --- .../exp_configs/non_rl/i210_subnetwork.py | 110 ++++++-- flow/networks/i210_subnetwork.py | 247 +++++++++++------- 2 files changed, 234 insertions(+), 123 deletions(-) diff --git a/examples/exp_configs/non_rl/i210_subnetwork.py b/examples/exp_configs/non_rl/i210_subnetwork.py index dd85c56cf..cb590cfcc 100644 --- a/examples/exp_configs/non_rl/i210_subnetwork.py +++ b/examples/exp_configs/non_rl/i210_subnetwork.py @@ -1,9 +1,9 @@ """I-210 subnetwork example.""" import os - import numpy as np -from flow.controllers.car_following_models import IDMController +from flow.controllers import IDMController +from flow.controllers import I210Router from flow.core.params import SumoParams from flow.core.params import EnvParams from flow.core.params import NetParams @@ -15,7 +15,49 @@ from flow.envs import TestEnv from flow.networks.i210_subnetwork import I210SubNetwork, EDGES_DISTRIBUTION -# create the base vehicle type that will be used for inflows +# =========================================================================== # +# Specify some configurable constants. # +# =========================================================================== # + +# whether to include the upstream ghost edge in the network +WANT_GHOST_CELL = True +# whether to include the downstream slow-down edge in the network +WANT_DOWNSTREAM_BOUNDARY = True +# whether to include vehicles on the on-ramp +ON_RAMP = True +# the inflow rate of vehicles (in veh/hr) +INFLOW_RATE = 5 * 2215 +# the speed of inflowing vehicles from the main edge (in m/s) +INFLOW_SPEED = 24.1 + +# =========================================================================== # +# Specify the path to the network template. # +# =========================================================================== # + +if WANT_DOWNSTREAM_BOUNDARY: + net_template = os.path.join( + config.PROJECT_PATH, + "examples/exp_configs/templates/sumo/i210_with_ghost_cell_with_" + "downstream.xml") +elif WANT_GHOST_CELL: + net_template = os.path.join( + config.PROJECT_PATH, + "examples/exp_configs/templates/sumo/i210_with_ghost_cell.xml") +else: + net_template = os.path.join( + config.PROJECT_PATH, + "examples/exp_configs/templates/sumo/test2.net.xml") + +# If the ghost cell is not being used, remove it from the initial edges that +# vehicles can be placed on. +edges_distribution = EDGES_DISTRIBUTION.copy() +if not WANT_GHOST_CELL: + edges_distribution.remove("ghost0") + +# =========================================================================== # +# Specify vehicle-specific information and inflows. # +# =========================================================================== # + vehicles = VehicleParams() vehicles.add( "human", @@ -24,35 +66,39 @@ lane_change_mode="strategic", ), acceleration_controller=(IDMController, { - "a": 0.3, "b": 2.0, "noise": 0.5 + "a": 1.3, + "b": 2.0, + "noise": 0.3, }), + routing_controller=(I210Router, {}) if ON_RAMP else None, ) inflow = InFlows() # main highway inflow.add( veh_type="human", - edge="119257914", - vehs_per_hour=8378, - departLane="random", - departSpeed=23) + edge="ghost0" if WANT_GHOST_CELL else "119257914", + vehs_per_hour=INFLOW_RATE, + departLane="best", + departSpeed=INFLOW_SPEED) # on ramp -# inflow.add( -# veh_type="human", -# edge="27414345", -# vehs_per_hour=321, -# departLane="random", -# departSpeed=20) -# inflow.add( -# veh_type="human", -# edge="27414342#0", -# vehs_per_hour=421, -# departLane="random", -# departSpeed=20) - -NET_TEMPLATE = os.path.join( - config.PROJECT_PATH, - "examples/exp_configs/templates/sumo/test2.net.xml") +if ON_RAMP: + inflow.add( + veh_type="human", + edge="27414345", + vehs_per_hour=500, + departLane="random", + departSpeed=10) + inflow.add( + veh_type="human", + edge="27414342#0", + vehs_per_hour=500, + departLane="random", + departSpeed=10) + +# =========================================================================== # +# Generate the flow_params dict with all relevant simulation information. # +# =========================================================================== # flow_params = dict( # name of the experiment @@ -69,7 +115,7 @@ # simulation-related parameters sim=SumoParams( - sim_step=0.5, + sim_step=0.4, render=False, color_by_speed=True, use_ballistic=True @@ -77,14 +123,18 @@ # environment related parameters (see flow.core.params.EnvParams) env=EnvParams( - horizon=4500, + horizon=10000, ), # network-related parameters (see flow.core.params.NetParams and the # network's documentation or ADDITIONAL_NET_PARAMS component) net=NetParams( inflows=inflow, - template=NET_TEMPLATE + template=net_template, + additional_params={ + "on_ramp": ON_RAMP, + "ghost_edge": WANT_GHOST_CELL, + } ), # vehicles to be placed in the network at the start of a rollout (see @@ -94,10 +144,14 @@ # parameters specifying the positioning of vehicles upon initialization/ # reset (see flow.core.params.InitialConfig) initial=InitialConfig( - edges_distribution=EDGES_DISTRIBUTION, + edges_distribution=edges_distribution, ), ) +# =========================================================================== # +# Specify custom callable that is logged during simulation runtime. # +# =========================================================================== # + edge_id = "119257908#1-AddedOnRampEdge" custom_callables = { "avg_merge_speed": lambda env: np.nan_to_num(np.mean( diff --git a/flow/networks/i210_subnetwork.py b/flow/networks/i210_subnetwork.py index d8e05efb5..b86a0dc8a 100644 --- a/flow/networks/i210_subnetwork.py +++ b/flow/networks/i210_subnetwork.py @@ -1,9 +1,18 @@ """Contains the I-210 sub-network class.""" - from flow.networks.base import Network +from flow.core.params import InitialConfig +from flow.core.params import TrafficLightParams + +ADDITIONAL_NET_PARAMS = { + # whether to include vehicle on the on-ramp + "on_ramp": False, + # whether to include the downstream slow-down edge in the network + "ghost_edge": False, +} EDGES_DISTRIBUTION = [ # Main highway + "ghost0", "119257914", "119257908#0", "119257908#1-AddedOnRampEdge", @@ -25,6 +34,12 @@ class I210SubNetwork(Network): """A network used to simulate the I-210 sub-network. + Requires from net_params: + + * **on_ramp** : whether to include vehicle on the on-ramp + * **ghost_edge** : whether to include the downstream slow-down edge in the + network + Usage ----- >>> from flow.core.params import NetParams @@ -39,103 +54,145 @@ class I210SubNetwork(Network): >>> ) """ - def specify_routes(self, net_params): - """See parent class. + def __init__(self, + name, + vehicles, + net_params, + initial_config=InitialConfig(), + traffic_lights=TrafficLightParams()): + """Initialize the I210 sub-network scenario.""" + for p in ADDITIONAL_NET_PARAMS.keys(): + if p not in net_params.additional_params: + raise KeyError('Network parameter "{}" not supplied'.format(p)) + + super(I210SubNetwork, self).__init__( + name=name, + vehicles=vehicles, + net_params=net_params, + initial_config=initial_config, + traffic_lights=traffic_lights, + ) - Routes for vehicles moving through the bay bridge from Oakland to San - Francisco. - """ + def specify_routes(self, net_params): + """See parent class.""" rts = { - # Main highway "119257914": [ - (["119257914", "119257908#0", "119257908#1-AddedOnRampEdge", - "119257908#1", "119257908#1-AddedOffRampEdge", "119257908#2", - "119257908#3"], - 1), # HOV: 1509 (on ramp: 57), Non HOV: 6869 (onramp: 16) - # (["119257914", "119257908#0", "119257908#1-AddedOnRampEdge", - # "119257908#1", "119257908#1-AddedOffRampEdge", "173381935"], - # 17 / 8378) - ], - # "119257908#0": [ - # (["119257908#0", "119257908#1-AddedOnRampEdge", "119257908#1", - # "119257908#1-AddedOffRampEdge", "119257908#2", - # "119257908#3"], - # 1.0), - # # (["119257908#0", "119257908#1-AddedOnRampEdge", "119257908#1", - # # "119257908#1-AddedOffRampEdge", "173381935"], - # # 0.5), - # ], - # "119257908#1-AddedOnRampEdge": [ - # (["119257908#1-AddedOnRampEdge", "119257908#1", - # "119257908#1-AddedOffRampEdge", "119257908#2", - # "119257908#3"], - # 1.0), - # # (["119257908#1-AddedOnRampEdge", "119257908#1", - # # "119257908#1-AddedOffRampEdge", "173381935"], - # # 0.5), - # ], - # "119257908#1": [ - # (["119257908#1", "119257908#1-AddedOffRampEdge", "119257908#2", - # "119257908#3"], - # 1.0), - # # (["119257908#1", "119257908#1-AddedOffRampEdge", "173381935"], - # # 0.5), - # ], - # "119257908#1-AddedOffRampEdge": [ - # (["119257908#1-AddedOffRampEdge", "119257908#2", - # "119257908#3"], - # 1.0), - # # (["119257908#1-AddedOffRampEdge", "173381935"], - # # 0.5), - # ], - # "119257908#2": [ - # (["119257908#2", "119257908#3"], 1), - # ], - # "119257908#3": [ - # (["119257908#3"], 1), - # ], - # - # # On-ramp - # "27414345": [ - # (["27414345", "27414342#1-AddedOnRampEdge", - # "27414342#1", - # "119257908#1-AddedOnRampEdge", "119257908#1", - # "119257908#1-AddedOffRampEdge", "119257908#2", - # "119257908#3"], - # 1 - 9 / 321), - # (["27414345", "27414342#1-AddedOnRampEdge", - # "27414342#1", - # "119257908#1-AddedOnRampEdge", "119257908#1", - # "119257908#1-AddedOffRampEdge", "173381935"], - # 9 / 321), - # ], - # "27414342#0": [ - # (["27414342#0", "27414342#1-AddedOnRampEdge", - # "27414342#1", - # "119257908#1-AddedOnRampEdge", "119257908#1", - # "119257908#1-AddedOffRampEdge", "119257908#2", - # "119257908#3"], - # 1 - 20 / 421), - # (["27414342#0", "27414342#1-AddedOnRampEdge", - # "27414342#1", - # "119257908#1-AddedOnRampEdge", "119257908#1", - # "119257908#1-AddedOffRampEdge", "173381935"], - # 20 / 421), - # ], - # "27414342#1-AddedOnRampEdge": [ - # (["27414342#1-AddedOnRampEdge", "27414342#1", "119257908#1-AddedOnRampEdge", - # "119257908#1", "119257908#1-AddedOffRampEdge", "119257908#2", - # "119257908#3"], - # 0.5), - # (["27414342#1-AddedOnRampEdge", "27414342#1", "119257908#1-AddedOnRampEdge", - # "119257908#1", "119257908#1-AddedOffRampEdge", "173381935"], - # 0.5), - # ], - # - # # Off-ramp - # "173381935": [ - # (["173381935"], 1), - # ], + (["119257914", + "119257908#0", + "119257908#1-AddedOnRampEdge", + "119257908#1", + "119257908#1-AddedOffRampEdge", + "119257908#2", + "119257908#3"], 1.0), + ] } + if net_params.additional_params["ghost_edge"]: + rts.update({ + "ghost0": [ + (["ghost0", + "119257914", + "119257908#0", + "119257908#1-AddedOnRampEdge", + "119257908#1", + "119257908#1-AddedOffRampEdge", + "119257908#2", + "119257908#3"], 1), + ], + }) + + if net_params.additional_params["on_ramp"]: + rts.update({ + # Main highway + "119257908#0": [ + (["119257908#0", + "119257908#1-AddedOnRampEdge", + "119257908#1", + "119257908#1-AddedOffRampEdge", + "119257908#2", + "119257908#3"], 1.0), + ], + "119257908#1-AddedOnRampEdge": [ + (["119257908#1-AddedOnRampEdge", + "119257908#1", + "119257908#1-AddedOffRampEdge", + "119257908#2", + "119257908#3"], 1.0), + ], + "119257908#1": [ + (["119257908#1", + "119257908#1-AddedOffRampEdge", + "119257908#2", + "119257908#3"], 1.0), + ], + "119257908#1-AddedOffRampEdge": [ + (["119257908#1-AddedOffRampEdge", + "119257908#2", + "119257908#3"], 1.0), + ], + "119257908#2": [ + (["119257908#2", + "119257908#3"], 1), + ], + "119257908#3": [ + (["119257908#3"], 1), + ], + + # On-ramp + "27414345": [ + (["27414345", + "27414342#1-AddedOnRampEdge", + "27414342#1", + "119257908#1-AddedOnRampEdge", + "119257908#1", + "119257908#1-AddedOffRampEdge", + "119257908#2", + "119257908#3"], 1 - 9 / 321), + (["27414345", + "27414342#1-AddedOnRampEdge", + "27414342#1", + "119257908#1-AddedOnRampEdge", + "119257908#1", + "119257908#1-AddedOffRampEdge", + "173381935"], 9 / 321), + ], + "27414342#0": [ + (["27414342#0", + "27414342#1-AddedOnRampEdge", + "27414342#1", + "119257908#1-AddedOnRampEdge", + "119257908#1", + "119257908#1-AddedOffRampEdge", + "119257908#2", + "119257908#3"], 1 - 20 / 421), + (["27414342#0", + "27414342#1-AddedOnRampEdge", + "27414342#1", + "119257908#1-AddedOnRampEdge", + "119257908#1", + "119257908#1-AddedOffRampEdge", + "173381935"], 20 / 421), + ], + "27414342#1-AddedOnRampEdge": [ + (["27414342#1-AddedOnRampEdge", + "27414342#1", + "119257908#1-AddedOnRampEdge", + "119257908#1", + "119257908#1-AddedOffRampEdge", + "119257908#2", + "119257908#3"], 0.5), + (["27414342#1-AddedOnRampEdge", + "27414342#1", + "119257908#1-AddedOnRampEdge", + "119257908#1", + "119257908#1-AddedOffRampEdge", + "173381935"], 0.5), + ], + + # Off-ramp + "173381935": [ + (["173381935"], 1), + ], + }) + return rts From ed8506a22caf796f642f1a9b99849231a9509f86 Mon Sep 17 00:00:00 2001 From: AboudyKreidieh Date: Fri, 15 May 2020 22:50:42 -0700 Subject: [PATCH 46/85] minor cleanup --- flow/networks/ring.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flow/networks/ring.py b/flow/networks/ring.py index de4d17503..ceef22a78 100755 --- a/flow/networks/ring.py +++ b/flow/networks/ring.py @@ -37,7 +37,7 @@ class RingNetwork(Network): >>> from flow.core.params import NetParams >>> from flow.core.params import VehicleParams >>> from flow.core.params import InitialConfig - >>> from flow.scenarios import RingNetwork + >>> from flow.networks import RingNetwork >>> >>> network = RingNetwork( >>> name='ring_road', From 6c90517c7d3cfc15ecd2256b8dbd463338d5eb71 Mon Sep 17 00:00:00 2001 From: AboudyKreidieh Date: Mon, 18 May 2020 00:50:00 -0700 Subject: [PATCH 47/85] better parameters based on when congestion propagates --- examples/exp_configs/non_rl/highway_single.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/exp_configs/non_rl/highway_single.py b/examples/exp_configs/non_rl/highway_single.py index 505a8e2eb..f5ec6aa20 100644 --- a/examples/exp_configs/non_rl/highway_single.py +++ b/examples/exp_configs/non_rl/highway_single.py @@ -19,7 +19,7 @@ # the inflow rate of vehicles TRAFFIC_FLOW = 2215 # the simulation time horizon (in steps) -HORIZON = 4500 +HORIZON = 1500 # whether to include noise in the car-following models INCLUDE_NOISE = True @@ -82,8 +82,8 @@ # environment related parameters (see flow.core.params.EnvParams) env=EnvParams( horizon=HORIZON, - warmup_steps=0, - sims_per_step=1, + warmup_steps=500, + sims_per_step=3, ), # sumo-related parameters (see flow.core.params.SumoParams) From be0eb253911f6b4b578d40700629b86f341fed31 Mon Sep 17 00:00:00 2001 From: AboudyKreidieh Date: Mon, 18 May 2020 01:11:44 -0700 Subject: [PATCH 48/85] bug fixes --- examples/exp_configs/rl/multiagent/multiagent_i210.py | 6 +++++- tests/fast_tests/test_scenarios.py | 3 ++- tests/fast_tests/test_vehicles.py | 1 + tests/setup_scripts.py | 1 + 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index 94f709ff4..b13ed1cb5 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -35,6 +35,10 @@ # percentage of autonomous vehicles compared to human vehicles on highway PENETRATION_RATE = 10 +# TODO: temporary fix +edges_distribution = EDGES_DISTRIBUTION.copy() +edges_distribution.remove("ghost0") + # SET UP PARAMETERS FOR THE ENVIRONMENT additional_env_params = ADDITIONAL_ENV_PARAMS.copy() additional_env_params.update({ @@ -155,7 +159,7 @@ # parameters specifying the positioning of vehicles upon initialization/ # reset (see flow.core.params.InitialConfig) initial=InitialConfig( - edges_distribution=EDGES_DISTRIBUTION, + edges_distribution=edges_distribution, ), ) diff --git a/tests/fast_tests/test_scenarios.py b/tests/fast_tests/test_scenarios.py index d72a50b17..6d051a2d0 100644 --- a/tests/fast_tests/test_scenarios.py +++ b/tests/fast_tests/test_scenarios.py @@ -97,7 +97,8 @@ def test_additional_net_params(self): "speed_limit": 30, "num_edges": 1, "use_ghost_edge": False, - "ghost_speed_limit": 25 + "ghost_speed_limit": 25, + "test_ghost_edge": 300, } ) ) diff --git a/tests/fast_tests/test_vehicles.py b/tests/fast_tests/test_vehicles.py index b791bba64..a37b235ff 100644 --- a/tests/fast_tests/test_vehicles.py +++ b/tests/fast_tests/test_vehicles.py @@ -261,6 +261,7 @@ def test_no_junctions_highway(self): "num_edges": 1, "use_ghost_edge": False, "ghost_speed_limit": 25, + "boundary_cell_length": 300, } net_params = NetParams(additional_params=additional_net_params) vehicles = VehicleParams() diff --git a/tests/setup_scripts.py b/tests/setup_scripts.py index ac88d2e42..343bad906 100644 --- a/tests/setup_scripts.py +++ b/tests/setup_scripts.py @@ -346,6 +346,7 @@ def highway_exp_setup(sim_params=None, "num_edges": 1, "use_ghost_edge": False, "ghost_speed_limit": 25, + "boundary_cell_length": 300, } net_params = NetParams(additional_params=additional_net_params) From bdcfb966b1cfd05921de40197fe27d0354d38b2a Mon Sep 17 00:00:00 2001 From: AboudyKreidieh Date: Mon, 18 May 2020 02:05:43 -0700 Subject: [PATCH 49/85] bug fixes --- tests/fast_tests/test_scenarios.py | 3 ++- tests/fast_tests/test_vehicles.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/fast_tests/test_scenarios.py b/tests/fast_tests/test_scenarios.py index d72a50b17..6a91b586e 100644 --- a/tests/fast_tests/test_scenarios.py +++ b/tests/fast_tests/test_scenarios.py @@ -116,7 +116,8 @@ def test_ghost_edge(self): "speed_limit": 30, "num_edges": 1, "use_ghost_edge": False, - "ghost_speed_limit": 25 + "ghost_speed_limit": 25, + "boundary_cell_length": 300, }) ) env.reset() diff --git a/tests/fast_tests/test_vehicles.py b/tests/fast_tests/test_vehicles.py index b791bba64..a37b235ff 100644 --- a/tests/fast_tests/test_vehicles.py +++ b/tests/fast_tests/test_vehicles.py @@ -261,6 +261,7 @@ def test_no_junctions_highway(self): "num_edges": 1, "use_ghost_edge": False, "ghost_speed_limit": 25, + "boundary_cell_length": 300, } net_params = NetParams(additional_params=additional_net_params) vehicles = VehicleParams() From 2892a805ba866f535132287f62ba915e19f141b8 Mon Sep 17 00:00:00 2001 From: AboudyKreidieh Date: Mon, 18 May 2020 02:07:38 -0700 Subject: [PATCH 50/85] more bug fixes --- examples/exp_configs/rl/multiagent/multiagent_i210.py | 6 +++++- tests/fast_tests/test_scenarios.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index b13ed1cb5..a6d194708 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -149,7 +149,11 @@ # network's documentation or ADDITIONAL_NET_PARAMS component) net=NetParams( inflows=inflow, - template=NET_TEMPLATE + template=NET_TEMPLATE, + additional_params={ + "on_ramp": False, + "ghost_edge": False + } ), # vehicles to be placed in the network at the start of a rollout (see diff --git a/tests/fast_tests/test_scenarios.py b/tests/fast_tests/test_scenarios.py index a0a52851a..2263f3474 100644 --- a/tests/fast_tests/test_scenarios.py +++ b/tests/fast_tests/test_scenarios.py @@ -98,7 +98,7 @@ def test_additional_net_params(self): "num_edges": 1, "use_ghost_edge": False, "ghost_speed_limit": 25, - "test_ghost_edge": 300, + "boundary_cell_length": 300, } ) ) From c7567b6ba8d17e755cce4846fed15d26213dadfa Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Mon, 18 May 2020 13:09:13 -0400 Subject: [PATCH 51/85] Fix gain on MPG --- examples/exp_configs/non_rl/highway_single.py | 26 +++++++++-------- .../rl/multiagent/multiagent_i210.py | 7 +++-- .../rl/multiagent/multiagent_straight_road.py | 28 +++++++++++++------ examples/train.py | 2 +- flow/algorithms/centralized_PPO.py | 3 +- flow/algorithms/custom_ppo.py | 2 ++ flow/envs/multiagent/i210.py | 26 +++++++++++------ flow/visualize/visualizer_rllib.py | 5 +++- 8 files changed, 64 insertions(+), 35 deletions(-) diff --git a/examples/exp_configs/non_rl/highway_single.py b/examples/exp_configs/non_rl/highway_single.py index 7948dfdef..7c1211ef5 100644 --- a/examples/exp_configs/non_rl/highway_single.py +++ b/examples/exp_configs/non_rl/highway_single.py @@ -15,7 +15,7 @@ from flow.core.params import VehicleParams from flow.core.params import SumoParams from flow.core.params import SumoLaneChangeParams -from flow.core.rewards import miles_per_gallon, +from flow.core.rewards import miles_per_gallon, miles_per_megajoule from flow.networks import HighwayNetwork from flow.envs import TestEnv from flow.networks.highway import ADDITIONAL_NET_PARAMS @@ -51,15 +51,15 @@ lane_change_params=SumoLaneChangeParams( lane_change_mode="strategic", ), - # acceleration_controller=(BandoFTLController, { - # 'alpha': .5, - # 'beta': 20.0, - # 'h_st': 12.0, - # 'h_go': 50.0, - # 'v_max': 30.0, - # 'noise': 1.0 if INCLUDE_NOISE else 0.0, - # }), - acceleration_controller=(IDMController, {}), + acceleration_controller=(BandoFTLController, { + 'alpha': .5, + 'beta': 20.0, + 'h_st': 12.0, + 'h_go': 50.0, + 'v_max': 30.0, + 'noise': 1.0 if INCLUDE_NOISE else 0.0, + }), + # acceleration_controller=(IDMController, {}), ) @@ -68,7 +68,7 @@ "av", color='red', num_vehicles=0, - acceleration_controller=(FollowerStopper, {"v_des": 12.0}), + acceleration_controller=(FollowerStopper, {"v_des": 11.0}), ) inflows = InFlows() @@ -139,5 +139,7 @@ env.k.vehicle.get_speed(env.k.vehicle.get_ids()))), "avg_outflow": lambda env: np.nan_to_num( env.k.vehicle.get_outflow_rate(120)), - "mpg": lambda env: miles_per_gallon(env, env.k.vehicle.get_ids(), gain=1.0) + "mpg": lambda env: miles_per_gallon(env, env.k.vehicle.get_ids(), gain=1.0), + "mpj": lambda env: miles_per_megajoule(env, env.k.vehicle.get_ids(), gain=1.0) + } diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index 19a093504..1e8886371 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -54,12 +54,15 @@ 'target_velocity': 12.0, # how many AVs there can be at once (this is only for centralized critics) "max_num_agents": 10, + # whether to add a slight reward for opening up a gap that will be annealed out N iterations in - "headway_curriculum": False, + "headway_curriculum": True, # how many timesteps to anneal the headway curriculum over "headway_curriculum_iters": 100, # weight of the headway reward - "headway_reward_gain": 0.1, + "headway_reward_gain": 2.0, + # desired time headway + "min_time_headway": 2.0, # whether to add a slight reward for traveling at a desired speed "speed_curriculum": True, diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py index aef3ee521..586f708bd 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py +++ b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py @@ -3,7 +3,7 @@ Trains a non-constant number of agents, all sharing the same policy, on the highway with ramps network. """ -from flow.controllers import IDMController, RLController +from flow.controllers import IDMController, RLController, BandoFTLController from flow.core.params import EnvParams from flow.core.params import NetParams from flow.core.params import InitialConfig @@ -56,28 +56,30 @@ 'max_accel': 2.6, 'max_decel': 4.5, 'target_velocity': 11.0, - 'local_reward': False, + 'local_reward': True, 'lead_obs': True, # whether to reroute vehicles once they have exited "reroute_on_exit": True, # whether to use the MPG reward. Otherwise, defaults to a target velocity reward "mpg_reward": True, # how many vehicles to look back for the MPG reward - "look_back_length": 10, + "look_back_length": 3, # how many AVs there can be at once (this is only for centralized critics) "max_num_agents": 10, # whether to add a slight reward for opening up a gap that will be annealed out N iterations in - "headway_curriculum": False, + "headway_curriculum": True, # how many timesteps to anneal the headway curriculum over "headway_curriculum_iters": 100, # weight of the headway reward - "headway_reward_gain": 1.0, + "headway_reward_gain": 2.0, + # desired time headway + "min_time_headway": 2.0, # whether to add a slight reward for traveling at a desired speed - "speed_curriculum": True, + "speed_curriculum": False, # how many timesteps to anneal the headway curriculum over - "speed_curriculum_iters": 50, + "speed_curriculum_iters": 100, # weight of the headway reward "speed_reward_gain": 2.0 }) @@ -93,7 +95,15 @@ lane_change_params=SumoLaneChangeParams( lane_change_mode="strategic", ), - acceleration_controller=(IDMController, {}), + acceleration_controller=(BandoFTLController, { + 'alpha': .5, + 'beta': 20.0, + 'h_st': 12.0, + 'h_go': 50.0, + 'v_max': 30.0, + 'noise': 1.0 if INCLUDE_NOISE else 0.0, + }), + # acceleration_controller=(IDMController, {}), ) # autonomous vehicles @@ -153,7 +163,7 @@ sim_step=0.5, render=False, use_ballistic=True, - restart_instance=False + restart_instance=True ), # network-related parameters (see flow.core.params.NetParams and the diff --git a/examples/train.py b/examples/train.py index 8f241000a..57ea84f6e 100644 --- a/examples/train.py +++ b/examples/train.py @@ -275,7 +275,7 @@ def on_episode_step(info): if not np.isnan(av_speed): episode.user_data["avg_speed_avs"].append(av_speed) episode.user_data["avg_energy"].append(energy_consumption(env)) - episode.user_data["avg_mpg"].append(miles_per_gallon(env, env.k.vehicle.get_ids())) + episode.user_data["avg_mpg"].append(miles_per_gallon(env, env.k.vehicle.get_ids(), gain=1.0)) def on_episode_end(info): diff --git a/flow/algorithms/centralized_PPO.py b/flow/algorithms/centralized_PPO.py index 3affd47c4..8f3b9f261 100644 --- a/flow/algorithms/centralized_PPO.py +++ b/flow/algorithms/centralized_PPO.py @@ -221,7 +221,8 @@ def centralized_critic_postprocessing(policy, try: central_obs_batch = np.hstack((sample_batch["obs"], np.hstack(central_obs_list))) except: - import ipdb; ipdb.set_trace() + # TODO(@ev) this is a bug and needs to be fixed + central_obs_batch = sample_batch["obs"] max_vf_agents = policy.model.max_num_agents num_agents = len(rel_agents) + 1 if num_agents < max_vf_agents: diff --git a/flow/algorithms/custom_ppo.py b/flow/algorithms/custom_ppo.py index 050d7d1fd..a98af6c2d 100644 --- a/flow/algorithms/custom_ppo.py +++ b/flow/algorithms/custom_ppo.py @@ -266,6 +266,8 @@ def __init__(self, config): shape=(), trainable=False, dtype=tf.float32) + def update_kl(self, blah): + pass CustomPPOTFPolicy = build_tf_policy( diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index 9acb6cd93..6d4d66414 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -82,6 +82,7 @@ def __init__(self, env_params, sim_params, network, simulator='traci'): # how many timesteps to anneal the headway curriculum over self.headway_curriculum_iters = env_params.additional_params["headway_curriculum_iters"] self.headway_reward_gain = env_params.additional_params["headway_reward_gain"] + self.min_time_headway = env_params.additional_params["min_time_headway"] # whether to add a slight reward for opening up a gap that will be annealed out N iterations in self.speed_curriculum = env_params.additional_params["speed_curriculum"] @@ -184,12 +185,12 @@ def compute_reward(self, rl_actions, **kwargs): for rl_id in self.k.vehicle.get_rl_ids(): rewards[rl_id] = 0 if self.mpg_reward: - rewards[rl_id] = miles_per_gallon(self, rl_id) / 100.0 + rewards[rl_id] = miles_per_gallon(self, rl_id, gain=1.0) / 100.0 follow_id = rl_id for i in range(self.look_back_length): follow_id = self.k.vehicle.get_follower(follow_id) if follow_id not in ["", None]: - rewards[rl_id] += (miles_per_gallon(self, follow_id) - 14.0) / 100.0 + rewards[rl_id] += (miles_per_gallon(self, follow_id, gain=1.0) - 14.0) / 100.0 else: break else: @@ -205,7 +206,7 @@ def compute_reward(self, rl_actions, **kwargs): for speed in speeds]) / (des_speed ** 2) else: if self.mpg_reward: - reward = np.nan_to_num(miles_per_gallon(self, self.k.vehicle.get_ids())) / 100.0 + reward = np.nan_to_num(miles_per_gallon(self, self.k.vehicle.get_ids(), gain=1.0)) / 100.0 else: speeds = self.k.vehicle.get_speed(self.k.vehicle.get_ids()) des_speed = self.env_params.additional_params["target_velocity"] @@ -217,8 +218,10 @@ def compute_reward(self, rl_actions, **kwargs): reward = np.nan_to_num(np.mean([(des_speed - np.abs(speed - des_speed)) ** 2 for speed in speeds]) / (des_speed ** 2)) rewards = {rl_id: reward for rl_id in self.k.vehicle.get_rl_ids()} + + # curriculum over time-gaps if self.headway_curriculum and self.num_training_iters <= self.headway_curriculum_iters: - t_min = 1 # smallest acceptable time headway + t_min = self.min_time_headway # smallest acceptable time headway for veh_id, rew in rewards.items(): lead_id = self.k.vehicle.get_leader(veh_id) penalty = 0 @@ -240,11 +243,16 @@ def compute_reward(self, rl_actions, **kwargs): for veh_id, rew in rewards.items(): speed = self.k.vehicle.get_speed(veh_id) speed_reward = 0.0 - if speed >= 0: - if self.reroute_on_exit: - speed_reward = ((des_speed - np.abs(speed - des_speed))) / (des_speed) + follow_id = veh_id + for i in range(self.look_back_length): + follow_id = self.k.vehicle.get_follower(follow_id) + if follow_id not in ["", None]: + if self.reroute_on_exit: + speed_reward += ((des_speed - np.abs(speed - des_speed))) / (des_speed) + else: + speed_reward += ((des_speed - np.abs(speed - des_speed)) ** 2) / (des_speed ** 2) else: - speed_reward = ((des_speed - np.abs(speed - des_speed)) ** 2) / (des_speed ** 2) + break scaling_factor = max(0, 1 - self.num_training_iters / self.speed_curriculum_iters) rewards[veh_id] += speed_reward * scaling_factor * self.speed_reward_gain @@ -429,7 +437,7 @@ def compute_reward(self, rl_actions, **kwargs): if rl_id in self.rl_id_to_idx_map.keys()}) print(reward_dict) else: - reward = np.nan_to_num(miles_per_gallon(self, self.k.vehicle.get_ids())) / 100.0 + reward = np.nan_to_num(miles_per_gallon(self, self.k.vehicle.get_ids(), gain=1.0)) / 100.0 reward_dict = {idx: reward for idx in range(self.max_num_agents)} else: diff --git a/flow/visualize/visualizer_rllib.py b/flow/visualize/visualizer_rllib.py index c7d30947b..db25cc810 100644 --- a/flow/visualize/visualizer_rllib.py +++ b/flow/visualize/visualizer_rllib.py @@ -96,6 +96,9 @@ def visualizer_rllib(args): from ray.rllib.models import ModelCatalog agent_cls = CCTrainer ModelCatalog.register_custom_model("cc_model", CentralizedCriticModel) + elif config['env_config']['run'] == "": + from flow.algorithms.custom_ppo import CustomPPOTrainer + agent_cls = CustomPPOTrainer elif config_run: agent_cls = get_agent_class(config_run) else: @@ -226,7 +229,7 @@ def visualizer_rllib(args): if speeds: vel.append(np.mean(speeds)) - mpg.append(miles_per_gallon(env.unwrapped, vehicles.get_ids())) + mpg.append(miles_per_gallon(env.unwrapped, vehicles.get_ids(), gain=1.0)) if multiagent: action = {} From 00f114c9135de059b667656eb94636424a1f6ce8 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Wed, 20 May 2020 18:42:44 -0400 Subject: [PATCH 52/85] Add ability to control which lengths the control is applied on --- examples/exp_configs/non_rl/highway_single.py | 6 +- .../rl/multiagent/multiagent_straight_road.py | 23 ++-- examples/train.py | 6 +- flow/controllers/velocity_controllers.py | 12 ++- flow/core/rewards.py | 2 +- flow/envs/multiagent/i210.py | 101 +++++++++++------- flow/visualize/visualizer_rllib.py | 4 - 7 files changed, 91 insertions(+), 63 deletions(-) diff --git a/examples/exp_configs/non_rl/highway_single.py b/examples/exp_configs/non_rl/highway_single.py index 0bb49440f..f10975cd0 100644 --- a/examples/exp_configs/non_rl/highway_single.py +++ b/examples/exp_configs/non_rl/highway_single.py @@ -28,7 +28,7 @@ # whether to include noise in the car-following models INCLUDE_NOISE = True -PENETRATION_RATE = 0.0 +PENETRATION_RATE = 10.0 additional_net_params = ADDITIONAL_NET_PARAMS.copy() additional_net_params.update({ @@ -71,7 +71,7 @@ "av", color='red', num_vehicles=0, - acceleration_controller=(FollowerStopper, {"v_des": 6.0}), + acceleration_controller=(FollowerStopper, {"v_des": 6.0, "control_length": [500, 2300]}), ) inflows = InFlows() @@ -147,7 +147,7 @@ "miles_per_megajoule": lambda env: np.nan_to_num( miles_per_megajoule(env, env.k.vehicle.get_ids(), gain=1.0) ), - "miles_per_gallone": lambda env: np.nan_to_num( + "miles_per_gallon": lambda env: np.nan_to_num( miles_per_gallon(env, env.k.vehicle.get_ids(), gain=1.0) ) } diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py index cec50d577..e2fadecd4 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py +++ b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py @@ -23,7 +23,7 @@ # the inflow rate of vehicles HIGHWAY_INFLOW_RATE = 2215 # the simulation time horizon (in steps) -HORIZON = 1500 +HORIZON = 1000 # whether to include noise in the car-following models INCLUDE_NOISE = True @@ -54,20 +54,23 @@ additional_env_params.update({ 'max_accel': 2.6, 'max_decel': 4.5, - 'target_velocity': 11.0, + 'target_velocity': 6.0, 'local_reward': True, 'lead_obs': True, + 'control_range': [500, 2300], # whether to reroute vehicles once they have exited "reroute_on_exit": True, # whether to use the MPG reward. Otherwise, defaults to a target velocity reward - "mpg_reward": True, + "mpg_reward": False, + # whether to use the joules reward. Otherwise, defaults to a target velocity reward + "mpj_reward": True, # how many vehicles to look back for the MPG reward "look_back_length": 3, # how many AVs there can be at once (this is only for centralized critics) "max_num_agents": 10, # whether to add a slight reward for opening up a gap that will be annealed out N iterations in - "headway_curriculum": True, + "headway_curriculum": False, # how many timesteps to anneal the headway curriculum over "headway_curriculum_iters": 100, # weight of the headway reward @@ -132,7 +135,7 @@ # SET UP FLOW PARAMETERS warmup_steps = 0 if additional_env_params['reroute_on_exit']: - warmup_steps = 400 + warmup_steps = 500 flow_params = dict( # name of the experiment @@ -151,16 +154,16 @@ env=EnvParams( horizon=HORIZON, warmup_steps=warmup_steps, - sims_per_step=1, # do not put more than one - additional_params=additional_env_params, + sims_per_step=3, + additional_params=additional_env_params ), # sumo-related parameters (see flow.core.params.SumoParams) sim=SumoParams( - sim_step=0.5, + sim_step=0.4, render=False, - use_ballistic=True, - restart_instance=True + restart_instance=True, + use_ballistic=True ), # network-related parameters (see flow.core.params.NetParams and the diff --git a/examples/train.py b/examples/train.py index 468a9d6a9..46c98c7fc 100644 --- a/examples/train.py +++ b/examples/train.py @@ -23,6 +23,7 @@ except ImportError: print("Stable-baselines not installed. Please install it if you need it.") +import ray from ray import tune from ray.tune.registry import register_env from ray.rllib.env.group_agents_wrapper import _GroupAgentsWrapper @@ -32,7 +33,7 @@ from ray.rllib.agents.registry import get_agent_class from flow.core.util import ensure_dir -from flow.core.rewards import energy_consumption, miles_per_gallon +from flow.core.rewards import energy_consumption, miles_per_gallon, miles_per_megajoule from flow.utils.registry import env_constructor from flow.utils.rllib import FlowParamsEncoder, get_flow_params from flow.utils.registry import make_create_env @@ -260,6 +261,7 @@ def on_episode_start(info): episode.user_data["avg_speed_avs"] = [] episode.user_data["avg_energy"] = [] episode.user_data["avg_mpg"] = [] + episode.user_data["avg_mpj"] = [] def on_episode_step(info): @@ -275,6 +277,7 @@ def on_episode_step(info): episode.user_data["avg_speed_avs"].append(av_speed) episode.user_data["avg_energy"].append(energy_consumption(env)) episode.user_data["avg_mpg"].append(miles_per_gallon(env, env.k.vehicle.get_ids(), gain=1.0)) + episode.user_data["avg_mpj"].append(miles_per_megajoule(env, env.k.vehicle.get_ids(), gain=1.0)) def on_episode_end(info): @@ -285,6 +288,7 @@ def on_episode_end(info): episode.custom_metrics["avg_speed_avs"] = avg_speed_avs episode.custom_metrics["avg_energy_per_veh"] = np.mean(episode.user_data["avg_energy"]) episode.custom_metrics["avg_mpg_per_veh"] = np.mean(episode.user_data["avg_mpg"]) + episode.custom_metrics["avg_mpj_per_veh"] = np.mean(episode.user_data["avg_mpj"]) def on_train_result(info): """Store the mean score of the episode, and increment or decrement how many adversaries are on""" diff --git a/flow/controllers/velocity_controllers.py b/flow/controllers/velocity_controllers.py index 05d2fb0c4..ab29a6135 100644 --- a/flow/controllers/velocity_controllers.py +++ b/flow/controllers/velocity_controllers.py @@ -26,7 +26,8 @@ def __init__(self, veh_id, car_following_params, v_des=15, - danger_edges=None): + danger_edges=None, + control_length=None): """Instantiate FollowerStopper.""" BaseController.__init__( self, veh_id, car_following_params, delay=0.0, @@ -47,6 +48,7 @@ def __init__(self, self.d_3 = 0.5 self.danger_edges = danger_edges if danger_edges else {} + self.control_length = control_length def find_intersection_dist(self, env): """Find distance to intersection. @@ -111,9 +113,11 @@ def get_accel(self, env): if edge == "": return None - if self.find_intersection_dist(env) <= 10 and \ - env.k.vehicle.get_edge(self.veh_id) in self.danger_edges or \ - env.k.vehicle.get_edge(self.veh_id)[0] == ":": + if (self.find_intersection_dist(env) <= 10 and \ + env.k.vehicle.get_edge(self.veh_id) in self.danger_edges) or \ + env.k.vehicle.get_edge(self.veh_id)[0] == ":"\ + or (self.control_length and (env.k.vehicle.get_x_by_id(self.veh_id) < self.control_length[0] + or env.k.vehicle.get_x_by_id(self.veh_id) > self.control_length[1])): return None else: # compute the acceleration from the desired velocity diff --git a/flow/core/rewards.py b/flow/core/rewards.py index 431f4e09f..e4a5599cd 100755 --- a/flow/core/rewards.py +++ b/flow/core/rewards.py @@ -388,7 +388,7 @@ def miles_per_megajoule(env, veh_ids=None, gain=.001): speed = env.k.vehicle.get_speed(veh_id) # convert to be positive since the function called is a penalty power = -vehicle_energy_consumption(env, veh_id, gain=1.0) - if power > 0 and speed >= 0.0: + if power > 0 and not speed < 0.1: counter += 1 # meters / joule is (v * \delta t) / (power * \delta t) mpj += speed / power diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index f9e40357b..9c7e94cf8 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -7,7 +7,7 @@ from gym.spaces import Box, Discrete, Dict import numpy as np -from flow.core.rewards import miles_per_gallon +from flow.core.rewards import miles_per_gallon, miles_per_megajoule from flow.envs.multiagent.base import MultiEnv # largest number of lanes on any given edge in the network @@ -74,7 +74,9 @@ def __init__(self, env_params, sim_params, network, simulator='traci'): self.num_enter_lanes = 5 self.entrance_edge = "119257914" self.exit_edge = "119257908#2" + self.control_range = env_params.additional_params['control_range'] self.mpg_reward = env_params.additional_params["mpg_reward"] + self.mpj_reward = env_params.additional_params["mpj_reward"] self.look_back_length = env_params.additional_params["look_back_length"] # whether to add a slight reward for opening up a gap that will be annealed out N iterations in @@ -154,17 +156,19 @@ def get_state(self): if self.lead_obs: veh_info = {} for rl_id in self.k.vehicle.get_rl_ids(): - speed = self.k.vehicle.get_speed(rl_id) - lead_id = self.k.vehicle.get_leader(rl_id) - if lead_id in ["", None]: - # in case leader is not visible - lead_speed = SPEED_SCALE - headway = HEADWAY_SCALE - else: - lead_speed = self.k.vehicle.get_speed(lead_id) - headway = self.k.vehicle.get_headway(rl_id) - veh_info.update({rl_id: np.array([speed / SPEED_SCALE, headway / HEADWAY_SCALE, - lead_speed / SPEED_SCALE])}) + if self.k.vehicle.get_x_by_id(rl_id) < self.control_range[1] \ + and self.k.vehicle.get_x_by_id(rl_id) > self.control_range[0]: + speed = self.k.vehicle.get_speed(rl_id) + lead_id = self.k.vehicle.get_leader(rl_id) + if lead_id in ["", None]: + # in case leader is not visible + lead_speed = SPEED_SCALE + headway = HEADWAY_SCALE + else: + lead_speed = self.k.vehicle.get_speed(lead_id) + headway = self.k.vehicle.get_headway(rl_id) + veh_info.update({rl_id: np.array([speed / SPEED_SCALE, headway / HEADWAY_SCALE, + lead_speed / SPEED_SCALE])}) else: veh_info = {rl_id: np.concatenate((self.state_util(rl_id), self.veh_statistics(rl_id))) @@ -180,30 +184,44 @@ def compute_reward(self, rl_actions, **kwargs): t = time() rewards = {} + print(self.time_counter) if self.env_params.additional_params["local_reward"]: des_speed = self.env_params.additional_params["target_velocity"] for rl_id in self.k.vehicle.get_rl_ids(): - rewards[rl_id] = 0 - if self.mpg_reward: - rewards[rl_id] = miles_per_gallon(self, rl_id, gain=1.0) / 100.0 - follow_id = rl_id - for i in range(self.look_back_length): - follow_id = self.k.vehicle.get_follower(follow_id) - if follow_id not in ["", None]: - rewards[rl_id] += (miles_per_gallon(self, follow_id, gain=1.0) - 14.0) / 100.0 - else: - break - else: - speeds = [] - follow_speed = self.k.vehicle.get_speed(self.k.vehicle.get_follower(rl_id)) - if follow_speed >= 0: - speeds.append(follow_speed) - if self.k.vehicle.get_speed(rl_id) >= 0: - speeds.append(self.k.vehicle.get_speed(rl_id)) - if len(speeds) > 0: - # rescale so the critic can estimate it quickly - rewards[rl_id] = np.mean([(des_speed - np.abs(speed - des_speed)) ** 2 - for speed in speeds]) / (des_speed ** 2) + if self.k.vehicle.get_x_by_id(rl_id) < self.control_range[1] \ + and self.k.vehicle.get_x_by_id(rl_id) > self.control_range[0]: + rewards[rl_id] = 0 + if self.mpg_reward: + rewards[rl_id] = miles_per_gallon(self, rl_id, gain=1.0) / 100.0 + follow_id = rl_id + for i in range(self.look_back_length): + follow_id = self.k.vehicle.get_follower(follow_id) + if follow_id not in ["", None]: + rewards[rl_id] += miles_per_gallon(self, follow_id, gain=1.0) / 100.0 + else: + break + elif self.mpj_reward: + rewards[rl_id] = miles_per_megajoule(self, rl_id, gain=1.0) / 100.0 + follow_id = rl_id + for i in range(self.look_back_length): + follow_id = self.k.vehicle.get_follower(follow_id) + if follow_id not in ["", None]: + # if self.time_counter > 700 and miles_per_megajoule(self, follow_id, gain=1.0) > 1.0: + # import ipdb; ipdb.set_trace() + rewards[rl_id] += miles_per_megajoule(self, follow_id, gain=1.0) / 100.0 + else: + break + else: + speeds = [] + follow_speed = self.k.vehicle.get_speed(self.k.vehicle.get_follower(rl_id)) + if follow_speed >= 0: + speeds.append(follow_speed) + if self.k.vehicle.get_speed(rl_id) >= 0: + speeds.append(self.k.vehicle.get_speed(rl_id)) + if len(speeds) > 0: + # rescale so the critic can estimate it quickly + rewards[rl_id] = np.mean([(des_speed - np.abs(speed - des_speed)) ** 2 + for speed in speeds]) / (des_speed ** 2) else: if self.mpg_reward: reward = np.nan_to_num(miles_per_gallon(self, self.k.vehicle.get_ids(), gain=1.0)) / 100.0 @@ -217,7 +235,9 @@ def compute_reward(self, rl_actions, **kwargs): else: reward = np.nan_to_num(np.mean([(des_speed - np.abs(speed - des_speed)) ** 2 for speed in speeds]) / (des_speed ** 2)) - rewards = {rl_id: reward for rl_id in self.k.vehicle.get_rl_ids()} + rewards = {rl_id: reward for rl_id in self.k.vehicle.get_rl_ids() + if self.k.vehicle.get_x_by_id(rl_id) < self.control_range[1] \ + and self.k.vehicle.get_x_by_id(rl_id) > self.control_range[0]} # curriculum over time-gaps if self.headway_curriculum and self.num_training_iters <= self.headway_curriculum_iters: @@ -381,7 +401,7 @@ def __init__(self, env_params, sim_params, network, simulator='traci'): self.idx_to_rl_id_map = OrderedDict() self.index_counter = 0 self.default_state = {idx: np.zeros(self.observation_space.shape[0]) - for idx in range(self.max_num_agents)} + for idx in range(self.max_num_agents)} def _apply_rl_actions(self, rl_actions): """See class definition.""" @@ -431,25 +451,26 @@ def compute_reward(self, rl_actions, **kwargs): if self.env_params.additional_params["local_reward"]: reward = super().compute_reward(rl_actions) reward_dict = {idx: 0 for idx in - range(self.max_num_agents)} + range(self.max_num_agents)} reward_dict.update({self.rl_id_to_idx_map[rl_id]: reward[rl_id] for rl_id in reward.keys() if rl_id in self.rl_id_to_idx_map.keys()}) print(reward_dict) else: reward = np.nan_to_num(miles_per_gallon(self, self.k.vehicle.get_ids(), gain=1.0)) / 100.0 reward_dict = {idx: reward for idx in - range(self.max_num_agents)} + range(self.max_num_agents)} else: if self.env_params.additional_params["local_reward"]: reward = super().compute_reward(rl_actions) reward_dict = {idx: 0 for idx in - range(self.max_num_agents)} + range(self.max_num_agents)} reward_dict.update({self.rl_id_to_idx_map[rl_id]: reward[rl_id] for rl_id in reward.keys() if rl_id in self.rl_id_to_idx_map.keys()}) else: - reward = np.nan_to_num(np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids()))) / (20 * self.env_params.horizon) + reward = np.nan_to_num(np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids()))) / ( + 20 * self.env_params.horizon) reward_dict = {idx: reward for idx in - range(self.max_num_agents)} + range(self.max_num_agents)} # print('reward time is ', time() - t) return reward_dict diff --git a/flow/visualize/visualizer_rllib.py b/flow/visualize/visualizer_rllib.py index db25cc810..67dd1026b 100644 --- a/flow/visualize/visualizer_rllib.py +++ b/flow/visualize/visualizer_rllib.py @@ -296,10 +296,6 @@ def visualizer_rllib(args): print(mean_speed) print('Average, std: {}, {}'.format(np.mean(mean_speed), np.std( mean_speed))) - print("\nSpeed, std (m/s):") - print(std_speed) - print('Average, std: {}, {}'.format(np.mean(std_speed), np.std( - std_speed))) print('Average, std miles per gallon: {}, {}'.format(np.mean(mpg), np.std(mpg))) From 231f04f34234ea3a1a1891c45409cb533c4974c3 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Thu, 21 May 2020 13:43:50 -0400 Subject: [PATCH 53/85] Move training iterations --- flow/envs/base.py | 2 ++ flow/envs/multiagent/base.py | 3 +++ flow/envs/multiagent/i210.py | 3 --- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/flow/envs/base.py b/flow/envs/base.py index baf8270b5..b718d9ab6 100644 --- a/flow/envs/base.py +++ b/flow/envs/base.py @@ -148,6 +148,8 @@ def __init__(self, self.state = None self.obs_var_labels = [] + self.num_training_iters = 0 + # track IDs that have ever been observed in the system self.observed_ids = set() self.observed_rl_ids = set() diff --git a/flow/envs/multiagent/base.py b/flow/envs/multiagent/base.py index 68427d109..6d1880673 100644 --- a/flow/envs/multiagent/base.py +++ b/flow/envs/multiagent/base.py @@ -322,3 +322,6 @@ def apply_rl_actions(self, rl_actions=None): # clip according to the action space requirements clipped_actions = self.clip_actions(rl_actions) self._apply_rl_actions(clipped_actions) + + def set_iteration_num(self): + self.num_training_iters += 1 diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index 9c7e94cf8..18b83acf7 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -279,9 +279,6 @@ def compute_reward(self, rl_actions, **kwargs): # print('time to get reward is ', time() - t) return rewards - def set_iteration_num(self): - self.num_training_iters += 1 - def additional_command(self): """See parent class. From c5a251e005e01d97249fce715d506e2c1843569b Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Thu, 21 May 2020 15:24:12 -0400 Subject: [PATCH 54/85] Turn off mpj reward --- .../exp_configs/rl/multiagent/multiagent_straight_road.py | 2 +- scripts/ray_autoscale.yaml | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py index e2fadecd4..6309b66f1 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py +++ b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py @@ -63,7 +63,7 @@ # whether to use the MPG reward. Otherwise, defaults to a target velocity reward "mpg_reward": False, # whether to use the joules reward. Otherwise, defaults to a target velocity reward - "mpj_reward": True, + "mpj_reward": False, # how many vehicles to look back for the MPG reward "look_back_length": 3, # how many AVs there can be at once (this is only for centralized critics) diff --git a/scripts/ray_autoscale.yaml b/scripts/ray_autoscale.yaml index a7e81561f..6bd9c78ad 100644 --- a/scripts/ray_autoscale.yaml +++ b/scripts/ray_autoscale.yaml @@ -70,8 +70,7 @@ worker_nodes: setup_commands: - cd flow && git fetch && git checkout origin/flow_maddpg - -head_setup_commands: + - flow/scripts/setup_sumo_ubuntu1604.sh - pip install ray==0.8.0 - pip install tabulate - pip install boto3==1.10.45 # 1.4.8 adds InstanceMarketOptions @@ -82,6 +81,9 @@ head_setup_commands: - pip install tensorflow==2.0.0 - pip install lz4 - pip install dm-tree + - pip install numpy==1.18.4 + +head_setup_commands: [] # Custom commands that will be run on worker nodes after common setup. worker_setup_commands: [] From 5fb28b8b8c1c7847a55a72cfe1ae54ab81808f36 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Thu, 21 May 2020 15:28:24 -0400 Subject: [PATCH 55/85] Minorg --- flow/envs/multiagent/i210.py | 1 - 1 file changed, 1 deletion(-) diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index 18b83acf7..b7b13d3a8 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -184,7 +184,6 @@ def compute_reward(self, rl_actions, **kwargs): t = time() rewards = {} - print(self.time_counter) if self.env_params.additional_params["local_reward"]: des_speed = self.env_params.additional_params["target_velocity"] for rl_id in self.k.vehicle.get_rl_ids(): From 1895c4f45c4c411cee9560c8f18cb8d6509d27d7 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Thu, 21 May 2020 16:33:18 -0400 Subject: [PATCH 56/85] MPG w/ curriculum test --- .../rl/multiagent/multiagent_straight_road.py | 8 +++---- scripts/run_exps.sh | 21 ++++++++++++------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py index 6309b66f1..1fcb318ff 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py +++ b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py @@ -63,7 +63,7 @@ # whether to use the MPG reward. Otherwise, defaults to a target velocity reward "mpg_reward": False, # whether to use the joules reward. Otherwise, defaults to a target velocity reward - "mpj_reward": False, + "mpj_reward": True, # how many vehicles to look back for the MPG reward "look_back_length": 3, # how many AVs there can be at once (this is only for centralized critics) @@ -79,11 +79,11 @@ "min_time_headway": 2.0, # whether to add a slight reward for traveling at a desired speed - "speed_curriculum": False, + "speed_curriculum": True, # how many timesteps to anneal the headway curriculum over - "speed_curriculum_iters": 100, + "speed_curriculum_iters": 20, # weight of the headway reward - "speed_reward_gain": 2.0 + "speed_reward_gain": 1.0 }) diff --git a/scripts/run_exps.sh b/scripts/run_exps.sh index 773127ece..0a8be26d4 100755 --- a/scripts/run_exps.sh +++ b/scripts/run_exps.sh @@ -19,12 +19,17 @@ #--num_iterations 200 --num_cpus 8 --num_rollouts 8 --rl_trainer rllib --use_s3" --start --stop \ #--cluster-name=ev_i210_test2 --tmux -ray exec ray_autoscale.yaml "python flow/examples/train.py multiagent_straight_road \ -straight_road_reroute_local_rew_mpg_curr --algorithm PPO \ ---num_iterations 200 --num_cpus 7 --num_rollouts 7 --rl_trainer rllib --use_s3 --grid_search" --start --stop \ ---cluster-name=ev_i210_test3 --tmux +#ray exec ray_autoscale.yaml "python flow/examples/train.py multiagent_straight_road \ +#straight_road_reroute_local_rew_mpg_curr --algorithm PPO \ +#--num_iterations 200 --num_cpus 7 --num_rollouts 7 --rl_trainer rllib --use_s3 --grid_search" --start --stop \ +#--cluster-name=ev_i210_test3 --tmux +# +#ray exec ray_autoscale.yaml "python flow/examples/train.py multiagent_i210 \ +#i210_reroute_local_rew_mpg_curr --algorithm PPO \ +#--num_iterations 200 --num_cpus 7 --num_rollouts 7 --rl_trainer rllib --use_s3 --grid_search" --start --stop \ +#--cluster-name=ev_i210_test4 --tmux -ray exec ray_autoscale.yaml "python flow/examples/train.py multiagent_i210 \ -i210_reroute_local_rew_mpg_curr --algorithm PPO \ ---num_iterations 200 --num_cpus 7 --num_rollouts 7 --rl_trainer rllib --use_s3 --grid_search" --start --stop \ ---cluster-name=ev_i210_test4 --tmux \ No newline at end of file +ray exec ray_autoscale.yaml "python flow/examples/train.py multiagent_straight_road \ +straight_road_reroute_local_rew_mpj_curr --algorithm PPO \ +--num_iterations 100 --num_cpus 30 --num_rollouts 30 --rl_trainer rllib --use_s3" --start --stop \ +--cluster-name=ev_i210_test3 --tmux \ No newline at end of file From 7772dfe2d7792828c4146699c7f26407ab33466a Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Thu, 21 May 2020 19:52:48 -0400 Subject: [PATCH 57/85] Add stop penalty --- .../rl/multiagent/multiagent_straight_road.py | 7 +++++-- flow/envs/multiagent/i210.py | 10 ++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py index 1fcb318ff..9e990beba 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py +++ b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py @@ -63,7 +63,7 @@ # whether to use the MPG reward. Otherwise, defaults to a target velocity reward "mpg_reward": False, # whether to use the joules reward. Otherwise, defaults to a target velocity reward - "mpj_reward": True, + "mpj_reward": False, # how many vehicles to look back for the MPG reward "look_back_length": 3, # how many AVs there can be at once (this is only for centralized critics) @@ -83,7 +83,10 @@ # how many timesteps to anneal the headway curriculum over "speed_curriculum_iters": 20, # weight of the headway reward - "speed_reward_gain": 1.0 + "speed_reward_gain": 1.0, + + # penalize stopped vehicles + "penalize_stops": True }) diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index b7b13d3a8..a9ff1748b 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -94,6 +94,9 @@ def __init__(self, env_params, sim_params, network, simulator='traci'): self.num_training_iters = 0 self.leader = [] + # penalize stops + self.penalize_stops = env_params.additional_params["penalize_stops"] + @property def observation_space(self): """See class definition.""" @@ -275,6 +278,13 @@ def compute_reward(self, rl_actions, **kwargs): scaling_factor = max(0, 1 - self.num_training_iters / self.speed_curriculum_iters) rewards[veh_id] += speed_reward * scaling_factor * self.speed_reward_gain + + if self.penalize_stops: + for veh_id in rewards.keys(): + speed = self.k.vehicle.get_speed(veh_id) + if speed < 1.0: + rewards[veh_id] -= .01 + # print('time to get reward is ', time() - t) return rewards From 5c79a8aafcb9e50cc122746502ce5c539595960a Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Fri, 22 May 2020 11:40:20 -0400 Subject: [PATCH 58/85] Add accel_penalty --- .../rl/multiagent/multiagent_straight_road.py | 5 ++++- flow/envs/multiagent/i210.py | 13 ++++++++++--- flow/visualize/time_space_diagram.py | 17 ++++++++++++----- flow/visualize/visualizer_rllib.py | 9 +++++++-- 4 files changed, 33 insertions(+), 11 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py index 9e990beba..c7402800a 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py +++ b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py @@ -86,7 +86,10 @@ "speed_reward_gain": 1.0, # penalize stopped vehicles - "penalize_stops": True + "penalize_stops": True, + + # penalize accels + "penalize_accel": True }) diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index a9ff1748b..93151c31f 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -97,6 +97,9 @@ def __init__(self, env_params, sim_params, network, simulator='traci'): # penalize stops self.penalize_stops = env_params.additional_params["penalize_stops"] + # penalize accel + self.penalize_accel = env_params.additional_params.get("penalize_accel", False) + @property def observation_space(self): """See class definition.""" @@ -279,11 +282,15 @@ def compute_reward(self, rl_actions, **kwargs): rewards[veh_id] += speed_reward * scaling_factor * self.speed_reward_gain - if self.penalize_stops: - for veh_id in rewards.keys(): - speed = self.k.vehicle.get_speed(veh_id) + for veh_id in rewards.keys(): + speed = self.k.vehicle.get_speed(veh_id) + if self.penalize_stops: if speed < 1.0: rewards[veh_id] -= .01 + if self.penalize_accel and veh_id in self.k.vehicle.previous_speeds: + prev_speed = self.k.vehicle.get_previous_speed(veh_id) + abs_accel = abs(speed - prev_speed) / self.sim_step + rewards[veh_id] -= abs_accel / 400.0 # print('time to get reward is ', time() - t) return rewards diff --git a/flow/visualize/time_space_diagram.py b/flow/visualize/time_space_diagram.py index 9ac6938d4..9c6f6d2cd 100644 --- a/flow/visualize/time_space_diagram.py +++ b/flow/visualize/time_space_diagram.py @@ -256,12 +256,19 @@ def _highway(data, params, all_time): time step. Set to zero if the vehicle is not present in the network at that time step. """ - length = params['net'].additional_params['length'] - num_edges = params['net'].additional_params['num_edges'] - edge_len = length / num_edges + junction_length = 0.1 + length = params['net'].additional_params["length"] + num_edges = params['net'].additional_params.get("num_edges", 1) edge_starts = {} - for i in range(num_edges): - edge_starts.update({"highway_{}".format(i): i * edge_len, ":edge_{}_0".format(i): i * edge_len}) + # Add the main edges. + edge_starts.update({ + "highway_{}".format(i): + i * (length / num_edges + junction_length) + for i in range(num_edges) + }) + + if params['net'].additional_params["use_ghost_edge"]: + edge_starts.update({"highway_end": length + num_edges * junction_length}) # compute the absolute position for veh_id in data.keys(): diff --git a/flow/visualize/visualizer_rllib.py b/flow/visualize/visualizer_rllib.py index 67dd1026b..162e455e9 100644 --- a/flow/visualize/visualizer_rllib.py +++ b/flow/visualize/visualizer_rllib.py @@ -26,7 +26,7 @@ from ray.rllib.agents.registry import get_agent_class from ray.tune.registry import register_env -from flow.core.rewards import miles_per_gallon +from flow.core.rewards import miles_per_gallon, miles_per_megajoule from flow.core.util import emission_to_csv from flow.utils.registry import make_create_env from flow.utils.rllib import get_flow_params @@ -172,7 +172,7 @@ def visualizer_rllib(args): if hasattr(env, "reroute_on_exit"): env.reroute_on_exit = False env.env_params.horizon += env.env_params.warmup_steps - env.env_params.warmup_steps = 0 + # env.env_params.warmup_steps = 0 if args.render_mode == 'sumo_gui': env.sim_params.render = True # set to True after initializing agent and env @@ -212,6 +212,7 @@ def visualizer_rllib(args): final_outflows = [] final_inflows = [] mpg = [] + mpj = [] mean_speed = [] std_speed = [] for i in range(args.num_rollouts): @@ -230,6 +231,7 @@ def visualizer_rllib(args): vel.append(np.mean(speeds)) mpg.append(miles_per_gallon(env.unwrapped, vehicles.get_ids(), gain=1.0)) + mpj.append(miles_per_megajoule(env.unwrapped, vehicles.get_ids(), gain=1.0)) if multiagent: action = {} @@ -299,6 +301,9 @@ def visualizer_rllib(args): print('Average, std miles per gallon: {}, {}'.format(np.mean(mpg), np.std(mpg))) + print('Average, std miles per megajoule: {}, {}'.format(np.mean(mpj), np.std(mpj))) + + # Compute arrival rate of vehicles in the last 500 sec of the run print("\nOutflows (veh/hr):") print(final_outflows) From 45a8e0ae0ab22de75472545dceae54a20cfd50fe Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 24 May 2020 11:52:06 -0400 Subject: [PATCH 59/85] Fix to space time diagram to update to new highway network --- examples/exp_configs/non_rl/highway_single.py | 4 ++-- .../rl/multiagent/multiagent_straight_road.py | 4 ++-- flow/controllers/velocity_controllers.py | 2 +- flow/visualize/time_space_diagram.py | 12 ++++++++++++ 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/examples/exp_configs/non_rl/highway_single.py b/examples/exp_configs/non_rl/highway_single.py index f10975cd0..0a9a6774b 100644 --- a/examples/exp_configs/non_rl/highway_single.py +++ b/examples/exp_configs/non_rl/highway_single.py @@ -24,7 +24,7 @@ # the inflow rate of vehicles TRAFFIC_FLOW = 2215 # the simulation time horizon (in steps) -HORIZON = 600 +HORIZON = 1000 # whether to include noise in the car-following models INCLUDE_NOISE = True @@ -71,7 +71,7 @@ "av", color='red', num_vehicles=0, - acceleration_controller=(FollowerStopper, {"v_des": 6.0, "control_length": [500, 2300]}), + acceleration_controller=(FollowerStopper, {"v_des": 5.0, "control_length": [500, 2300]}), ) inflows = InFlows() diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py index c7402800a..fccccddeb 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py +++ b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py @@ -125,7 +125,7 @@ edge="highway_0", vehs_per_hour=int(HIGHWAY_INFLOW_RATE * (1 - PENETRATION_RATE / 100)), depart_lane="free", - depart_speed="23.0", + depart_speed=TRAFFIC_SPEED, name="idm_highway_inflow") # add autonomous vehicles on the highway @@ -135,7 +135,7 @@ edge="highway_0", vehs_per_hour=int(HIGHWAY_INFLOW_RATE * (PENETRATION_RATE / 100)), depart_lane="free", - depart_speed="23.0", + depart_speed=TRAFFIC_SPEED, name="rl_highway_inflow") # SET UP FLOW PARAMETERS diff --git a/flow/controllers/velocity_controllers.py b/flow/controllers/velocity_controllers.py index ab29a6135..d04c05f3e 100644 --- a/flow/controllers/velocity_controllers.py +++ b/flow/controllers/velocity_controllers.py @@ -77,7 +77,7 @@ def find_intersection_dist(self, env): def get_accel(self, env): """See parent class.""" - if env.time_counter < env.env_params.warmup_steps: + if env.time_counter < env.env_params.warmup_steps * env.env_params.sims_per_step: return None else: lead_id = env.k.vehicle.get_leader(self.veh_id) diff --git a/flow/visualize/time_space_diagram.py b/flow/visualize/time_space_diagram.py index 9c6f6d2cd..004172765 100644 --- a/flow/visualize/time_space_diagram.py +++ b/flow/visualize/time_space_diagram.py @@ -270,6 +270,18 @@ def _highway(data, params, all_time): if params['net'].additional_params["use_ghost_edge"]: edge_starts.update({"highway_end": length + num_edges * junction_length}) + edge_starts.update({ + ":edge_{}".format(i + 1): + (i + 1) * length / num_edges + i * junction_length + for i in range(num_edges - 1) + }) + + if params['net'].additional_params["use_ghost_edge"]: + edge_starts.update({ + ":edge_{}".format(num_edges): + length + (num_edges - 1) * junction_length + }) + # compute the absolute position for veh_id in data.keys(): data[veh_id]['abs_pos'] = _get_abs_pos_1_edge(data[veh_id]['edge'], From cf579bfaac9dba5128cbc248f44d700aa5754d86 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Sun, 24 May 2020 14:55:07 -0400 Subject: [PATCH 60/85] Import georges I210 fixes --- .../exp_configs/non_rl/i210_subnetwork.py | 240 ++++++++++++------ .../i210_with_ghost_cell_with_downstream.xml | 10 +- 2 files changed, 165 insertions(+), 85 deletions(-) diff --git a/examples/exp_configs/non_rl/i210_subnetwork.py b/examples/exp_configs/non_rl/i210_subnetwork.py index 25565bb49..a6779e535 100644 --- a/examples/exp_configs/non_rl/i210_subnetwork.py +++ b/examples/exp_configs/non_rl/i210_subnetwork.py @@ -1,9 +1,12 @@ """I-210 subnetwork example.""" import os + import numpy as np -from flow.controllers import IDMController -from flow.controllers import I210Router +from flow.controllers.car_following_models import IDMController +from flow.controllers.velocity_controllers import FollowerStopper +from flow.controllers.lane_change_controllers import StaticLaneChanger +from flow.controllers.routing_controllers import I210Router from flow.core.params import SumoParams from flow.core.params import EnvParams from flow.core.params import NetParams @@ -11,94 +14,176 @@ from flow.core.params import VehicleParams from flow.core.params import InitialConfig from flow.core.params import InFlows + +from flow.core.params import SumoCarFollowingParams + import flow.config as config from flow.envs import TestEnv -from flow.networks.i210_subnetwork import I210SubNetwork, EDGES_DISTRIBUTION -# =========================================================================== # -# Specify some configurable constants. # -# =========================================================================== # +# Instantiate which conditions we want to be true about the network -# whether to include the upstream ghost edge in the network WANT_GHOST_CELL = True -# whether to include the downstream slow-down edge in the network WANT_DOWNSTREAM_BOUNDARY = True -# whether to include vehicles on the on-ramp -ON_RAMP = True -# the inflow rate of vehicles (in veh/hr) -INFLOW_RATE = 5 * 2215 -# the speed of inflowing vehicles from the main edge (in m/s) -INFLOW_SPEED = 24.1 - -# =========================================================================== # -# Specify the path to the network template. # -# =========================================================================== # - -if WANT_DOWNSTREAM_BOUNDARY: - net_template = os.path.join( - config.PROJECT_PATH, - "examples/exp_configs/templates/sumo/i210_with_ghost_cell_with_" - "downstream.xml") -elif WANT_GHOST_CELL: - net_template = os.path.join( - config.PROJECT_PATH, - "examples/exp_configs/templates/sumo/i210_with_ghost_cell.xml") -else: - net_template = os.path.join( - config.PROJECT_PATH, - "examples/exp_configs/templates/sumo/test2.net.xml") +ON_RAMP = False +PENETRATION_RATE = 0.10 +V_DES = 13.0 +HORIZON = 1000 +WARMUP_STEPS = 400 + +inflow_rate = 2050 +inflow_speed = 25.5 -# If the ghost cell is not being used, remove it from the initial edges that -# vehicles can be placed on. -edges_distribution = EDGES_DISTRIBUTION.copy() -if not WANT_GHOST_CELL: - edges_distribution.remove("ghost0") -# =========================================================================== # -# Specify vehicle-specific information and inflows. # -# =========================================================================== # +accel_data = (IDMController,{'a':1.3,'b':2.0,'noise':0.3}) + + +highway_start_edge = '' + +if(WANT_GHOST_CELL): + from flow.networks.i210_subnetwork_ghost_cell import I210SubNetworkGhostCell, EDGES_DISTRIBUTION + highway_start_edge = 'ghost0' +else: + from flow.networks.i210_subnetwork import I210SubNetwork, EDGES_DISTRIBUTION + highway_start_edge = "119257914" + vehicles = VehicleParams() -vehicles.add( - "human", - num_vehicles=0, - lane_change_params=SumoLaneChangeParams( - lane_change_mode="strategic", - ), - acceleration_controller=(IDMController, { - "a": 1.3, - "b": 2.0, - "noise": 0.3, - }), - routing_controller=(I210Router, {}) if ON_RAMP else None, -) inflow = InFlows() -# main highway -inflow.add( - veh_type="human", - edge="ghost0" if WANT_GHOST_CELL else "119257914", - vehs_per_hour=INFLOW_RATE, - departLane="best", - departSpeed=INFLOW_SPEED) -# on ramp + if ON_RAMP: + vehicles.add( + "human", + num_vehicles=0, + color="white", + lane_change_params=SumoLaneChangeParams( + lane_change_mode="strategic", + ), + acceleration_controller=accel_data, + routing_controller=(I210Router, {}) + ) + if PENETRATION_RATE > 0.0: + vehicles.add( + "av", + num_vehicles=0, + color="red", + acceleration_controller=(FollowerStopper, {"v_des": V_DES}), + routing_controller=(I210Router, {}) + ) + + # inflow.add( + # veh_type="human", + # edge=highway_start_edge, + # vehs_per_hour=inflow_rate, + # departLane="best", + # departSpeed=inflow_speed) + + lane_list = ['0','1','2','3','4'] + + for lane in lane_list: + inflow.add( + veh_type="human", + edge=highway_start_edge, + vehs_per_hour=int(inflow_rate * (1 - PENETRATION_RATE)), + departLane=lane, + departSpeed=inflow_speed) + inflow.add( veh_type="human", edge="27414345", - vehs_per_hour=500, + vehs_per_hour=int(500 * (1 - PENETRATION_RATE)), departLane="random", departSpeed=10) inflow.add( veh_type="human", edge="27414342#0", - vehs_per_hour=500, + vehs_per_hour=int(500 * (1 - PENETRATION_RATE)), departLane="random", departSpeed=10) -# =========================================================================== # -# Generate the flow_params dict with all relevant simulation information. # -# =========================================================================== # + if PENETRATION_RATE > 0.0: + for lane in lane_list: + inflow.add( + veh_type="av", + edge=highway_start_edge, + vehs_per_hour=int(inflow_rate * PENETRATION_RATE), + departLane=lane, + departSpeed=inflow_speed) + + inflow.add( + veh_type="av", + edge="27414345", + vehs_per_hour=int(500 * PENETRATION_RATE), + departLane="random", + departSpeed=10) + inflow.add( + veh_type="av", + edge="27414342#0", + vehs_per_hour=int(500 * PENETRATION_RATE), + departLane="random", + departSpeed=10) + +else: + # create the base vehicle type that will be used for inflows + vehicles.add( + "human", + num_vehicles=0, + lane_change_params=SumoLaneChangeParams( + lane_change_mode="strategic", + ), + acceleration_controller=accel_data, + ) + if PENETRATION_RATE > 0.0: + vehicles.add( + "av", + color="red", + num_vehicles=0, + acceleration_controller=(FollowerStopper, {"v_des": V_DES}), + ) + + # If you want to turn off the fail safes uncomment this: + + # vehicles.add( + # 'human', + # num_vehicles=0, + # lane_change_params=SumoLaneChangeParams( + # lane_change_mode='strategic', + # ), + # acceleration_controller=accel_data, + # car_following_params=SumoCarFollowingParams(speed_mode='19') + # ) + + lane_list = ['0','1','2','3','4'] + + for lane in lane_list: + inflow.add( + veh_type="human", + edge=highway_start_edge, + vehs_per_hour=int(inflow_rate * (1 - PENETRATION_RATE)), + departLane=lane, + departSpeed=inflow_speed) + + if PENETRATION_RATE > 0.0: + for lane in lane_list: + inflow.add( + veh_type="av", + edge=highway_start_edge, + vehs_per_hour=int(inflow_rate * PENETRATION_RATE), + departLane=lane, + departSpeed=inflow_speed) + + +network_xml_file = "examples/exp_configs/templates/sumo/i210_with_ghost_cell_with_downstream.xml" + +# network_xml_file = "examples/exp_configs/templates/sumo/i210_with_congestion.xml" + +NET_TEMPLATE = os.path.join(config.PROJECT_PATH,network_xml_file) + +if WANT_GHOST_CELL: + network = I210SubNetworkGhostCell +else: + network = I210SubNetwork + flow_params = dict( # name of the experiment @@ -108,7 +193,7 @@ env_name=TestEnv, # name of the network class the experiment is running on - network=I210SubNetwork, + network=network, # simulator that is used by the experiment simulator='traci', @@ -117,24 +202,23 @@ sim=SumoParams( sim_step=0.4, render=False, - color_by_speed=True, + color_by_speed=False, use_ballistic=True ), # environment related parameters (see flow.core.params.EnvParams) env=EnvParams( - horizon=10000, + horizon=HORIZON, + warmup_steps=WARMUP_STEPS, + sims_per_step=3 ), # network-related parameters (see flow.core.params.NetParams and the # network's documentation or ADDITIONAL_NET_PARAMS component) net=NetParams( inflows=inflow, - template=net_template, - additional_params={ - "on_ramp": ON_RAMP, - "ghost_edge": WANT_GHOST_CELL, - } + template=NET_TEMPLATE, + additional_params={"use_on_ramp": ON_RAMP} ), # vehicles to be placed in the network at the start of a rollout (see @@ -144,18 +228,14 @@ # parameters specifying the positioning of vehicles upon initialization/ # reset (see flow.core.params.InitialConfig) initial=InitialConfig( - edges_distribution=edges_distribution, + edges_distribution=EDGES_DISTRIBUTION, ), ) -# =========================================================================== # -# Specify custom callable that is logged during simulation runtime. # -# =========================================================================== # - edge_id = "119257908#1-AddedOnRampEdge" custom_callables = { "avg_merge_speed": lambda env: np.nan_to_num(np.mean( - env.k.vehicle.get_speed(env.k.vehicle.get_ids()))), + env.k.vehicle.get_speed(env.k.vehicle.get_ids_by_edge(edge_id)))), "avg_outflow": lambda env: np.nan_to_num( env.k.vehicle.get_outflow_rate(120)), # we multiply by 5 to account for the vehicle length and by 1000 to convert diff --git a/examples/exp_configs/templates/sumo/i210_with_ghost_cell_with_downstream.xml b/examples/exp_configs/templates/sumo/i210_with_ghost_cell_with_downstream.xml index 10d4d8d45..b9b2db479 100644 --- a/examples/exp_configs/templates/sumo/i210_with_ghost_cell_with_downstream.xml +++ b/examples/exp_configs/templates/sumo/i210_with_ghost_cell_with_downstream.xml @@ -3501,11 +3501,11 @@ - - - - - + + + + + From 9a5b532e3227f7cb891d57f3535f4ca539081830 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Tue, 26 May 2020 01:49:48 -0400 Subject: [PATCH 61/85] Add ghost cell template with correct priorities --- ...0_with_ghost_cell_with_downstream_test.xml | 5719 +++++++++++++++++ 1 file changed, 5719 insertions(+) create mode 100644 examples/exp_configs/templates/sumo/i210_with_ghost_cell_with_downstream_test.xml diff --git a/examples/exp_configs/templates/sumo/i210_with_ghost_cell_with_downstream_test.xml b/examples/exp_configs/templates/sumo/i210_with_ghost_cell_with_downstream_test.xml new file mode 100644 index 000000000..ee508b730 --- /dev/null +++ b/examples/exp_configs/templates/sumo/i210_with_ghost_cell_with_downstream_test.xml @@ -0,0 +1,5719 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From e6f7ef8a7f8b24b0c3d39e97f746940b1b4e462e Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Tue, 26 May 2020 17:27:26 -0400 Subject: [PATCH 62/85] Update I210 to new model --- .../exp_configs/non_rl/i210_subnetwork.py | 55 ++-- examples/exp_configs/non_rl/straight_road.py | 2 +- .../rl/multiagent/multiagent_i210.py | 258 ++++++++++++------ examples/train.py | 22 +- flow/controllers/velocity_controllers.py | 6 +- flow/envs/__init__.py | 3 +- flow/envs/multiagent/i210.py | 32 ++- flow/envs/test.py | 14 + 8 files changed, 256 insertions(+), 136 deletions(-) diff --git a/examples/exp_configs/non_rl/i210_subnetwork.py b/examples/exp_configs/non_rl/i210_subnetwork.py index e66c9ee46..9ffc7145e 100644 --- a/examples/exp_configs/non_rl/i210_subnetwork.py +++ b/examples/exp_configs/non_rl/i210_subnetwork.py @@ -4,7 +4,6 @@ from flow.controllers.car_following_models import IDMController from flow.controllers.velocity_controllers import FollowerStopper -from flow.controllers.lane_change_controllers import StaticLaneChanger from flow.controllers.routing_controllers import I210Router from flow.core.params import SumoParams from flow.core.params import EnvParams @@ -13,9 +12,7 @@ from flow.core.params import VehicleParams from flow.core.params import InitialConfig from flow.core.params import InFlows -from flow.core.rewards import miles_per_gallon - -from flow.core.params import SumoCarFollowingParams +from flow.core.rewards import miles_per_gallon, miles_per_megajoule import flow.config as config from flow.envs import TestEnv @@ -23,40 +20,29 @@ # Instantiate which conditions we want to be true about the network WANT_GHOST_CELL = True -WANT_DOWNSTREAM_BOUNDARY = True +# WANT_DOWNSTREAM_BOUNDARY = True ON_RAMP = False -PENETRATION_RATE = 0.10 -V_DES = 13.0 +PENETRATION_RATE = 0.0 +V_DES = 5.0 HORIZON = 1000 -WARMUP_STEPS = 400 +WARMUP_STEPS = 600 inflow_rate = 2050 inflow_speed = 25.5 +accel_data = (IDMController, {'a': 1.3, 'b': 2.0, 'noise': 0.3}) -accel_data = (IDMController,{'a':1.3,'b':2.0,'noise':0.3}) - - -highway_start_edge = '' - -if(WANT_GHOST_CELL): +if WANT_GHOST_CELL: from flow.networks.i210_subnetwork_ghost_cell import I210SubNetworkGhostCell, EDGES_DISTRIBUTION + highway_start_edge = 'ghost0' else: from flow.networks.i210_subnetwork import I210SubNetwork, EDGES_DISTRIBUTION - highway_start_edge = "119257914" + highway_start_edge = "119257914" vehicles = VehicleParams() -if PENETRATION_RATE > 0.0: - vehicles.add( - "av", - num_vehicles=0, - acceleration_controller=(FollowerStopper, {"v_des": 12.0}), - ) - - inflow = InFlows() if ON_RAMP: @@ -75,7 +61,9 @@ "av", num_vehicles=0, color="red", - acceleration_controller=(FollowerStopper, {"v_des": V_DES}), + acceleration_controller=(FollowerStopper, {"v_des": V_DES, + "no_control_edges": ["ghost0", "119257908#3"] + }), routing_controller=(I210Router, {}) ) @@ -86,7 +74,7 @@ # departLane="best", # departSpeed=inflow_speed) - lane_list = ['0','1','2','3','4'] + lane_list = ['0', '1', '2', '3', '4'] for lane in lane_list: inflow.add( @@ -146,7 +134,9 @@ "av", color="red", num_vehicles=0, - acceleration_controller=(FollowerStopper, {"v_des": V_DES}), + acceleration_controller=(FollowerStopper, {"v_des": V_DES, + "no_control_edges": ["ghost0", "119257908#3"] + }), ) # If you want to turn off the fail safes uncomment this: @@ -161,7 +151,7 @@ # car_following_params=SumoCarFollowingParams(speed_mode='19') # ) - lane_list = ['0','1','2','3','4'] + lane_list = ['0', '1', '2', '3', '4'] for lane in lane_list: inflow.add( @@ -180,19 +170,17 @@ departLane=lane, departSpeed=inflow_speed) - -network_xml_file = "examples/exp_configs/templates/sumo/i210_with_ghost_cell_with_downstream.xml" +network_xml_file = "examples/exp_configs/templates/sumo/i210_with_ghost_cell_with_downstream_test.xml" # network_xml_file = "examples/exp_configs/templates/sumo/i210_with_congestion.xml" -NET_TEMPLATE = os.path.join(config.PROJECT_PATH,network_xml_file) +NET_TEMPLATE = os.path.join(config.PROJECT_PATH, network_xml_file) if WANT_GHOST_CELL: network = I210SubNetworkGhostCell else: network = I210SubNetwork - flow_params = dict( # name of the experiment exp_tag='I-210_subnetwork', @@ -226,7 +214,7 @@ net=NetParams( inflows=inflow, template=NET_TEMPLATE, - additional_params={"use_on_ramp": ON_RAMP} + additional_params={"on_ramp": ON_RAMP, "ghost_edge": WANT_GHOST_CELL} ), # vehicles to be placed in the network at the start of a rollout (see @@ -255,5 +243,6 @@ "avg_density": lambda env: 5 * 1000 * len(env.k.vehicle.get_ids_by_edge( edge_id)) / (env.k.network.edge_length(edge_id) * env.k.network.num_lanes(edge_id)), - "mpg": lambda env: miles_per_gallon(env, env.k.vehicle.get_ids(), gain=1.0) + "mpg": lambda env: miles_per_gallon(env, env.k.vehicle.get_ids(), gain=1.0), + "mpj": lambda env: miles_per_megajoule(env, env.k.vehicle.get_ids(), gain=1.0), } diff --git a/examples/exp_configs/non_rl/straight_road.py b/examples/exp_configs/non_rl/straight_road.py index ecc296d22..1669bb896 100644 --- a/examples/exp_configs/non_rl/straight_road.py +++ b/examples/exp_configs/non_rl/straight_road.py @@ -23,7 +23,7 @@ # inflow rate on the highway in vehicles per hour HIGHWAY_INFLOW_RATE = 10800 / 5 # percentage of autonomous vehicles compared to human vehicles on highway -PENETRATION_RATE = 10.0 +PENETRATION_RATE = 0.0 # SET UP PARAMETERS FOR THE NETWORK diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index d4b4c265f..b9da9b1b8 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -9,6 +9,7 @@ from ray.tune.registry import register_env from flow.controllers import RLController +from flow.controllers.routing_controllers import I210Router from flow.controllers.car_following_models import IDMController import flow.config as config from flow.core.params import EnvParams @@ -25,21 +26,32 @@ from flow.utils.registry import make_create_env # SET UP PARAMETERS FOR THE SIMULATION +WANT_GHOST_CELL = True +# WANT_DOWNSTREAM_BOUNDARY = True +ON_RAMP = False +PENETRATION_RATE = 0.10 +V_DES = 5.0 +HORIZON = 1000 +WARMUP_STEPS = 600 -# number of steps per rollout -HORIZON = 2000 +inflow_rate = 2050 +inflow_speed = 25.5 + +accel_data = (IDMController, {'a': 1.3, 'b': 2.0, 'noise': 0.3}) VEH_PER_HOUR_BASE_119257914 = 10800 VEH_PER_HOUR_BASE_27414345 = 321 VEH_PER_HOUR_BASE_27414342 = 421 +if WANT_GHOST_CELL: + from flow.networks.i210_subnetwork_ghost_cell import I210SubNetworkGhostCell, EDGES_DISTRIBUTION -# percentage of autonomous vehicles compared to human vehicles on highway -PENETRATION_RATE = 10 - -# TODO: temporary fix -edges_distribution = EDGES_DISTRIBUTION.copy() -edges_distribution.remove("ghost0") + edges_distribution = EDGES_DISTRIBUTION + highway_start_edge = 'ghost0' +else: + from flow.networks.i210_subnetwork import I210SubNetwork, EDGES_DISTRIBUTION + edges_distribution = EDGES_DISTRIBUTION + highway_start_edge = "119257914" # SET UP PARAMETERS FOR THE ENVIRONMENT additional_env_params = ADDITIONAL_ENV_PARAMS.copy() @@ -51,17 +63,21 @@ # whether to add in a reward for the speed of nearby vehicles "local_reward": True, # whether to use the MPG reward. Otherwise, defaults to a target velocity reward - "mpg_reward": True, + "mpg_reward": False, + # whether to use the MPJ reward. Otherwise, defaults to a target velocity reward + "mpj_reward": False, # how many vehicles to look back for the MPG reward "look_back_length": 1, # whether to reroute vehicles once they have exited - "reroute_on_exit": False, - 'target_velocity': 12.0, + "reroute_on_exit": True, + 'target_velocity': 8.0, # how many AVs there can be at once (this is only for centralized critics) "max_num_agents": 10, + # which edges we shouldn't apply control on + "invalid_control_edges": ["ghost0", "119257908#3"], # whether to add a slight reward for opening up a gap that will be annealed out N iterations in - "headway_curriculum": True, + "headway_curriculum": False, # how many timesteps to anneal the headway curriculum over "headway_curriculum_iters": 100, # weight of the headway reward @@ -72,84 +88,154 @@ # whether to add a slight reward for traveling at a desired speed "speed_curriculum": True, # how many timesteps to anneal the headway curriculum over - "speed_curriculum_iters": 100, + "speed_curriculum_iters": 20, # weight of the headway reward - "speed_reward_gain": 0.5 + "speed_reward_gain": 0.5, + # penalize stopped vehicles + "penalize_stops": True, + + # penalize accels + "penalize_accel": True }) # CREATE VEHICLE TYPES AND INFLOWS # no vehicles in the network vehicles = VehicleParams() -vehicles.add( - "human", - num_vehicles=0, - lane_change_params=SumoLaneChangeParams(lane_change_mode="strategic"), - acceleration_controller=(IDMController, {"a": .3, "b": 2.0, "noise": 0.5}), - car_following_params=SumoCarFollowingParams(speed_mode="no_collide"), -) -vehicles.add( - "av", - acceleration_controller=(RLController, {}), - num_vehicles=0, - color='red' -) inflow = InFlows() -# main highway -pen_rate = PENETRATION_RATE / 100 -assert pen_rate < 1.0, "your penetration rate is over 100%" -assert pen_rate > 0.0, "your penetration rate should be above zero" -inflow.add( - veh_type="human", - edge="119257914", - vehs_per_hour=int(VEH_PER_HOUR_BASE_119257914 * (1 - pen_rate)), - # probability=1.0, - depart_lane="random", - departSpeed=20) -# # on ramp -# inflow.add( -# veh_type="human", -# edge="27414345", -# vehs_per_hour=321 * pen_rate, -# depart_lane="random", -# depart_speed=20) -# inflow.add( -# veh_type="human", -# edge="27414342#0", -# vehs_per_hour=421 * pen_rate, -# depart_lane="random", -# depart_speed=20) - -# Now add the AVs -# main highway -inflow.add( - veh_type="av", - edge="119257914", - vehs_per_hour=int(VEH_PER_HOUR_BASE_119257914 * pen_rate), - # probability=1.0, - depart_lane="random", - depart_speed=20) -# # on ramp -# inflow.add( -# veh_type="av", -# edge="27414345", -# vehs_per_hour=int(VEH_PER_HOUR_BASE_27414345 * pen_rate), -# depart_lane="random", -# depart_speed=20) -# inflow.add( -# veh_type="av", -# edge="27414342#0", -# vehs_per_hour=int(VEH_PER_HOUR_BASE_27414342 * pen_rate), -# depart_lane="random", -# depart_speed=20) - -NET_TEMPLATE = os.path.join( - config.PROJECT_PATH, - "examples/exp_configs/templates/sumo/test2.net.xml") - -warmup_steps = 0 -if additional_env_params['reroute_on_exit']: - warmup_steps = 400 + +if ON_RAMP: + vehicles.add( + "human", + num_vehicles=0, + color="white", + lane_change_params=SumoLaneChangeParams( + lane_change_mode="strategic", + ), + acceleration_controller=accel_data, + routing_controller=(I210Router, {}) + ) + if PENETRATION_RATE > 0.0: + vehicles.add( + "av", + num_vehicles=0, + color="red", + acceleration_controller=(RLController, {}), + routing_controller=(I210Router, {}) + ) + + # inflow.add( + # veh_type="human", + # edge=highway_start_edge, + # vehs_per_hour=inflow_rate, + # departLane="best", + # departSpeed=inflow_speed) + + lane_list = ['0', '1', '2', '3', '4'] + + for lane in lane_list: + inflow.add( + veh_type="human", + edge=highway_start_edge, + vehs_per_hour=int(inflow_rate * (1 - PENETRATION_RATE)), + departLane=lane, + departSpeed=inflow_speed) + + inflow.add( + veh_type="human", + edge="27414345", + vehs_per_hour=int(500 * (1 - PENETRATION_RATE)), + departLane="random", + departSpeed=10) + inflow.add( + veh_type="human", + edge="27414342#0", + vehs_per_hour=int(500 * (1 - PENETRATION_RATE)), + departLane="random", + departSpeed=10) + + if PENETRATION_RATE > 0.0: + for lane in lane_list: + inflow.add( + veh_type="av", + edge=highway_start_edge, + vehs_per_hour=int(inflow_rate * PENETRATION_RATE), + departLane=lane, + departSpeed=inflow_speed) + + inflow.add( + veh_type="av", + edge="27414345", + vehs_per_hour=int(500 * PENETRATION_RATE), + departLane="random", + departSpeed=10) + inflow.add( + veh_type="av", + edge="27414342#0", + vehs_per_hour=int(500 * PENETRATION_RATE), + departLane="random", + departSpeed=10) + +else: + # create the base vehicle type that will be used for inflows + vehicles.add( + "human", + num_vehicles=0, + lane_change_params=SumoLaneChangeParams( + lane_change_mode="strategic", + ), + acceleration_controller=accel_data, + ) + if PENETRATION_RATE > 0.0: + vehicles.add( + "av", + color="red", + num_vehicles=0, + acceleration_controller=(RLController, {}), + ) + + # If you want to turn off the fail safes uncomment this: + + # vehicles.add( + # 'human', + # num_vehicles=0, + # lane_change_params=SumoLaneChangeParams( + # lane_change_mode='strategic', + # ), + # acceleration_controller=accel_data, + # car_following_params=SumoCarFollowingParams(speed_mode='19') + # ) + + lane_list = ['0', '1', '2', '3', '4'] + + for lane in lane_list: + inflow.add( + veh_type="human", + edge=highway_start_edge, + vehs_per_hour=int(inflow_rate * (1 - PENETRATION_RATE)), + departLane=lane, + departSpeed=inflow_speed) + + if PENETRATION_RATE > 0.0: + for lane in lane_list: + inflow.add( + veh_type="av", + edge=highway_start_edge, + vehs_per_hour=int(inflow_rate * PENETRATION_RATE), + departLane=lane, + departSpeed=inflow_speed) + + +network_xml_file = "examples/exp_configs/templates/sumo/i210_with_ghost_cell_with_downstream_test.xml" + +# network_xml_file = "examples/exp_configs/templates/sumo/i210_with_congestion.xml" + +NET_TEMPLATE = os.path.join(config.PROJECT_PATH, network_xml_file) + +if WANT_GHOST_CELL: + network = I210SubNetworkGhostCell +else: + network = I210SubNetwork flow_params = dict( # name of the experiment @@ -159,14 +245,14 @@ env_name=I210MultiEnv, # name of the network class the experiment is running on - network=I210SubNetwork, + network=network, # simulator that is used by the experiment simulator='traci', # simulation-related parameters sim=SumoParams( - sim_step=0.5, + sim_step=0.4, render=False, color_by_speed=False, restart_instance=True, @@ -177,8 +263,8 @@ # environment related parameters (see flow.core.params.EnvParams) env=EnvParams( horizon=HORIZON, - sims_per_step=1, - warmup_steps=warmup_steps, + sims_per_step=3, + warmup_steps=WARMUP_STEPS, additional_params=additional_env_params, done_at_exit=False ), diff --git a/examples/train.py b/examples/train.py index b0e2a164f..6da7cb4ea 100644 --- a/examples/train.py +++ b/examples/train.py @@ -23,12 +23,14 @@ except ImportError: print("Stable-baselines not installed. Please install it if you need it.") +import ray from ray import tune from ray.rllib.env.group_agents_wrapper import _GroupAgentsWrapper try: from ray.rllib.agents.agent import get_agent_class except ImportError: from ray.rllib.agents.registry import get_agent_class +from ray.tune.registry import register_env from flow.core.util import ensure_dir from flow.core.rewards import energy_consumption, miles_per_gallon, miles_per_megajoule @@ -267,15 +269,25 @@ def on_episode_step(info): env = info["env"].get_unwrapped()[0] if isinstance(env, _GroupAgentsWrapper): env = env.env - speed = np.mean([speed for speed in env.k.vehicle.get_speed(env.k.vehicle.get_ids()) if speed >= 0]) + if hasattr(env, 'invalid_control_edges'): + veh_ids = [veh_id for veh_id in env.k.vehicle.get_ids() if (env.k.vehicle.get_speed(veh_id) >= 0 + and env.k.vehicle.get_edge(veh_id) + not in env.invalid_control_edges)] + rl_ids = [veh_id for veh_id in env.k.vehicle.get_rl_ids() if (env.k.vehicle.get_speed(veh_id) >= 0 + and env.k.vehicle.get_edge(veh_id) + not in env.invalid_control_edges)] + else: + veh_ids = [veh_id for veh_id in env.k.vehicle.get_ids() if env.k.vehicle.get_speed(veh_id) >= 0] + rl_ids = [veh_id for veh_id in env.k.vehicle.get_rl_ids() if env.k.vehicle.get_speed(veh_id) >= 0] + + speed = np.mean([speed for speed in env.k.vehicle.get_speed(veh_ids)]) if not np.isnan(speed): episode.user_data["avg_speed"].append(speed) - av_speed = np.mean([speed for speed in env.k.vehicle.get_speed(env.k.vehicle.get_rl_ids()) if speed >= 0]) + av_speed = np.mean([speed for speed in env.k.vehicle.get_speed(rl_ids) if speed >= 0]) if not np.isnan(av_speed): episode.user_data["avg_speed_avs"].append(av_speed) - episode.user_data["avg_energy"].append(energy_consumption(env)) - episode.user_data["avg_mpg"].append(miles_per_gallon(env, env.k.vehicle.get_ids(), gain=1.0)) - episode.user_data["avg_mpj"].append(miles_per_megajoule(env, env.k.vehicle.get_ids(), gain=1.0)) + episode.user_data["avg_mpg"].append(miles_per_gallon(env, veh_ids, gain=1.0)) + episode.user_data["avg_mpj"].append(miles_per_megajoule(env, veh_ids, gain=1.0)) def on_episode_end(info): diff --git a/flow/controllers/velocity_controllers.py b/flow/controllers/velocity_controllers.py index d04c05f3e..8a9b3a06d 100644 --- a/flow/controllers/velocity_controllers.py +++ b/flow/controllers/velocity_controllers.py @@ -27,7 +27,8 @@ def __init__(self, car_following_params, v_des=15, danger_edges=None, - control_length=None): + control_length=None, + no_control_edges=None): """Instantiate FollowerStopper.""" BaseController.__init__( self, veh_id, car_following_params, delay=0.0, @@ -49,6 +50,7 @@ def __init__(self, self.danger_edges = danger_edges if danger_edges else {} self.control_length = control_length + self.no_control_edges = no_control_edges def find_intersection_dist(self, env): """Find distance to intersection. @@ -118,6 +120,8 @@ def get_accel(self, env): env.k.vehicle.get_edge(self.veh_id)[0] == ":"\ or (self.control_length and (env.k.vehicle.get_x_by_id(self.veh_id) < self.control_length[0] or env.k.vehicle.get_x_by_id(self.veh_id) > self.control_length[1])): + # TODO(@evinitsky) put back + # or env.k.vehicle.get_edge(self.veh_id) in self.no_control_edges: return None else: # compute the acceleration from the desired velocity diff --git a/flow/envs/__init__.py b/flow/envs/__init__.py index 8bea3dd4f..31a91292d 100755 --- a/flow/envs/__init__.py +++ b/flow/envs/__init__.py @@ -12,7 +12,7 @@ WaveAttenuationPOEnv from flow.envs.merge import MergePOEnv from flow.envs.straightroad_env import SingleStraightRoad -from flow.envs.test import TestEnv +from flow.envs.test import TestEnv, TestI210Env # deprecated classes whose names have changed from flow.envs.bottleneck_env import BottleNeckAccelEnv @@ -37,6 +37,7 @@ 'TrafficLightGridBenchmarkEnv', 'BottleneckDesiredVelocityEnv', 'TestEnv', + 'TestI210Env' 'BayBridgeEnv', 'SingleStraightRoad', # deprecated classes diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index 93151c31f..bd1ae2787 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -72,9 +72,10 @@ def __init__(self, env_params, sim_params, network, simulator='traci'): self.reroute_on_exit = env_params.additional_params.get("reroute_on_exit") self.max_lanes = MAX_LANES self.num_enter_lanes = 5 - self.entrance_edge = "119257914" + self.entrance_edge = "ghost0" self.exit_edge = "119257908#2" - self.control_range = env_params.additional_params['control_range'] + self.control_range = env_params.additional_params.get('control_range', None) + self.invalid_control_edges = env_params.additional_params.get('invalid_control_edges', []) self.mpg_reward = env_params.additional_params["mpg_reward"] self.mpj_reward = env_params.additional_params["mpj_reward"] self.look_back_length = env_params.additional_params["look_back_length"] @@ -162,8 +163,10 @@ def get_state(self): if self.lead_obs: veh_info = {} for rl_id in self.k.vehicle.get_rl_ids(): - if self.k.vehicle.get_x_by_id(rl_id) < self.control_range[1] \ - and self.k.vehicle.get_x_by_id(rl_id) > self.control_range[0]: + if (self.control_range and self.k.vehicle.get_x_by_id(rl_id) < self.control_range[1] \ + and self.k.vehicle.get_x_by_id(rl_id) > self.control_range[0]) or \ + (len(self.invalid_control_edges) > 0 and self.k.vehicle.get_edge(rl_id) not in + self.invalid_control_edges): speed = self.k.vehicle.get_speed(rl_id) lead_id = self.k.vehicle.get_leader(rl_id) if lead_id in ["", None]: @@ -193,8 +196,10 @@ def compute_reward(self, rl_actions, **kwargs): if self.env_params.additional_params["local_reward"]: des_speed = self.env_params.additional_params["target_velocity"] for rl_id in self.k.vehicle.get_rl_ids(): - if self.k.vehicle.get_x_by_id(rl_id) < self.control_range[1] \ - and self.k.vehicle.get_x_by_id(rl_id) > self.control_range[0]: + if (self.control_range and self.k.vehicle.get_x_by_id(rl_id) < self.control_range[1] \ + and self.k.vehicle.get_x_by_id(rl_id) > self.control_range[0]) or \ + (len(self.invalid_control_edges) > 0 and self.k.vehicle.get_edge(rl_id) not in + self.invalid_control_edges): rewards[rl_id] = 0 if self.mpg_reward: rewards[rl_id] = miles_per_gallon(self, rl_id, gain=1.0) / 100.0 @@ -241,8 +246,10 @@ def compute_reward(self, rl_actions, **kwargs): reward = np.nan_to_num(np.mean([(des_speed - np.abs(speed - des_speed)) ** 2 for speed in speeds]) / (des_speed ** 2)) rewards = {rl_id: reward for rl_id in self.k.vehicle.get_rl_ids() - if self.k.vehicle.get_x_by_id(rl_id) < self.control_range[1] \ - and self.k.vehicle.get_x_by_id(rl_id) > self.control_range[0]} + if (self.control_range and self.k.vehicle.get_x_by_id(rl_id) < self.control_range[1] \ + and self.k.vehicle.get_x_by_id(rl_id) > self.control_range[0]) or \ + (len(self.invalid_control_edges) > 0 and self.k.vehicle.get_edge(rl_id) not in + self.invalid_control_edges)} # curriculum over time-gaps if self.headway_curriculum and self.num_training_iters <= self.headway_curriculum_iters: @@ -348,11 +355,18 @@ def additional_command(self): print(e) departed_ids = self.k.vehicle.get_departed_ids() - if len(departed_ids) > 0: + if isinstance(departed_ids, tuple) and len(departed_ids) > 0: for veh_id in departed_ids: if veh_id not in self.observed_ids: self.k.vehicle.remove(veh_id) + # for veh_id in self.k.vehicle.get_ids(): + # edge = self.k.vehicle.get_edge(veh_id) + # + # # disable lane changes to prevent vehicles from being on the wrong route + # if edge == "119257908#1-AddedOnRampEdge": + # self.k.vehicle.apply_lane_change([veh_id], direction=[0]) + def state_util(self, rl_id): """Return an array of headway, tailway, leader speed, follower speed. diff --git a/flow/envs/test.py b/flow/envs/test.py index 813e4621e..2fb4f6ceb 100644 --- a/flow/envs/test.py +++ b/flow/envs/test.py @@ -52,3 +52,17 @@ def compute_reward(self, rl_actions, **kwargs): def get_state(self, **kwargs): """See class definition.""" return np.array([]) + +class TestI210Env(TestEnv): + + def additional_command(self): + edge = "119257908#0" + edge_length = self.k.network.edge_length(edge) + for veh_id in self.k.vehicle.get_ids(): + edge = self.k.vehicle.get_edge(veh_id) + pos = self.k.vehicle.get_position(veh_id) + + # disable lane changes to prevent vehicles from being on the wrong route + if edge == edge and np.abs(pos - edge_length) < 20: + # import ipdb; ipdb.set_trace() + self.k.vehicle.apply_lane_change([veh_id], direction=[0]) \ No newline at end of file From 2dfffc62608021a52cff64f874148eac49c43f54 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Tue, 26 May 2020 17:55:45 -0400 Subject: [PATCH 63/85] Add missing file --- flow/networks/i210_subnetwork_ghost_cell.py | 162 ++++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 flow/networks/i210_subnetwork_ghost_cell.py diff --git a/flow/networks/i210_subnetwork_ghost_cell.py b/flow/networks/i210_subnetwork_ghost_cell.py new file mode 100644 index 000000000..08fee4ecd --- /dev/null +++ b/flow/networks/i210_subnetwork_ghost_cell.py @@ -0,0 +1,162 @@ +"""Contains the I-210 sub-network class.""" + +from flow.networks.base import Network + +EDGES_DISTRIBUTION = [ + # Main highway + "ghost0", + "119257914", + "119257908#0", + "119257908#1-AddedOnRampEdge", + "119257908#1", + "119257908#1-AddedOffRampEdge", + "119257908#2", + "119257908#3", + + # On-ramp + "27414345", + "27414342#0", + "27414342#1-AddedOnRampEdge", + + # Off-ramp + "173381935", +] + + +class I210SubNetworkGhostCell(Network): + """A network used to simulate the I-210 sub-network. + + Usage + ----- + >>> from flow.core.params import NetParams + >>> from flow.core.params import VehicleParams + >>> from flow.core.params import InitialConfig + >>> from flow.networks import I210SubNetwork + >>> + >>> network = I210SubNetwork( + >>> name='I-210_subnetwork', + >>> vehicles=VehicleParams(), + >>> net_params=NetParams() + >>> ) + """ + + def specify_routes(self, net_params): + """See parent class. + + Routes for vehicles moving through the I210. + """ + if net_params.additional_params["on_ramp"]: + rts = { + # Main highway + "ghost0": [ + (["ghost0", "119257914", "119257908#0", "119257908#1-AddedOnRampEdge", + "119257908#1", "119257908#1-AddedOffRampEdge", "119257908#2", + "119257908#3"], + 1), # HOV: 1509 (on ramp: 57), Non HOV: 6869 (onramp: 16) + (["119257914", "119257908#0", "119257908#1-AddedOnRampEdge", + "119257908#1", "119257908#1-AddedOffRampEdge", "173381935"], + 17 / 8378) + ], + "119257914": [ + (["119257914", "119257908#0", "119257908#1-AddedOnRampEdge", + "119257908#1", "119257908#1-AddedOffRampEdge", "119257908#2", + "119257908#3"], + 1), # HOV: 1509 (on ramp: 57), Non HOV: 6869 (onramp: 16) + (["119257914", "119257908#0", "119257908#1-AddedOnRampEdge", + "119257908#1", "119257908#1-AddedOffRampEdge", "173381935"], + 17 / 8378) + ], + "119257908#0": [ + (["119257908#0", "119257908#1-AddedOnRampEdge", "119257908#1", + "119257908#1-AddedOffRampEdge", "119257908#2", + "119257908#3"], + 1.0), + # (["119257908#0", "119257908#1-AddedOnRampEdge", "119257908#1", + # "119257908#1-AddedOffRampEdge", "173381935"], + # 0.5), + ], + "119257908#1-AddedOnRampEdge": [ + (["119257908#1-AddedOnRampEdge", "119257908#1", + "119257908#1-AddedOffRampEdge", "119257908#2", + "119257908#3"], + 1.0), + # (["119257908#1-AddedOnRampEdge", "119257908#1", + # "119257908#1-AddedOffRampEdge", "173381935"], + # 0.5), + ], + "119257908#1": [ + (["119257908#1", "119257908#1-AddedOffRampEdge", "119257908#2", + "119257908#3"], + 1.0), + # (["119257908#1", "119257908#1-AddedOffRampEdge", "173381935"], + # 0.5), + ], + "119257908#1-AddedOffRampEdge": [ + (["119257908#1-AddedOffRampEdge", "119257908#2", + "119257908#3"], + 1.0), + # (["119257908#1-AddedOffRampEdge", "173381935"], + # 0.5), + ], + "119257908#2": [ + (["119257908#2", "119257908#3"], 1), + ], + "119257908#3": [ + (["119257908#3"], 1), + ], + + # On-ramp + "27414345": [ + (["27414345", "27414342#1-AddedOnRampEdge", + "27414342#1", + "119257908#1-AddedOnRampEdge", "119257908#1", + "119257908#1-AddedOffRampEdge", "119257908#2", + "119257908#3"], + 1 - 9 / 321), + (["27414345", "27414342#1-AddedOnRampEdge", + "27414342#1", + "119257908#1-AddedOnRampEdge", "119257908#1", + "119257908#1-AddedOffRampEdge", "173381935"], + 9 / 321), + ], + "27414342#0": [ + (["27414342#0", "27414342#1-AddedOnRampEdge", + "27414342#1", + "119257908#1-AddedOnRampEdge", "119257908#1", + "119257908#1-AddedOffRampEdge", "119257908#2", + "119257908#3"], + 1 - 20 / 421), + (["27414342#0", "27414342#1-AddedOnRampEdge", + "27414342#1", + "119257908#1-AddedOnRampEdge", "119257908#1", + "119257908#1-AddedOffRampEdge", "173381935"], + 20 / 421), + ], + "27414342#1-AddedOnRampEdge": [ + (["27414342#1-AddedOnRampEdge", "27414342#1", "119257908#1-AddedOnRampEdge", + "119257908#1", "119257908#1-AddedOffRampEdge", "119257908#2", + "119257908#3"], + 0.5), + (["27414342#1-AddedOnRampEdge", "27414342#1", "119257908#1-AddedOnRampEdge", + "119257908#1", "119257908#1-AddedOffRampEdge", "173381935"], + 0.5), + ], + + # Off-ramp + "173381935": [ + (["173381935"], 1), + ], + } + + else: + rts = { + # Main highway + "ghost0": [ + (["ghost0", "119257914", "119257908#0", "119257908#1-AddedOnRampEdge", + "119257908#1", "119257908#1-AddedOffRampEdge", "119257908#2", + "119257908#3"], + 1), + ], + } + + return rts From e400a38675ed845e615f135e25517ca2e95fce97 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Tue, 26 May 2020 22:20:29 -0400 Subject: [PATCH 64/85] Add handling of too many vehicles trying to reroute at once --- flow/envs/multiagent/i210.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index 22615cdfb..2e4b1a9e4 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -322,7 +322,6 @@ def additional_command(self): veh_ids = self.k.vehicle.get_ids() edges = self.k.vehicle.get_edge(veh_ids) valid_lanes = list(range(self.num_enter_lanes)) - num_trials = 0 for veh_id, edge in zip(veh_ids, edges): if edge == "": continue @@ -354,6 +353,8 @@ def additional_command(self): speed="23.0") except Exception as e: print(e) + if len(valid_lanes) == 0: + break departed_ids = self.k.vehicle.get_departed_ids() if isinstance(departed_ids, tuple) and len(departed_ids) > 0: From 3d95f2d43e6ee2732e5feb7e945dc59bf58863ec Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Tue, 26 May 2020 22:25:45 -0400 Subject: [PATCH 65/85] Minor --- examples/exp_configs/rl/multiagent/multiagent_i210.py | 4 ++-- flow/envs/multiagent/i210.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index 99fe1138e..94110d3e9 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -30,9 +30,9 @@ # WANT_DOWNSTREAM_BOUNDARY = True ON_RAMP = False PENETRATION_RATE = 0.10 -V_DES = 5.0 +V_DES = 6.0 HORIZON = 1000 -WARMUP_STEPS = 40 +WARMUP_STEPS = 600 inflow_rate = 2050 inflow_speed = 25.5 diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index 2e4b1a9e4..24df39a5a 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -167,7 +167,6 @@ def get_state(self): and self.k.vehicle.get_x_by_id(rl_id) > self.control_range[0]) or \ (len(self.invalid_control_edges) > 0 and self.k.vehicle.get_edge(rl_id) not in self.invalid_control_edges): - print('edge', self.k.vehicle.get_edge(rl_id)) speed = self.k.vehicle.get_speed(rl_id) lead_id = self.k.vehicle.get_leader(rl_id) if lead_id in ["", None]: From d4b76b1901b6966235d517d7a66a65555d576be2 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Wed, 27 May 2020 01:31:19 -0400 Subject: [PATCH 66/85] Shorten curriculum iters --- examples/exp_configs/rl/multiagent/multiagent_i210.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index 94110d3e9..ca7539d50 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -30,7 +30,7 @@ # WANT_DOWNSTREAM_BOUNDARY = True ON_RAMP = False PENETRATION_RATE = 0.10 -V_DES = 6.0 +V_DES = 7.0 HORIZON = 1000 WARMUP_STEPS = 600 @@ -88,7 +88,7 @@ # whether to add a slight reward for traveling at a desired speed "speed_curriculum": True, # how many timesteps to anneal the headway curriculum over - "speed_curriculum_iters": 20, + "speed_curriculum_iters": 10, # weight of the headway reward "speed_reward_gain": 0.5, # penalize stopped vehicles From 72eda143381256e83bc4caa75b48619565a57d4f Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Wed, 27 May 2020 14:11:04 -0400 Subject: [PATCH 67/85] Put back long curriculum --- .../exp_configs/non_rl/i210_subnetwork.py | 20 +++++++++++-------- .../rl/multiagent/multiagent_i210.py | 2 +- flow/controllers/velocity_controllers.py | 3 ++- flow/envs/multiagent/i210.py | 1 - flow/visualize/visualizer_rllib.py | 2 +- 5 files changed, 16 insertions(+), 12 deletions(-) diff --git a/examples/exp_configs/non_rl/i210_subnetwork.py b/examples/exp_configs/non_rl/i210_subnetwork.py index 9ffc7145e..3083818fd 100644 --- a/examples/exp_configs/non_rl/i210_subnetwork.py +++ b/examples/exp_configs/non_rl/i210_subnetwork.py @@ -233,16 +233,20 @@ # =========================================================================== # edge_id = "119257908#1-AddedOnRampEdge" + +def valid_ids(env, veh_ids): + return [veh_id for veh_id in veh_ids if env.k.vehicle.get_edge(veh_id) not in ["ghost0", "119257908#3"]] + custom_callables = { "avg_merge_speed": lambda env: np.nan_to_num(np.mean( - env.k.vehicle.get_speed(env.k.vehicle.get_ids_by_edge(edge_id)))), + env.k.vehicle.get_speed(valid_ids(env, env.k.vehicle.get_ids())))), "avg_outflow": lambda env: np.nan_to_num( env.k.vehicle.get_outflow_rate(120)), - # we multiply by 5 to account for the vehicle length and by 1000 to convert - # into veh/km - "avg_density": lambda env: 5 * 1000 * len(env.k.vehicle.get_ids_by_edge( - edge_id)) / (env.k.network.edge_length(edge_id) - * env.k.network.num_lanes(edge_id)), - "mpg": lambda env: miles_per_gallon(env, env.k.vehicle.get_ids(), gain=1.0), - "mpj": lambda env: miles_per_megajoule(env, env.k.vehicle.get_ids(), gain=1.0), + # # we multiply by 5 to account for the vehicle length and by 1000 to convert + # # into veh/km + # "avg_density": lambda env: 5 * 1000 * len(env.k.vehicle.get_ids_by_edge( + # edge_id)) / (env.k.network.edge_length(edge_id) + # * env.k.network.num_lanes(edge_id)), + "mpg": lambda env: miles_per_gallon(env, valid_ids(env, env.k.vehicle.get_ids()), gain=1.0), + "mpj": lambda env: miles_per_megajoule(env, valid_ids(env, env.k.vehicle.get_ids()), gain=1.0), } diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index ca7539d50..3dabcdd98 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -88,7 +88,7 @@ # whether to add a slight reward for traveling at a desired speed "speed_curriculum": True, # how many timesteps to anneal the headway curriculum over - "speed_curriculum_iters": 10, + "speed_curriculum_iters": 20, # weight of the headway reward "speed_reward_gain": 0.5, # penalize stopped vehicles diff --git a/flow/controllers/velocity_controllers.py b/flow/controllers/velocity_controllers.py index 8a9b3a06d..62ce15beb 100644 --- a/flow/controllers/velocity_controllers.py +++ b/flow/controllers/velocity_controllers.py @@ -119,7 +119,8 @@ def get_accel(self, env): env.k.vehicle.get_edge(self.veh_id) in self.danger_edges) or \ env.k.vehicle.get_edge(self.veh_id)[0] == ":"\ or (self.control_length and (env.k.vehicle.get_x_by_id(self.veh_id) < self.control_length[0] - or env.k.vehicle.get_x_by_id(self.veh_id) > self.control_length[1])): + or env.k.vehicle.get_x_by_id(self.veh_id) > self.control_length[1]))\ + or edge in self.no_control_edges: # TODO(@evinitsky) put back # or env.k.vehicle.get_edge(self.veh_id) in self.no_control_edges: return None diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index 24df39a5a..a0989dc8b 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -159,7 +159,6 @@ def _apply_rl_actions(self, rl_actions): def get_state(self): """See class definition.""" - t = time() if self.lead_obs: veh_info = {} for rl_id in self.k.vehicle.get_rl_ids(): diff --git a/flow/visualize/visualizer_rllib.py b/flow/visualize/visualizer_rllib.py index 162e455e9..1df7ed83e 100644 --- a/flow/visualize/visualizer_rllib.py +++ b/flow/visualize/visualizer_rllib.py @@ -171,7 +171,7 @@ def visualizer_rllib(args): if hasattr(env, "reroute_on_exit"): env.reroute_on_exit = False - env.env_params.horizon += env.env_params.warmup_steps + # env.env_params.horizon += env.env_params.warmup_steps # env.env_params.warmup_steps = 0 if args.render_mode == 'sumo_gui': From ca6a567175668745bac9dcd7c7a5d1b414d8ae0f Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Wed, 27 May 2020 18:16:34 -0400 Subject: [PATCH 68/85] Remove MADDPG, QMIX --- .../rl/multiagent/multiagent_i210_maddpg.py | 201 ------- .../multiagent_straight_road_maddpg.py | 188 ------ examples/train.py | 25 +- flow/algorithms/maddpg/__init__.py | 0 flow/algorithms/maddpg/maddpg.py | 185 ------ flow/algorithms/maddpg/maddpg_policy.py | 398 ------------- flow/algorithms/qmix/README.md | 1 - flow/algorithms/qmix/__init__.py | 8 - flow/algorithms/qmix/apex.py | 39 -- flow/algorithms/qmix/mixers.py | 64 -- flow/algorithms/qmix/model.py | 91 --- flow/algorithms/qmix/qmix.py | 105 ---- flow/algorithms/qmix/qmix_policy.py | 561 ------------------ flow/envs/multiagent/i210.py | 117 ---- scripts/ray_autoscale.yaml | 4 +- scripts/run_exps.sh | 35 -- 16 files changed, 4 insertions(+), 2018 deletions(-) delete mode 100644 examples/exp_configs/rl/multiagent/multiagent_i210_maddpg.py delete mode 100644 examples/exp_configs/rl/multiagent/multiagent_straight_road_maddpg.py delete mode 100644 flow/algorithms/maddpg/__init__.py delete mode 100644 flow/algorithms/maddpg/maddpg.py delete mode 100644 flow/algorithms/maddpg/maddpg_policy.py delete mode 100644 flow/algorithms/qmix/README.md delete mode 100644 flow/algorithms/qmix/__init__.py delete mode 100644 flow/algorithms/qmix/apex.py delete mode 100644 flow/algorithms/qmix/mixers.py delete mode 100644 flow/algorithms/qmix/model.py delete mode 100644 flow/algorithms/qmix/qmix.py delete mode 100644 flow/algorithms/qmix/qmix_policy.py delete mode 100755 scripts/run_exps.sh diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210_maddpg.py b/examples/exp_configs/rl/multiagent/multiagent_i210_maddpg.py deleted file mode 100644 index 8ae041360..000000000 --- a/examples/exp_configs/rl/multiagent/multiagent_i210_maddpg.py +++ /dev/null @@ -1,201 +0,0 @@ -"""Multi-agent I-210 example. - -Trains a non-constant number of agents, all sharing the same policy, on the -highway with ramps network. -""" -import os - -from ray.tune.registry import register_env - -from flow.controllers import RLController -from flow.controllers.car_following_models import IDMController -import flow.config as config -from flow.core.params import EnvParams -from flow.core.params import NetParams -from flow.core.params import InitialConfig -from flow.core.params import InFlows -from flow.core.params import VehicleParams -from flow.core.params import SumoParams -from flow.core.params import SumoLaneChangeParams -from flow.networks.i210_subnetwork import I210SubNetwork, EDGES_DISTRIBUTION -from flow.envs.multiagent.i210 import I210MADDPGMultiEnv, ADDITIONAL_ENV_PARAMS -from flow.utils.registry import make_create_env - -# SET UP PARAMETERS FOR THE SIMULATION - -# number of steps per rollout -HORIZON = 2000 - -# percentage of autonomous vehicles compared to human vehicles on highway -PENETRATION_RATE = 10 - -# SET UP PARAMETERS FOR THE ENVIRONMENT -additional_env_params = ADDITIONAL_ENV_PARAMS.copy() -additional_env_params.update({ - 'max_accel': 2.6, - 'max_decel': 4.5, - # configure the observation space. Look at the I210MultiEnv class for more info. - 'lead_obs': True, - # whether to add in a reward for the speed of nearby vehicles - "local_reward": True, - # whether to use the MPG reward. Otherwise, defaults to a target velocity reward - "mpg_reward": True, - "num_actions": 5, - "max_num_agents": 200 -}) - -# CREATE VEHICLE TYPES AND INFLOWS -# no vehicles in the network -vehicles = VehicleParams() -vehicles.add( - "human", - num_vehicles=0, - lane_change_params=SumoLaneChangeParams(lane_change_mode="strategic"), - acceleration_controller=(IDMController, {"a": .3, "b": 2.0, "noise": 0.6}), -) -vehicles.add( - "av", - acceleration_controller=(RLController, {}), - num_vehicles=0, -) - -inflow = InFlows() -# main highway -pen_rate = PENETRATION_RATE / 100 -assert pen_rate < 1.0, "your penetration rate is over 100%" -assert pen_rate > 0.0, "your penetration rate should be above zero" -inflow.add( - veh_type="human", - edge="119257914", - vehs_per_hour=int(10800 * (1 - pen_rate)), - # probability=1.0, - departLane="random", - departSpeed=20) -# # on ramp -# inflow.add( -# veh_type="human", -# edge="27414345", -# vehs_per_hour=321 * pen_rate, -# departLane="random", -# departSpeed=20) -# inflow.add( -# veh_type="human", -# edge="27414342#0", -# vehs_per_hour=421 * pen_rate, -# departLane="random", -# departSpeed=20) - -# Now add the AVs -# main highway -inflow.add( - veh_type="av", - edge="119257914", - vehs_per_hour=int(10800 * pen_rate), - # probability=1.0, - departLane="random", - departSpeed=20) -# # on ramp -# inflow.add( -# veh_type="av", -# edge="27414345", -# vehs_per_hour=int(321 * pen_rate), -# departLane="random", -# departSpeed=20) -# inflow.add( -# veh_type="av", -# edge="27414342#0", -# vehs_per_hour=int(421 * pen_rate), -# departLane="random", -# departSpeed=20) - -NET_TEMPLATE = os.path.join( - config.PROJECT_PATH, - "examples/exp_configs/templates/sumo/test2.net.xml") - -flow_params = dict( - # name of the experiment - exp_tag='I_210_subnetwork', - - # name of the flow environment the experiment is running on - env_name=I210MADDPGMultiEnv, - - # name of the network class the experiment is running on - network=I210SubNetwork, - - # simulator that is used by the experiment - simulator='traci', - - # simulation-related parameters - sim=SumoParams( - sim_step=0.5, - render=False, - color_by_speed=False, - restart_instance=True, - use_ballistic=True - ), - - # environment related parameters (see flow.core.params.EnvParams) - env=EnvParams( - horizon=HORIZON, - sims_per_step=1, - warmup_steps=0, - additional_params=additional_env_params, - ), - - # network-related parameters (see flow.core.params.NetParams and the - # network's documentation or ADDITIONAL_NET_PARAMS component) - net=NetParams( - inflows=inflow, - template=NET_TEMPLATE - ), - - # vehicles to be placed in the network at the start of a rollout (see - # flow.core.params.VehicleParams) - veh=vehicles, - - # parameters specifying the positioning of vehicles upon initialization/ - # reset (see flow.core.params.InitialConfig) - initial=InitialConfig( - edges_distribution=EDGES_DISTRIBUTION, - ), -) - -# SET UP RLLIB MULTI-AGENT FEATURES - -create_env, env_name = make_create_env(params=flow_params, version=0) - -# register as rllib env -register_env(env_name, create_env) - -# multiagent configuration -test_env = create_env() -obs_space = test_env.observation_space -act_space = test_env.action_space - -POLICIES_TO_TRAIN = ['av'] - -observation_space_dict = {i: test_env.observation_space for i in range(additional_env_params["max_num_agents"])} -action_space_dict = {i: test_env.action_space for i in range(additional_env_params["max_num_agents"])} - - -def gen_policy(i): - return ( - None, - test_env.observation_space, - test_env.action_space, - { - "agent_id": i, - "use_local_critic": False, - "obs_space_dict": observation_space_dict, - "act_space_dict": action_space_dict, - } - ) - - -POLICY_GRAPHS = {"av": gen_policy(0)} - - -def policy_mapping_fn(_): - """Map a policy in RLlib.""" - return 'av' - diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road_maddpg.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road_maddpg.py deleted file mode 100644 index 6aa69503c..000000000 --- a/examples/exp_configs/rl/multiagent/multiagent_straight_road_maddpg.py +++ /dev/null @@ -1,188 +0,0 @@ -"""Multi-agent straight road example. - -Trains a non-constant number of agents, all sharing the same policy, on the -highway with ramps network using MADDPG. -""" -from flow.controllers import RLController, IDMController -from flow.core.params import EnvParams, NetParams, InitialConfig, InFlows, \ - VehicleParams, SumoParams, SumoLaneChangeParams -from flow.envs.ring.accel import ADDITIONAL_ENV_PARAMS -from flow.networks import HighwayNetwork -from flow.envs.multiagent import MultiStraightRoadMADDPG -from flow.networks.highway import ADDITIONAL_NET_PARAMS -from flow.utils.registry import make_create_env -from ray.tune.registry import register_env - - -# SET UP PARAMETERS FOR THE SIMULATION - -# number of steps per rollout -HORIZON = 2000 - -# inflow rate on the highway in vehicles per hour -HIGHWAY_INFLOW_RATE = 10800 / 5 -# percentage of autonomous vehicles compared to human vehicles on highway -PENETRATION_RATE = 10 - - -# SET UP PARAMETERS FOR THE NETWORK - -additional_net_params = ADDITIONAL_NET_PARAMS.copy() -additional_net_params.update({ - # length of the highway - "length": 2000, - # number of lanes - "lanes": 1, - # speed limit for all edges - "speed_limit": 30, - # number of edges to divide the highway into - "num_edges": 2 -}) - - -# SET UP PARAMETERS FOR THE ENVIRONMENT - -additional_env_params = ADDITIONAL_ENV_PARAMS.copy() -additional_env_params.update({ - 'max_accel': 2.6, - 'max_decel': 4.5, - 'target_velocity': 18, - 'local_reward': True, - 'lead_obs': True, - 'max_num_agents': 5, - # whether to reroute vehicles once they have exited - "reroute_on_exit": False, - # whether to use the MPG reward. Otherwise, defaults to a target velocity reward - "mpg_reward": True -}) - - -# CREATE VEHICLE TYPES AND INFLOWS - -vehicles = VehicleParams() -inflows = InFlows() - -# human vehicles -vehicles.add( - "human", - num_vehicles=0, - lane_change_params=SumoLaneChangeParams( - lane_change_mode="strategic", - ), - acceleration_controller=(IDMController, {"a": .3, "b": 2.0, "noise": 0.5}), -) - -# autonomous vehicles -vehicles.add( - color='red', - veh_id='rl', - acceleration_controller=(RLController, {})) - -# add human vehicles on the highway -inflows.add( - veh_type="human", - edge="highway_0", - vehs_per_hour=int(HIGHWAY_INFLOW_RATE * (1 - PENETRATION_RATE / 100)), - depart_lane="free", - depart_speed="23.0", - name="idm_highway_inflow") - -# add autonomous vehicles on the highway -# they will stay on the highway, i.e. they won't exit through the off-ramps -inflows.add( - veh_type="rl", - edge="highway_0", - vehs_per_hour=int(HIGHWAY_INFLOW_RATE * (PENETRATION_RATE / 100)), - depart_lane="free", - depart_speed="23.0", - name="rl_highway_inflow") - -# SET UP FLOW PARAMETERS -warmup_steps = 0 -if additional_env_params['reroute_on_exit']: - warmup_steps = 400 - -flow_params = dict( - # name of the experiment - exp_tag='multiagent_highway_maddpg', - - # name of the flow environment the experiment is running on - env_name=MultiStraightRoadMADDPG, - - # name of the network class the experiment is running on - network=HighwayNetwork, - - # simulator that is used by the experiment - simulator='traci', - - # environment related parameters (see flow.core.params.EnvParams) - env=EnvParams( - horizon=HORIZON, - warmup_steps=warmup_steps, - sims_per_step=1, # do not put more than one - additional_params=additional_env_params, - ), - - # sumo-related parameters (see flow.core.params.SumoParams) - sim=SumoParams( - sim_step=0.5, - render=False, - use_ballistic=True, - restart_instance=False - ), - - # network-related parameters (see flow.core.params.NetParams and the - # network's documentation or ADDITIONAL_NET_PARAMS component) - net=NetParams( - inflows=inflows, - additional_params=additional_net_params - ), - - # vehicles to be placed in the network at the start of a rollout (see - # flow.core.params.VehicleParams) - veh=vehicles, - - # parameters specifying the positioning of vehicles upon initialization/ - # reset (see flow.core.params.InitialConfig) - initial=InitialConfig(), -) - -# SET UP RLLIB MULTI-AGENT FEATURES - -create_env, env_name = make_create_env(params=flow_params, version=0) - -# register as rllib env -register_env(env_name, create_env) - -# multiagent configuration -test_env = create_env() -obs_space = test_env.observation_space -act_space = test_env.action_space - -POLICIES_TO_TRAIN = ['av'] - -observation_space_dict = {i: test_env.observation_space for i in range(additional_env_params["max_num_agents"])} -action_space_dict = {i: test_env.action_space for i in range(additional_env_params["max_num_agents"])} - - -def gen_policy(i): - return ( - None, - test_env.observation_space, - test_env.action_space, - { - "agent_id": i, - "use_local_critic": False, - "obs_space_dict": observation_space_dict, - "act_space_dict": action_space_dict, - } - ) - - -POLICY_GRAPHS = {"av": gen_policy(0)} - - -def policy_mapping_fn(_): - """Map a policy in RLlib.""" - return 'av' - diff --git a/examples/train.py b/examples/train.py index 6da7cb4ea..1c824c379 100644 --- a/examples/train.py +++ b/examples/train.py @@ -33,7 +33,7 @@ from ray.tune.registry import register_env from flow.core.util import ensure_dir -from flow.core.rewards import energy_consumption, miles_per_gallon, miles_per_megajoule +from flow.core.rewards import miles_per_gallon, miles_per_megajoule from flow.utils.registry import env_constructor from flow.utils.rllib import FlowParamsEncoder, get_flow_params from flow.utils.registry import make_create_env @@ -68,7 +68,7 @@ def parse_args(args): help='the RL trainer to use. either rllib or Stable-Baselines') parser.add_argument( '--algorithm', type=str, default="PPO", - help='RL algorithm to use. Options are PPO, TD3, MATD3 (MADDPG w/ TD3) right now.' + help='RL algorithm to use. Options are PPO, TD3 right now.' ) parser.add_argument( '--num_cpus', type=int, default=1, @@ -237,20 +237,6 @@ def setup_exps_rllib(flow_params, config["critic_lr"] = tune.grid_search([1e-3, 1e-4]) config["n_step"] = tune.grid_search([1, 10]) - elif alg_run == "MADDPG": - from flow.algorithms.maddpg.maddpg import MADDPGTrainer, DEFAULT_CONFIG - config = deepcopy(DEFAULT_CONFIG) - config["actor_feature_reg"] = 0.0 - config["learning_starts"] = 100 - alg_run = MADDPGTrainer - - elif alg_run == "QMIX": - from flow.algorithms.qmix.qmix import QMixTrainer2, DEFAULT_CONFIG - config = deepcopy(DEFAULT_CONFIG) - if flags.grid_search: - config["exploration_fraction"] = tune.grid_search([0.1, 0.3]) - config["buffer_size"] = tune.grid_search([10000, 100000]) - alg_run = QMixTrainer2 else: sys.exit("We only support PPO, TD3, right now.") @@ -328,12 +314,7 @@ def on_train_result(info): create_env, gym_name = make_create_env(params=flow_params) - if flags.algorithm.upper() == "MADDPG": - config['max_num_agents'] = flow_params['env'].additional_params['max_num_agents'] - register_env(gym_name, create_env) - else: - # Register as rllib env - register_env(gym_name, create_env) + register_env(gym_name, create_env) return alg_run, gym_name, config diff --git a/flow/algorithms/maddpg/__init__.py b/flow/algorithms/maddpg/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/flow/algorithms/maddpg/maddpg.py b/flow/algorithms/maddpg/maddpg.py deleted file mode 100644 index 942206871..000000000 --- a/flow/algorithms/maddpg/maddpg.py +++ /dev/null @@ -1,185 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -"""Contributed port of MADDPG from OpenAI baselines. - -The implementation has a couple assumptions: -- The number of agents is fixed and known upfront. -- Each agent is bound to a policy of the same name. -- Discrete actions are sent as logits (pre-softmax). - -For a minimal example, see twostep_game.py, and the README for how to run -with the multi-agent particle envs. -""" - -import logging - -from ray.rllib.agents.trainer import with_common_config -from ray.rllib.agents.dqn.dqn import GenericOffPolicyTrainer -from flow.algorithms.maddpg.maddpg_policy import MADDPGTFPolicy -from ray.rllib.optimizers import SyncReplayOptimizer -from ray.rllib.policy.sample_batch import SampleBatch, MultiAgentBatch - -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - -# yapf: disable -# __sphinx_doc_begin__ -DEFAULT_CONFIG = with_common_config({ - # === Settings for each individual policy === - # ID of the agent controlled by this policy - "agent_id": None, - # Use a local critic for this policy. - "use_local_critic": False, - - # === Evaluation === - # Evaluation interval - "evaluation_interval": None, - # Number of episodes to run per evaluation period. - "evaluation_num_episodes": 10, - - # === Model === - # Apply a state preprocessor with spec given by the "model" config option - # (like other RL algorithms). This is mostly useful if you have a weird - # observation shape, like an image. Disabled by default. - "use_state_preprocessor": False, - # Postprocess the policy network model output with these hidden layers. If - # use_state_preprocessor is False, then these will be the *only* hidden - # layers in the network. - "actor_hiddens": [64, 64], - # Hidden layers activation of the postprocessing stage of the policy - # network - "actor_hidden_activation": "relu", - # Postprocess the critic network model output with these hidden layers; - # again, if use_state_preprocessor is True, then the state will be - # preprocessed by the model specified with the "model" config option first. - "critic_hiddens": [64, 64], - # Hidden layers activation of the postprocessing state of the critic. - "critic_hidden_activation": "relu", - # N-step Q learning - "n_step": 1, - # Algorithm for good policies - "good_policy": "maddpg", - # Algorithm for adversary policies - "adv_policy": "maddpg", - - # === Replay buffer === - # Size of the replay buffer. Note that if async_updates is set, then - # each worker will have a replay buffer of this size. - "buffer_size": int(1e6), - # Observation compression. Note that compression makes simulation slow in - # MPE. - "compress_observations": False, - - # === Optimization === - # Learning rate for the critic (Q-function) optimizer. - "critic_lr": 1e-2, - # Learning rate for the actor (policy) optimizer. - "actor_lr": 1e-2, - # Update the target network every `target_network_update_freq` steps. - "target_network_update_freq": 0, - # Update the target by \tau * policy + (1-\tau) * target_policy - "tau": 0.01, - # Weights for feature regularization for the actor - "actor_feature_reg": 0.001, - # If not None, clip gradients during optimization at this value - "grad_norm_clipping": None, - # How many steps of the model to sample before learning starts. - "learning_starts": 1024 * 25, - # Update the replay buffer with this many samples at once. Note that this - # setting applies per-worker if num_workers > 1. - "sample_batch_size": 100, - # Size of a batched sampled from replay buffer for training. Note that - # if async_updates is set, then each worker returns gradients for a - # batch of this size. - "train_batch_size": 1024, - # Number of env steps to optimize for before returning - "timesteps_per_iteration": 0, - # How many agents can be in the system in total - "max_num_agents": 1, - - # === Parallelism === - # Number of workers for collecting samples with. This only makes sense - # to increase if your environment is particularly slow to sample, or if - # you're using the Async or Ape-X optimizers. - "num_workers": 1, - # Prevent iterations from going lower than this time span - "min_iter_time_s": 0, -}) -# __sphinx_doc_end__ -# yapf: enable - - -def set_global_timestep(trainer): - global_timestep = trainer.optimizer.num_steps_sampled - trainer.train_start_timestep = global_timestep - - -def before_learn_on_batch(multi_agent_batch, policies, train_batch_size): - samples = {} - - # Modify keys. - for pid, p in policies.items(): - i = p.config["agent_id"] - keys = multi_agent_batch.policy_batches[pid].data.keys() - keys = ["_".join([k, str(i)]) for k in keys] - samples.update( - dict( - zip(keys, - multi_agent_batch.policy_batches[pid].data.values()))) - - # Make ops and feed_dict to get "new_obs" from target action sampler. - new_obs_ph_n = [p.new_obs_ph for p in policies.values()] - new_obs_n = list() - for k, v in samples.items(): - if "new_obs" in k: - new_obs_n.append(v) - - target_act_sampler_n = [p.target_act_sampler for p in policies.values()] - feed_dict = dict(zip(new_obs_ph_n, new_obs_n)) - - new_act_n = p.sess.run(target_act_sampler_n, feed_dict) - samples.update( - {"new_actions_%d" % i: new_act - for i, new_act in enumerate(new_act_n)}) - - # Share samples among agents. - policy_batches = {pid: SampleBatch(samples) for pid in policies.keys()} - return MultiAgentBatch(policy_batches, train_batch_size) - - -def make_optimizer(workers, config): - return SyncReplayOptimizer( - workers, - learning_starts=config["learning_starts"], - buffer_size=config["buffer_size"], - train_batch_size=config["train_batch_size"], - before_learn_on_batch=before_learn_on_batch, - synchronize_sampling=True, - prioritized_replay=False) - - -def add_trainer_metrics(trainer, result): - global_timestep = trainer.optimizer.num_steps_sampled - result.update( - timesteps_this_iter=global_timestep - trainer.train_start_timestep, - info=dict({ - "num_target_updates": trainer.state["num_target_updates"], - }, **trainer.optimizer.stats())) - - -def collect_metrics(trainer): - result = trainer.collect_metrics() - return result - - -MADDPGTrainer = GenericOffPolicyTrainer.with_updates( - name="MADDPG", - default_config=DEFAULT_CONFIG, - default_policy=MADDPGTFPolicy, - before_init=None, - before_train_step=set_global_timestep, - make_policy_optimizer=make_optimizer, - after_train_result=add_trainer_metrics, - collect_metrics_fn=collect_metrics, - before_evaluate_fn=None) diff --git a/flow/algorithms/maddpg/maddpg_policy.py b/flow/algorithms/maddpg/maddpg_policy.py deleted file mode 100644 index 707afba95..000000000 --- a/flow/algorithms/maddpg/maddpg_policy.py +++ /dev/null @@ -1,398 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import ray -from ray.rllib.agents.dqn.dqn_policy import minimize_and_clip, _adjust_nstep -from ray.rllib.evaluation.metrics import LEARNER_STATS_KEY -from ray.rllib.policy.sample_batch import SampleBatch -from ray.rllib.models import ModelCatalog -from ray.rllib.utils.annotations import override -from ray.rllib.utils.error import UnsupportedSpaceException -from ray.rllib.policy.policy import Policy -from ray.rllib.policy.tf_policy import TFPolicy -from ray.rllib.utils import try_import_tf, try_import_tfp - -import logging -from gym.spaces import Box, Discrete -import numpy as np - -logger = logging.getLogger(__name__) - -tf = try_import_tf() -tfp = try_import_tfp() - - -class MADDPGPostprocessing(object): - """Implements agentwise termination signal and n-step learning.""" - - @override(Policy) - def postprocess_trajectory(self, - sample_batch, - other_agent_batches=None, - episode=None): - # FIXME: Get done from info is required since agentwise done is not - # supported now. - sample_batch.data["dones"] = self.get_done_from_info( - sample_batch.data["infos"]) - - # N-step Q adjustments - if self.config["n_step"] > 1: - _adjust_nstep(self.config["n_step"], self.config["gamma"], - sample_batch[SampleBatch.CUR_OBS], - sample_batch[SampleBatch.ACTIONS], - sample_batch[SampleBatch.REWARDS], - sample_batch[SampleBatch.NEXT_OBS], - sample_batch[SampleBatch.DONES]) - - return sample_batch - - -class MADDPGTFPolicy(MADDPGPostprocessing, TFPolicy): - def __init__(self, obs_space, act_space, config): - # _____ Initial Configuration - self.config = config = dict(ray.rllib.contrib.maddpg.DEFAULT_CONFIG, - **config) - self.global_step = tf.train.get_or_create_global_step() - - # FIXME: Get done from info is required since agentwise done is not - # supported now. - self.get_done_from_info = np.vectorize( - lambda info: info.get("done", False)) - - agent_id = config["agent_id"] - - # _____ Environment Setting - def _make_continuous_space(space): - if isinstance(space, Box): - return space - elif isinstance(space, Discrete): - return Box( - low=np.zeros((space.n, )), high=np.ones((space.n, ))) - else: - raise UnsupportedSpaceException( - "Space {} is not supported.".format(space)) - - if len(config["multiagent"]["policies"]) > 1: - obs_space_n = [ - _make_continuous_space(space) - for _, (_, space, _, - _) in sorted(config["multiagent"]["policies"].items()) - ] - act_space_n = [ - _make_continuous_space(space) - for _, (_, _, space, - _) in sorted(config["multiagent"]["policies"].items()) - ] - else: - obs_space = config["multiagent"]["policies"][list(config["multiagent"]["policies"].keys())[0]][1] - act_space = config["multiagent"]["policies"][list(config["multiagent"]["policies"].keys())[0]][2] - num_agents = config["max_num_agents"] - obs_space_n = [ - _make_continuous_space(obs_space) - for i in range(num_agents) - ] - act_space_n = [ - _make_continuous_space(act_space) - for i in range(num_agents) - ] - - # _____ Placeholders - # Placeholders for policy evaluation and updates - def _make_ph_n(space_n, name=""): - return [ - tf.placeholder( - tf.float32, - shape=(None, ) + space.shape, - name=name + "_%d" % i) for i, space in enumerate(space_n) - ] - - obs_ph_n = _make_ph_n(obs_space_n, "obs") - act_ph_n = _make_ph_n(act_space_n, "actions") - new_obs_ph_n = _make_ph_n(obs_space_n, "new_obs") - new_act_ph_n = _make_ph_n(act_space_n, "new_actions") - rew_ph = tf.placeholder( - tf.float32, shape=None, name="rewards_{}".format(agent_id)) - done_ph = tf.placeholder( - tf.float32, shape=None, name="dones_{}".format(agent_id)) - - if config["use_local_critic"]: - if len(config["multiagent"]["policies"]) > 1: - - obs_space_n, act_space_n = [obs_space_n[agent_id]], [ - act_space_n[agent_id] - ] - obs_ph_n, act_ph_n = [obs_ph_n[agent_id]], [act_ph_n[agent_id]] - new_obs_ph_n, new_act_ph_n = [new_obs_ph_n[agent_id]], [ - new_act_ph_n[agent_id] - ] - agent_id = 0 - else: - agent_id = 0 - obs_space_n, act_space_n = [obs_space_n[agent_id]], [ - act_space_n[agent_id] - ] - obs_ph_n, act_ph_n = [obs_ph_n[agent_id]], [act_ph_n[agent_id]] - new_obs_ph_n, new_act_ph_n = [new_obs_ph_n[agent_id]], [ - new_act_ph_n[agent_id] - ] - - # _____ Value Network - # Build critic network for t. - critic, _, critic_model_n, critic_vars = self._build_critic_network( - obs_ph_n, - act_ph_n, - obs_space_n, - act_space_n, - hiddens=config["critic_hiddens"], - activation=getattr(tf.nn, config["critic_hidden_activation"]), - scope="critic") - - # Build critic network for t + 1. - target_critic, _, _, target_critic_vars = self._build_critic_network( - new_obs_ph_n, - new_act_ph_n, - obs_space_n, - act_space_n, - hiddens=config["critic_hiddens"], - activation=getattr(tf.nn, config["critic_hidden_activation"]), - scope="target_critic") - - # Build critic loss. - td_error = tf.subtract( - tf.stop_gradient( - rew_ph + (1.0 - done_ph) * - (config["gamma"]**config["n_step"]) * target_critic[:, 0]), - critic[:, 0]) - critic_loss = tf.reduce_mean(td_error**2) - - # _____ Policy Network - # Build actor network for t. - act_sampler, actor_feature, actor_model, actor_vars = ( - self._build_actor_network( - obs_ph_n[agent_id], - obs_space_n[agent_id], - act_space_n[agent_id], - hiddens=config["actor_hiddens"], - activation=getattr(tf.nn, config["actor_hidden_activation"]), - scope="actor")) - - # Build actor network for t + 1. - self.new_obs_ph = new_obs_ph_n[agent_id] - self.target_act_sampler, _, _, target_actor_vars = ( - self._build_actor_network( - self.new_obs_ph, - obs_space_n[agent_id], - act_space_n[agent_id], - hiddens=config["actor_hiddens"], - activation=getattr(tf.nn, config["actor_hidden_activation"]), - scope="target_actor")) - - # Build actor loss. - act_n = act_ph_n.copy() - act_n[agent_id] = act_sampler - critic, _, _, _ = self._build_critic_network( - obs_ph_n, - act_n, - obs_space_n, - act_space_n, - hiddens=config["critic_hiddens"], - activation=getattr(tf.nn, config["critic_hidden_activation"]), - scope="critic") - actor_loss = -tf.reduce_mean(critic) - if config["actor_feature_reg"] is not None: - actor_loss += config["actor_feature_reg"] * tf.reduce_mean( - actor_feature**2) - - # _____ Losses - self.losses = {"critic": critic_loss, "actor": actor_loss} - - # _____ Optimizers - self.optimizers = { - "critic": tf.train.AdamOptimizer(config["critic_lr"]), - "actor": tf.train.AdamOptimizer(config["actor_lr"]) - } - - # _____ Build variable update ops. - self.tau = tf.placeholder_with_default( - config["tau"], shape=(), name="tau") - - def _make_target_update_op(vs, target_vs, tau): - return [ - target_v.assign(tau * v + (1.0 - tau) * target_v) - for v, target_v in zip(vs, target_vs) - ] - - self.update_target_vars = _make_target_update_op( - critic_vars + actor_vars, target_critic_vars + target_actor_vars, - self.tau) - - def _make_set_weight_op(variables): - vs = list() - for v in variables.values(): - vs += v - phs = [ - tf.placeholder( - tf.float32, - shape=v.get_shape(), - name=v.name.split(":")[0] + "_ph") for v in vs - ] - return tf.group(*[v.assign(ph) for v, ph in zip(vs, phs)]), phs - - self.vars = { - "critic": critic_vars, - "actor": actor_vars, - "target_critic": target_critic_vars, - "target_actor": target_actor_vars - } - self.update_vars, self.vars_ph = _make_set_weight_op(self.vars) - - # _____ TensorFlow Initialization - - self.sess = tf.get_default_session() - - def _make_loss_inputs(placeholders): - return [(ph.name.split("/")[-1].split(":")[0], ph) - for ph in placeholders] - - loss_inputs = _make_loss_inputs(obs_ph_n + act_ph_n + new_obs_ph_n + - new_act_ph_n + [rew_ph, done_ph]) - - TFPolicy.__init__( - self, - obs_space, - act_space, - self.sess, - obs_input=obs_ph_n[agent_id], - action_sampler=act_sampler, - loss=actor_loss + critic_loss, - loss_inputs=loss_inputs) - - self.sess.run(tf.global_variables_initializer()) - - # Hard initial update - self.update_target(1.0) - - @override(TFPolicy) - def optimizer(self): - return None - - @override(TFPolicy) - def gradients(self, optimizer, loss): - if self.config["grad_norm_clipping"] is not None: - self.gvs = { - k: minimize_and_clip(optimizer, self.losses[k], self.vars[k], - self.config["grad_norm_clipping"]) - for k, optimizer in self.optimizers.items() - } - else: - self.gvs = { - k: optimizer.compute_gradients(self.losses[k], self.vars[k]) - for k, optimizer in self.optimizers.items() - } - return self.gvs["critic"] + self.gvs["actor"] - - @override(TFPolicy) - def build_apply_op(self, optimizer, grads_and_vars): - critic_apply_op = self.optimizers["critic"].apply_gradients( - self.gvs["critic"]) - - with tf.control_dependencies([tf.assign_add(self.global_step, 1)]): - with tf.control_dependencies([critic_apply_op]): - actor_apply_op = self.optimizers["actor"].apply_gradients( - self.gvs["actor"]) - - return actor_apply_op - - @override(TFPolicy) - def extra_compute_action_feed_dict(self): - return {} - - @override(TFPolicy) - def extra_compute_grad_fetches(self): - return {LEARNER_STATS_KEY: {}} - - @override(TFPolicy) - def get_weights(self): - var_list = [] - for var in self.vars.values(): - var_list += var - return self.sess.run(var_list) - - @override(TFPolicy) - def set_weights(self, weights): - self.sess.run( - self.update_vars, feed_dict=dict(zip(self.vars_ph, weights))) - - @override(Policy) - def get_state(self): - return TFPolicy.get_state(self) - - @override(Policy) - def set_state(self, state): - TFPolicy.set_state(self, state) - - def _build_critic_network(self, - obs_n, - act_n, - obs_space_n, - act_space_n, - hiddens, - activation=None, - scope=None): - with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as scope: - if self.config["use_state_preprocessor"]: - model_n = [ - ModelCatalog.get_model({ - "obs": obs, - "is_training": self._get_is_training_placeholder(), - }, obs_space, act_space, 1, self.config["model"]) - for obs, obs_space, act_space in zip( - obs_n, obs_space_n, act_space_n) - ] - out_n = [model.last_layer for model in model_n] - out = tf.concat(out_n + act_n, axis=1) - else: - model_n = [None] * len(obs_n) - out = tf.concat(obs_n + act_n, axis=1) - - for hidden in hiddens: - out = tf.layers.dense(out, units=hidden, activation=activation) - feature = out - out = tf.layers.dense(feature, units=1, activation=None) - - return out, feature, model_n, tf.global_variables(scope.name) - - def _build_actor_network(self, - obs, - obs_space, - act_space, - hiddens, - activation=None, - scope=None): - with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as scope: - if self.config["use_state_preprocessor"]: - model = ModelCatalog.get_model({ - "obs": obs, - "is_training": self._get_is_training_placeholder(), - }, obs_space, act_space, 1, self.config["model"]) - out = model.last_layer - else: - model = None - out = obs - - for hidden in hiddens: - out = tf.layers.dense(out, units=hidden, activation=activation) - feature = tf.layers.dense( - out, units=act_space.shape[0], activation=None) - # TODO(@ev) what is going on here?? Why is this here?? - # sampler = tfp.distributions.RelaxedOneHotCategorical( - # temperature=1.0, logits=feature).sample() - - return feature, feature, model, tf.global_variables(scope.name) - - def update_target(self, tau=None): - if tau is not None: - self.sess.run(self.update_target_vars, {self.tau: tau}) - else: - self.sess.run(self.update_target_vars) diff --git a/flow/algorithms/qmix/README.md b/flow/algorithms/qmix/README.md deleted file mode 100644 index e8d66616d..000000000 --- a/flow/algorithms/qmix/README.md +++ /dev/null @@ -1 +0,0 @@ -Code in this package is adapted from https://github.com/oxwhirl/pymarl. diff --git a/flow/algorithms/qmix/__init__.py b/flow/algorithms/qmix/__init__.py deleted file mode 100644 index 0de9ff272..000000000 --- a/flow/algorithms/qmix/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from ray.rllib.agents.qmix.qmix import QMixTrainer, DEFAULT_CONFIG -from ray.rllib.agents.qmix.apex import ApexQMixTrainer - -__all__ = ["QMixTrainer", "ApexQMixTrainer", "DEFAULT_CONFIG"] diff --git a/flow/algorithms/qmix/apex.py b/flow/algorithms/qmix/apex.py deleted file mode 100644 index be6e66638..000000000 --- a/flow/algorithms/qmix/apex.py +++ /dev/null @@ -1,39 +0,0 @@ -"""Experimental: scalable Ape-X variant of QMIX""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from ray.rllib.agents.dqn.apex import APEX_TRAINER_PROPERTIES -from flow.algorithms.qmix import QMixTrainer, \ - DEFAULT_CONFIG as QMIX_CONFIG -from ray.rllib.utils import merge_dicts - -APEX_QMIX_DEFAULT_CONFIG = merge_dicts( - QMIX_CONFIG, # see also the options in qmix.py, which are also supported - { - "optimizer": merge_dicts( - QMIX_CONFIG["optimizer"], - { - "max_weight_sync_delay": 400, - "num_replay_buffer_shards": 4, - "batch_replay": True, # required for RNN. Disables prio. - "debug": False - }), - "num_gpus": 0, - "num_workers": 32, - "buffer_size": 2000000, - "learning_starts": 50000, - "train_batch_size": 512, - "sample_batch_size": 50, - "target_network_update_freq": 500000, - "timesteps_per_iteration": 25000, - "per_worker_exploration": True, - "min_iter_time_s": 30, - }, -) - -ApexQMixTrainer = QMixTrainer.with_updates( - name="APEX_QMIX", - default_config=APEX_QMIX_DEFAULT_CONFIG, - **APEX_TRAINER_PROPERTIES) diff --git a/flow/algorithms/qmix/mixers.py b/flow/algorithms/qmix/mixers.py deleted file mode 100644 index 3f8fbbce4..000000000 --- a/flow/algorithms/qmix/mixers.py +++ /dev/null @@ -1,64 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import torch as th -import torch.nn as nn -import torch.nn.functional as F -import numpy as np - - -class VDNMixer(nn.Module): - def __init__(self): - super(VDNMixer, self).__init__() - - def forward(self, agent_qs, batch): - return th.sum(agent_qs, dim=2, keepdim=True) - - -class QMixer(nn.Module): - def __init__(self, n_agents, state_shape, mixing_embed_dim): - super(QMixer, self).__init__() - - self.n_agents = n_agents - self.embed_dim = mixing_embed_dim - self.state_dim = int(np.prod(state_shape)) - - self.hyper_w_1 = nn.Linear(self.state_dim, - self.embed_dim * self.n_agents) - self.hyper_w_final = nn.Linear(self.state_dim, self.embed_dim) - - # State dependent bias for hidden layer - self.hyper_b_1 = nn.Linear(self.state_dim, self.embed_dim) - - # V(s) instead of a bias for the last layers - self.V = nn.Sequential( - nn.Linear(self.state_dim, self.embed_dim), nn.ReLU(), - nn.Linear(self.embed_dim, 1)) - - def forward(self, agent_qs, states): - """Forward pass for the mixer. - - Arguments: - agent_qs: Tensor of shape [B, T, n_agents, n_actions] - states: Tensor of shape [B, T, state_dim] - """ - bs = agent_qs.size(0) - states = states.reshape(-1, self.state_dim) - agent_qs = agent_qs.view(-1, 1, self.n_agents) - # First layer - w1 = th.abs(self.hyper_w_1(states)) - b1 = self.hyper_b_1(states) - w1 = w1.view(-1, self.n_agents, self.embed_dim) - b1 = b1.view(-1, 1, self.embed_dim) - hidden = F.elu(th.bmm(agent_qs, w1) + b1) - # Second layer - w_final = th.abs(self.hyper_w_final(states)) - w_final = w_final.view(-1, self.embed_dim, 1) - # State-dependent bias - v = self.V(states).view(-1, 1, 1) - # Compute final output - y = th.bmm(hidden, w_final) + v - # Reshape and return - q_tot = y.view(bs, -1, 1) - return q_tot diff --git a/flow/algorithms/qmix/model.py b/flow/algorithms/qmix/model.py deleted file mode 100644 index f94f6804e..000000000 --- a/flow/algorithms/qmix/model.py +++ /dev/null @@ -1,91 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from torch import nn -import numpy as np -import torch.nn.functional as F - -from ray.rllib.models.preprocessors import get_preprocessor -from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 -from ray.rllib.utils.annotations import override - - -class RNNModel(TorchModelV2, nn.Module): - """The default RNN model for QMIX.""" - - def __init__(self, obs_space, action_space, num_outputs, model_config, - name): - TorchModelV2.__init__(self, obs_space, action_space, num_outputs, - model_config, name) - nn.Module.__init__(self) - self.obs_size = _get_size(obs_space) - self.rnn_hidden_dim = model_config["lstm_cell_size"] - self.fc1 = nn.Linear(self.obs_size, self.rnn_hidden_dim) - self.rnn = nn.GRUCell(self.rnn_hidden_dim, self.rnn_hidden_dim) - self.fc2 = nn.Linear(self.rnn_hidden_dim, num_outputs) - - @override(TorchModelV2) - def get_initial_state(self): - # make hidden states on same device as model - return [self.fc1.weight.new(1, self.rnn_hidden_dim).zero_().squeeze(0)] - - @override(TorchModelV2) - def forward(self, input_dict, hidden_state, seq_lens): - x = F.relu(self.fc1(input_dict["obs_flat"].float())) - h_in = hidden_state[0].reshape(-1, self.rnn_hidden_dim) - h = self.rnn(x, h_in) - q = self.fc2(h) - return q, [h] - - -def _get_size(obs_space): - return get_preprocessor(obs_space)(obs_space).size - - -class FeedForward(TorchModelV2, nn.Module): - """Generic fully connected network.""" - - def __init__(self, obs_space, action_space, num_outputs, model_config, - name): - TorchModelV2.__init__(self, obs_space, action_space, num_outputs, - model_config, name) - nn.Module.__init__(self) - - hiddens = model_config.get("fcnet_hiddens") - layers = [] - last_layer_size = np.product(obs_space.shape) - for size in hiddens: - layers.append(nn.Linear(in_features=last_layer_size, out_features=size)) - layers.append(nn.ReLU()) - last_layer_size = size - - self._hidden_layers = nn.Sequential(*layers) - - self._hidden_layers.apply(init_weights) - - # TODO(@ev) pick the right initialization - self._logits = nn.Linear( - in_features=last_layer_size, - out_features=num_outputs) - - self._logits.apply(large_initializer) - - @override(TorchModelV2) - def forward(self, input_dict, state, seq_lens): - obs = input_dict["obs_flat"].float() - features = self._hidden_layers(obs.reshape(obs.shape[0], -1)) - logits = self._logits(features) - return logits, state - - -def init_weights(m): - if type(m) == nn.Linear: - nn.init.xavier_uniform_(m.weight) - m.bias.data.fill_(0.01) - - -def large_initializer(m): - if type(m) == nn.Linear: - nn.init.xavier_uniform_(m.weight) - m.bias.data.fill_(0.1) diff --git a/flow/algorithms/qmix/qmix.py b/flow/algorithms/qmix/qmix.py deleted file mode 100644 index 88d2bb29b..000000000 --- a/flow/algorithms/qmix/qmix.py +++ /dev/null @@ -1,105 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from ray.rllib.agents.trainer import with_common_config -from ray.rllib.agents.dqn.dqn import GenericOffPolicyTrainer -from flow.algorithms.qmix.qmix_policy import QMixTorchPolicy -from ray.rllib.optimizers import SyncBatchReplayOptimizer - -# yapf: disable -# __sphinx_doc_begin__ -DEFAULT_CONFIG = with_common_config({ - # === QMix === - # Mixing network. Either "qmix", "vdn", or None - "mixer": "qmix", - # Size of the mixing network embedding - "mixing_embed_dim": 32, - # Whether to use Double_Q learning - "double_q": True, - # Optimize over complete episodes by default. - "batch_mode": "complete_episodes", - - # === Evaluation === - # Evaluate with epsilon=0 every `evaluation_interval` training iterations. - # The evaluation stats will be reported under the "evaluation" metric key. - # Note that evaluation is currently not parallelized, and that for Ape-X - # metrics are already only reported for the lowest epsilon workers. - "evaluation_interval": None, - # Number of episodes to run per evaluation period. - "evaluation_num_episodes": 10, - - # === Exploration === - # Max num timesteps for annealing schedules. Exploration is annealed from - # 1.0 to exploration_fraction over this number of timesteps scaled by - # exploration_fraction - "schedule_max_timesteps": 100000, - # Number of env steps to optimize for before returning - "timesteps_per_iteration": 1000, - # Fraction of entire training period over which the exploration rate is - # annealed - "exploration_fraction": 0.1, - # Final value of random action probability - "exploration_final_eps": 0.02, - # Update the target network every `target_network_update_freq` steps. - "target_network_update_freq": 500, - - # === Replay buffer === - # Size of the replay buffer in steps. - "buffer_size": 10000, - - # === Optimization === - # Learning rate for RMSProp optimizer - "lr": 0.0005, - # RMSProp alpha - "optim_alpha": 0.99, - # RMSProp epsilon - "optim_eps": 0.00001, - # If not None, clip gradients during optimization at this value - "grad_norm_clipping": 10, - # How many steps of the model to sample before learning starts. - "learning_starts": 1000, - # Update the replay buffer with this many samples at once. Note that - # this setting applies per-worker if num_workers > 1. - "sample_batch_size": 4, - # Size of a batched sampled from replay buffer for training. Note that - # if async_updates is set, then each worker returns gradients for a - # batch of this size. - "train_batch_size": 32, - - # === Parallelism === - # Number of workers for collecting samples with. This only makes sense - # to increase if your environment is particularly slow to sample, or if - # you"re using the Async or Ape-X optimizers. - "num_workers": 0, - # Whether to use a distribution of epsilons across workers for exploration. - "per_worker_exploration": False, - # Whether to compute priorities on workers. - "worker_side_prioritization": False, - # Prevent iterations from going lower than this time span - "min_iter_time_s": 1, - - # === Model === - "model": { - "lstm_cell_size": 64, - "max_seq_len": 999999, - "fcnet_hiddens": [32, 32] - }, -}) -# __sphinx_doc_end__ -# yapf: enable - - -def make_sync_batch_optimizer(workers, config): - return SyncBatchReplayOptimizer( - workers, - learning_starts=config["learning_starts"], - buffer_size=config["buffer_size"], - train_batch_size=config["train_batch_size"]) - - -QMixTrainer2 = GenericOffPolicyTrainer.with_updates( - name="QMIX", - default_config=DEFAULT_CONFIG, - default_policy=QMixTorchPolicy, - make_policy_optimizer=make_sync_batch_optimizer) diff --git a/flow/algorithms/qmix/qmix_policy.py b/flow/algorithms/qmix/qmix_policy.py deleted file mode 100644 index ad6857143..000000000 --- a/flow/algorithms/qmix/qmix_policy.py +++ /dev/null @@ -1,561 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from gym.spaces import Tuple, Discrete, Dict -import logging -import numpy as np -import torch as th -import torch.nn as nn -from torch.optim import RMSprop -from torch.distributions import Categorical - -import ray -from flow.algorithms.qmix.mixers import VDNMixer, QMixer -from flow.algorithms.qmix.model import RNNModel, _get_size, FeedForward -from ray.rllib.evaluation.metrics import LEARNER_STATS_KEY -from ray.rllib.policy.policy import TupleActions, Policy -from ray.rllib.policy.rnn_sequencing import chop_into_sequences -from ray.rllib.policy.sample_batch import SampleBatch -from ray.rllib.models.catalog import ModelCatalog -from ray.rllib.models.model import _unpack_obs -from ray.rllib.env.constants import GROUP_REWARDS -from ray.rllib.utils.annotations import override - -logger = logging.getLogger(__name__) - -# if the obs space is Dict type, look for the global state under this key -ENV_STATE = "state" - - -class QMixLoss(nn.Module): - def __init__(self, - model, - target_model, - mixer, - target_mixer, - n_agents, - n_actions, - double_q=True, - gamma=0.99): - nn.Module.__init__(self) - self.model = model - self.target_model = target_model - self.mixer = mixer - self.target_mixer = target_mixer - self.n_agents = n_agents - self.n_actions = n_actions - self.double_q = double_q - self.gamma = gamma - - def forward(self, - rewards, - actions, - terminated, - mask, - obs, - next_obs, - action_mask, - next_action_mask, - state=None, - next_state=None): - """Forward pass of the loss. - - Arguments: - rewards: Tensor of shape [B, T, n_agents] - actions: Tensor of shape [B, T, n_agents] - terminated: Tensor of shape [B, T, n_agents] - mask: Tensor of shape [B, T, n_agents] - obs: Tensor of shape [B, T, n_agents, obs_size] - next_obs: Tensor of shape [B, T, n_agents, obs_size] - action_mask: Tensor of shape [B, T, n_agents, n_actions] - next_action_mask: Tensor of shape [B, T, n_agents, n_actions] - state: Tensor of shape [B, T, state_dim] (optional) - next_state: Tensor of shape [B, T, state_dim] (optional) - """ - - # Assert either none or both of state and next_state are given - if state is None and next_state is None: - state = obs # default to state being all agents' observations - next_state = next_obs - elif (state is None) != (next_state is None): - raise ValueError("Expected either neither or both of `state` and " - "`next_state` to be given. Got: " - "\n`state` = {}\n`next_state` = {}".format( - state, next_state)) - - # Calculate estimated Q-Values - mac_out = _unroll_mac(self.model, obs) - - # Pick the Q-Values for the actions taken -> [B * n_agents, T] - chosen_action_qvals = th.gather( - mac_out, dim=3, index=actions.unsqueeze(3)).squeeze(3) - - # Calculate the Q-Values necessary for the target - target_mac_out = _unroll_mac(self.target_model, next_obs) - - # Mask out unavailable actions for the t+1 step - ignore_action_tp1 = (next_action_mask == 0) & (mask == 1).unsqueeze(-1) - target_mac_out[ignore_action_tp1] = -np.inf - - # Max over target Q-Values - if self.double_q: - # Double Q learning computes the target Q values by selecting the - # t+1 timestep action according to the "policy" neural network and - # then estimating the Q-value of that action with the "target" - # neural network - - # Compute the t+1 Q-values to be used in action selection - # using next_obs - mac_out_tp1 = _unroll_mac(self.model, next_obs) - - # mask out unallowed actions - mac_out_tp1[ignore_action_tp1] = -np.inf - - # obtain best actions at t+1 according to policy NN - cur_max_actions = mac_out_tp1.argmax(dim=3, keepdim=True) - - # use the target network to estimate the Q-values of policy - # network's selected actions - target_max_qvals = th.gather(target_mac_out, 3, - cur_max_actions).squeeze(3) - else: - target_max_qvals = target_mac_out.max(dim=3)[0] - - assert target_max_qvals.min().item() != -np.inf, \ - "target_max_qvals contains a masked action; \ - there may be a state with no valid actions." - - # Mix - if self.mixer is not None: - chosen_action_qvals = self.mixer(chosen_action_qvals, state) - target_max_qvals = self.target_mixer(target_max_qvals, next_state) - - # Calculate 1-step Q-Learning targets - targets = rewards + self.gamma * (1 - terminated) * target_max_qvals - - # Td-error - td_error = (chosen_action_qvals - targets.detach()) - - mask = mask.expand_as(td_error) - - # 0-out the targets that came from padded data - masked_td_error = td_error * mask - - # Normal L2 loss, take mean over actual data - loss = (masked_td_error**2).sum() / mask.sum() - return loss, mask, masked_td_error, chosen_action_qvals, targets - - -class QMixTorchPolicy(Policy): - """QMix impl. Assumes homogeneous agents for now. - - You must use MultiAgentEnv.with_agent_groups() to group agents - together for QMix. This creates the proper Tuple obs/action spaces and - populates the '_group_rewards' info field. - - Action masking: to specify an action mask for individual agents, use a - dict space with an action_mask key, e.g. {"obs": ob, "action_mask": mask}. - The mask space must be `Box(0, 1, (n_actions,))`. - """ - - def __init__(self, obs_space, action_space, config): - _validate(obs_space, action_space) - config = dict(ray.rllib.agents.qmix.qmix.DEFAULT_CONFIG, **config) - self.config = config - self.observation_space = obs_space - self.action_space = action_space - self.n_agents = len(obs_space.original_space.spaces) - self.n_actions = action_space.spaces[0].n - self.h_size = config["model"]["lstm_cell_size"] - self.has_env_global_state = False - self.has_action_mask = False - self.device = (th.device("cuda") - if th.cuda.is_available() else th.device("cpu")) - - agent_obs_space = obs_space.original_space.spaces[0] - if isinstance(agent_obs_space, Dict): - space_keys = set(agent_obs_space.spaces.keys()) - if "obs" not in space_keys: - raise ValueError( - "Dict obs space must have subspace labeled `obs`") - self.obs_size = _get_size(agent_obs_space.spaces["obs"]) - if "action_mask" in space_keys: - mask_shape = tuple(agent_obs_space.spaces["action_mask"].shape) - if mask_shape != (self.n_actions, ): - raise ValueError( - "Action mask shape must be {}, got {}".format( - (self.n_actions, ), mask_shape)) - self.has_action_mask = True - if ENV_STATE in space_keys: - self.env_global_state_shape = _get_size( - agent_obs_space.spaces[ENV_STATE]) - self.has_env_global_state = True - else: - self.env_global_state_shape = (self.obs_size, self.n_agents) - # The real agent obs space is nested inside the dict - config["model"]["full_obs_space"] = agent_obs_space - agent_obs_space = agent_obs_space.spaces["obs"] - else: - self.obs_size = _get_size(agent_obs_space) - - self.model = ModelCatalog.get_model_v2( - agent_obs_space, - action_space.spaces[0], - self.n_actions, - config["model"], - framework="torch", - name="model", - # default_model=FeedForward).to(self.device) - default_model=RNNModel).to(self.device) - - self.target_model = ModelCatalog.get_model_v2( - agent_obs_space, - action_space.spaces[0], - self.n_actions, - config["model"], - framework="torch", - name="target_model", - # default_model=FeedForward).to(self.device) - default_model=RNNModel).to(self.device) - - # Setup the mixer network. - if config["mixer"] is None: - self.mixer = None - self.target_mixer = None - elif config["mixer"] == "qmix": - self.mixer = QMixer(self.n_agents, self.env_global_state_shape, - config["mixing_embed_dim"]).to(self.device) - self.target_mixer = QMixer( - self.n_agents, self.env_global_state_shape, - config["mixing_embed_dim"]).to(self.device) - elif config["mixer"] == "vdn": - self.mixer = VDNMixer().to(self.device) - self.target_mixer = VDNMixer().to(self.device) - else: - raise ValueError("Unknown mixer type {}".format(config["mixer"])) - - self.cur_epsilon = 1.0 - self.update_target() # initial sync - - # Setup optimizer - self.params = list(self.model.parameters()) - if self.mixer: - self.params += list(self.mixer.parameters()) - self.loss = QMixLoss(self.model, self.target_model, self.mixer, - self.target_mixer, self.n_agents, self.n_actions, - self.config["double_q"], self.config["gamma"]) - self.optimiser = RMSprop( - params=self.params, - lr=config["lr"], - alpha=config["optim_alpha"], - eps=config["optim_eps"]) - - @override(Policy) - def compute_actions(self, - obs_batch, - state_batches=None, - prev_action_batch=None, - prev_reward_batch=None, - info_batch=None, - episodes=None, - **kwargs): - obs_batch, action_mask, _ = self._unpack_observation(obs_batch) - # We need to ensure we do not use the env global state - # to compute actions - - # Compute actions - with th.no_grad(): - q_values, hiddens = _mac( - self.model, - th.as_tensor(obs_batch, dtype=th.float, device=self.device), [ - th.as_tensor( - np.array(s), dtype=th.float, device=self.device) - for s in state_batches - ]) - avail = th.as_tensor( - action_mask, dtype=th.float, device=self.device) - masked_q_values = q_values.clone() - masked_q_values[avail == 0.0] = -float("inf") - # epsilon-greedy action selector - random_numbers = th.rand_like(q_values[:, :, 0]) - pick_random = (random_numbers < self.cur_epsilon).long() - random_actions = Categorical(avail).sample().long() - actions = (pick_random * random_actions + - (1 - pick_random) * masked_q_values.argmax(dim=2)) - actions = actions.cpu().numpy() - hiddens = [s.cpu().numpy() for s in hiddens] - - return TupleActions(list(actions.transpose([1, 0]))), hiddens, {} - - @override(Policy) - def learn_on_batch(self, samples): - obs_batch, action_mask, env_global_state = self._unpack_observation( - samples[SampleBatch.CUR_OBS]) - (next_obs_batch, next_action_mask, - next_env_global_state) = self._unpack_observation( - samples[SampleBatch.NEXT_OBS]) - group_rewards = self._get_group_rewards(samples[SampleBatch.INFOS]) - - input_list = [ - group_rewards, action_mask, next_action_mask, - samples[SampleBatch.ACTIONS], samples[SampleBatch.DONES], - obs_batch, next_obs_batch - ] - if self.has_env_global_state: - input_list.extend([env_global_state, next_env_global_state]) - - output_list, _, seq_lens = \ - chop_into_sequences( - samples[SampleBatch.EPS_ID], - samples[SampleBatch.UNROLL_ID], - samples[SampleBatch.AGENT_INDEX], - input_list, - [], # RNN states not used here - # TODO(@evinitsky) make this an option if we are using an RNN - # max_seq_len=1, - max_seq_len=self.config["model"]["max_seq_len"], - dynamic_max=True) - # These will be padded to shape [B * T, ...] - if self.has_env_global_state: - (rew, action_mask, next_action_mask, act, dones, obs, next_obs, - env_global_state, next_env_global_state) = output_list - else: - (rew, action_mask, next_action_mask, act, dones, obs, - next_obs) = output_list - B, T = len(seq_lens), max(seq_lens) - - def to_batches(arr, dtype): - new_shape = [B, T] + list(arr.shape[1:]) - return th.as_tensor( - np.reshape(arr, new_shape), dtype=dtype, device=self.device) - - rewards = to_batches(rew, th.float) - actions = to_batches(act, th.long) - obs = to_batches(obs, th.float).reshape( - [B, T, self.n_agents, self.obs_size]) - action_mask = to_batches(action_mask, th.float) - next_obs = to_batches(next_obs, th.float).reshape( - [B, T, self.n_agents, self.obs_size]) - next_action_mask = to_batches(next_action_mask, th.float) - if self.has_env_global_state: - env_global_state = to_batches(env_global_state, th.float) - next_env_global_state = to_batches(next_env_global_state, th.float) - - # TODO(ekl) this treats group termination as individual termination - terminated = to_batches(dones, th.float).unsqueeze(2).expand( - B, T, self.n_agents) - - # Create mask for where index is < unpadded sequence length - filled = np.reshape( - np.tile(np.arange(T, dtype=np.float32), B), - [B, T]) < np.expand_dims(seq_lens, 1) - mask = th.as_tensor( - filled, dtype=th.float, device=self.device).unsqueeze(2).expand( - B, T, self.n_agents) - - # Compute loss - loss_out, mask, masked_td_error, chosen_action_qvals, targets = ( - self.loss(rewards, actions, terminated, mask, obs, next_obs, - action_mask, next_action_mask, env_global_state, - next_env_global_state)) - - # Optimise - self.optimiser.zero_grad() - loss_out.backward() - grad_norm = th.nn.utils.clip_grad_norm_( - self.params, self.config["grad_norm_clipping"]) - self.optimiser.step() - - mask_elems = mask.sum().item() - stats = { - "loss": loss_out.item(), - "grad_norm": grad_norm - if isinstance(grad_norm, float) else grad_norm.item(), - "td_error_abs": masked_td_error.abs().sum().item() / mask_elems, - "q_taken_mean": (chosen_action_qvals * mask).sum().item() / - mask_elems, - "target_mean": (targets * mask).sum().item() / mask_elems, - } - return {LEARNER_STATS_KEY: stats} - - @override(Policy) - def get_initial_state(self): # initial RNN state - return [ - s.expand([self.n_agents, -1]).cpu().numpy() - for s in self.model.get_initial_state() - ] - - @override(Policy) - def get_weights(self): - return { - "model": self._cpu_dict(self.model.state_dict()), - "target_model": self._cpu_dict(self.target_model.state_dict()), - "mixer": self._cpu_dict(self.mixer.state_dict()) - if self.mixer else None, - "target_mixer": self._cpu_dict(self.target_mixer.state_dict()) - if self.mixer else None, - } - - @override(Policy) - def set_weights(self, weights): - self.model.load_state_dict(self._device_dict(weights["model"])) - self.target_model.load_state_dict( - self._device_dict(weights["target_model"])) - if weights["mixer"] is not None: - self.mixer.load_state_dict(self._device_dict(weights["mixer"])) - self.target_mixer.load_state_dict( - self._device_dict(weights["target_mixer"])) - - @override(Policy) - def get_state(self): - state = self.get_weights() - state["cur_epsilon"] = self.cur_epsilon - return state - - @override(Policy) - def set_state(self, state): - self.set_weights(state) - self.set_epsilon(state["cur_epsilon"]) - - def update_target(self): - self.target_model.load_state_dict(self.model.state_dict()) - if self.mixer is not None: - self.target_mixer.load_state_dict(self.mixer.state_dict()) - logger.debug("Updated target networks") - - def set_epsilon(self, epsilon): - self.cur_epsilon = epsilon - - def _get_group_rewards(self, info_batch): - group_rewards = np.array([ - info.get(GROUP_REWARDS, [0.0] * self.n_agents) - for info in info_batch - ]) - return group_rewards - - def _device_dict(self, state_dict): - return { - k: th.as_tensor(v, device=self.device) - for k, v in state_dict.items() - } - - @staticmethod - def _cpu_dict(state_dict): - return {k: v.cpu().detach().numpy() for k, v in state_dict.items()} - - def _unpack_observation(self, obs_batch): - """Unpacks the observation, action mask, and state (if present) - from agent grouping. - - Returns: - obs (np.ndarray): obs tensor of shape [B, n_agents, obs_size] - mask (np.ndarray): action mask, if any - state (np.ndarray or None): state tensor of shape [B, state_size] - or None if it is not in the batch - """ - unpacked = _unpack_obs( - np.array(obs_batch, dtype=np.float32), - self.observation_space.original_space, - tensorlib=np) - if self.has_action_mask: - obs = np.concatenate( - [o["obs"] for o in unpacked], - axis=1).reshape([len(obs_batch), self.n_agents, self.obs_size]) - action_mask = np.concatenate( - [o["action_mask"] for o in unpacked], axis=1).reshape( - [len(obs_batch), self.n_agents, self.n_actions]) - else: - if isinstance(unpacked[0], dict): - unpacked_obs = [u["obs"] for u in unpacked] - else: - unpacked_obs = unpacked - obs = np.concatenate( - unpacked_obs, - axis=1).reshape([len(obs_batch), self.n_agents, self.obs_size]) - action_mask = np.ones( - [len(obs_batch), self.n_agents, self.n_actions], - dtype=np.float32) - - if self.has_env_global_state: - state = unpacked[0][ENV_STATE] - else: - state = None - return obs, action_mask, state - - -def _validate(obs_space, action_space): - if not hasattr(obs_space, "original_space") or \ - not isinstance(obs_space.original_space, Tuple): - raise ValueError("Obs space must be a Tuple, got {}. Use ".format( - obs_space) + "MultiAgentEnv.with_agent_groups() to group related " - "agents for QMix.") - if not isinstance(action_space, Tuple): - raise ValueError( - "Action space must be a Tuple, got {}. ".format(action_space) + - "Use MultiAgentEnv.with_agent_groups() to group related " - "agents for QMix.") - if not isinstance(action_space.spaces[0], Discrete): - raise ValueError( - "QMix requires a discrete action space, got {}".format( - action_space.spaces[0])) - if len({str(x) for x in obs_space.original_space.spaces}) > 1: - raise ValueError( - "Implementation limitation: observations of grouped agents " - "must be homogeneous, got {}".format( - obs_space.original_space.spaces)) - if len({str(x) for x in action_space.spaces}) > 1: - raise ValueError( - "Implementation limitation: action space of grouped agents " - "must be homogeneous, got {}".format(action_space.spaces)) - - -def _mac(model, obs, h): - """Forward pass of the multi-agent controller. - - Arguments: - model: TorchModelV2 class - obs: Tensor of shape [B, n_agents, obs_size] - h: List of tensors of shape [B, n_agents, h_size] - - Returns: - q_vals: Tensor of shape [B, n_agents, n_actions] - h: Tensor of shape [B, n_agents, h_size] - """ - B, n_agents = obs.size(0), obs.size(1) - if not isinstance(obs, dict): - obs = {"obs": obs} - obs_agents_as_batches = {k: _drop_agent_dim(v) for k, v in obs.items()} - h_flat = [s.reshape([B * n_agents, -1]) for s in h] - q_flat, h_flat = model(obs_agents_as_batches, h_flat, None) - return q_flat.reshape( - [B, n_agents, -1]), [s.reshape([B, n_agents, -1]) for s in h_flat] - - -def _unroll_mac(model, obs_tensor): - """Computes the estimated Q values for an entire trajectory batch""" - B = obs_tensor.size(0) - T = obs_tensor.size(1) - n_agents = obs_tensor.size(2) - - mac_out = [] - h = [s.expand([B, n_agents, -1]) for s in model.get_initial_state()] - for t in range(T): - q, h = _mac(model, obs_tensor[:, t], h) - mac_out.append(q) - mac_out = th.stack(mac_out, dim=1) # Concat over time - - return mac_out - - -def _drop_agent_dim(T): - shape = list(T.shape) - B, n_agents = shape[0], shape[1] - return T.reshape([B * n_agents] + shape[2:]) - - -def _add_agent_dim(T, n_agents): - shape = list(T.shape) - B = shape[0] // n_agents - assert shape[0] % n_agents == 0 - return T.reshape([B, n_agents] + shape[1:]) diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index a0989dc8b..81eac23c3 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -420,97 +420,6 @@ def step(self, rl_actions): return state, reward, done, info -class I210MADDPGMultiEnv(I210MultiEnv): - def __init__(self, env_params, sim_params, network, simulator='traci'): - super().__init__(env_params, sim_params, network, simulator) - self.max_num_agents = env_params.additional_params["max_num_agents"] - self.rl_id_to_idx_map = OrderedDict() - self.idx_to_rl_id_map = OrderedDict() - self.index_counter = 0 - self.default_state = {idx: np.zeros(self.observation_space.shape[0]) - for idx in range(self.max_num_agents)} - - def _apply_rl_actions(self, rl_actions): - """See class definition.""" - # in the warmup steps, rl_actions is None - t = time() - if rl_actions: - # print(rl_actions) - accel_list = [] - rl_ids = [] - for rl_id in self.k.vehicle.get_rl_ids(): - if rl_id in self.rl_id_to_idx_map: - accel_list.append(rl_actions[self.rl_id_to_idx_map[rl_id]]) - rl_ids.append(rl_id) - self.k.vehicle.apply_acceleration(rl_ids, accel_list) - # print('time to apply actions is ', time() - t) - - def get_state(self): - t = time() - - # TODO(@evinitsky) clean this up - self.index_counter = 0 - self.rl_id_to_idx_map = {} - for key in self.k.vehicle.get_rl_ids(): - self.rl_id_to_idx_map[key] = self.index_counter - self.idx_to_rl_id_map[self.index_counter] = key - self.index_counter += 1 - if self.index_counter >= self.max_num_agents: - break - - veh_info = super().get_state() - # TODO(@evinitsky) think this doesn't have to be a deepcopy - veh_info_copy = deepcopy(self.default_state) - # id_list = zip(list(range(self.max_num_agents)), rl_ids) - veh_info_copy.update({self.rl_id_to_idx_map[rl_id]: veh_info[rl_id] - for rl_id in self.rl_id_to_idx_map.keys()}) - # print('time to update copy is ', time() - t) - veh_info = veh_info_copy - # print('state time is ', time() - t) - print(veh_info) - - return veh_info - - def compute_reward(self, rl_actions, **kwargs): - # There has to be one global reward for qmix - t = time() - if self.mpg_reward: - if self.env_params.additional_params["local_reward"]: - reward = super().compute_reward(rl_actions) - reward_dict = {idx: 0 for idx in - range(self.max_num_agents)} - reward_dict.update({self.rl_id_to_idx_map[rl_id]: reward[rl_id] for rl_id in reward.keys() - if rl_id in self.rl_id_to_idx_map.keys()}) - print(reward_dict) - else: - reward = np.nan_to_num(miles_per_gallon(self, self.k.vehicle.get_ids(), gain=1.0)) / 100.0 - reward_dict = {idx: reward for idx in - range(self.max_num_agents)} - else: - if self.env_params.additional_params["local_reward"]: - reward = super().compute_reward(rl_actions) - reward_dict = {idx: 0 for idx in - range(self.max_num_agents)} - reward_dict.update({self.rl_id_to_idx_map[rl_id]: reward[rl_id] for rl_id in reward.keys() - if rl_id in self.rl_id_to_idx_map.keys()}) - else: - reward = np.nan_to_num(np.mean(self.k.vehicle.get_speed(self.k.vehicle.get_ids()))) / ( - 20 * self.env_params.horizon) - reward_dict = {idx: reward for idx in - range(self.max_num_agents)} - - # print('reward time is ', time() - t) - return reward_dict - - def reset(self, new_inflow_rate=None): - super().reset(new_inflow_rate) - self.rl_id_to_idx_map = OrderedDict() - self.idx_to_rl_id_map = OrderedDict() - self.index_counter = 0 - - return self.get_state() - - class MultiStraightRoad(I210MultiEnv): """Partially observable multi-agent environment for a straight road. Look at superclass for more information.""" @@ -532,29 +441,3 @@ def _apply_rl_actions(self, rl_actions): # prevent the AV from blocking the entrance self.k.vehicle.apply_acceleration(rl_ids, accels) - - -class MultiStraightRoadMADDPG(I210MADDPGMultiEnv): - def __init__(self, env_params, sim_params, network, simulator): - super().__init__(env_params, sim_params, network, simulator) - self.num_enter_lanes = 1 - self.entrance_edge = self.network.routes['highway_0'][0][0][0] - self.exit_edge = self.network.routes['highway_0'][0][0][-1] - - def _apply_rl_actions(self, rl_actions): - """See class definition.""" - # in the warmup steps, rl_actions is None - if rl_actions: - # print(rl_actions) - - rl_ids = [] - accels = [] - for idx, actions in rl_actions.items(): - if idx < self.index_counter: - accels.append(actions[0]) - rl_ids.append(self.idx_to_rl_id_map[idx]) - else: - break - - # prevent the AV from blocking the entrance - self.k.vehicle.apply_acceleration(rl_ids, accels) diff --git a/scripts/ray_autoscale.yaml b/scripts/ray_autoscale.yaml index 6bd9c78ad..18e25154d 100644 --- a/scripts/ray_autoscale.yaml +++ b/scripts/ray_autoscale.yaml @@ -32,7 +32,7 @@ auth: # By default Ray creates a new private keypair, but you can also use your own. # If you do so, make sure to also set "KeyName" in the head and worker node # configurations below. - ssh_private_key: /Users/eugenevinitsky/.ssh/MyKeyPair2.pem +# ssh_private_key: # Provider-specific config for the head node, e.g. instance type. By default # Ray will auto-configure unspecified fields such as SubnetId and KeyName. @@ -41,7 +41,6 @@ auth: head_node: InstanceType: c4.8xlarge ImageId: ami-0c047f3ddd3939b30 # Flow AMI (Ubuntu) - KeyName: MyKeyPair2 InstanceMarketOptions: MarketType: spot #Additional options can be found in the boto docs, e.g. @@ -57,7 +56,6 @@ head_node: worker_nodes: InstanceType: c4.8xlarge ImageId: ami-0c047f3ddd3939b30 # Flow AMI (Ubuntu) - KeyName: MyKeyPair2 #Run workers on spot by default. Comment this out to use on-demand. InstanceMarketOptions: diff --git a/scripts/run_exps.sh b/scripts/run_exps.sh deleted file mode 100755 index 0a8be26d4..000000000 --- a/scripts/run_exps.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash - -#ray exec ray_autoscale.yaml "python flow/examples/train.py multiagent_i210 i210_reroute_test --algorithm PPO \ -#--num_iterations 200 --num_cpus 12 --num_rollouts 12 --rl_trainer rllib --use_s3" --start --stop \ -#--cluster-name=ev_i210_test --tmux - -#ray exec ray_autoscale.yaml "python flow/examples/train.py multiagent_i210 i210_reroute_test2 --algorithm PPO \ -##--num_iterations 200 --num_cpus 4 --num_rollouts 4 --rl_trainer rllib --use_s3" --start --stop \ -##--cluster-name=ev_i210_test2 --tmux - -# 5/10 -#ray exec ray_autoscale.yaml "python flow/examples/train.py multiagent_straight_road \ -#straight_road_reroute_local_rew_mpg --algorithm PPO \ -#--num_iterations 200 --num_cpus 8 --num_rollouts 8 --rl_trainer rllib --use_s3" --start --stop \ -#--cluster-name=ev_i210_test1 --tmux -# -#ray exec ray_autoscale.yaml "python flow/examples/train.py multiagent_i210 \ -#i210_reroute_local_rew_mpg --algorithm PPO \ -#--num_iterations 200 --num_cpus 8 --num_rollouts 8 --rl_trainer rllib --use_s3" --start --stop \ -#--cluster-name=ev_i210_test2 --tmux - -#ray exec ray_autoscale.yaml "python flow/examples/train.py multiagent_straight_road \ -#straight_road_reroute_local_rew_mpg_curr --algorithm PPO \ -#--num_iterations 200 --num_cpus 7 --num_rollouts 7 --rl_trainer rllib --use_s3 --grid_search" --start --stop \ -#--cluster-name=ev_i210_test3 --tmux -# -#ray exec ray_autoscale.yaml "python flow/examples/train.py multiagent_i210 \ -#i210_reroute_local_rew_mpg_curr --algorithm PPO \ -#--num_iterations 200 --num_cpus 7 --num_rollouts 7 --rl_trainer rllib --use_s3 --grid_search" --start --stop \ -#--cluster-name=ev_i210_test4 --tmux - -ray exec ray_autoscale.yaml "python flow/examples/train.py multiagent_straight_road \ -straight_road_reroute_local_rew_mpj_curr --algorithm PPO \ ---num_iterations 100 --num_cpus 30 --num_rollouts 30 --rl_trainer rllib --use_s3" --start --stop \ ---cluster-name=ev_i210_test3 --tmux \ No newline at end of file From 9d468d859ecece029ec3d7905ec2dfb7d7165294 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Wed, 27 May 2020 18:22:56 -0400 Subject: [PATCH 69/85] Switch V_DES to 5.0 --- examples/exp_configs/rl/multiagent/multiagent_i210.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index 3dabcdd98..b9da9b1b8 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -30,7 +30,7 @@ # WANT_DOWNSTREAM_BOUNDARY = True ON_RAMP = False PENETRATION_RATE = 0.10 -V_DES = 7.0 +V_DES = 5.0 HORIZON = 1000 WARMUP_STEPS = 600 From f67828677c32f2d24d6e13837181ffd13343b229 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Wed, 27 May 2020 19:29:57 -0400 Subject: [PATCH 70/85] Bug fix for lead_obs false --- .../rl/multiagent/multiagent_i210.py | 2 +- flow/envs/multiagent/i210.py | 35 ++++++++++--------- flow/networks/__init__.py | 3 +- flow/visualize/time_space_diagram.py | 6 ++-- 4 files changed, 25 insertions(+), 21 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index b9da9b1b8..26c69b0a0 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -59,7 +59,7 @@ 'max_accel': 2.6, 'max_decel': 4.5, # configure the observation space. Look at the I210MultiEnv class for more info. - 'lead_obs': True, + 'lead_obs': False, # whether to add in a reward for the speed of nearby vehicles "local_reward": True, # whether to use the MPG reward. Otherwise, defaults to a target velocity reward diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index a0989dc8b..8efc06820 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -159,28 +159,29 @@ def _apply_rl_actions(self, rl_actions): def get_state(self): """See class definition.""" - if self.lead_obs: - veh_info = {} - for rl_id in self.k.vehicle.get_rl_ids(): - if (self.control_range and self.k.vehicle.get_x_by_id(rl_id) < self.control_range[1] \ + valid_ids = [rl_id for rl_id in self.k.vehicle.get_rl_ids() + if (self.control_range and self.k.vehicle.get_x_by_id(rl_id) < self.control_range[1] \ and self.k.vehicle.get_x_by_id(rl_id) > self.control_range[0]) or \ (len(self.invalid_control_edges) > 0 and self.k.vehicle.get_edge(rl_id) not in - self.invalid_control_edges): - speed = self.k.vehicle.get_speed(rl_id) - lead_id = self.k.vehicle.get_leader(rl_id) - if lead_id in ["", None]: - # in case leader is not visible - lead_speed = SPEED_SCALE - headway = HEADWAY_SCALE - else: - lead_speed = self.k.vehicle.get_speed(lead_id) - headway = self.k.vehicle.get_headway(rl_id) - veh_info.update({rl_id: np.array([speed / SPEED_SCALE, headway / HEADWAY_SCALE, - lead_speed / SPEED_SCALE])}) + self.invalid_control_edges)] + if self.lead_obs: + veh_info = {} + for rl_id in valid_ids: + speed = self.k.vehicle.get_speed(rl_id) + lead_id = self.k.vehicle.get_leader(rl_id) + if lead_id in ["", None]: + # in case leader is not visible + lead_speed = SPEED_SCALE + headway = HEADWAY_SCALE + else: + lead_speed = self.k.vehicle.get_speed(lead_id) + headway = self.k.vehicle.get_headway(rl_id) + veh_info.update({rl_id: np.array([speed / SPEED_SCALE, headway / HEADWAY_SCALE, + lead_speed / SPEED_SCALE])}) else: veh_info = {rl_id: np.concatenate((self.state_util(rl_id), self.veh_statistics(rl_id))) - for rl_id in self.k.vehicle.get_rl_ids()} + for rl_id in valid_ids} # print('time to get state is ', time() - t) return veh_info diff --git a/flow/networks/__init__.py b/flow/networks/__init__.py index af849031d..2b3faced8 100644 --- a/flow/networks/__init__.py +++ b/flow/networks/__init__.py @@ -16,10 +16,11 @@ from flow.networks.minicity import MiniCityNetwork from flow.networks.highway_ramps import HighwayRampsNetwork from flow.networks.i210_subnetwork import I210SubNetwork +from flow.networks.i210_subnetwork_ghost_cell import I210SubNetworkGhostCell __all__ = [ "Network", "BayBridgeNetwork", "BayBridgeTollNetwork", "BottleneckNetwork", "FigureEightNetwork", "TrafficLightGridNetwork", "HighwayNetwork", "RingNetwork", "MergeNetwork", "MultiRingNetwork", - "MiniCityNetwork", "HighwayRampsNetwork", "I210SubNetwork" + "MiniCityNetwork", "HighwayRampsNetwork", "I210SubNetwork", "I210SubNetworkGhostCell" ] diff --git a/flow/visualize/time_space_diagram.py b/flow/visualize/time_space_diagram.py index 004172765..180fe1753 100644 --- a/flow/visualize/time_space_diagram.py +++ b/flow/visualize/time_space_diagram.py @@ -17,7 +17,7 @@ python time_space_diagram.py .csv .json """ from flow.utils.rllib import get_flow_params -from flow.networks import RingNetwork, FigureEightNetwork, MergeNetwork, I210SubNetwork, HighwayNetwork +from flow.networks import RingNetwork, FigureEightNetwork, MergeNetwork, I210SubNetwork, HighwayNetwork, I210SubNetworkGhostCell import argparse import csv @@ -38,6 +38,7 @@ FigureEightNetwork, MergeNetwork, I210SubNetwork, + I210SubNetworkGhostCell, HighwayNetwork ] @@ -137,6 +138,7 @@ def get_time_space_data(data, params): MergeNetwork: _merge, FigureEightNetwork: _figure_eight, I210SubNetwork: _i210_subnetwork, + I210SubNetworkGhostCell: _i210_subnetwork, HighwayNetwork: _highway, } @@ -434,7 +436,7 @@ def _i210_subnetwork(data, params, all_time): # create the output variables # TODO(@ev) handle subsampling better than this low_time = int(0 / params['sim'].sim_step) - high_time = int(1600 / params['sim'].sim_step) + high_time = int(1600 * params['env'].sims_per_step / params['sim'].sim_step) all_time = all_time[low_time:high_time] # track only vehicles that were around during this time period From bbb16ffbf254cbafe206e5945d9c8305636c33d9 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Wed, 27 May 2020 20:41:48 -0400 Subject: [PATCH 71/85] Address comments --- .../exp_configs/non_rl/i210_subnetwork.py | 9 +- .../rl/multiagent/multiagent_i210.py | 2 +- .../rl/multiagent/multiagent_straight_road.py | 1 - examples/train.py | 11 +- flow/controllers/car_following_models.py | 1 - flow/core/rewards.py | 2 +- flow/envs/__init__.py | 3 +- flow/envs/multiagent/base.py | 2 +- flow/envs/multiagent/i210.py | 130 ++++++++---------- flow/envs/test.py | 14 -- flow/visualize/visualizer_rllib.py | 3 +- 11 files changed, 80 insertions(+), 98 deletions(-) diff --git a/examples/exp_configs/non_rl/i210_subnetwork.py b/examples/exp_configs/non_rl/i210_subnetwork.py index 3083818fd..65131a6bd 100644 --- a/examples/exp_configs/non_rl/i210_subnetwork.py +++ b/examples/exp_configs/non_rl/i210_subnetwork.py @@ -19,15 +19,22 @@ # Instantiate which conditions we want to be true about the network +# whether to include a ghost cell at the entrance WANT_GHOST_CELL = True -# WANT_DOWNSTREAM_BOUNDARY = True +# whether to include vehicles on the on-ramp ON_RAMP = False +# fraction of vehicles that are follower-stoppers. 0.10 corresponds to 10% PENETRATION_RATE = 0.0 +# desired speed of the follower stopper vehicles V_DES = 5.0 +# horizon over which to run the env HORIZON = 1000 +# steps to run before follower-stopper is allowed to take control WARMUP_STEPS = 600 +# Number of vehicles/hour/lane inflow_rate = 2050 +# the speed of inflowing vehicles from the main edge (in m/s) inflow_speed = 25.5 accel_data = (IDMController, {'a': 1.3, 'b': 2.0, 'noise': 0.3}) diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index 3dabcdd98..c31677d24 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -74,7 +74,7 @@ # how many AVs there can be at once (this is only for centralized critics) "max_num_agents": 10, # which edges we shouldn't apply control on - "invalid_control_edges": ["ghost0", "119257908#3"], + "no_control_edges": ["ghost0", "119257908#3"], # whether to add a slight reward for opening up a gap that will be annealed out N iterations in "headway_curriculum": False, diff --git a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py index fccccddeb..5816d3fe7 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_straight_road.py +++ b/examples/exp_configs/rl/multiagent/multiagent_straight_road.py @@ -6,7 +6,6 @@ from flow.controllers import RLController, IDMController from flow.core.params import EnvParams, NetParams, InitialConfig, InFlows, \ VehicleParams, SumoParams, SumoLaneChangeParams, SumoCarFollowingParams -from flow.envs.ring.accel import ADDITIONAL_ENV_PARAMS from flow.networks import HighwayNetwork from flow.envs.ring.accel import ADDITIONAL_ENV_PARAMS from flow.envs.multiagent import MultiStraightRoad diff --git a/examples/train.py b/examples/train.py index 1c824c379..bae83fcf0 100644 --- a/examples/train.py +++ b/examples/train.py @@ -68,7 +68,8 @@ def parse_args(args): help='the RL trainer to use. either rllib or Stable-Baselines') parser.add_argument( '--algorithm', type=str, default="PPO", - help='RL algorithm to use. Options are PPO, TD3 right now.' + help='RL algorithm to use. Options are PPO, TD3, and CENTRALIZEDPPO (which uses a centralized value function)' + ' right now.' ) parser.add_argument( '--num_cpus', type=int, default=1, @@ -255,13 +256,13 @@ def on_episode_step(info): env = info["env"].get_unwrapped()[0] if isinstance(env, _GroupAgentsWrapper): env = env.env - if hasattr(env, 'invalid_control_edges'): + if hasattr(env, 'no_control_edges'): veh_ids = [veh_id for veh_id in env.k.vehicle.get_ids() if (env.k.vehicle.get_speed(veh_id) >= 0 and env.k.vehicle.get_edge(veh_id) - not in env.invalid_control_edges)] + not in env.no_control_edges)] rl_ids = [veh_id for veh_id in env.k.vehicle.get_rl_ids() if (env.k.vehicle.get_speed(veh_id) >= 0 and env.k.vehicle.get_edge(veh_id) - not in env.invalid_control_edges)] + not in env.no_control_edges)] else: veh_ids = [veh_id for veh_id in env.k.vehicle.get_ids() if env.k.vehicle.get_speed(veh_id) >= 0] rl_ids = [veh_id for veh_id in env.k.vehicle.get_rl_ids() if env.k.vehicle.get_speed(veh_id) >= 0] @@ -356,7 +357,7 @@ def trial_str_creator(trial): } date = datetime.now(tz=pytz.utc) date = date.astimezone(pytz.timezone('US/Pacific')).strftime("%m-%d-%Y") - s3_string = "s3://eugene.experiments/i210/" \ + s3_string = "s3://i210.experiments/i210/" \ + date + '/' + flags.exp_title if flags.use_s3: exp_dict['upload_dir'] = s3_string diff --git a/flow/controllers/car_following_models.py b/flow/controllers/car_following_models.py index 30e77162e..280c94d37 100755 --- a/flow/controllers/car_following_models.py +++ b/flow/controllers/car_following_models.py @@ -647,7 +647,6 @@ def __init__(self, def get_accel(self, env): """See parent class.""" - # TODO(@evinitsky) this is a hack to make rerouting works. This gets vehicles into the network # without generating waves. lead_id = env.k.vehicle.get_leader(self.veh_id) if not lead_id: # no car ahead diff --git a/flow/core/rewards.py b/flow/core/rewards.py index 94de247d6..5aada2d8e 100755 --- a/flow/core/rewards.py +++ b/flow/core/rewards.py @@ -389,7 +389,7 @@ def miles_per_megajoule(env, veh_ids=None, gain=.001): speed = env.k.vehicle.get_speed(veh_id) # convert to be positive since the function called is a penalty power = -vehicle_energy_consumption(env, veh_id, gain=1.0) - if power > 0 and not speed < 0.1: + if power > 0 and speed >= 0.1: counter += 1 # meters / joule is (v * \delta t) / (power * \delta t) mpj += speed / power diff --git a/flow/envs/__init__.py b/flow/envs/__init__.py index 31a91292d..8bea3dd4f 100755 --- a/flow/envs/__init__.py +++ b/flow/envs/__init__.py @@ -12,7 +12,7 @@ WaveAttenuationPOEnv from flow.envs.merge import MergePOEnv from flow.envs.straightroad_env import SingleStraightRoad -from flow.envs.test import TestEnv, TestI210Env +from flow.envs.test import TestEnv # deprecated classes whose names have changed from flow.envs.bottleneck_env import BottleNeckAccelEnv @@ -37,7 +37,6 @@ 'TrafficLightGridBenchmarkEnv', 'BottleneckDesiredVelocityEnv', 'TestEnv', - 'TestI210Env' 'BayBridgeEnv', 'SingleStraightRoad', # deprecated classes diff --git a/flow/envs/multiagent/base.py b/flow/envs/multiagent/base.py index 6d1880673..7104138de 100644 --- a/flow/envs/multiagent/base.py +++ b/flow/envs/multiagent/base.py @@ -4,7 +4,7 @@ import numpy as np import random import traceback -from gym.spaces import Box, Dict +from gym.spaces import Box from traci.exceptions import FatalTraCIError from traci.exceptions import TraCIException diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index 81eac23c3..c9b63b23a 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -75,7 +75,7 @@ def __init__(self, env_params, sim_params, network, simulator='traci'): self.entrance_edge = "ghost0" self.exit_edge = "119257908#2" self.control_range = env_params.additional_params.get('control_range', None) - self.invalid_control_edges = env_params.additional_params.get('invalid_control_edges', []) + self.no_control_edges = env_params.additional_params.get('no_control_edges', []) self.mpg_reward = env_params.additional_params["mpg_reward"] self.mpj_reward = env_params.additional_params["mpj_reward"] self.look_back_length = env_params.additional_params["look_back_length"] @@ -143,7 +143,6 @@ def _apply_rl_actions(self, rl_actions): id_list = [] accel_list = [] if rl_actions: - t = time() for rl_id, actions in rl_actions.items(): accel = actions[0] @@ -157,31 +156,37 @@ def _apply_rl_actions(self, rl_actions): # self.k.vehicle.apply_lane_change(rl_id, lane_change_action) # print('time to apply actions is ', time() - t) + def in_control_range(self, veh_id): + """Return if a veh_id is on an edge that is allowed to be controlled. + + If control range is defined it uses control range, otherwise it searches over a set of edges + """ + return (self.control_range and self.k.vehicle.get_x_by_id(veh_id) < self.control_range[1] \ + and self.k.vehicle.get_x_by_id(veh_id) > self.control_range[0]) or \ + (len(self.no_control_edges) > 0 and self.k.vehicle.get_edge(veh_id) not in + self.no_control_edges) + def get_state(self): """See class definition.""" + valid_ids = [rl_id for rl_id in self.k.vehicle.get_rl_ids() if self.in_control_range(rl_id)] if self.lead_obs: veh_info = {} - for rl_id in self.k.vehicle.get_rl_ids(): - if (self.control_range and self.k.vehicle.get_x_by_id(rl_id) < self.control_range[1] \ - and self.k.vehicle.get_x_by_id(rl_id) > self.control_range[0]) or \ - (len(self.invalid_control_edges) > 0 and self.k.vehicle.get_edge(rl_id) not in - self.invalid_control_edges): - speed = self.k.vehicle.get_speed(rl_id) - lead_id = self.k.vehicle.get_leader(rl_id) - if lead_id in ["", None]: - # in case leader is not visible - lead_speed = SPEED_SCALE - headway = HEADWAY_SCALE - else: - lead_speed = self.k.vehicle.get_speed(lead_id) - headway = self.k.vehicle.get_headway(rl_id) - veh_info.update({rl_id: np.array([speed / SPEED_SCALE, headway / HEADWAY_SCALE, - lead_speed / SPEED_SCALE])}) + for rl_id in valid_ids: + speed = self.k.vehicle.get_speed(rl_id) + lead_id = self.k.vehicle.get_leader(rl_id) + if lead_id in ["", None]: + # in case leader is not visible + lead_speed = SPEED_SCALE + headway = HEADWAY_SCALE + else: + lead_speed = self.k.vehicle.get_speed(lead_id) + headway = self.k.vehicle.get_headway(rl_id) + veh_info.update({rl_id: np.array([speed / SPEED_SCALE, headway / HEADWAY_SCALE, + lead_speed / SPEED_SCALE])}) else: veh_info = {rl_id: np.concatenate((self.state_util(rl_id), self.veh_statistics(rl_id))) - for rl_id in self.k.vehicle.get_rl_ids()} - # print('time to get state is ', time() - t) + for rl_id in valid_ids} return veh_info def compute_reward(self, rl_actions, **kwargs): @@ -190,47 +195,44 @@ def compute_reward(self, rl_actions, **kwargs): if rl_actions is None: return {} - t = time() rewards = {} + valid_ids = [rl_id for rl_id in self.k.vehicle.get_rl_ids() if self.in_control_range(rl_id)] + if self.env_params.additional_params["local_reward"]: des_speed = self.env_params.additional_params["target_velocity"] - for rl_id in self.k.vehicle.get_rl_ids(): - if (self.control_range and self.k.vehicle.get_x_by_id(rl_id) < self.control_range[1] \ - and self.k.vehicle.get_x_by_id(rl_id) > self.control_range[0]) or \ - (len(self.invalid_control_edges) > 0 and self.k.vehicle.get_edge(rl_id) not in - self.invalid_control_edges): - rewards[rl_id] = 0 - if self.mpg_reward: - rewards[rl_id] = miles_per_gallon(self, rl_id, gain=1.0) / 100.0 - follow_id = rl_id - for i in range(self.look_back_length): - follow_id = self.k.vehicle.get_follower(follow_id) - if follow_id not in ["", None]: - rewards[rl_id] += miles_per_gallon(self, follow_id, gain=1.0) / 100.0 - else: - break - elif self.mpj_reward: - rewards[rl_id] = miles_per_megajoule(self, rl_id, gain=1.0) / 100.0 - follow_id = rl_id - for i in range(self.look_back_length): - follow_id = self.k.vehicle.get_follower(follow_id) - if follow_id not in ["", None]: - # if self.time_counter > 700 and miles_per_megajoule(self, follow_id, gain=1.0) > 1.0: - # import ipdb; ipdb.set_trace() - rewards[rl_id] += miles_per_megajoule(self, follow_id, gain=1.0) / 100.0 - else: - break - else: - speeds = [] - follow_speed = self.k.vehicle.get_speed(self.k.vehicle.get_follower(rl_id)) - if follow_speed >= 0: - speeds.append(follow_speed) - if self.k.vehicle.get_speed(rl_id) >= 0: - speeds.append(self.k.vehicle.get_speed(rl_id)) - if len(speeds) > 0: - # rescale so the critic can estimate it quickly - rewards[rl_id] = np.mean([(des_speed - np.abs(speed - des_speed)) ** 2 - for speed in speeds]) / (des_speed ** 2) + for rl_id in valid_ids: + rewards[rl_id] = 0 + if self.mpg_reward: + rewards[rl_id] = miles_per_gallon(self, rl_id, gain=1.0) / 100.0 + follow_id = rl_id + for i in range(self.look_back_length): + follow_id = self.k.vehicle.get_follower(follow_id) + if follow_id not in ["", None]: + rewards[rl_id] += miles_per_gallon(self, follow_id, gain=1.0) / 100.0 + else: + break + elif self.mpj_reward: + rewards[rl_id] = miles_per_megajoule(self, rl_id, gain=1.0) / 100.0 + follow_id = rl_id + for i in range(self.look_back_length): + follow_id = self.k.vehicle.get_follower(follow_id) + if follow_id not in ["", None]: + # if self.time_counter > 700 and miles_per_megajoule(self, follow_id, gain=1.0) > 1.0: + # import ipdb; ipdb.set_trace() + rewards[rl_id] += miles_per_megajoule(self, follow_id, gain=1.0) / 100.0 + else: + break + else: + speeds = [] + follow_speed = self.k.vehicle.get_speed(self.k.vehicle.get_follower(rl_id)) + if follow_speed >= 0: + speeds.append(follow_speed) + if self.k.vehicle.get_speed(rl_id) >= 0: + speeds.append(self.k.vehicle.get_speed(rl_id)) + if len(speeds) > 0: + # rescale so the critic can estimate it quickly + rewards[rl_id] = np.mean([(des_speed - np.abs(speed - des_speed)) ** 2 + for speed in speeds]) / (des_speed ** 2) else: if self.mpg_reward: reward = np.nan_to_num(miles_per_gallon(self, self.k.vehicle.get_ids(), gain=1.0)) / 100.0 @@ -244,11 +246,7 @@ def compute_reward(self, rl_actions, **kwargs): else: reward = np.nan_to_num(np.mean([(des_speed - np.abs(speed - des_speed)) ** 2 for speed in speeds]) / (des_speed ** 2)) - rewards = {rl_id: reward for rl_id in self.k.vehicle.get_rl_ids() - if (self.control_range and self.k.vehicle.get_x_by_id(rl_id) < self.control_range[1] \ - and self.k.vehicle.get_x_by_id(rl_id) > self.control_range[0]) or \ - (len(self.invalid_control_edges) > 0 and self.k.vehicle.get_edge(rl_id) not in - self.invalid_control_edges)} + rewards = {rl_id: reward for rl_id in valid_ids} # curriculum over time-gaps if self.headway_curriculum and self.num_training_iters <= self.headway_curriculum_iters: @@ -360,12 +358,6 @@ def additional_command(self): if veh_id not in self.observed_ids: self.k.vehicle.remove(veh_id) - # for veh_id in self.k.vehicle.get_ids(): - # edge = self.k.vehicle.get_edge(veh_id) - # - # # disable lane changes to prevent vehicles from being on the wrong route - # if edge == "119257908#1-AddedOnRampEdge": - # self.k.vehicle.apply_lane_change([veh_id], direction=[0]) def state_util(self, rl_id): """Return an array of headway, tailway, leader speed, follower speed. diff --git a/flow/envs/test.py b/flow/envs/test.py index 2fb4f6ceb..813e4621e 100644 --- a/flow/envs/test.py +++ b/flow/envs/test.py @@ -52,17 +52,3 @@ def compute_reward(self, rl_actions, **kwargs): def get_state(self, **kwargs): """See class definition.""" return np.array([]) - -class TestI210Env(TestEnv): - - def additional_command(self): - edge = "119257908#0" - edge_length = self.k.network.edge_length(edge) - for veh_id in self.k.vehicle.get_ids(): - edge = self.k.vehicle.get_edge(veh_id) - pos = self.k.vehicle.get_position(veh_id) - - # disable lane changes to prevent vehicles from being on the wrong route - if edge == edge and np.abs(pos - edge_length) < 20: - # import ipdb; ipdb.set_trace() - self.k.vehicle.apply_lane_change([veh_id], direction=[0]) \ No newline at end of file diff --git a/flow/visualize/visualizer_rllib.py b/flow/visualize/visualizer_rllib.py index 1df7ed83e..5c52e196f 100644 --- a/flow/visualize/visualizer_rllib.py +++ b/flow/visualize/visualizer_rllib.py @@ -169,10 +169,9 @@ def visualizer_rllib(args): else: env = gym.make(env_name) + # reroute on exit is a training hack, it should be turned off at test time. if hasattr(env, "reroute_on_exit"): env.reroute_on_exit = False - # env.env_params.horizon += env.env_params.warmup_steps - # env.env_params.warmup_steps = 0 if args.render_mode == 'sumo_gui': env.sim_params.render = True # set to True after initializing agent and env From c69b763ec9ab8895b37032c6d70ac0474fbc8729 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Thu, 28 May 2020 13:21:11 -0400 Subject: [PATCH 72/85] Add logging of number of cars --- examples/train.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/examples/train.py b/examples/train.py index 6da7cb4ea..ada58bc46 100644 --- a/examples/train.py +++ b/examples/train.py @@ -262,6 +262,7 @@ def on_episode_start(info): episode.user_data["avg_energy"] = [] episode.user_data["avg_mpg"] = [] episode.user_data["avg_mpj"] = [] + episode.user_data["num_avs"] = [] def on_episode_step(info): @@ -288,6 +289,7 @@ def on_episode_step(info): episode.user_data["avg_speed_avs"].append(av_speed) episode.user_data["avg_mpg"].append(miles_per_gallon(env, veh_ids, gain=1.0)) episode.user_data["avg_mpj"].append(miles_per_megajoule(env, veh_ids, gain=1.0)) + episode.user_data["num_cars"].append(len(env.k.vehicle.get_ids())) def on_episode_end(info): @@ -299,6 +301,7 @@ def on_episode_end(info): episode.custom_metrics["avg_energy_per_veh"] = np.mean(episode.user_data["avg_energy"]) episode.custom_metrics["avg_mpg_per_veh"] = np.mean(episode.user_data["avg_mpg"]) episode.custom_metrics["avg_mpj_per_veh"] = np.mean(episode.user_data["avg_mpj"]) + episode.custom_metrics["num_cars"] = np.mean(episode.user_data["num_cars"]) def on_train_result(info): """Store the mean score of the episode, and increment or decrement how many adversaries are on""" From bb37992141dbfe3636eca52c41e7bc57d604538d Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Thu, 28 May 2020 13:22:29 -0400 Subject: [PATCH 73/85] Remove clipping so failsafes can be applied --- flow/controllers/base_controller.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/flow/controllers/base_controller.py b/flow/controllers/base_controller.py index 0bf8e8e22..2fdb2f399 100755 --- a/flow/controllers/base_controller.py +++ b/flow/controllers/base_controller.py @@ -114,8 +114,6 @@ def get_action(self, env): # time step if accel is None: return None - else: - accel = min(max(accel, -self.max_deaccel), self.max_accel) # store the acceleration without noise to each vehicle # run fail safe if requested From 36e36192e35dba5115c729f518dbeb377469fd49 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Thu, 28 May 2020 14:08:33 -0400 Subject: [PATCH 74/85] add logging of accel variation --- examples/train.py | 12 +++++++++++- flow/core/rewards.py | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/examples/train.py b/examples/train.py index ada58bc46..314c3a797 100644 --- a/examples/train.py +++ b/examples/train.py @@ -262,7 +262,9 @@ def on_episode_start(info): episode.user_data["avg_energy"] = [] episode.user_data["avg_mpg"] = [] episode.user_data["avg_mpj"] = [] - episode.user_data["num_avs"] = [] + episode.user_data["num_cars"] = [] + episode.user_data["avg_accel_human"] = [] + episode.user_data["avg_accel_avs"] = [] def on_episode_step(info): @@ -290,6 +292,14 @@ def on_episode_step(info): episode.user_data["avg_mpg"].append(miles_per_gallon(env, veh_ids, gain=1.0)) episode.user_data["avg_mpj"].append(miles_per_megajoule(env, veh_ids, gain=1.0)) episode.user_data["num_cars"].append(len(env.k.vehicle.get_ids())) + episode.user_data["avg_accel_human"].append(np.nan_to_num(np.mean( + [np.abs((env.k.vehicle.get_speed(veh_id) - env.k.vehicle.get_previous_speed(veh_id))/env.sim_step) for + veh_id in veh_ids if veh_id in env.k.vehicle.previous_speeds.keys()] + ))) + episode.user_data["avg_accel_avs"].append(np.nan_to_num(np.mean( + [np.abs((env.k.vehicle.get_speed(veh_id) - env.k.vehicle.get_previous_speed(veh_id))/env.sim_step) for + veh_id in rl_ids if veh_id in env.k.vehicle.previous_speeds.keys()] + ))) def on_episode_end(info): diff --git a/flow/core/rewards.py b/flow/core/rewards.py index 94de247d6..9a18a1b77 100755 --- a/flow/core/rewards.py +++ b/flow/core/rewards.py @@ -322,7 +322,7 @@ def energy_consumption(env, gain=.001): rho = 1.225 # air density (kg/m^3) A = 2.6 # vehicle cross sectional area (m^2) for veh_id in env.k.vehicle.get_ids(): - if veh_id not in env.k.vehicle.previous_speeds: + if veh_id not in env.k.vehicle.previous_speeds.keys(): continue speed = env.k.vehicle.get_speed(veh_id) prev_speed = env.k.vehicle.get_previous_speed(veh_id) From c6949bcf4df0e1046d4d341ef43cf1f743b4aa6f Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Thu, 28 May 2020 14:12:09 -0400 Subject: [PATCH 75/85] Fix imports --- flow/envs/multiagent/__init__.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/flow/envs/multiagent/__init__.py b/flow/envs/multiagent/__init__.py index f530ce568..8c5552580 100644 --- a/flow/envs/multiagent/__init__.py +++ b/flow/envs/multiagent/__init__.py @@ -10,7 +10,7 @@ from flow.envs.multiagent.traffic_light_grid import MultiTrafficLightGridPOEnv from flow.envs.multiagent.highway import MultiAgentHighwayPOEnv from flow.envs.multiagent.merge import MultiAgentMergePOEnv -from flow.envs.multiagent.i210 import I210MultiEnv, MultiStraightRoad, I210MADDPGMultiEnv, MultiStraightRoadMADDPG +from flow.envs.multiagent.i210 import I210MultiEnv, MultiStraightRoad __all__ = [ @@ -24,6 +24,4 @@ 'MultiAgentMergePOEnv', 'I210MultiEnv', 'MultiStraightRoad', - 'I210MADDPGMultiEnv', - 'MultiStraightRoadMADDPG' ] From 3dd87d07ccbdcc4ec231cb838fce7a88804bc5a7 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Thu, 28 May 2020 15:01:29 -0400 Subject: [PATCH 76/85] Fix on-ramp code --- examples/exp_configs/rl/multiagent/multiagent_i210.py | 4 ++-- flow/networks/i210_subnetwork_ghost_cell.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index c31677d24..f55917e49 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -275,8 +275,8 @@ inflows=inflow, template=NET_TEMPLATE, additional_params={ - "on_ramp": False, - "ghost_edge": False + "on_ramp": ON_RAMP, + "ghost_edge": WANT_GHOST_CELL } ), diff --git a/flow/networks/i210_subnetwork_ghost_cell.py b/flow/networks/i210_subnetwork_ghost_cell.py index 08fee4ecd..8a45b4d91 100644 --- a/flow/networks/i210_subnetwork_ghost_cell.py +++ b/flow/networks/i210_subnetwork_ghost_cell.py @@ -52,7 +52,7 @@ def specify_routes(self, net_params): (["ghost0", "119257914", "119257908#0", "119257908#1-AddedOnRampEdge", "119257908#1", "119257908#1-AddedOffRampEdge", "119257908#2", "119257908#3"], - 1), # HOV: 1509 (on ramp: 57), Non HOV: 6869 (onramp: 16) + 1 - 17 / 8378), # HOV: 1509 (on ramp: 57), Non HOV: 6869 (onramp: 16) (["119257914", "119257908#0", "119257908#1-AddedOnRampEdge", "119257908#1", "119257908#1-AddedOffRampEdge", "173381935"], 17 / 8378) @@ -61,7 +61,7 @@ def specify_routes(self, net_params): (["119257914", "119257908#0", "119257908#1-AddedOnRampEdge", "119257908#1", "119257908#1-AddedOffRampEdge", "119257908#2", "119257908#3"], - 1), # HOV: 1509 (on ramp: 57), Non HOV: 6869 (onramp: 16) + 1 - 17 / 8378), # HOV: 1509 (on ramp: 57), Non HOV: 6869 (onramp: 16) (["119257914", "119257908#0", "119257908#1-AddedOnRampEdge", "119257908#1", "119257908#1-AddedOffRampEdge", "173381935"], 17 / 8378) From 620ddd31193c2ff263e32b0ca1b0bb16f4e2d52d Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Thu, 28 May 2020 16:24:57 -0400 Subject: [PATCH 77/85] Add scaling for stop and accel penalties, turn off reroute on exit --- .../rl/multiagent/multiagent_i210.py | 10 ++++--- flow/envs/multiagent/i210.py | 28 +++++++++++-------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index 26c69b0a0..03371f7e6 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -59,7 +59,7 @@ 'max_accel': 2.6, 'max_decel': 4.5, # configure the observation space. Look at the I210MultiEnv class for more info. - 'lead_obs': False, + 'lead_obs': True, # whether to add in a reward for the speed of nearby vehicles "local_reward": True, # whether to use the MPG reward. Otherwise, defaults to a target velocity reward @@ -69,8 +69,8 @@ # how many vehicles to look back for the MPG reward "look_back_length": 1, # whether to reroute vehicles once they have exited - "reroute_on_exit": True, - 'target_velocity': 8.0, + "reroute_on_exit": False, + 'target_velocity': 5.0, # how many AVs there can be at once (this is only for centralized critics) "max_num_agents": 10, # which edges we shouldn't apply control on @@ -93,9 +93,11 @@ "speed_reward_gain": 0.5, # penalize stopped vehicles "penalize_stops": True, + "stop_penalty": 0.05, # penalize accels - "penalize_accel": True + "penalize_accel": True, + "accel_penalty": 0.05 }) # CREATE VEHICLE TYPES AND INFLOWS diff --git a/flow/envs/multiagent/i210.py b/flow/envs/multiagent/i210.py index 8efc06820..233aa42d8 100644 --- a/flow/envs/multiagent/i210.py +++ b/flow/envs/multiagent/i210.py @@ -97,9 +97,11 @@ def __init__(self, env_params, sim_params, network, simulator='traci'): # penalize stops self.penalize_stops = env_params.additional_params["penalize_stops"] + self.stop_penalty = env_params.additional_params["stop_penalty"] # penalize accel self.penalize_accel = env_params.additional_params.get("penalize_accel", False) + self.accel_penalty = env_params.additional_params["accel_penalty"] @property def observation_space(self): @@ -222,16 +224,18 @@ def compute_reward(self, rl_actions, **kwargs): else: break else: - speeds = [] - follow_speed = self.k.vehicle.get_speed(self.k.vehicle.get_follower(rl_id)) - if follow_speed >= 0: - speeds.append(follow_speed) - if self.k.vehicle.get_speed(rl_id) >= 0: - speeds.append(self.k.vehicle.get_speed(rl_id)) - if len(speeds) > 0: - # rescale so the critic can estimate it quickly - rewards[rl_id] = np.mean([(des_speed - np.abs(speed - des_speed)) ** 2 - for speed in speeds]) / (des_speed ** 2) + follow_id = rl_id + rewards[rl_id] = ((des_speed - np.abs(self.k.vehicle.get_speed(rl_id) + - des_speed))) ** 2 / ((des_speed ** 2) * self.look_back_length) + + for i in range(self.look_back_length): + follow_id = self.k.vehicle.get_follower(follow_id) + if follow_id not in ["", None]: + + follow_speed = self.k.vehicle.get_speed(self.k.vehicle.get_follower(follow_id)) + rewards[rl_id] += ((des_speed - np.abs(follow_speed + - des_speed))) ** 2 / ((des_speed ** 2) * self.look_back_length) + else: if self.mpg_reward: reward = np.nan_to_num(miles_per_gallon(self, self.k.vehicle.get_ids(), gain=1.0)) / 100.0 @@ -293,11 +297,11 @@ def compute_reward(self, rl_actions, **kwargs): speed = self.k.vehicle.get_speed(veh_id) if self.penalize_stops: if speed < 1.0: - rewards[veh_id] -= .01 + rewards[veh_id] -= self.stop_penalty if self.penalize_accel and veh_id in self.k.vehicle.previous_speeds: prev_speed = self.k.vehicle.get_previous_speed(veh_id) abs_accel = abs(speed - prev_speed) / self.sim_step - rewards[veh_id] -= abs_accel / 400.0 + rewards[veh_id] -= abs_accel * self.accel_penalty # print('time to get reward is ', time() - t) return rewards From 89e8ae2be1b9a268346ca0f28ad19a099fbc61e2 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Thu, 28 May 2020 16:30:52 -0400 Subject: [PATCH 78/85] Increase lookback length to 10 --- examples/exp_configs/rl/multiagent/multiagent_i210.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index 03371f7e6..5a57fe41e 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -66,8 +66,8 @@ "mpg_reward": False, # whether to use the MPJ reward. Otherwise, defaults to a target velocity reward "mpj_reward": False, - # how many vehicles to look back for the MPG reward - "look_back_length": 1, + # how many vehicles to look back for any reward + "look_back_length": 10, # whether to reroute vehicles once they have exited "reroute_on_exit": False, 'target_velocity': 5.0, From 633309941624ff59a510375e5b25181d348bf2f1 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Thu, 28 May 2020 16:41:58 -0400 Subject: [PATCH 79/85] Exp 11 with lookback length of 5 --- examples/exp_configs/rl/multiagent/multiagent_i210.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index 5a57fe41e..f9dd11ee2 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -67,7 +67,7 @@ # whether to use the MPJ reward. Otherwise, defaults to a target velocity reward "mpj_reward": False, # how many vehicles to look back for any reward - "look_back_length": 10, + "look_back_length": 5, # whether to reroute vehicles once they have exited "reroute_on_exit": False, 'target_velocity': 5.0, From 71e175f4513f20f52cf606042859a0ac168e07c2 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Thu, 28 May 2020 17:17:20 -0400 Subject: [PATCH 80/85] Set lb length to 10 --- examples/exp_configs/rl/multiagent/multiagent_i210.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index f9dd11ee2..5a57fe41e 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -67,7 +67,7 @@ # whether to use the MPJ reward. Otherwise, defaults to a target velocity reward "mpj_reward": False, # how many vehicles to look back for any reward - "look_back_length": 5, + "look_back_length": 10, # whether to reroute vehicles once they have exited "reroute_on_exit": False, 'target_velocity': 5.0, From 659b55cec84826831249f03da3d9151c2f254ccc Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Thu, 28 May 2020 18:19:20 -0400 Subject: [PATCH 81/85] Add done conditions so vehicles that exit are done if reroute on exit is false --- examples/exp_configs/rl/multiagent/multiagent_i210.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/exp_configs/rl/multiagent/multiagent_i210.py b/examples/exp_configs/rl/multiagent/multiagent_i210.py index 5a57fe41e..86398c8cb 100644 --- a/examples/exp_configs/rl/multiagent/multiagent_i210.py +++ b/examples/exp_configs/rl/multiagent/multiagent_i210.py @@ -268,7 +268,7 @@ sims_per_step=3, warmup_steps=WARMUP_STEPS, additional_params=additional_env_params, - done_at_exit=False + done_at_exit=not additional_env_params["reroute_on_exit"] ), # network-related parameters (see flow.core.params.NetParams and the From 67dc1216e2f4822e9017f0f33cde45a060e810f4 Mon Sep 17 00:00:00 2001 From: Yashar Zeinali Farid <34227133+Yasharzf@users.noreply.github.com> Date: Thu, 11 Jun 2020 10:29:40 -0700 Subject: [PATCH 82/85] Update lane change mode (#948) * added new lane change modes * replaced 'no_lat_collide' with 'no_lc_safe' which is the new default lane change mode * bug fixes and PR reviews Co-authored-by: AboudyKreidieh --- examples/exp_configs/non_rl/bay_bridge.py | 2 +- .../exp_configs/non_rl/bay_bridge_toll.py | 2 +- examples/exp_configs/non_rl/minicity.py | 2 +- flow/core/params.py | 97 +++++++++++++++++-- tests/fast_tests/test_vehicles.py | 6 +- 5 files changed, 93 insertions(+), 16 deletions(-) diff --git a/examples/exp_configs/non_rl/bay_bridge.py b/examples/exp_configs/non_rl/bay_bridge.py index d7d78360f..f3e0c465f 100644 --- a/examples/exp_configs/non_rl/bay_bridge.py +++ b/examples/exp_configs/non_rl/bay_bridge.py @@ -48,7 +48,7 @@ lc_pushy=0.8, lc_speed_gain=4.0, model="LC2013", - lane_change_mode="no_lat_collide", + lane_change_mode="no_lc_safe", # lcKeepRight=0.8 ), num_vehicles=1400) diff --git a/examples/exp_configs/non_rl/bay_bridge_toll.py b/examples/exp_configs/non_rl/bay_bridge_toll.py index 1b8268aeb..0941823cb 100644 --- a/examples/exp_configs/non_rl/bay_bridge_toll.py +++ b/examples/exp_configs/non_rl/bay_bridge_toll.py @@ -46,7 +46,7 @@ model="LC2013", lcCooperative=0.2, lcSpeedGain=15, - lane_change_mode="no_lat_collide", + lane_change_mode="no_lc_safe", ), num_vehicles=50) diff --git a/examples/exp_configs/non_rl/minicity.py b/examples/exp_configs/non_rl/minicity.py index 23b232480..35d5edbce 100644 --- a/examples/exp_configs/non_rl/minicity.py +++ b/examples/exp_configs/non_rl/minicity.py @@ -18,7 +18,7 @@ speed_mode=1, ), lane_change_params=SumoLaneChangeParams( - lane_change_mode="no_lat_collide", + lane_change_mode="no_lc_safe", ), initial_speed=0, num_vehicles=90) diff --git a/flow/core/params.py b/flow/core/params.py index 5a7467580..79ad8d689 100755 --- a/flow/core/params.py +++ b/flow/core/params.py @@ -17,7 +17,27 @@ "all_checks": 31 } -LC_MODES = {"aggressive": 0, "no_lat_collide": 512, "strategic": 1621} +LC_MODES = { + "no_lc_safe": 512, + "no_lc_aggressive": 0, + "sumo_default": 1621, + "no_strategic_aggressive": 1108, + "no_strategic_safe": 1620, + "only_strategic_aggressive": 1, + "only_strategic_safe": 513, + "no_cooperative_aggressive": 1105, + "no_cooperative_safe": 1617, + "only_cooperative_aggressive": 4, + "only_cooperative_safe": 516, + "no_speed_gain_aggressive": 1093, + "no_speed_gain_safe": 1605, + "only_speed_gain_aggressive": 16, + "only_speed_gain_safe": 528, + "no_right_drive_aggressive": 1045, + "no_right_drive_safe": 1557, + "only_right_drive_aggressive": 64, + "only_right_drive_safe": 576 +} # Traffic light defaults PROGRAM_ID = 1 @@ -897,14 +917,71 @@ class SumoLaneChangeParams: ---------- lane_change_mode : str or int, optional may be one of the following: + * "no_lc_safe" (default): Disable all SUMO lane changing but still + handle safety checks (collision avoidance and safety-gap enforcement) + in the simulation. Binary is [001000000000] + * "no_lc_aggressive": SUMO lane changes are not executed, collision + avoidance and safety-gap enforcement are off. + Binary is [000000000000] + + * "sumo_default": Execute all changes requested by a custom controller + unless in conflict with TraCI. Binary is [011001010101]. + + * "no_strategic_aggressive": Execute all changes except strategic + (routing) lane changes unless in conflict with TraCI. Collision + avoidance and safety-gap enforcement are off. Binary is [010001010100] + * "no_strategic_safe": Execute all changes except strategic + (routing) lane changes unless in conflict with TraCI. Collision + avoidance and safety-gap enforcement are on. Binary is [011001010100] + * "only_strategic_aggressive": Execute only strategic (routing) lane + changes unless in conflict with TraCI. Collision avoidance and + safety-gap enforcement are off. Binary is [000000000001] + * "only_strategic_safe": Execute only strategic (routing) lane + changes unless in conflict with TraCI. Collision avoidance and + safety-gap enforcement are on. Binary is [001000000001] + + * "no_cooperative_aggressive": Execute all changes except cooperative + (change in order to allow others to change) lane changes unless in + conflict with TraCI. Collision avoidance and safety-gap enforcement + are off. Binary is [010001010001] + * "no_cooperative_safe": Execute all changes except cooperative + lane changes unless in conflict with TraCI. Collision avoidance and + safety-gap enforcement are on. Binary is [011001010001] + * "only_cooperative_aggressive": Execute only cooperative lane changes + unless in conflict with TraCI. Collision avoidance and safety-gap + enforcement are off. Binary is [000000000100] + * "only_cooperative_safe": Execute only cooperative lane changes + unless in conflict with TraCI. Collision avoidance and safety-gap + enforcement are on. Binary is [001000000100] + + * "no_speed_gain_aggressive": Execute all changes except speed gain (the + other lane allows for faster driving) lane changes unless in conflict + with TraCI. Collision avoidance and safety-gap enforcement are off. + Binary is [010001000101] + * "no_speed_gain_safe": Execute all changes except speed gain + lane changes unless in conflict with TraCI. Collision avoidance and + safety-gap enforcement are on. Binary is [011001000101] + * "only_speed_gain_aggressive": Execute only speed gain lane changes + unless in conflict with TraCI. Collision avoidance and safety-gap + enforcement are off. Binary is [000000010000] + * "only_speed_gain_safe": Execute only speed gain lane changes + unless in conflict with TraCI. Collision avoidance and safety-gap + enforcement are on. Binary is [001000010000] + + * "no_right_drive_aggressive": Execute all changes except right drive + (obligation to drive on the right) lane changes unless in conflict + with TraCI. Collision avoidance and safety-gap enforcement are off. + Binary is [010000010101] + * "no_right_drive_safe": Execute all changes except right drive + lane changes unless in conflict with TraCI. Collision avoidance and + safety-gap enforcement are on. Binary is [011000010101] + * "only_right_drive_aggressive": Execute only right drive lane changes + unless in conflict with TraCI. Collision avoidance and safety-gap + enforcement are off. Binary is [000001000000] + * "only_right_drive_safe": Execute only right drive lane changes + unless in conflict with TraCI. Collision avoidance and safety-gap + enforcement are on. Binary is [001001000000] - * "no_lat_collide" (default): Human cars will not make lane - changes, RL cars can lane change into any space, no matter how - likely it is to crash - * "strategic": Human cars make lane changes in accordance with SUMO - to provide speed boosts - * "aggressive": RL cars are not limited by sumo with regard to - their lane-change actions, and can crash longitudinally * int values may be used to define custom lane change modes for the given vehicles, specified at: http://sumo.dlr.de/wiki/TraCI/Change_Vehicle_State#lane_change_mode_.280xb6.29 @@ -943,7 +1020,7 @@ class SumoLaneChangeParams: """ def __init__(self, - lane_change_mode="no_lat_collide", + lane_change_mode="no_lc_safe", model="LC2013", lc_strategic=1.0, lc_cooperative=1.0, @@ -1051,7 +1128,7 @@ def __init__(self, elif not (isinstance(lane_change_mode, int) or isinstance(lane_change_mode, float)): logging.error("Setting lane change mode to default.") - lane_change_mode = LC_MODES["no_lat_collide"] + lane_change_mode = LC_MODES["no_lc_safe"] self.lane_change_mode = lane_change_mode diff --git a/tests/fast_tests/test_vehicles.py b/tests/fast_tests/test_vehicles.py index b791bba64..1ae2d1cf0 100644 --- a/tests/fast_tests/test_vehicles.py +++ b/tests/fast_tests/test_vehicles.py @@ -33,7 +33,7 @@ def test_speed_lane_change_modes(self): speed_mode='obey_safe_speed', ), lane_change_params=SumoLaneChangeParams( - lane_change_mode="no_lat_collide", + lane_change_mode="no_lc_safe", ) ) @@ -56,7 +56,7 @@ def test_speed_lane_change_modes(self): self.assertEqual(vehicles.type_parameters["typeB"][ "car_following_params"].speed_mode, 0) self.assertEqual(vehicles.type_parameters["typeB"][ - "lane_change_params"].lane_change_mode, 1621) + "lane_change_params"].lane_change_mode, 512) vehicles.add( "typeC", @@ -89,7 +89,7 @@ def test_controlled_id_params(self): speed_mode="obey_safe_speed", ), lane_change_params=SumoLaneChangeParams( - lane_change_mode="no_lat_collide", + lane_change_mode="no_lc_safe", )) default_mingap = SumoCarFollowingParams().controller_params["minGap"] self.assertEqual(vehicles.types[0]["type_params"]["minGap"], From eb4b4859b2d67ecf206e294c12a173a2b5672ef5 Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Thu, 11 Jun 2020 11:22:53 -0700 Subject: [PATCH 83/85] Flake8 --- flow/algorithms/centralized_PPO.py | 3 ++- flow/envs/multiagent/base.py | 2 +- flow/visualize/time_space_diagram.py | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/flow/algorithms/centralized_PPO.py b/flow/algorithms/centralized_PPO.py index 5461f6a7c..e14e54537 100644 --- a/flow/algorithms/centralized_PPO.py +++ b/flow/algorithms/centralized_PPO.py @@ -221,8 +221,9 @@ def centralized_critic_postprocessing(policy, try: central_obs_batch = np.hstack( (sample_batch["obs"], np.hstack(central_obs_list))) - except: + except Exception as e: # TODO(@ev) this is a bug and needs to be fixed + print('Error in stacking obs ', e) central_obs_batch = sample_batch["obs"] max_vf_agents = policy.model.max_num_agents num_agents = len(rel_agents) + 1 diff --git a/flow/envs/multiagent/base.py b/flow/envs/multiagent/base.py index a9404eb57..5c4795bbe 100644 --- a/flow/envs/multiagent/base.py +++ b/flow/envs/multiagent/base.py @@ -115,7 +115,7 @@ def step(self, rl_actions): states = self.get_state() done.update({key: key in self.k.vehicle.get_arrived_ids() - for key in states.keys()}) + for key in states.keys()}) if crash or (self.time_counter >= self.env_params.sims_per_step * (self.env_params.warmup_steps + self.env_params.horizon)): done['__all__'] = True diff --git a/flow/visualize/time_space_diagram.py b/flow/visualize/time_space_diagram.py index a93fa3e15..a9509aa64 100644 --- a/flow/visualize/time_space_diagram.py +++ b/flow/visualize/time_space_diagram.py @@ -17,7 +17,8 @@ python time_space_diagram.py .csv .json """ from flow.utils.rllib import get_flow_params -from flow.networks import RingNetwork, FigureEightNetwork, MergeNetwork, I210SubNetwork, HighwayNetwork, I210SubNetworkGhostCell +from flow.networks import RingNetwork, FigureEightNetwork, MergeNetwork, I210SubNetwork, \ + HighwayNetwork, I210SubNetworkGhostCell import argparse import csv From 8950d69253fc141bfff445776f37317583c5f45f Mon Sep 17 00:00:00 2001 From: Eugene Vinitsky Date: Thu, 11 Jun 2020 12:11:24 -0700 Subject: [PATCH 84/85] Pydoc fixes --- examples/train.py | 2 +- flow/algorithms/centralized_PPO.py | 44 ++++++++++++++---------------- flow/algorithms/custom_ppo.py | 13 +++++++++ 3 files changed, 34 insertions(+), 25 deletions(-) diff --git a/examples/train.py b/examples/train.py index 69ead32dd..7bf259691 100644 --- a/examples/train.py +++ b/examples/train.py @@ -295,7 +295,7 @@ def on_episode_end(info): episode.custom_metrics["num_cars"] = np.mean(episode.user_data["num_cars"]) def on_train_result(info): - """Store the mean score of the episode, and increment or decrement how many adversaries are on""" + """Store the mean score of the episode, and increment or decrement the iteration number for curriculum.""" trainer = info["trainer"] trainer.workers.foreach_worker( lambda ev: ev.foreach_env( diff --git a/flow/algorithms/centralized_PPO.py b/flow/algorithms/centralized_PPO.py index e14e54537..dca737f75 100644 --- a/flow/algorithms/centralized_PPO.py +++ b/flow/algorithms/centralized_PPO.py @@ -1,14 +1,14 @@ +"""An example of customizing PPO to leverage a centralized critic.""" + from __future__ import absolute_import from __future__ import division from __future__ import print_function -"""An example of customizing PPO to leverage a centralized critic.""" - import argparse import numpy as np from ray.rllib.agents.ppo.ppo import PPOTrainer -from flow.algorithms.custom_ppo import CustomPPOTFPolicy +from flow.algorithms.custom_ppo import CustomPPOTFPolicy, KLCoeffMixin from ray.rllib.evaluation.postprocessing import compute_advantages, \ Postprocessing from ray.rllib.policy.sample_batch import SampleBatch @@ -65,14 +65,17 @@ def __init__(self, obs_space, action_space, num_outputs, model_config, self.register_variables(self.central_vf.variables) def forward(self, input_dict, state, seq_lens): + """Run forward inference.""" return self.model.forward(input_dict, state, seq_lens) def central_value_function(self, central_obs): + """Compute the centralized value function.""" return tf.reshape( self.central_vf( [central_obs]), [-1]) def value_function(self): + """Compute the normal value function; this is only here to make the code run.""" return self.model.value_function() # not used @@ -145,23 +148,27 @@ def __init__(self, @override(RecurrentTFModelV2) def forward_rnn(self, inputs, state, seq_lens): + """Forward inference on the RNN.""" model_out, self._value_out, h, c = self.model( [inputs, seq_lens] + state) return model_out, [h, c] @override(ModelV2) def get_initial_state(self): + """Set up the initial RNN state.""" return [ np.zeros(self.cell_size, np.float32), np.zeros(self.cell_size, np.float32), ] def central_value_function(self, central_obs): + """Compute the central value function.""" return tf.reshape( self.central_vf( [central_obs]), [-1]) def value_function(self): + """Compute the normal value function; this is only here to make the code run.""" return tf.reshape(self._value_out, [-1]) # not used @@ -175,18 +182,18 @@ def __init__(self): ) def compute_central_vf(self, central_obs): + """Run forward inference on the model.""" feed_dict = { self.get_placeholder(CENTRAL_OBS): central_obs, } return self.get_session().run(self.central_value_function, feed_dict) -# Grabs the opponent obs/act and includes it in the experience train_batch, -# and computes GAE using the central vf predictions. def centralized_critic_postprocessing(policy, sample_batch, other_agent_batches=None, episode=None): + """Find all other agents that overlapped with you and stack their obs to be passed to the central VF.""" if policy.loss_initialized(): assert other_agent_batches is not None @@ -280,6 +287,7 @@ def time_overlap(time_span, agent_time): def fill_missing(agent_time, other_agent_time, obs): + """Pad the obs to the appropriate length for agents that don't overlap perfectly in time.""" # shortcut, the two overlap perfectly if np.sum(agent_time == other_agent_time) == agent_time.shape[0]: return obs @@ -352,8 +360,8 @@ def overlap_and_pad_agent(time_span, agent_time, obs): return overlap_obs -# Copied from PPO but optimizing the central value function def loss_with_central_critic(policy, model, dist_class, train_batch): + """Set up the PPO loss but replace the VF loss with the centralized VF loss.""" CentralizedValueMixin.__init__(policy) logits, state = model.from_batch(train_batch) @@ -384,6 +392,8 @@ def loss_with_central_critic(policy, model, dist_class, train_batch): class PPOLoss(object): + """Object containing the PPO loss function.""" + def __init__(self, action_space, dist_class, @@ -484,28 +494,13 @@ def reduce_mean_valid(t): def new_ppo_surrogate_loss(policy, model, dist_class, train_batch): + """Return the PPO loss with the centralized value function.""" loss = loss_with_central_critic(policy, model, dist_class, train_batch) return loss -class KLCoeffMixin(object): - def __init__(self, config): - # KL Coefficient - self.kl_coeff_val = config["kl_coeff"] - self.kl_target = config["kl_target"] - self.kl_coeff = tf.get_variable( - initializer=tf.constant_initializer(self.kl_coeff_val), - name="kl_coeff", - shape=(), - trainable=False, - dtype=tf.float32) - - def update_kl(self, blah): - pass - - def setup_mixins(policy, obs_space, action_space, config): - # copied from PPO + """Construct additional classes that add on to PPO.""" KLCoeffMixin.__init__(policy, config) EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], @@ -517,7 +512,7 @@ def setup_mixins(policy, obs_space, action_space, config): def central_vf_stats(policy, train_batch, grads): - # Report the explained variance of the central value function. + """Report the explained variance of the centralized value function.""" return { "vf_explained_var": explained_variance( train_batch[Postprocessing.VALUE_TARGETS], @@ -526,6 +521,7 @@ def central_vf_stats(policy, train_batch, grads): def kl_and_loss_stats(policy, train_batch): + """Trianing stats to pass to the tensorboard.""" return { "cur_kl_coeff": tf.cast(policy.kl_coeff, tf.float64), "cur_lr": tf.cast(policy.cur_lr, tf.float64), diff --git a/flow/algorithms/custom_ppo.py b/flow/algorithms/custom_ppo.py index 6c8da3324..47a4459aa 100644 --- a/flow/algorithms/custom_ppo.py +++ b/flow/algorithms/custom_ppo.py @@ -29,6 +29,8 @@ class PPOLoss(object): + """PPO Loss object.""" + def __init__(self, action_space, dist_class, @@ -128,6 +130,7 @@ def reduce_mean_valid(t): def ppo_surrogate_loss(policy, model, dist_class, train_batch): + """Construct and return the PPO loss.""" logits, state = model.from_batch(train_batch) action_dist = dist_class(logits, model) @@ -164,6 +167,7 @@ def ppo_surrogate_loss(policy, model, dist_class, train_batch): def kl_and_loss_stats(policy, train_batch): + """Return statistics for the tensorboard.""" return { "cur_kl_coeff": tf.cast(policy.kl_coeff, tf.float64), "cur_lr": tf.cast(policy.cur_lr, tf.float64), @@ -217,6 +221,7 @@ def postprocess_ppo_gae(policy, def clip_gradients(policy, optimizer, loss): + """If grad_clip is not None, clip the gradients.""" variables = policy.model.trainable_variables() if policy.config["grad_clip"] is not None: grads_and_vars = optimizer.compute_gradients(loss, variables) @@ -230,6 +235,8 @@ def clip_gradients(policy, optimizer, loss): class ValueNetworkMixin(object): + """Construct the value function.""" + def __init__(self, obs_space, action_space, config): if config["use_gae"]: @@ -256,11 +263,13 @@ def value(ob, prev_action, prev_reward, *state): def setup_config(policy, obs_space, action_space, config): + """Add additional custom options from the config.""" # auto set the model option for layer sharing config["model"]["vf_share_layers"] = config["vf_share_layers"] def setup_mixins(policy, obs_space, action_space, config): + """Construct additional classes that add on to PPO.""" KLCoeffMixin.__init__(policy, config) ValueNetworkMixin.__init__(policy, obs_space, action_space, config) EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], @@ -269,6 +278,8 @@ def setup_mixins(policy, obs_space, action_space, config): class KLCoeffMixin(object): + """Update the KL Coefficient. This is intentionally disabled to match the PPO paper better.""" + def __init__(self, config): # KL Coefficient self.kl_coeff_val = config["kl_coeff"] @@ -281,6 +292,7 @@ def __init__(self, config): dtype=tf.float32) def update_kl(self, blah): + """Disabled to match the PPO paper better.""" pass @@ -301,6 +313,7 @@ def update_kl(self, blah): def validate_config(config): + """Check that the config is set up properly.""" if config["entropy_coeff"] < 0: raise DeprecationWarning("entropy_coeff must be >= 0") if isinstance(config["entropy_coeff"], int): From 7ce615e2f2990f7a8e67404feec065256d9e6876 Mon Sep 17 00:00:00 2001 From: akashvelu Date: Thu, 11 Jun 2020 16:45:00 -0700 Subject: [PATCH 85/85] Visualizer tests fixes --- flow/visualize/visualizer_rllib.py | 2 +- .../multi_agent/checkpoint_1/checkpoint-1 | Bin 10209 -> 20358 bytes .../checkpoint_1/checkpoint-1.tune_metadata | Bin 180 -> 210 bytes tests/data/rllib_data/multi_agent/params.json | 40 +++++++++++------- tests/data/rllib_data/multi_agent/params.pkl | Bin 17562 -> 21381 bytes .../single_agent/checkpoint_1/checkpoint-1 | Bin 582 -> 26194 bytes .../checkpoint_1/checkpoint-1.tune_metadata | Bin 180 -> 210 bytes .../data/rllib_data/single_agent/params.json | 26 ++++++++---- tests/data/rllib_data/single_agent/params.pkl | Bin 6414 -> 6687 bytes 9 files changed, 43 insertions(+), 25 deletions(-) diff --git a/flow/visualize/visualizer_rllib.py b/flow/visualize/visualizer_rllib.py index 8c38a91c1..67b9768c3 100644 --- a/flow/visualize/visualizer_rllib.py +++ b/flow/visualize/visualizer_rllib.py @@ -166,7 +166,7 @@ def visualizer_rllib(args): if multiagent: rets = {} # map the agent id to its policy - policy_map_fn = config['multiagent']['policy_mapping_fn'].func + policy_map_fn = config['multiagent']['policy_mapping_fn'] for key in config['multiagent']['policies'].keys(): rets[key] = [] else: diff --git a/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1 b/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1 index 0693ed4b62a9cabcdbecb267201ea862144f212c..d346e9dc58b39a5b511ced70927eac1d0d32579b 100644 GIT binary patch literal 20358 zcmZU)c{oKn`xww<#`NGS4uu8$eLzHzgEsBdrxPk1j+#8YJS!rC1?(e=S=!wiG{{Wq^O+#b5w zKg95VMoGB_I{Y(SiYKNQ#S;(l6d!#cBy?@4FHd4GZ;apQYscz(%Kh^eqmAo*OpHe7 z+~(`glU%=Tdq7Z_;rhT}U&Day{-K-K29M4mI#)6rF{fg%fy6 zZoKij&Z3^eqoy!CWmi|%1%F3>|NJ=%h4NI^?Y9*Y6%wkuFCvJRzCj*c`hXiJ55r%Z zI%qt?g}8;2xLcADKIQmh@ajke^W*f(&~(AO!7})`(v_~dCNJ1UW^mVE83At?C&(F8 z6im09$gC1ortzNk&~uasB;U^xoYhnk#K+7Mxb_*~M*kn+GrkWlrJcd5**wNY0r?!RumkKwT4)Zs6eyj3(CyzB8LZKiRn2f!Qb;b+>zTn@=8An zIAStP%Wre8)w~c~kZH~Rn$ro=s|>+QIg?#HMo7@`sfyGFXTf=sX;fdzQ392>bB?pY%amdS$Rlg#JFSjjS;K} zsKc$%x)>rgjQ$~Y(CQHcSt*_lmu8cZY2-Tt&(Zn7Y~Rz|QQ<2xKX zjT3;6h{7Dy@-uhj#Ro~ z$qX%lV%9(eFs_U>*A=Eu_x zVxrVb-%ve4Lsb&7`5g%}WW(XYxIJWp-U4zxPoMcH_YMo=uS5FX&!AD90*fAKbG=+2 za?aiihi=hXNNW1Y+^3~5vt&FsjXc9r%T(B2XTasvGlJ*(v$*3|D&Y`2o@=+)o-1ED zotsqhoa~jljyj(8Kmy`mV~d?Y(}EJ-gizkZb)GW+$79!RUK_%j zH{O*0c;3`Fo|@nI{dSQLrL(Q|DhfFo8?rfu>IK$+U8h;QWp3yE5tHZtP4sA}SnbCD zl$FZqXdA<+QIz3NJsZb)cQk>MXL+yjnv#pP<^HM6x!Mf=hgqtu@<171h)GWgW{8|Ml70&fKf<`xgaPxkrrQsfY43)_IQquh~1+ z`fvCB|HIAHf6dqYkNH||ylMXp9NMEF@Q>>+cFrX^p}{!KS(+=ha3N{=Jc4SnayVUj zJ&5-w!IW7Vg7S;0c&sLZ7I6dUl~5lz;$_F3^^4*Do#_C^slr&VAudp`=*9PkJ3%Z_ znj8IfJeNPa5?4B@;rY}futzZojlx{HadzjySTqZzaywz01{YH8ys+FZAB&Ytxj_mt z0?r0wZuIy*8Z9D;nPcx^`KJJGz&#N`L1GYh-}v1EvG{jT&@npi*+#@c5y6QywIFCU z0OblrVz=@dz_ZJcG|n7y-ZsMEfvwp3a-rZ{RyEx7oK6nh9uLLMaiBN4oK$TkpjTrt zxqo}J;MuJ(f$ioK;3kpLbWFgH(z->0+i`-?%)b=D_0_DgVcg>|`i>JO0zADn2bx{W}t zIs@{qW0K&}z%YF`=QR$`e~DTRE>xW% zVCn6TmO*O-*>2TP&6|%Bts2}LYA4XDvJdtq8*=-M4!~lcSMWXCoHcL`!s9lDIKSE# zK+{uDdRK>A?C}ok-wvZfmbSq7U;@dJaRi}l%J^Nx5qV;A)c$injNz$q=kwn}+Uv{s z)F>ABn*O53s4rM!X9_Jnc3jOJ18j$)J~wVdKbY~%u*u;Kl-QdH9t?hkw%}SElGA>+a9K|Cr<@(1@V zNF76U>7e)-@R;cf>wd`yE^#fnGrmf5`5qmRse2agIR%3D{46|oHxjq3l@qLBWVtV2 zox#`I-B>y8H09}p@}{rzRQ!K@=$`;E<6jTb{f`Ifx$*S>Hvr81Cjk7Z459Arb#z`_ z4VjpCfGqu0M9ntcpxxcxBs(R8*uGDrw_OUTnQ1(|75#|$*mQ`Dxt>oPi)zW?6M1CS zh7`(|Jx$eS=g?fkG-B(0gpAWTLJYGG(FrXf)U9#!oss$Uct|CwI$uI`>obUYS{8|0 z+CW0K9i%*iP~NO{|0mNh{WsGv{MT@!{}^uU#xwcH@c-ltrvJFUypYhfj7-?o-pkeu z{UFvh1@ur!IbAbokGF$s@F!hE+h+b?9TnHXxnmKyuK5j3N!t&F^D^lY6(_hk=NP-f zP6Jyl9%DSL%4hglL{=%PB69=^N=m(Xa`N;M|APL~PM{7Wxs3)tZrN`AJ! zvvsi8wIDyY8rIo+124jR7N^gv4m znYAg7#@;T#LH9GnqV^a`xOSWNT{T5+b19koHV%?tChS|`L$qNzS+jwMhCUBaL)aWl z1{T5$Ek53;*1-*J!Z5V>4OKle2^XtH(x1tDF;7z)rDku&Ha#QUNVeed$Gg~)5Le=0 zHVFc{9hkJxnasF~)i96SMJse7VPpSfJpa3p4roTh2ZMvqykQ(=DZ0Um<7I{9r}%X0$1p zhECynr0}jU1g);-E6_Y}&=uT&*Ce5;R)H`>KKhf~B!(G7iawxeofC5;Vu&{!uv z8~crFh#mSei(8N4%-FZ|_nSq;qVOph_wX(WnjS?KPBx@3?D~n9Rz6(asRD1_1Ot?> zL^-Tv_hxyqa=l~m+mu`6^_U|xYHtX9?8qTbODyOoAqRStza68C*22C!ui2IZjx=@X zGBhM+5c%+CqBupBRa$tQRoe8G-Vc}qCfAMd>OxgCo;DW(%m&E=8w(od5)Z0cMX*9R zh7Lcif(Gdf=>@CIaM~QSoUI+$G&4uaBl2kFd3>W6VVD#JwGtW5y=B$dr z@;~BmCUGmB+r5A>mpo2eHf$y~x$^K}{Vl40-j7~Yy+k&B6=!ZcG~geX6Ev%|j;zX_ zik7FnvCqGo*(#$3UyL?mgS-wtKGjdRpWKKFO7BU%>nXhD_K;CB%fOP-2c#=v4y@05 z4`M@obkpfG5NNOt)k;lp@LMXf*|KDe0i~OtoPq7%cEG92b3t=L2Afmk1k@@TkJ7`? zea*S?$3YPibKQvvh>D{TEs`Wfe=J_|>BfjVsYKKGJ)NA@3i^*vvtvFjfYeq^sP?HL z>jmk|d&4cX_pdeH-Nu6nUZUh~tTeM|uL1E6X6awB!{aknk$GJ=X^h%icHEd~Xb1{H zmmLPMa``GUTkj~{F0=&hUvUSHw=+Ca-ax(8OyKc}XndK$BhPn8!L6h6_)+f^>II!4 zi(eN)fJ6q}n^Zt*K3dV)W6zWKU*Ayw0TIr{(-PRKW(H?|$x!1ze~D}AQVbnBgUuIR z3C$3O^{EA9!>grm^G_=k6DcEfxj3vz$)y3`4Df@}1Jc+b4)3=+;k?y#BunWO38^S1 zt&?5Rme)rVI>lj!%VF}VI|_c?eZV-)Q^K+}Q|Mo(3^*ly4@qk`)n9dhy`11e!!IMh zeWC{#R!t+VR${2+avKTi7GN{?ImuI1rzB9zCb7#xtS$ke7X(tl?ax4~(iwm0Jex zJ-&}jKGRJ6Zk~imld_S#csx7%SU6q0wUSDGi@;+4d;Dj5NieV>k?DL~MfC!mQ8?5M zBUd(2v!8dE8^dQfGa4o_v72M*1@|PNhVjT(?jU1>Md9*YMYs{T269Gp&|z>jcuajl zDkt8C!+ok?w^bL~uRmhNR8G^D;y4m9BeAjPjtP{NtD#7BEGy+*4?>;Fz`634Qf?Wo zl}{rV1d;TJa4uPBz(t!2srV{rgzD-dO+h`m z-%&|dU2Y&wAG6`TO%c+W*r>cZ1Ex&sWl{l>(stz?330OpjT<3wb;eV2(2S39 zB^&VLnqJa7IkWL;`ET0RM;V)oAz1w3G-tL&KP^4`kqEU*GR3Pd(RyJouu6=_%bP1e z*4Q2SPQ@6L<4F|NTFBx#Uo;o7Y+T^s&I6lDbDdO#Zv{d;kS+`YHyfJ&aaDssb;#UxokBi&Jls7 zwi@)+n1hUWwHJz?n~G~T&!c8h0W{^!bbRik399eKV7^`o_3IcQkl~FL>!!h4j!`x}O%>bQNciy)a(b&LgB}lQ*PSA4kMJRK^AyngX&jc-^^-rEl_;n4ijsp%BL{%mlyhqL{f@sL`R}Ap7Lf zH>NCE9t_%fIAcr>l>K7pA^*+fQd3O*ae}nLDq?(pEnN`ZPg}p6L6^vM z%F7YM70(-q@yS{|QzJqBh0j4~odNyP`kLHa&=+)!vl;%PYgbpG8Hm( zUy=zOx#-xrjcIi{P4B;wg5!Tv*pt`d>GR*eVAwYg-_CQzj%{!GB@y2k^LTL(J2xJ_ zk68;zd=)xY?+be^!wD9?6lQd#rUO%*iwUb3V7~Sc)BJO6rp6GNl^er;OS7X%0gHeQ z6~m+RW?`|;V#xR9kON03`{7S9X1p`NvTg_Z<>O=Me0ZHWKDtVsZ&uODm6|v^MjTwn z{^oz+`LX2%J884xM|#!qEYVnQ!bC?G(s7&Sk(k6Ku&RG7{(fFd*N7F-=tG-G(vc~+ zQ6K~%dR!P0pH4U1DYCi+pUCCSn(Xq~O|-*{YR zapoW{GROq(vjXz{Q66>qr~^3}34mwU5Y@Yxu-C-{s-?Bydwv24!n+ugcfz1({)6#a z&_Ui>1;DkLk0EGy0=6eQz{I_iF{y7K{#Muvcdk{l9j{(fE7XF{dtUHiyA=4(_9dG- zVsPV!SejPRhL{~k9@?zH*?xc)BMqd{EeI`}&Vhn#HhthNjs3eVVZu-y3DkVax(Qz5 z&&_f;>#iY&L|f3oT5>z(F)@(ra>6~wqex)p2m1Zn0zA0#84||_bn?+Wa23zO&8u$m>m-m&%&MdA*FxyT z{5z=i!30WoIU(ne6-GAN&^V)0s9Jso&A*7#E7MG1Fn9tgtZ=0-)4$Q|N$R-i<|(-U zeFk{em2g%mU8cWocR;IkC2P$@!_(L=thLQ+8nyi}@t3?z1;vq^4>_H5|7LRttn`IB zoI!Hxz;e=k`z#GBh=a(#{vi4)AM3j{h%Y|@JX5o1NwyuVxy>in@<#E1Z7%G&=!$Bc zxv=OO~Rsdl&qJ$?F ziC26iFf>;K4ZjP+{nA?Y-KzxbnI;a7mHp(K-FKF2zZhKW67luemqhsQdT3Ac1pReG zbVIuz>Ra-NkL)Hi)e&Y6(;&Khl=K}C4WjsjOEr>`DPwY-YO54b>GDo`SMLyAP_hK; z%wCAq&hk6r&$msocf(lxK!$j)Pe|= zDEz+j8O}5nL(2;dG|9yn?tH8v_y1VadOH=;c+3U`Qasog=ZvokeewFZ2#nLUht)cD z?9|UssB-uoCU?(Yx=w2~w0j??b$5#SlIvyh;eNGrUn@kJkB|y_+kSbo*hmR>~=-xd-#S81G_yh~^ zZ9a@c9jRE_z5t4L8>51e4E8^^h2Kx7qQu8!vT)-X=HTKfxY93$tfhnW?~!~m?0bvu zJRySy4;K-WBtEVER)BL(E1<6QHt?Ueo;18#fj{arK=agHCS_OzrDh&rilncTH-Ddy z2hVpRPeBe|zAy%#D>gXcq8Fr{c)>s3w4a8$@JYdp=Y(e#$}|7J)YH`HALwE6FFUaO z4?CFc#+&oM(8KB<=y9cGHs?b^3xB}0_$Ue&W$cjgmXUZ0{?}`FOIy36vyG?9qZj|j5tQ?G&w<= zf&Xc&E%RDIi^KN)11IkWvTIXvr79_Rn1p8rK3%)jQ7 z|CkSMJnnzdht0p}!#DaWjWF-SeQIN{=}kON?kXd$fjzLzT1Fs^sknPzIlfIOz?~J= zNSbu;zEC1;la+3;%DF*&XZqqnCm!mxjs=ZTGMLB7!u|dk@KP!sidHTlkG9`|v-OF% zAt#h3*W?4|wieC5qL1xgHnD9>C-RT&6~`M|E9fno@layRp<^#PLyB`gy7eS7(Va%* zw|pNAi)_J`CTn;&{w^)-lgIkb2|>#+G;%>WZNs@m0B4sjI<)-7kJ~NU=e|i zYUF4Gs2IHV$bz!FH06$_#Eax3iC<;|%_`at$(el4S0hi@Ati~|PRU@vk`Sn$A|Q7D zE|us*m2e>=AFjcmMd+87@G;5ieMB4=jhPcvwdF(fPa4{5u=2WIB?(`4h_@G7r> z=0-(ggu_L;BlbLKlyRBn?jiiMW-NA3)4=#Yx=`N!nt7%u0kaO}!;{f<=soo>cpo1k z+BOalyF~@sDom_L%K~~cVIjFQWfo0R`#=&mJfLM&b~x!}6bW#wgq(1SVWNsCEPisd zTrfk)=}M4Owh8z{i8wbM;PuzZbdhQoQ>ZY3Y}v1Z+2b~WiiHVGnW#!7eht&|h)<2a zlP&P_Vjb$A_!N&jJ1~XIGr*vCIS76Rk@Y<0cH|-sEJDg2Bx|o_AwSDtl!T9m->=@V z8&*n^{<0iWQz!xJ7mt=amc>vtn1SDC_>q!ZreLz6m+Wbh!AXWWu(GO%bs9IAQEIS^=^5e#5%C+i}Zp6T#K?+xTdF9I4e_jH`D}!A)D$;Nr{g5LvT; zmJi54gy=&YTE3K+4+$V}j5j{@i6IAeeZq&!GN@#NEq*^34Sbb)sA()jUxO~1^DP_a zr|?l^eH9VRp9bEY_vqrB6F8&xE$LSYpl05kluuS-#a|tGDwa*(jh2xv8m74AN*e6u z2~g~;2EIwv#!IQ;DA=@}=H}04PiHm~XVoLP;mT34DUBl5<&Qas_~U8G(+0*>Ab~sD zYq991<>w4FWigisQL(gEfAf>h%-9BtQge`VQx%6D z)3M$i>9n=u(B38&@^apgiRq{LuDP1PJGGTA7F>bVpJst;j3Wu;AH|YWVc=G(jDe0j z;d5jKT`TMa$G#;r1UvN7<*CYu^UKKv!xq|8eV;kiSPGlnK2ZOspUGW^1n{1^n=rY2 zM)br+P?2n>;Tqq_n~qFU{x=X#30nR)?5R-mpe(GT|>qc5a(C z?s7}O{(g1%)$^B?6z`>f7uC`%IXQ9^e9?9H9h9ErgZ2-Dpy}Zkx>95rX>u9|F5ws1 zs>58oKClzsU6)6_!&QWvz7f3Ei_vW(5gyW z!7~DReLK>-nM=17h0}(O&9SIVSOq2Q0~aF;E_c8!pw(;BIEW)``(tbuO&s*Q8_!_-@ovWI>wB*mZO z(K5^dI4@i9Q0hLMccTMZ-rU70Lpy1w^L27Cua=g^&H;Cg<G`9#!nCL zif@A21q(2Ax|Cq#m=HGkWaG}jEbxkT!R`oEe#8ARbe-P|UQ@Vez4Ii|#&Ql#-ORqx zw}9=7*5kUP97xvmrt-eEuzd0$acvnO^SDPLjJuTF-WQA;P5yvU?HMMp;S!xUAPGWj z%$%*09>JcZl_cnK7Og3Drndez_;>IEnS6~UH#a2VxcmcvFTKef^<4U>W*Ty)8o}8G zO9`#*p|?y_L4CAwcj4|+sy-(NeeNg0?_h86UYCVUuYBP*r8KYh3kY>@WDJGZ;-yJ8 zU~KuAK3{u*JtrLr>!t$;rY|RFSKNYqyX|22zCnKbUO9B*@aXao4G@%`V%EjvF(XQr z%-Rnz(A;Q86u0^jvu9eUCp`lv?K=woTa#$?!FRwt6iWjx+7kQgU86`kj$dVNfW0f- zu~LNrulL6p-Vq17GJidLL@u6jF{ZS(V>?dooe0N#5^ze@0dNbCBxRtL z7Q9cTJ{PutmA(e%ggjx#Ot8fVMZxTsX?M{FhR9B{@8m%DTHHHlKhu<%h#swB;5nuY zuY@lEzT_pc$Tyx1c^`&z)=bB){4;3z=qTKDdI;!XOY^uz_ZleC16lm=%BRuQZz>>U05bGj^C#t@Xefe^NeQi_W zVq7#XdtwfPaed?>T!iZ%J?L^9MHsWXntT#6PJ=Fk#c7Op^=$*TYj6b`N99wXhtU5LwAFl|4JC5F@_I=xEQ(YFe9eK!JYd6DHzH6btWG-oz zAu#>TVLJBPUD_Zw9}=<`gVx|!=qnnHxgGj+$FXEweYBKTq)VgONF}3FodV>?4Z7N| z0!oi;hPMa)ux;;Fu)lV{#7ASTVNbX`S}%SHGi96zCpwXCW)w!_el@w1Xvj(9zoHH^ zPk>Rg0SY^oj-rqP?2(&|kC(q;IX`7Uw{0HYxupp!qoSba_ac6{|03$4K8rnik_*8v zf>EQ!fT1Gs81^L#?$7sxynQTrAzMd(tg(Sf50(MKD6TNX7qQi$@1@^>c8@krQ(J`M4@URPsV7KZZYX$LZ^TWvUEoe@0@E#j0oT?( zVcHG$(0RsJv9doBoYT)^6)K<~HF^*PD#mb#vtH@P#VF5-dw`WoA*>NVB zWDRGLRr!6)i;x(q(&kBh@2XJl-6E1CNTUu{GKs~POwv-sBlLL%^_0vcPMbDSIk{Bo z_$!@QgC4b#PbXa`qo`iuF_Ia3i=DaDo!Ixgl5;Wz^u(tu!gCDeIsISNhuOcXj|Kl4 z?))FaUEFvJ|BF6c|3x3hQd}~r#ETTVm7v|JSfak6g6|WQ2xHBQIgf8=;)Vx2G9+&d zI!<#zVTmr=WG#%HfvMy{;tdq4Ji~rgjtBJ)D>&jL1(&Q$!M;?5y%~^8)KpB+?{EM; z_3$jY`auu6*4|?*v!@UhE&-!F9h_IEMRO*cq6;8{CdOZ&?(apwDDwt&x0Dbhdnb}N ztqydT!%=ceRSOO$C!%CQ79H_;O?tP8GLI)k61884shHg}&J)>_V6`m_3qOb8?%^7! za5&Abe*T^0EqKZ-_R50w1(V2YkwCgWbc7h*s-jzOGQ`O_1F}u<$I2dh>wX z(oaFL3}xtEmJXuB;`s4-GUW-aK+cQ(Bv@S-c2_Kc!s@zX_`(Ts0JI+rJc@vk1vuDY3cb?6mU%RfSveV)a6P$2<_wiQxza|M~G zBe1utlsw)fMKl+cpz59_Y)jEDtZTkcZ%24T#^8K9@62z~m{0MT0kBTy!eBneiKBkx zE)@h%L0cUSq>nFyVRJOzdn<-1%8eY~wKrL%#me|z`!PKnXOHJAPJ;R2=XB+kE10)& zDNY+r!+ux|p?G>aYKRx&tuL00R81!vxoitbQMO`VoRXuW*3XDv@8QPHnxTY!CWDXv zYJ%FUKJvp~mMCi1(0ZR;BvQWsi}PKX&LzpTP4zm}d7y@E+ONn;Rt#QWoeZaL%R;!o zgUHLiXIII)faf(IQgJndPT`Bg{aR(~+R%4wcNH#HZ4Qs0Yp_cki~7$^*89V_Hlz;^*S6=}L6*L4DA$TsP@JXg~GGw=4*<_K(bwH%HB@N@!G~j4zf&lED4rK=QE& zs$C8y*W%iU?fJ{}T6zs_UAqNx_hmAs3J&-?zJa#>sDyagpYU*3INplLq)(eFsA6dz zEibrFPTDjRqaOpLr|Ay#3ww-er)}ZalR|26wizwD7QuO~N|JN?I1yPa3xC4ppnFX$ zy0o4{UQjc?%lIB_y4%S7I2Q^r!`;+BdInBe9mH?j&t>G-O-A4Sx9FOChv>wzaq#u& zLo%Yp;Du|KsKxa#R&j7VhS{DW`@iL*`}jlXiBFgzeiaR^&_LNmk@%wKHv9bgM|!|A zkenV{#&(P6QNODk7-N$}SAEYw)$koSWv(dm(Xg8Is^2F+Rvo4ue)iC5_kc~}7r{^4 zTHKlKh4JUKU`63nTy$$XyGIa5^Et(s*49MlN8W^c8!u439jCDImN|*g8r5Q_HWGt9 zmzj@C6Y=@IA$sR_5C&`*p^D0*ofb7mjH=j1`!B!bJpZ|e?mXuX*e^|LqggsJ`xp7} zG8y`;v{^|H8EVEkLtkFrOD5E}k^829Xw{W|`dZ>DiJEt`u}f(UOehzHmf3}v=zfft z8YRPpdB;iL@f|c=Esk|i*vd&5O{wy#*N`@|_xu=NOSBhH0X3x^^t9R`*!HOc*43Wn zteG4|ZXW+mI$lQ8Ydz(#Nca^!nO;uB1Ow!f{UuV^a0h%2?Waq!F4L>&=Qsh2{xTDi z50EP5T&z6lPR71?LEnv?jc=7pu(;74dskeb*<<5TktpK2oJ6>hR7tHz#jtZ;6STV+ z!X|5dvcA_m1I`s&(I*E&Km=1cnPOYW$i+5V*nN*)xcY`O=vYEzhNn{JIn$wZ{~NY^ z?P#f{=!*k=I>cxZ2M(`H!uvi`@t3Cv;rcB{gLt{oZrNh$eZT~=obIt=7gUBU40BXnQ-{*lEo+eNMuI zs_W@yv$yPAbvIlpHpE(uKShF%IDnZ2-s7X!zqvKIE^;Kn$B>@_V^vwc))(>8@X z%gSNp4*qO3?_3FYhHg@~q^WpnMHUtR`KeK%NexzeR+93w`=RpP37jxFoR(Id;ZGV> zvn(v}#4i&yp=FmQeo=oyhlg8nugYv-cz*N>6@jm_Iw31B0gb0_C5^gLf^Q-7FfTbA za^}1wOAmad4F?6pR%9!j+_njIA&nUAip9^yhpBH^6k9ubuA`kGm zG+O8}q#uZ;DShkFIMtfgxoeS`CkBYfQWw0JWrjm`9J-||VH8<5(0c<4P_=6d`c-Qa z+ZhBmR4bxYwIrii^v0i6L9NsrW);f^hw$f;ga#diPB=>CiXPIl z!Bg?EXa`lECJA9`CqaJ90epBj3{Kj`Nwo9}s6gXNcKw9jI6yfTZR!m@j>iocAY7XvsO6Y#M6XeS>8=NOA1KYj@gSS#5xLUMPk7_j@ zPj->7H^xziH|D?}?W~rstHVE{ABnKc1FDmz2Qz8|SQi6ZcI%g1vZU1r#~Q3Bwhq#8 zG_8hw)yzj(_cV~+w*Vsgj4}OE28o@m1Ye%0z*<#(DnBt6XDBSj8OEl#fnS1JU+UqS zSPcEM%n%Q3`%OMcB-7XhPsm%BsZ_W*(B;`@IR zn2$`)(J9~)y^;^@E_C4jJgS)-3R?S+h8@c#rU9bdzrxxiWaKB^^v)kQn@_X9jzk^x^oH4e)i!BIG31GdilJ z%xMirx?(h+%pr^5UXcr255Gy$OP$FxnNo7*+CfH4v6MJ2^Tn@U*3)&)`>@mS5SgpG z0FpCuuxR02vO!S{T#V+>*4RUYtd7O*TjiLz-4Nk;B4rx9!RXo}PGhw)(H}fS1-bc9 zrA?q2PLP2YKWW6=)41yE4=NIqL9N~^fjH+a6aD%z^hC-M8Gap`k{QW}Z*_s+l}E|5 z)MB*!(TV7w55s29*+12W%-?N0>4e+|^o2t$eNm$gvJQTjvSmE=IN(RMsu;$;BU)KAp=aevd>3y{4YL{YLi{+Hc({s6c20wU zfDyV{?-G?A=8T1VU#gz;4`7~$sLKTMMS02EEk!XhhDja=X~k)0!V+2YU@;Hos5>)mUn z9j0w`Ugu@7h|Hr}1J+RH>4Wb!3g|Ch2D8MW*;;FP1awSH1fR5A##MMb*{1c8-rM-nba>*)i}sAVP;QkIF>fLZdx7G{Y_*w;yvs)7_>JvY%luJ8Z$f!uxSV z{Q|9hcNaEna)WdC)8X10VPa25mD(jn)a%(#c6ZS_n7}SYb9Pjv>=^}p>kKg}qmk9| zo=#=^#)ALP-(VGeSIgQ_LgCur%;@i((sfJ@vV_*1q@HP2H3prUd?ZkG_E_=hO;bMIF zqYs+j?8isdC1mTC@5FWT15!756sJypOosm+N7wfWL^|d*`@T+vTsqTB`kM!d!;;JF zS1&`zsMl%?P_hMuu1`z>nS$S|mg9+N6H-ugj{f}S2G1`oz!AMvd{`(YkP4_lLCP`a zNT&f^`nQleJ=P*0^-9@A-6=%ec>-CM^Brtg&n?!=I0f*2oT;YEO2GkWG*0qzoOsQw{?d90ui(l}{q8eB{MIUp=+H+^D*n)4n zy(nvO1L?=r=;!4D8E-C=hmGHG($O)3lOJy32zP`^s-;0@=OL8!i=f;Sd-3Mi`SgwN z7fxZl7?@ca!1&4-RGk|Ni@h#nPMm%+xU56r}M0^(jD58Nru zIITAxPIe`d0znMen-zhK&;!oMJs1^H7e0Ig-U|A+9m==j1@8y^qzQdgR=7s3CjtiUC7>ynbTMn699dylP{TL=@1Z_sn29ZY9`18QHr zp7PeJ(ZgN+IHT1Yo)>1I4lGQsM28q?GA0#0sdrD_HdWZ>OqvZWi@SF@(WIrVAy z_xvBcd{>5fKX)<4CC1WhT7o~q`ia3cHQaLa0En-fIC@qv9ewSKkynyOS2swa;8_^8 zrS(LfH;D0Xlc_?}CgjiL!3FMJS~7P6-U`#;D)lO0MMNRo^|P?bTxLsrp6^3T=6GQ|UPATpiO<5d|$llQ6=EU&B=-I!SC)+5@H1C&;tF-?VFN44UpdiWl<7a7UIK;tlkIPc_Zh>241xTjxVn zN)vhCwS`7*m!N{b#f+g^2Bgio#@Tw+8moNXkmA}b47<7rJu{Rb+h&NW-ucbhJ9=0n z>~t;WMQj1n6Irx&Sd+Xg98adbKZ`B)mLR@26=u2hP;cH&3|dl4E-8x%nBzC8qq~gY zZ?GnKyf!7-v#$`nBXLk|ybogv4*@6q819l60jV-W+WlcHtR6E=D}J`Ko$Jd9H^7-J znQ;_XSM0!QLuouT?;fr-4Fa)pFFeS!l8&_R^zQr>z>OSY_DQ;9tP>aaoyvmd+(RVJ z{4zNr{E|-Tvc$px9jq&uirp2G+}H_1^w+%?^!ev^U_6_UpeX`2cQv5#4sXm4DyKsq zpF+vXdb%WH2K0O3z5T@qP#%5r^`9 zC+UK6We7A}20b<$X!9%r&T|8@Fue@t?l}g_o_?pp8+Az!jH<(iH5tthQxmqCp9j4#!kC(@7Br`MvM6QxO?D$yAgEqC)j+5EYdnzeJ;lfNqgY4LFiWC}gS(_l;|-aA?ti1qldS~N^Dn)9$uWITu^&G;WS4?AC7)&{}7yn^#l^!RV zVoIJ9soplGRf*y#6V)SDQUj=9alFwK-p18u!&kh zGu@=v7V62*9^F)URU|xoC+Jb?r*UkESsgdXzXbzd)nb-x7<6$nsNVq@jc_tzFUzvX zqI4R~cHT{rJEtHz?<9^Nc%2fwi&?5m4|YOh9Pkr=rAE6XY+Lh$sccsQi{*nT;<~t+ zfAautaEZ`yj3fIX;TG!d-Q@j z5;IxmelfPd!3qq{JF*G8524PsgQ)wu7cDqMq)$hzeAzmaWJTS#%huk#55R}^p)P0a;Pdx+aVFJ7@Swmx{C9!2!Pq2o~)$HR#c`x@ z*^o3$Zi+8iW}xVsfDfu$uzmTj^5zE{C~4OWN=)4f>hUtnpm=z0m`b` zQERyc_}DF?(qe18?$v>JdTLS66*rlm^&)ifc}&k8C($I?bm(y5=#^p_ED1Lvi*h5d z?e7B5bW?fTmks>FGyBnwKPI`&Z=xBijPRYuH6d)24BqA|)3n8DR1tHWep@V{-D6Ff zwRItG-K#=AHudu4z(9FJK?5FLU=5i^>fvRO20c0cPM$lsjLhd%$u)WwFoQD}S^Kwm z9J3bL?by+Ld;dJV^;8go?SpBKNOzf-f03uPJcdr|*X--NX!4S!5a->*S5+M0FZO$n z4=q3P!*1lVE6q3A>(h;xJ2DPVspl}&RtX%j_#5?I=fkqMd+CGgB6*}=6uUGx4__K@ z5^)Gacv;4Q`y)$Ao3fT-mpqbJ*Hy{2Bk%Dt*#I~hwnCoaFc#0RiG`(;{K?SANA3{B z@qVHe)aU6^sL7rMdG~*zu=NvYr{=(-e-23`ov=?-)oDq=hG}e-Lw3>f^V(o}N}!HX z6_KEc0E-Q(a5^~;)t98>=m8I@(R?Kw)9YqIzZ%fc+7x)|c^eZiC|u|}q7cGI58>m6DZ=^aHuU@|Wj&sDqG^6LiyS+Iwytr5 zqQ}J&cMWsMc%}rAQByc?jD*nP6?jTNpUoYkePQOzQcQQ%!2^0OWYH@TntQ~+ijZ~U zS&V`a2d?0YhBDH!?Z5&}Ll!skDAREa;k~dN#Cq0wsv7Bu8J{%+NyGPNAeJB zdaH}DY&{Nrz7&=8>zMwjC{Cqf5gXf;0!EX|nY}y$?7WxI0H<+K@k<`ASs98e`Ww(w zw>0Vu?tu8+9rAP1Ft%z`J}ql=$1`(ZVe8_zOtbbWd#{&-Zs#LV_nZognUlcuujbQC ztI&!QXy1=FD@ zQjmX7G_8!C4%H*L}_qs=EnWuBFJe&jrFjEqBIl8qmhz-K@Io znJ~x41rk;+q-foK;tTv9O3ygL`ivUFUcR?tlY7}fS9vUj4)LXe)WML{^osrIqX_x6v6q_OC_&8f zP?Dz{V)swDg2@+=lUR^~P9%t6(c(T60FCY@Cb zJx_dsUE*y!V}vHN)YuDIu@T&^k7X#`84iJ~6S#q0n?a(R4O45Muv?e<x$}!c3qDD|)oSgFjpFoA`fnnEe)5$DU3#B9+L=IKT1>I^1k!Ugi7c zN(aiJy)=c6{xuZV`;COT7kN7Pu9$BT@49vJN@k#6z@nB$qPG}65Td>VHjj#tpJ>|# zdb?LZtq85E`roF0&SfyB#0XkKdr^4A5`L+-BR5UBL|SD!biC`Cd|Ofy)*FSO$BjL7 zX{!b(^4Vl#JrBJWYhiCsRZuypPV=mtu)W?MhsuM)@hCGCK%tFB!Qbh|>St|-% z8(UeBaW*|vSVSvIqT!Ea|DbUwfzfluqH@szkVJJb?V?T`n(P41`Rj0*hZ$U)HypmL zF2OsVRal5b8nN@B&(3tn6LL6{lb31F^mH27JA^G(%wdN^vI`}{GAMj-xO|YOC)*g3 zjyWNrq}Xde1x$^Ho>tdzz}H#4fk72Ek~j6WuVhk3f9cBqr#}8;fH?hh5S+Um1Q%PW z>wgT8Ro@Me;F@$+I!MN1m&+Jm6u^9!YNL9-C3~~b9Cgm?F_q>Z?3|g4)eST7O_m!L z@`-G6as*q{)gte&JQyR|x1r3|8F%Nzv*L?7xWPM|Jt=g@h$nm4t3Cm&;oCmEY8=ky zeLN|TIUB}oyyIB2uN|ApB5=Zo2qsz zP26eiT<&F}nWTm(^Y`zH8=1yl!a+}-TNb{LTd$=c1WR{w1&a?CUGdn)jkU@WT+8=y zi{r)#s*A1*$sMs=$(uvM^jjN1F(JpH1XehE3vpt$vl9ZdT); zxv!Lj1ZWFBHEI$c%Q|6XSSDw6>ks~EnXb_FFiQB;->UGzWhQJ|c}XHJ=O1lTIeCW{)&TMN8C-wE0 z`nlR^{uscIsHOj!f&b4c0o_$L+DZd|%%Eq4bkp|(YQ)Fl&%al)oquGrE1}P%H#>FllfSQvhnJhL(~nTR-dm)70*w9*V#6`m literal 10209 zcmXYX2{cvT`@RSv$&g4&6b*)=TK$u^&epqmDkiULJcu??8{r^Kr zJ8if5A6zHi})gh%fPln#~qUq3f44xMZ-Vm~e|RDM;c!d!bX&v9d&VL}z1oSf|b zjg|j3_9Ee-O8yxZ1==DaeSW`D&|)fV4|xp33)XX$o^KU)HV5K9!5r?@#EkBW{ zt;51e?%WmwH{qbqAVG&{VbP0e+{U7Q407?|(vFufct}zBZ^L|;ck~Z#KxyuqIqUJG zOdOoH@(}v*`FN>=5e}@&!^Zv7&8(;B3UjPfg>#zag<#SL>Z*~jxcLe=L|>w@Rt+>f z*_*3?I{ zxqEV=xaVhGCz?zNCdWqz*Iv;PuADSZ=rX#T8}~$!D?8tWdn7!Nd*i@+h#U|H#fO_w zs@hy=+odIJ$aChVl#HNURx6IKc!6EAzd=Vo3qN+0lJ{5rxl&OsIHN5M-Sn4n?Yd*g z4~u@dbI?qQouf$Ng2*hN)xi3zy5WWfI;Ik-W?yhyV&=)g~+p_yCgsN!@ zYYGP-;iDq=m6M(Dskt3jdVeV=wf8VDS@{@e@tI=IjlvSn$fi8r*YaZCiql0L^-t!U zKG8Z(=dN;&s(&TVqNJ49o_?5j@M}HqYep$&^~ExtMOZ0MC$E~9J?$Xp^M_o{zOX!w znq~>-idzDY8dY&(FPHMpR+REKIv(ac-B`veytJOfP0TbozcZ7!Kg> z!CED7kU9>dZiiW~ICs3_xD$SSn~jppCR7=>kjT5#;pp*h{8q1uZ~9hY$1@L*^m;^Q ze$rv(zMW&c%Z<@Q`8>`ZT|)gM7vO_Ksjx@S2nIS^h_X=yJ+vW~OzAiZ4lfl*b(Aq| z@=8Tr=MpyS#SgkhgwpdjQb?SNH<)ZN0mnl(snX8CU#G?y6~2?68x@nn~*E9Nd;hv^@e;OvE`A#P1FtsVa!{goU++AEv% z$f!e>!hFzd3}$~T%!Xxk*O)2xv z<8vu=W!RDBrN&5SEu9b=a#rP{sC2bTSC8>&BNa&;?%R;N$@u`jpmz3WB2T5q{`Nr>>a*M zj`Nm6=m%dMihf7VTQ4KVHH`Mthk4 zk69$w>mGetD~@X)QZ!q$6p}Q{@k-DnFy4L?(^a!^8Ak~gzBos!Zi|6L^mA%`!2*l3 zwIKbaDDZAokP?k#dfHP7KW|ATF7YBz=wnXMrklO>Zwl&3CgI+;0$O@s1-?b`!75%7 zE8PEMX3D0J5WgzC>^wqb$NLh_)KYjUohZ0FO$n=y6_P_ex~Qfuju+mS!=g78aIbzl zU8l@MvT|9`@2g@;Q_S z&zcQ}Z6Z+Sq)Yx?%B0gnSFznwe-Mi~9?n?`v zRhy{Xn{KkeGZ3_!*W&Qgc_1Zn4d(@1zz-uUY1>FC`Mmuc-tJ!nB~zEcl%)W=b~=zz z^N374H5I?ioQiq!hIrFu5e9h=(z8#wRe)tY%-itE! z^jtYsJZvkoRWc9VzIM=P+i!IFHF@m4kjg}tPQ(6$QS#X08}nOK1RXuOuycnBof)En zFFdTlA|Qtr{!GFpjmg;EX3Lr$V~KtZLq*G{0vjxb6+OZ1?o&agBU?UV?&l`@`G*{8 zq_i^Z%em~!G6NjJFjAux5c{nxKW3IF$sub4j@6L?-!fjS^Zi$?W_q=7NW|6lVn@z1*)}lki@(A(4&K!!LIxg8zZ?2hwpZh2TP|@)$)82y;qe~ zemP6!t_6^Ou_~N!C=JF%?ZullE6CC`6~-l`3QcvNQa{yx;v!m#WlOwZ{?0sXjo}b$ z;(%#uW$C+JEZR&nWxBR5!gi_Sg8S35$){J_aN{)_YF;&q4BKXzWoY&uryC zd)0|o&nQ1mT_nD7AI1b{k=R%{a&Jx@E*ouS-F$P%cTN=PvD`;H zl(k7tWCL^`SO$_UGO$?lIrIFR5o%?(lXESP$RFcbpsbP%lC`>!-1?bxt<}I^4O-As z^PPHg_)sw36Sh3`#pj)IV0(5hY&Y?Q+8|5p+PfVaKP`dpKev*eNHb={IYTqCpr#Yfz3gSb*~HV!?w#a6xyi77;R&=qXoAAV zGV<#)#nlrUVeLj+^jejU;bt#b_nAnN{QX!5Ra@#+u#ZaXj;&Rzby#ONg_>R8z)W;H z0(NQzn7j8nbK<=*O5_HU?5GYB+oTKqpUyE}m-UH(+*-^?XTkZXD!q2B1bzM)z_4f! zy_N2Pg8Y04&{;&ZziPocjyC=}xCTpR8PGi|CqwMlPUymV4EXjA7gjBXx^-u%Xth6j zZ%IUcjUA4XENszG0?pq>n0&OA8tBM^ET)<92Jca|$qCejxk)svbkTulk0P3mpcQ); zQ%ddd;ss?G*P#iv=9H+}mlJn)W%3}r#q^A0li>Em2zZ{_MMd5|r|jQQ^xT{XVJ3#K zdgTQ0IPO48= z)cHT9ZvzgH#dT?%$`8rZPHZx;oHEub(FgMGH!-_5tpoFO8t}5Rz-&qQRt!J20*fj> zbC|^vwEIa4&W($~)}}jTn~N#7x#qE(d)4uCmk%8Kk^;RYE(Et{!iSh45-jk**^_Ru zp8xJro-G5eJ;P?3zb8XjTmn6IVIp#ze~^v>NxI8H7qmk2apg%3QX=Jre_W=Z{G0}i z+q#q*-ndSEzeHf;ZwqRq-AUG|)G+TXkC4sbktDq4GaZ*P8IKtmQ5)%bAZ+CkY%PTT zoEdOCX%_|(1u{uT0Tg~7!IjDa@}wl1HQKq4xqNpeabH_Ovt{H#WobRFNQ^;u6<@Y* zUL9&nDkDz#Kmx9d;O6VM=+%c7WXH(K)}_aclR1*#$ee9`cv={`18-A|13# z?*ty#CrFIlw9s4>f!I+qyP z%8)>P26m@kVg5dIrLraoRKHPpCUTX3kn|R-!;c{W3F+gIL%pW`C&Nw6FFadjH(#Te}A%f*n?4BBF*3Av+l z;klavgt;hSZ0|LCZ^t_}#kGvdcpymydNLR}s|jWMt*GAV8K9v%7b`_;NR?(VtG=m; zEIt{Iu8~)0rjj3=2$4r~;e6bud=H}NPqV1zjWGCikS^S`37R&Fv;4>@Xr;Ih-3^?O zDG-HEuQ!leKA}+1R13!wPBYfOl(Bc@BqUF`O4`RakchUKkeqCeCTr&~i`gT1iob`h zJ6ejT9Q{~hjkoN|HTAr-h=1hu$%$l9mJe-pK1R+in~dNb0e^mqVEgA0vh>zNG8&o) zDfefA#@CzlbKN&JwVpb97L2fbU&(u;5bEA?IAy>qkI#|ldeYnfiTQkG7 zQ{2ULma382sPr#>qjQ+4-`RbFQ7OhOK>dWNCog`{KV!a9i7t=ayE_ ze{$)i>C(Mog8cO&rj71SW|HzneDzBKrYAI|1rpV#`ALWVaJtv&nf+I#ZT5+C%Jh7L zoFF6e5x=_kCa*-iiJ$a#h1o(w`{n>K%8wBL%(HrH#`~{iaJt75NT(yiJRDHkdYL|K%TyIml0qlN3aIzcsDb zkTpGZ_ybQt!19e=z2SsJOc1y^Z!%Rd@a272=wKT8uGn;`hpFjX{Q&+h%W_ULM%Q`JNeEsh=j?e!D8NF#=#!U$n4%f*Lo4w}pHA)l6>IzR!5& z@Co3_Z$rf!j4;d73c4C&xH~5~awmUZhkHJS3p3`+2yI$!!=(-hs8Vqj-Zg9k_xg9V zYlZ~(rukIv6~pzyRQo{g!ryV!Li;(WW$AL={(5ky<_Iy^lPi2tsK-^?q|L4K+%3e{ zKIp1yEBx~J7;>Y>3Fmg~#bjO$)TUSovn>}0%VtFj8x<|FUBwt@$?xEbpUl9|Z4SaB z%lX1RKmTG=iX`{XZB_2KW!5e+5vJd~3V)Wr0>4^u zu6glJjD0MDTRqIV$_{^-&h@~2+c6GDZGADyOojWs-$VHJus_$MQ$^U_be5*|iNYQ3 zL0aBZ#_QZy$}9G&;B}AYbDU}lIllA|C*tr?jyPM+@sX+Gz5I~L5wEG_B>y_ZvllPs zlzk}Y`N@}YN<6AKX%>0B2ajENnro_f@6<|pG0_z~Wz{N9Xw%!~rt-1%L(6#U50>%P zDJF5$>Z&+@eD-nl=T~xK6-s$CjVd`&$J=N_-U~tNuq9pg-+snM(*j-7Z*f%3WoUF2 zLmXGw!C;Ub%4yd@-n|TgOw}=PU6F>|Gq$YNrTwU{`+?eo>?e|;j#NqeG}FO3$`r>M z;==R=G;qRZ#$QStb06E|z$FDx)LIL6nwI2(n#mVi^FpK`d=j`C;CE0Lm+Z`x1xAvJ<(A9|hJ9p(0ho*L_b9foE`r#G4 zw=j^IwX2qX9Mk+hE?5lq-i~BOff?#_Wuu^aJ9sgZX_re98kgvSs8s>|xu=sV=xM?^ zvvfH2^cxWy+sjiO{78I=A7o7*`#xHCj~;nBM6J#&L&MRd{LrF6h&(WlEPA|;COI6y z^z~J+5fu?3xB4e>YFLBuRsm% z>Pn;Stt~i9*PYxj8`H=72s7c9Jn|rvi5kBeIy1MBkOPNl@Uwj+&1(m-t@=$*bo8)w z?th4n@)G#FkBb9)$05jNv*E7K$;F)WwEV(Tl72-FvPB;=lCNDDJ)d-BWX0jKWhy*Q zSAl`qh7i5Pn610YMRS`qQ52ogp6H>10rMkQxVS&{xaSalwn- z)PI#bsynqIZ&?s&zLP{IZ2d_MpKIU}DRo@E8KaraWN!Om;#0+EIOPk;os1cTDTspUHa}Sp zyF#LokWP*y9>kTo!$j=%ezWhl%m}~uBo$sa20>3S>=RqgPCPgOh08>#-)b9B4thj0 zpUJXQXH%HBVT8W?84nJX8{vxDT8unhOCs-2B_XQ1aO(I}`oU~7$|tyzoH2b?EOQ$y zNONQYgSsH$?MAXSCy~B;f0iv!eM0=a=i-t<9XxH8M`sj2qZ$V1iEXzrr6 z@EZdxVD!K{Z!@))%47BRF>rih1OEJY5N_Q+2t~%vSj(rkDChfq`oJNSO11=(m7_{D z_0w!vIO7mwq}GPyLx*5Ae+t|>8c(G+J*UQdIc$%fB3z`2RA?1U>(^J3xQL06*=CQK z4S6JA_B(%G`!G4;t4MWkzM!Q$jL4#q7|fG%fj`CPsdfbmi|xy>>7X?+h&}+?^M=W} zQA6nF4Zyt^eR!HAiun~DG}h0A%9t-8K3|=oqB5WT_a%c&YST6Q^yG}-y81=pk(?_K z(|u1wy6vf;znKY?I>ZiZ??cz!vrvI+0Rdkd$>2%_nEL294c?N1^PRnE&z{RP*D@cR zCYF;XpGRbQ>UsLIH^eOJ+A0E4b{MtuIPG1`AoH%5F0W94t>MKWFIS5PE9>d5%GIR# ziUAm3wx(@?3t-ZM3~YMcL9B|C+2jX~Oq{3({0UiWX6IQ+5A`&W4yDa>!`>}q>dJWX zeS6(lZe58nqMM0P*E72LS~03VRKb^bZ_}->7trSxxwPi+M5H7W!y@`=ZvSjp)T9B^ zuU%nZoaiRG4{yTMVOgA8_<-)3yB{xfCW23WIb6A{hYO8%jIbk((J68oPCSWQo% z-N6J+la~|aE=d?{K0*?&w+kL^J4DwlSB9F&8}U-@GsdJx3{zKRgZth>rX(T5w7u3D ze7`l(<2TZv|Lr7HoS%SqKi0tUj)`cx;|A0E^%MC$Qbn#jdW^jTj>yh*f}5|$oo`P_IL= zKQil4a+rZ?(NA#@V20b!90QvhHu|oDaRE+n<*}1;Brpq3eB8ziRPNjjz z$MW*13VfHEfvk!s_qby=)*lvynvA)yu3;INzOG?iTZic3sXrj+#Sst&HjxR{18kqo z7e*!X6qGEkg<$Z-JKg(LeV2gY%y}GcAj=o(D0ly9L{ApuMK6ei8*|pHWOT=itQx_3$vtsOCWnfsOEKW7L zBEYxf@kv2C8g1=|Z&OY}OGrLj`1J%{{CdN5Z>9)7d}0avYZSO63Q1(|7*{qHXo7{D z6#n)bhyFq0f{Vjtg1qww=HD#k=r7)ab55$G#cmTQoqhyk6ypWg+NRP@ zjwVEU$iUa0CgwqGAFXh1z(=Ac?1_bU;bwrrm^K*-Tl3_omMX!^(h_KS*_ZBXjDxuj zJ4v{GDt62|2#UVHsdSMMEU1%Ya;uGTO~eK~<|bf5KbEsiI-#bqK50 z2KGk~$$v2e71o`mrV){tE|zcBAC-w07Hy;#^V}d)ejSdRT|&-mQ^8@a9n>kV38pz| zU{kyq)c$)wQw;~{lSL}fV%N#Ud$zIXU-#1a-rf8!OJ&&4zieQ-`W$?xv7dCgx(k~3 zY=n-kC(J5vg=St3qxxPRwmZyX7mw>>VkT&jymDXGbj2gS=ujJxx%rL^Jn+XGzk2B= z5odaGx;^Fv9b%I@>&e|TK12=c!(DfGqViK61|p?k>gF`OA(KdJglf#xd5?(+2jF9s zJN(>x2&PE}vrz|c(&G~=Nr(O>JaTP>k>YA##79rGvO7dRU(yEpqyols`=+tS|)j*bc-~O z4F9I-XGLM@EH!A%&Z4Q2LV8^+5a#p)evD26`^jBIw!j-T?)Ss}i`8Vmv=^*Um4lWL0{5V?f0E}IT$eq>BqSW7>K3P&(`)#& zxMe;`2~WnH_MgPEHk<5xQx3ePd#T=ue?+7C3(f!Aj+67J5Uns@6*WXNr5p76g>$5@{|J45(VoZzY{9SH2^e6~i$bx@@Ig(9!u?1XI2;b0 zYfWi|$9IxaI}1l9%fpj2E&8E;2Xxm|!rCQYnV`#*e*6&+&Nli#tG;QZ4D|B z;DgUcTG*fiYvIKERRFbnLFE24FgSC8lpe_--@`R%X4EvS_*#t#i|-KK;-{>k&s`KzY(HgD(Sz!Had03yZ9HLBqkP+J}g5UGx zvHRV6`p~@>-%DI1&Fyb_9_iv;ik!Titub%qnD#PcBPxMZuGqdh#A$+&G zNW)un@Z>gEIC;MjzHVL(!VAjW$Es=6J@yxK&1;ww!i@PC%Fjd8v=?M$!?-a`{~FaW zlVc(V`Jf&AnAYvcr0YKg(qBdSu%dY<2D(iE{*G^KSr}mR!vip*Zoo`pLn8k3t0&(w zHc}uxozv~FUrB$bj^@05tn+SQPGazANMjK$QB@ANG#X1|Y) z!@MXD)c%x4e{Vg=>UbrBLCsh8@C_AU`C%wObO;1RpRmhv5}USJftj&O9*3&a;L6yZ zv!<%lOnO=t%0IAzSie{}_9`EG43ofhOC<{9lbN1d4-#<|Q6?KRNo zvw~^3918XJtLcMnRqU&h7iK;76}(MP4)AUcRdLkJOL<;trM%ihMLd7$N}hRh2`_PJ zDQ|W`C8yrHg5&?Aio^UZ<$WfVoQ=AroVT47oHuv#dDq7LJ;}zUysxe$JoC;X-o~?a zye$30oR}?zyv>=1c};2sycWqy&c(87o>$Kij)iS0Peb7lFX>-3&(JbSu=|}7uX|d& z>FTgI{F&|(1iJSY^Cv%>CeXfgo_~G83x0jlC(|mLX7=mH9@ExYjNsb^8MD*fPfT%@ zky+V?eZ1yl0#obrgZ!$?C44O};IC+!#BW!*YN{Ifm^VJ^6#r*V9?yJ0&1{d-27cay z`KH#zL;SLte|avM?tDGla?XveE;43U6A7IX9;)oWGeUpNeJ3?`ynRP_(6*qMF{hrG kcj)-Jj_ZfUYM`g=*s1U^zo3x7FyH_4+llW9m59*)f66*xxBvhE diff --git a/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1.tune_metadata b/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1.tune_metadata index 7eef2ef15bba26f49eb7e79079714b5c7015bddd..febe7b205e46a15ce78f3248344fddfc47a3eb3e 100644 GIT binary patch delta 147 zcmdnOc!_a>yh3Vfs%f%?foYO~Ns5V?WolxwrMYoZvN4dKY+`6)IMH4^!=N;Ri-Ccm zBr`WPz9hdSF{hB#-68FY_Claw1Rsz{Eyyg+Pf0C?%CdQjM(_fKixbOIQ{oeg<5Mz| mOA6ULBe;QrMTwR1WvNBQnfZBz91*NQ5d%F7J%d8dQau2Xqbj8U delta 117 zcmcb_xP@_oyh4hlr9q;lk*PtFQId&iN}`2{shOprp-HNxMUqi!qUA(;ZLfq^5nK!m z3?-SlsqrQGC5btOtnLm)-yTdlz`zi}2b4)I$SlrJNiBxTvU!U{@B)R46U$Ol;uDMG NQ!", - "Box(3,)", + "Box(6,)", "Box(1,)", {} ] }, - "policies_to_train": [ - "av" - ], - "policy_mapping_fn": "tune.function(.policy_mapping_fn at 0x7fda132e6c80>)" + "policies_to_train": null, + "policy_mapping_fn": "" }, + "no_done_at_end": false, + "no_eager_on_workers": false, "num_cpus_for_driver": 1, "num_cpus_per_worker": 1, "num_envs_per_worker": 1, "num_gpus": 0, "num_gpus_per_worker": 0, - "num_sgd_iter": 30, + "num_sgd_iter": 10, "num_workers": 2, + "object_store_memory": 0, + "object_store_memory_per_worker": 0, "observation_filter": "NoFilter", "optimizer": {}, "output": null, @@ -110,7 +118,7 @@ "sgd_minibatch_size": 128, "shuffle_buffer_size": 0, "shuffle_sequences": true, - "simple_optimizer": true, + "simple_optimizer": false, "soft_horizon": false, "synchronize_filters": true, "tf_session_args": { @@ -126,7 +134,7 @@ "log_device_placement": false }, "timesteps_per_iteration": 0, - "train_batch_size": 60000, + "train_batch_size": 30000, "use_gae": true, "vf_clip_param": 10.0, "vf_loss_coeff": 1.0, diff --git a/tests/data/rllib_data/multi_agent/params.pkl b/tests/data/rllib_data/multi_agent/params.pkl index cd832aa1c3eb1713e608fef452dbe168746e4cfa..192cf7558830fe2e280e383cf7777e9ee669a7f0 100644 GIT binary patch delta 10700 zcmb7K2|SeF`kxv5n(X^dMD|^jEoDMQq%h5DW{eqTk$ptAOrdwQXr-jIC|a~AC0Z0M zT8Jdls#R2||9NL5{ciVv@14(Q&YW}J=RD_ppY1&7ol{nT%&9`^W{Q@kgdhZ;biNr+ zhR%t@tYI)?iA)wBje@$Btsw&y14Bu;KPH|7IuSetHWN#sV{lkDo{V8pQi%LO8&1h% zBIJpDucV0Qu-J?^3>MF(Fz6Tpg~f(uqvjiSi}7Sx3=$hdW-uu!3_8DnCr@W!2n;$A zgJolgbOMxuGC=Lnf_9=7dD&Y^>reX_#$l5%1R|bDGj!Ac=Q9?A$;KoS$rL<|$O6wv zSQ?86tq?H~v9pne%H@=xjUv7%TSurv#08QU)wCwj7;CJE^hB0b9EZlHU}K1MwiS~? zkFmny!4y`3aIY8K>jk%=xuWW7zbP>!N(_fd#1JVlWHv+;T?8E$Juc3nQ%DRZ6eC7c zU}6ar4hsV_gjZ_=jfveL(TQwWtf0eCF#TkkWi_3f)dB%)KbUjShKDSZ6I!iPZlsT0;(t2-HZU z0VPrJQ&b(2#Ym)WUGWTCB+6DADpAy`>s3%jA)V@O$R#3?_E7u`HON}pQCO?XkXD~Y zSA~MA<)k5s{Ml>&d=rNF0;wyr-LCbBwI&kCuc{3D<12B zb#QjIb+&b}cfb+sNp=nnu1-SMR*I4)Q_ybXzfb8xi9;cT33arQ(PJkiz0*%^vEf#>U|Y*eU&)k+Y!)&0^$r=(Q$KFV=cF3j|tgY%G>VP9)Me){JroB`8N3u`7ZexxgUD0r8SeyCwK59 zF&GM+!UovUG$=yH7TE_C>S%Kxk{^)=c#1JZHi(}jEQo9%GSETL;gfrL5;O)-B8ko? zKP5jWKLLC?bNH~f1zQ0I6OYBm0^b2Vc_6AKTQNbTz*)r$jSrrYj*Ww#k}tyz5``vc zbZ~-#%*fk$(!jD8$AU;iq|tzZ1i|)89-sUnZv@)`kYBgaUN+760VYVn)6W4e2h^i}{bDw1x8YOw5&$J_x?K`6Nj4C{Ot}Fu_K^vWkyq z@X6nRrxGUy1OfmOH~|2qh#_(4!mVwZ@l*8yRXQv-@;9nFsPkkom;?@%HYp%s z-wX@}OQ$n{!+@QU1>^>NL>_~QT{Ec}xGM-A`Kvt#?jY}`>H?0yB6(`S?HC+FJOv*M zM9t#h;ux@dv-nY9VlEV~t1CvG18AWVUA4*DAziJ>+I3w$E|DjL!NkxQAm$6_r_KcA zf-yW1AP9k|SsZ~2|0(E=JVhWEFej7AhyfmtflX&hH65Hpq2V|PE=DtC)Eu61X0c5GAI|04wr!hsIx&kicfVD z(2)7$(#gd_Mf#ee0%NGrpN;HyxLfaQEjV_o$s8AfJQ%yj@bAvGY#!!jA8ux!#rcj(@Lo&QRGc*#70ACTb zl8!)cmcbW$Br@>4Wz=Vp7%(+vX_<45{EdC^15dc>NEehZoPQgW{p|V8;Z~^qR$<{B()M6wTfdGIz$%Z^p zftlqA(EnzQuABl-O^5X&JN4`z(5czru|V(gcp4=hGetM4^69dun39F$H|pkJ!m#)K zA4MP!Z{Y+CHa?z~jA6tJ=ce~)MG*z<;E^H|04`-sMkcz2W0ply!?yhDzm3-l>{u7irNTlR$#9$ zsh7c&xLZcP%@fGqx*TH&2A*GRg0hpZMHYz|d-2vOCp|~nUp>OUC4Vg$e2$}tF zc%U&;-RS8K2wS8nut;?_f{mmgC`b|#cExNY3K~5yk~ed)A4n+umSI9dy+T7U zXuk+P^#NGfFQ{M&ofz~PA8Fv10Hh>R8YP6uQza&Wq!U9UVwrRdi6~hA-wjou(do(@ zoF;~Dffzu+^AyCP1`~N`zM0(LV$olHl-dc8l@lxjq^iQOJQ+ZFvTG=8GDdh_VDWj9 zlZhy(%EE*cd>oNYW)OhiOj-j%5XZ#O;J{{X!F`zva#cnmk;$YGfW-*n)t@ZUO+F|D zh*mSe`7ee-$I`&Yhh@gVw9_ILoOy^(?)>nsR!H7JE+%gx@zPf~yZh|-cz z4TQnTPmk^(3Q{R>@1Kz|2vX27;O6tBLE;V4cm{z;1KaLYE=2Vc44H})u%f6t1(JjN zgkpm`gZ`RU2~q)`CY*2y56A>cB85f4QD_u)GKNV^0ONo?aQ|u$V2c-;%H-*JnypZ! z;Uy^Gh~Ri9+_sdN7!zm!2ih{cog%Riv96>y3EQt+tPw|EcgLpG1Ct_ja z;%T5ChQ)#Ris`PXftuQ;sowx=;a+G0Y*54P53BiaU6T3*Z~+;zDOXtIPfIy?l314T zP{yZz0%X&y4c@kGhs-36Z-3L^{y1saHvx&l;$Y>{YqzG%=EGMc{#aGL+muYi*rj=`Z5hy)Cd z2m|{MEEl05hUILoI8yNWlK~b7;?jR&5HDEM^zl;kcnlNdu#7l9Pgy8cVbcmDOu+fz zFfd60haqs#;er;F~KOoSxY3A znEd~&Ffbl9r6$h*qb4q_ETNvf8>Iv<^)gqlE_@MX$+Wp{a4{|WAWPWf`*>mCdh^{01w7+gArpGBboO|c%w0_kwXKk508hb=CXp`cMFQ3Iv&)w^3RnQcY zN@=g~&1oR!pSE1nG3+7Yzo^w@=CzLd180xjj%(;Nc+Y*ImaG^NcYL|%(&FQEu2ez9 zrI&cC{PDXjD2hiM`ynQmwpdSMpVf?_r447lh?h!dX#EhWO<1>YLsmjx0J;7rQ+~G2 zu!`OMVfo@?>yJ_`&+i=ab=N7sRI_1?RUTxpS98Vkgxu<#?-(g*v^R-E9wAi>bd`_t zc}xBNOYilTb4w-H=83+ve7v}NSwQ(<)11}apCf2m3&i?oJ#5dZ3}@H|wUt%SwL@_O zwjR;_tL65(NcWfKCmxl@J^e7d)k7=%ldh$_RJ~^B;>g(Jda9Z~)*tjV&o1N65KU-e zH`^Y29GqxgswfjVix^LPI9O8<8D&5qmzWf-OFxyjP~$MSrEl??jL?*D?EZI%kLmj9 zhr*ABF_Fz;7xu>goHuIgbvG$^RU4IcOTJm3q*QQ{Us++^p^c4n_ZwNqZfHA{cXEfK z*-I-@_$xR6p5qqCk|pcq?PbUh@Ly^qwqwF3bQV8;>gKG$S#aIlbl^;Q(R&hG52Mmv zIZl(*H6G6!Gj-tMfOpzwxBI0%({mgsuG?=KQWtqt+`)G=4|am-^nP~8pVjK z95XJdthq?;*w%%ndw63zZg$>1b0}x=NUUk86=9&^qkq%E^AS7Z(wKw0%jdA~pQI9c zhrf*8-cw%bxp&6Nt*`t~ndlM3>uDG@>;B}n#_F>Yha)t@xDGp6XV&(8I9AdAv%Wg< zy(HbJJ4N5t!}v`Bd;TKwh^}kt$9tNJq?R3YInB3oMiH^iEa@jvtMcBfWk2|tE;fWb z>@xG?pwFD2d4oebeVI{KnwLKfC8HQ*Zm7B@x?-)zHD#;NoPdkB?k?6@PLR9Qo_fvr zv4ttK?*>-&DXom#RQO{S!cs|tF(ZGG)4&eM$U0ora`5Enn-50s9c~qWgZhH{CT2D! z+rDA+?%LMi^)1yyGi`NxC?Q9?GDEr;Vf|f!DE(Pgo*?x$=4XMe?nFXlB4_E)6R*`K z`e7*u8of{az{w)jLTguNB*V4Pag~In$@z-@nT4MVn_vFe&#eo1ayxwa)^%rO4Ktr@ zNFIB?`Nht`l0Zv2zm)a`?WlG4g3%{qcbv#w>3h>FeBs;cv{Z+y_0a+46Db61ufeeu z3w`Gfr+&_$UcT$LZ~1je3(v;h{`IeCWm;?L%(>bkH}XOI&C>espAgmP^EBDV+m0O6 zm){U5(pB70W*2RopT!-q4NJOHr@FF#6J77Fxv#XP=+=1|PJ2!%D1?)e>!fx4qPiBD zWNk*w-BwZ8SnjuA@4RZ34pyqS$m9b3KwjrTHBLW!(_sV0(X|RkkEh%>epgk|c<}RB zY;m6YP^vZYltHkgss*k2X&L3^(e!8aBtwm==cwcKBhMx(tIQINxO)xo$9fXG4=s&< zcIO+X-FSVY;cCShae?HVW3R(7+btJcDKF4$@LIS-J!@2^W-Xs_qQ9PL^5Ae>Us8h8 zqCoi=9zLa;f{dC~`&IPjipZ{l5k<$#p1dCq-R6iz&Sgnv*L@l;@rqE4FurNc-xC(E zqBPrn&FvnCk*h4OB|(Kd5t1%$bBBB9b(wj~^Wm&RG{&q!R%cUd1-ia-*Q1uS$Mdm- zo_pIJE0+h%Ihc>dB53&&YL)MUTDV;ok~bbX@Nj!z zxut>|ap7%IMnB?I%>MV0UTYhNK6KtYbboY9+`THZhn397erBURJ{4#B`1YeV419z<&XWEufi`#egn|(^mTF%>DP|WX||J{Km{{2GRzzxpLZPXUo z=sN*tb1gfg4-l|zAI4KY7jp~I6O`k<^_BE{KK3u9!nDg}j6_J$whuRmx~rTrPmz&a z;jG=1*j0Y*gUAR?_E~oA*2XIXJ&}%-+8arVklxCI@i_NbhxpH;?c%34Jwo6X54X5j zjr?qWhj*YtEXDz|X85sU}mtbJLt)r}B?Y1X~{omr!Uai6?t*dmCAm=`b!m}hR*7xM z>*aofuM1@pyFzhoWmg^)Zu0EM8Xqe@DbxFIVEDEEt0GL1(SoSsN0m>R7h9VuoWH7_ zcZ$45cC&^|+$X{QRt7V>b;Ig=4b(x0vOs>VbrqLfp=HFQsKdVQN%2fr~t@-kK*?2qRoV6dD zvpRwB{eps@?KmdCcoTh|xpLNX#`kp<$r@haA-alcfpc}`=j0@R| zUwSjUyNBG?_Uyz7xv+%7j}Ezq-}Jl~xfUVS{M6vMYlDmS(iiT|f%wI%F>j5^chAav zxX%CBTNi7Or5Qf>3;iFA;}^}aJ<`-vtR-4;m=ajtvC3I#@z@e{>rx%g%Ti1j%ez8Q zY&KXkL{GhPpTyp^)V|2m`#WLlId}D&`fac74Rw<^U1aeL^W|H{DvJ4sToMbm7y0et zK6BM``BwWa$h*2i98y@Y)z|5%W(cM`qR6{s?(z8U<*D|=-rzw`lOn+`Nr(H)Da8qWo7uQhqb9q9Sc6JI1{;FQ;^z< zfKx6XB!wKF6d4>pVz3;R;E!3Zn6TH<)uE{2O>P@s_@ zt6U1V{(Zgdj$5Mc_AL<;CR|0O;3 z03d`Hl?HyfR4yq0CHrN+&r zH^T98QJS&hFESWkmmWllQ*o4ReA4#*XzEH_E8SoO1n zdp)jp>#D3ubrC-?RL^di&F-xdr077E{VB3;`=4T-YHK#2{oe7KOFcW59kFh&THorO zA-kx4=g$(YgQiy)n5wECP0Eken%>I|QQcCttu1?Wy?nR+SX@?mRP2#^N8-5cs$H|t zp)a0X2}xR}P5)R>-M`GN*4Mb@5FyvD+4&_`y<=aR$Fq{R3NxTo$g!d5I>*2C zb&WqYKjV<1_Q>kAMp#THtMpN>cDl^F!n>Dlw`JDMYCT|?nkv)&e4gE%w(<2E>JPu< z8B`NS=JviwSY;D_RHk3F*KhN#Z#jiQ#s*0~p%)$2ZtpYRC_38N8@QLs{I=VDe`dg{ zivy9)3%QyXY62ew(8ywSkBG&y<1Wr;Cxo#rvIo7vrg{McrdbUEl6LDjACpSgo>`k)6Lf zIcAyOk`a7`9%<~H?H7m|U@x-5%~hss^L_A-;-l_cT$FhRk-1=NqoR>v&Xu=&0?kFw z;^!axaN6EPtlnSI0 zynUgJ06nf-=ejq8L07Xrw_Wl#x>wV1vFP@tfqVPiCysIoR*QruE$};C6`+tOYPuo1 zqNMn(-8qAH6hiAuj*a~7v$NaV4tBSUSjH~a{n3DKIOCJiQA7>J>>oW;OIU4yy@v3f zqx~{lvv1LsoTd=juU}O~Bh-~S_Y(uMvt_e2;{1AKg1J%OZ#O)9H72TtlCUj0GyB>7 zwPL4Ec$D_7tl3g*#nqx}6WLiF5}fr9(yq*lW_Xv-*V=v1KBD|0o`%P zdXt2^o{JPDR;&CmDr=d1Sy{AOWuT;MVt-a7Dfraj+)VEH^DnGX`{;z-cMf}u@L!yb zb6m9ar0CmE_18zYy$#msDk*+Ydi1m(%693xV@D4x^dA^PDSTQ=Iey~ozJ2=ooTu5C z&8>qv*8=68JC2i$5a&H*2cN}qH*e`jYhT)R^V4F_BSQoYccm4bquu?VX0`V46Nr>^ z?P=C3L7P3?aUZxRHODU*6!KT^ktFjlp4ZQ$K~8i_|=8Iffq(J>5mBUb%Bq-Rn7ptcKq3J&d6vpAzqyOmJgT zAx!y4#mb|fnW=k-FP>-xqGT_3CLWg$r<*-?a?dAN$uo`EOJv5<8o{g9wOXIq=7DYp z=laHxcbvPm>p}EO+#Av0)gn04vO1+kV{!L${_~ELYohy~$#FAnzlKXhq4)QRy}Va( zReNO18d}E%&kSGnYDu4k^ZmYP%zl6Twy~o0%$2b=yI8Ya<3d$>9Omp&D9j58PFZmB zL>IZ>)n`N1u+<@XCOgXA943gos|WKfBB{zxI27)d<^Nvb}iBzR#+%Sq5cVj+}yxO?j$>Y1$ zf}ffu?fNE{y)Q&Yr00z2&U1@zmGrPlo_o>j(*D@={=v;oD$bTQXIiTa@z{mlU1NK; z+w~r3u$`agk$)R)yw>jp_rl879+NQpt#zXEuUud2$%&#q`_EO(pD7x_DmbzJ#dw9@ z;I4^ijHG!@wdO_#nH)L8sGHBZR*r?vTCyaQYcXDn)H)1)8YpnS+@3A{Bxm_NsaLB` z;`T`fOAWtybkp-}u>E4>rM6BxUOjFgDzbO{bEK2h+OmzK+eEYu{1nJp>_>F@cdrO(_@MG8XvvyO7x{VX+Tzg|***e&Eq0TRXiIc^xAC;xyBuYo!HdNKvJy%)0nY`f$qq<|!*Ea|E zeW+>8C@_mivic#1H}K&`x!VI{@le?vArMSL6^YVI5M+ahf?ZI+f^uRjo@XJRb5F3v1#pCcKh0PoO{)|Ehg82mb(*W>- zjHra3zRvHXVM0(NxSdZ16lD`e8LRSHY*g=8p~#aGByP7ut}ObhLC6kcWfwr zJB@}93c=C{ol@-k~;&5GypHGSzlTuAb~{b8J2bn>Y$K%AW|d_ zJjg!}Zd$YZA06(aKw()K8R`EBRWnovw3Xd}Oa=j2CGM>gOXW%9z$^&L*-P_6STSK_ zN&uchWjdf3+;H7LI0UL7z*c#}2{|oMK^g^12*UVaX*d8wB}C$xjtAeK*riZ9EcXEk z9#{Bey&gls0};LyEDevL2H;6}8aW7qBVnmjCI~qYw@(NGvO*9QavEAhFdk}T@H#o| zcC~RLlL)v7CJ?R)90vP0?%;-RfFnROD5ogNxmp1&6b*i~$AX6yT?M!>7+*3DgJGhn ziA-lEsI8>nOmj|P9$^A$t4+||2_TdDe`$>WZMZ6M0(4i_vj}DIWBdpt62>ni2nYKP z3s0FS%1cDFMbdjy# z7d2^m9Z(OnGlc!|G$@Gtu#lHyAcB4be(mq)-{$e-ffH(dYyw&~=2#;KxCfLMV7lD3%n0r!u{ms%SJ2 z%ivR_;%OnlKUhNGp|;hgKtecdX}Ad-+q8ib4HQBO1G_b3Wpsckh9CtSp+zB)2tHa^ ze|!*)%8ZBPfu4YZno?}=RYy~fLkB1ZNs4Of-Z0PzaXv zS91u>83rsI98AVQnxK&VVJC<63qeFDX+zJ10!SEakT0tdL#1IMOdu0a6oiNZ1wqs}?0e>-$7Zd%P_<+g}*`fXn zO>kCIal=pgOb7~t$De}rC9oXnhdTN7Wau;m5eY#-MxkQa{sO?_N8Aq4g8aJyP+_pM z3<&|LWwkow|AONu;eW;P7Yw2~MDH*EVwpx43LHW(<|m1Nm<5*MAJD*h-8E7$D1LYv z@x@?+gGmt>axn83WDfrg+1|g9@x#2SEWZrJLUTI{BmCUq`cI7Dtg)!=KP>cLko|5P zB24ZF2(Vqxgyq-_eL4C++{?vs9qp$>`9YQ;vK@yU3wA11plj7E{tz{we08pZ>;YFD z%3xVh$Ti4cL;Sa>SRMPj3FOefg#b+HhnUbUV;k&7ps;}q2dryZ@VLG>9q)_rCEy_D zt0DO#Fi=onNJ4?Z2L}cm3ai@xH7HgE2!RO0J_Q9yEfKCb)U2wR8w!ba_;6?m#b8+r z2*wW&G5cF+Y+Id^e}=|CeCltShdKNTX~&S7+5fdkVFteOu<+0T+m;0c2ljn)xwR2pSnlb|o+wJmgS3g+lO!db519 z5XR915{+c(e@)7(A%7Ek1T9$b<@Y%m&K5ZTT-Tut05}T70P#Q~esyh&0-A~9u<-J5 zp$Qj;P=N+Il7NS3;0aL_ZnA?57pMSQ(7*t34HB^v0koruX>eEKIksu2AMgg%WWn{y`?O4Wi88f^svs7u4W@h-X?N zeOP%DJTE|$i5+ksl%E0g&;bdr(W(w?#BNwvG@6x$&{?Ge6dgXvmV5}r(Ul)RC=c^t zumc2tfJuA`gLnoNFc0E$=w_AHDwCH`eRVJ$8qE5!2Cs!u$RJY-s;);%kvr>FMjd=! zUyOQWSkW&LaWvm7;q4~zt)i zF+)A`jK{GzO=Ol$FFwIHW}rE$7<)diL~VQPcmi z+E~dzPR6^JSB}imLyz#8ZhI^xvZYYSA|q2q>RWpS81ztnd$82T9wnKb7mEaAj%lo{ zL*6<$*VQs9T)On~WpmV6{eb^IM6Rxl;BcwKHl|ElP@~;lH-$F zp7^w#YjcyWlKv(u$I!dHQP{Or=P624{XHt2=4Uot+^IA-q*y%nNvD2DVu@)ju{&RQ z>16K2z5!o(AjNnQd>oOR#iN?i+Zt%j>CNlSBhBcV573& z7mgKkxI6pW*(CLf&%w`{DIE#hko&`=IMIpETU^&4I*EN?-+!@Jz&)=iUvPxNSC?zO zAyVeBUQb)`VC8t=G4EqonLjbQyCyWhG%7p0=rr1YLWJZjL_3Sd3&t1+2NWH!J;p5@~T}59^ zt|LD7jPyF}Tl5i(FXb@_7d%q%$Zi=NNgwx=c<`eB8j39_`j23bQ0@zSrq8C z+ZMCNHF5j}%yZr6r09wYF__xIFVhIZ)X)x{r|hJBE)Pw+uQ!~J$)9~`Myt?%r9(fBX?n0e)~Wx6(a}YEV*vlvlR`}yMY>T1 zE?Ng?BVWExQ#fI(#h7bA`bS!he|F$Fi?JHS9Vj5}QXJzAeIjr*0C(6d&xZ&O+Pqq6 z1;@f&Yjb4cW0fq^y1`pjjb3B^>7>^=A+{CSkJDNet=@M&zb<2a>al0UCUcxRzUxax zPG+SK7<(h}UDxGy`n;vw!Ct(!CnJh1X0ld|{ej`~qEs3Od}*yXR+b<2X! zrV+kaIA2jmmLvN-H|jq3OmU6%Qm{Lx z=^AM!80y82IdEdF46}6Wi22(CW8dnLqgyyCT*kxVZudqE@Y78O?~>lAJL_1ncjU1L z3x5`8oKf4dhwgo5A<)q*tD)hsKyJ&&&Y4+Jrp69Q&T~HWZ~H>hzD#!LPUxA9FY79Z zR~|hu{zmGCx36T{mvH}eTN{PDQomCk9#J#eR_#G)V&bOPub^+rzNmPT&n9+bIB(ab zkrh#QFMN_uh{l_}^uBz{_#=#Yg%_6%b5QQgg>_~42KoiZ2{tXO)P4)1nlpCS+q9lx z8$EgO5^hHFT5eTWsw~&T;doDT%j_6SQy}pMSDw|V)W+&SNm1V7ZQ1H`omV7dE|iJA zX;6^!velnoIJ_@J$ibJ+XR-Oe0e$ro*pyV-8CojgyIdP3K2bFTl|y?L1^I_%7> zKg4BnQ=kvOt)Wu$exeX}o|TLGMr49fc@|BUJMPBHi-L%$TQ_bVsm)%wYJQg8%E4)> z$!9i~c(U1A{3Di>GEW@o$@6xK$Z+Z2UZR z&B?TfnK)+DzCK)@{<5s@;EC^5Z^a(4yATl1cNV-Kce=marJ6RFX(y!CVz;80&NcdV zQr9Tq)2l)f$qd0i z)#r;))He2g3aDK<##KczcFX6l-dspqEcP;9?^&HI=#_H$*a7p6C(}tsF~Dxp>-3 zM7Cr5p`f`7D~PSh&*?eL^gcF;SBb>PsYmZ$1RcNlsS7Mk)wcX_8L@Nf-5zo2(G9%K zA%_+ozwsI6SB&jFNgW8?ZPD>+EJGt|UCra~(Unt5y>a(5NFS0eIwsw`QyH>oE7)u3 zQLQ{}YZJZCcY%O9+C1%${c3tdbv{A2&ra}-=0~rHHOSVKlnZX<^tU(G6>miH72U{A z|9Z}!j?sAW6erN)Xm`}Uyu#(c^=|dq0y|25R6Hic!=iPc*hI4p*UFBh z4`0ehJg$#AvoonEcOkyGt5uyt6g<@-th8tvH`Ee4QYi7>ljIE zZVuIae`nFMF6O$L-8UwkNV}D7h1RY;b1xZ=_ zi`RW~b)Pn$U*R^?OZL8*^m70aN>I3+6dU&BEyL{fHp#E)%<%gX8gTjvrFjkr z+ttkl_~p?j#D~}d<(O*6;6EjakDyy(7hLor5p*cIhH}DwxcZ;7RJhA(s>}|h$#pOj z;vfH)Ar`D5AO1On{%b}Kvsq1skx15n&;K0oulBs60=g2}paRZ`i5zh4KcZq~ z!*n7a)G2vKJr7G?M;QhjIuv-ePqTj>Js>SdyM6MxfOt+Z-J ztU`*T#EzR6nu&A>MCimQQp>*rMC{y z)I2K2WO}VVZ=9}UKV_3e(oB>Wh?=0Enc(_(ry=JX$iJf3I<&*Y-FDgDUE9ze?ZLfp z8HJ8!+T#UR-j^z%Z;X^=uNR5CaNV8bYO8Dgqnc|58ZU+z;@#D!?dJ~_YKh1ed3z?M z;wlPTwpWdBG+T#Ja(FLp@8_2NHsW>R5zoc2R8EQfK0;Yq=`_tm_R=xla)*!%dTKX+ zv~yXaU|pq>+tV{)V(D5B@9b$x!qsJWE{YEaf=kK{{8^^|rfYV2 z9cwSBgg>H~|fc_y4~xO06z zUqxS`OqqZG(5c6EDfb_n&$nfCzq@^`4}0yeqa>k}Bvw5$8YSnM*T~2bJ4@e=c_pcKJhvRrWrRfb4 zyo8(V9~1~v3K3on?tBG_JtgH@Bj>rz7_od4r%kWx9R`KxU3G-a;@fy_b&PViHX%Kd z+Y81^a80T?)$wZM&vRU#Vm-E7RLv{Y?D}pgt5GAWgN}Rqg8xc;qW9E27X=Ud4MpF1 z{BY)qz!8C)iUJc(HG^q|Rc2+Ko!jX<9GTIKv^VX`{YgSfOZ!J{_A8V4s;fsr;sgT> zJ#6NUXQ*k?rEL!Fh{(Z)?;|iW)lm1k9MvH9HHUX6p(0vjZoPJqvmEouBjawUt3|G-gx;{ZexYLZ(ml>?ZD$zuP^)Zb=5cyJq{i3qknK;Y2i@mBc)Fr8D4}@t56isJwUrij>}S_h2FH=i{6jm#i=|z5 z%Nv|7CyV+a1wQ?8?XpL4hLU7(^h0qun=MNL($N!vDYs|08c^lHnz_t&x7p~1+D+az zr#8x$SQD+iUoBgNj`W=TzR{I5xkQ`^N&&mib=0$U{ZGOwi?48JaTiVQhsRI$w)BUF5z*-xGteP@-ATr_qHZT~|f zuHNw0#ISd?z+G=sni0=7;T79Ebn+p!N6H2KenN#xN|92Go^HGC1=YSSt*12y)+DlB zeK=r+6lgb3sh30Psow)iCT`E?OK3wG7=_L8?$?$N$$Pi%mMJKbGmk^jP7zf3g?E(R zD{vf6B0PR_-Y#BqJ>A>K6%j+Vx{#rjz^x+iV14fnpQOv`ZJyK5gMp|#PI~9J8#QO| zjrL{AFS92%Cw)4h9eO?@HE2VBTeD>Gh%Xt}y{=5?5D_<*{9kO>^rO92 zuH`RVtKogocwkAdcuBWRyz85Id7H-Gjf=f+Kg@lYn^#{^k>#5`ii;CCmbBokA>p`x zwg544t8Z~Z-5~4KNI|-})@;dfy1g&jcdos8=)!#;8#9eyfn@J)f8Ud*tN9-ZByy@m zS>R@lt4re#qNa}Ra%^ba_o;Ct`WTm8h3Up}M<0G$33IpBi=HCiy$4+0FDqYE9JiddbdWb$+Ty0ly-v5?jy;9$Iog}) zW64{N`r@CU+{ov8t*)%3A31w$u(e~^)4*_a{3}~7K#?viZRvYZW~67TuertQRG7dz zuI0l)6F2l)!nWsp(?i9Y?pjM$pwo>DT`^bcBZoSU*2mVon7gjc5t(tQ<<;U>gG^6B zq3(m%D^AA0HhbFDEo<3p3>n;}^at87@Dmy|?Gl zvqaoeojd$*N)~D~y1U-Z&+K|BIwCg>ylC0BZ%;ycoXs%;-m&R{6Q}Hb3x`5F=L7bE zizWcJB=zdn&%73T4sQT@n9mlk+@d;xPDBRYoKf#+*UTsFwRYu^O01Eu(dERCS#t8* z_MCQT4nCRs&Hg&_8P|Btf$a_txaA^7bvvwt&0F@{C(tg+d6HE3zB515QTe^w(d7=VD!YM`cLkqb*ewYPW~8zGhN!SCN46Y)P&NpGlA zIi~{(_Vl>~h>d8yvl&R8G{793!z3iVEYv;R@bYvSW2B&93w3|wKKT$2!#b0wh0nD^ zR?@*aQxCZ^ZFZ|m7rKXbkTlMI){M3hO)!YS$)6Ftx3m9=ZX9Yrb>ezXo$yv3Qs)`5 zMB#JD*j>Hmdr$6@FK+I}PE2od&r^wZKGm+YRnXBm%f zvdw%FU+GWOQm!&N#q&<1!+71=aP?YpsEf_HG~M_uQj(Iz?BmaC-4;hD4_>c&qBL`+ zZ8}WhRL?WSkY#>r&x(*{HtKUKuX@$a_PWr|9BFw0VUEV~oT`iI=?YH!;(Hr8uF&dK)b3xCfc>g;eH(5VBjFw7gQ(zc-O+8qHIv6Y$=ER+0ef5Q8xbYM2 zLgCEzR~6f*+AY(v!gDpuS8%#v?vBzPgQ}wwqm269A9wOwg=06N)jzL!{8A`Uuc)WE zWPC2N$c3_JDT_BW@ZNC6({5{~ZiJyeUAR&y@u0+xyRc)SYm-qhVyR zJN>}GG|yw_ph3rgFS3&>_n$qGOqlFrYa3=V`_)>8q4y__n2h$j%iO7(&R-~REWx!Q z($8d7s_TAz7Q(;uB)WGO$A3tNk3j8IhEOn>M2F5sK<5mCgW=OK(2fy90Ny;Og#_WX zsMx?@XbYRc{mZ!oRtn8vhg*JZ1N_*>LOQruviq`j+<=e-rWyl@fwlt}3afvI!~5{? z=8F%3M2LiW(EvOKI@3b;*9wkc5)Gjw)Zv093h~=6-0vIhKX=zzJ8ukb3?w&nW(31z z?Gdm@{P?#XN}Cxn|Z8u`((; zJS18u=_C^>A(8lB$7RLGLzgUz4T+8sO0E-1`KgExED8x-xMEps!1AbNOM>Ht(!tRS z<1M0>En5;~u_AWKvKWj1x6)L((qzaWLNd^g((}gNCgabypNxJE{8KenS|NGQMvGL186ohJi zQvcLwB^>A`;U<|bRG%RnG}2AlPf~o86KZ&Pc)0!*zy5u5lZX`#4$5?r5Z{F*GX%WE zk}0C+t3vQjO9gJrxjT|d*_gV17zQ@ zq0ijtu+(=J*;hi#!e`_B-!il(@;q<8rw-C&%~9CbL#rLLDERYZ3aCkhzV14hCv>99 ztS`_xc_+?3`VKa2DOf0j^rV#59(3~@g3UYL)Y@r>_^RHk}>bBBAyE@3rK1o$SX3*qFT~hHc=FA^D z;g{il;P*NVhZ*if{bf%eeeOJy6fkYfFF-f8|L8Jdk-MvK{5Ae_A(ljA5NctJO$;8 z_Dmz|BPYFMG=yI(r=N)#v~&JVe%r_*F0Xk7&MMZYEiQ|2UX3w-Y^^>`@6|;kzLY=w zWHyGclEnwH%jk60Y1%wo6~8AappEHtuK95i4w~E~`sE^pw623?4?E1vPmO^Vqi&FK zNP^Oc#V~5S7JjbjVyfz=;oDOM^e+2rU#IwzMU3A<$+lae%kvXkQ#Ov>;|iF2&3x9g z|1da?(MOv@IruYaHgciA;CWLpm60@MRL-ZOu?5g0H50#j_2XY?AFy}vj3I%g4^yo$ zL$lz6Flurn9eFi@0;d~c>}8y8C6K_fwjsP>==1tVFls#<#v4B3!DOW_J~A2u71j?~t78xUsdNS$jJIUB zm%SGSwokJ29QTVkSx=)`-z{)~gCW^p_NBSHbMbeuE2m+W%CtTuW1`Fs3XL5^Pe=KY z_TgzT^xfGxNG>huT9$Fj#LheP6W^ zaw-n8XO|Da3yUGRTfv*e>TSe`Kgr7Q}`vGgwgNozNrk8ZHPtGtSFZ=5M_ zbQ#JvyHc%XI(a+l;FIky*-xV@@M(Y-I9>e(? zC%3Xki6;ajQnWECv7ELh*`e9yQ~cy%n`z3Je5`jJ#gEQWV^J!P1%EsG*xtkaG0n^d zN6TulH*KJ23D%SNE?(R7Ln_(`XZDdMHG9F2Hk z3R|rVaG6p)>{@z@^L<)^N*fZHUEfSdsWyjW=KHCB;Y>*VvV&I1Y2dtoQ~pZK`(Do z$dO|789x7k2dH-^(67F!f*?bq=7dX1B=pz};S(Rg$i#b-&YB+{Hsbb(i z5H^1M0h_A)Y0}r7=&|37_y=FXJ);FGTlpq~D?)g6>@^Ds+)S#YhEmIr5dK^J5AbKl zNWXtEysch<617G+!&jA^@=So`9xqvDZ#ubr8%=rErc7;LF7I`CI}R7EqRS(4!O~_k zW&BP+*AIrcDU4%PSIuDD%|@mum_Tm>-PpaT#kg{S_`hv(4OI<#0j@{A*yy`O_#<~b zP7ONF>hnclcqxj$gnYrrg^Or}cwRr(D50k8PC-U+EoQxrpwE5wd`;XNoILHS;ESva z=QdNBv=41(gCk^-WVTXk`ylMLk;nA3gJ8^;;+gh`czv%IT}Vj6g4fn?)vFBEr+wx{ zW932StS)XJoy9(V=px?>sSw%T4oU%Oq`7wvxAbobQ{53m{K6#qF29UZ^s0e0oiy6_ zTN<`ml%uBKM6xJLqe*FLP$Fw#w3R9e1{H$}WDDb{n((HIPOr5I3ppH2Y(=lH6xx&_=U* zShj7K=)&_^I5O-U>-`W#McwsG&1yW|5!K?FZEx|L%MSYEyo1a%H85S>0M-^?XU$Q0 zRP?@|!*;{1wD@2kMaC_po6)8CcBcdl(JH{WTb@*=GKA}LiiDwiwn5K@&Fq&$4-2Yt zX9K=`#5JwA*t`MV%sA2n_v)-<`y^xtQkT=>Yuky7{t02Dm1y*>i)`sKEoNEK%XN)7 z$bM?o;)06t$Q?OD=D*FE`BE7QDA|i)Foh4jb{Wp(6wpJ(R4|z1!k%BqWU}6#?3n)^ zIOJYR?US4^=GSKGoLWXhRJ`DVg&BJoX~t%Z*~Q%VX~3u}tJt69G}vrcjlautzzJe0 zFzz5r3VYAe=k0)ScbQWE)GQ*rlYv_~YbGR+9 zKo&2(@ODidlg#grC1wrO)O?VBN9nP&$*FL_`~({>k%T`+s?&Xr!1K5=U23#JCHRc$ z-x}Gq-Z^|s?M$rMaupLxqqs9gL(pz^1Yb36H@l@&fP*B)LG|=J_U=h8Tfe*-_j*p` zi>&X%^!bn2p>G>N?bZ!g*QU!}-0BC@;>TmUrzV%lUxLaZt-OLy26^Vb2Q0mduf_@4 zo1X#f>yay9srCikmA2rt{%crtO(kYoyn_B?{ZKUN2U{%{$lm`3w79m2j?^B38TU7! zy+jf8+Pq-vKCI!NHm)PDfMA@{p2S`XjbK^7Q>1*LTd(Egut;uCVB`-Eiuz7g<#SYq8pTZ|4t!5)zyBWM241E!;012a^e47TT8X2RX=6L+H*qT&) z1F?UhHtmSc#+uuD{M_9WFk!wFxya<@u#K1gBT;uza!=}xshsVwQj3N+Bt zVZ7&Fc;+RZ6C3*TvjqZrkhO>|1(Y$j!R?}buT9YN!yT@0_f8t>yPN876|tT-{fPE# zL}$Z$EJxgvJ#LjojpaNm$X3TS%^UGp<0-f^Clb}hE~0Znebk%j19f*^Le~WaD4p~J z%7;DxnTu2Dhx~ayrKS+~R?j50X^Jc)tUndrpN%iq72>Rt0-EkUmO8MSPAa~{w!(4j z^H}jSf!bWZRj!fsBn_?mh$)Y&LX9u|*n>@B^j1cl1tqqUmEHih`ENRRv}!!t ztgr!3O87Dzt*LM-MUL$6s$==j37FYlNAv$|q%FTeq&fc@^eURc`mkqsVXYQ-w{ROJ zzb&WV`4F^LUEEahVI!%( z+Q;{n<)e+Eo8af_bedXyf!6n|fsT*snCH4W{&bWkE0BJV3bV68P2Q7LrKsZHgjUFu zA4U)L53$KVKEeTYJ=}Z44sVAXfVVx7U>MOKgqpF!Awhma{ulKcv}9q7P%BI!>|Y@| zG)$~QwPy%*{!yU@sY2bbL79^hTbREnKbMJ4%qqDp z>wESF+;+i<-T|VBv}(bK_ICSbudb%wH&=^h&08z-I@8{?Z;((h!&F!F%D>ru(?w;0 z_Ktpn2d;{O)*K&E-|c$47Qg!IeB5 z{Qs5^w{Y2tkpEYP8~mpf!+%N{HbXf4e+0FWxR`$>vEIBtxYR3!X2*Jx?58eT_C<}H zCF8I+QVw1IJb{ol4YKK5LXJ}pvmHN%(SElmc2z-_vpHK&hrU-}<+A>G;uw!E?_=?q zx+%rXQ75j7pHXC(AyRl?8;fhb~g>Cli5SC+)169!uq&fofXij zDT!sfFR~DYrIh+85FI=YKwN_t=J_um`Q<~|45dY+*SLXGh!0?qM$%l{>^| zl|XmpLY6w+0;9?-xG}{%tUqf^v&Bo(Ni|t|IrbcU*UTeF`N04-NBM&*7qjW16S1Wp z=w8i9QnL@o_9P258N3oiSVQVAN zXRsIgO-p5Y!mXtIF$IG!dt=_Ek9hovgJA6 zJ2sR0y%^YhH9dq1P9KB~fn}_x!~&i6X}?!7zRmazCmq9a^p)2%eB~VOn0f)aSq?(;D~)(O>j3Au<|gwnk!Jc5Mi?sT zjplzv2pXqwR@NiXTd<8Dl$YYn^X`~+z?zzO$dkyRfz8Xz=kKvi@afbdTFlP{37Hji zApR3uxhs$UG#p{^tERy`M}5}tvXRAo38o|0n_1{5b7pd}O%Pei@$dTikjtzKF#UTX zZJ#!TyvE-~rJr-~+wip*_9p-)l&{A}->1OGxg~hv2w=F|J`9k^0h`q4)Gu@ybN@RD z>lR7kJH=d7oTG%N=FOnXy2s&zCKM?|UigQZ?n;M{6;WaoZwY55XJ+Bju3b>9tw95R*ke({7|N=ufTq@13Mei`pQQcN`74aIT}i=!RYlY> zYYOW1=9AtFcj%ZsiuVXxjm8JF1hLBFSZ=uozv=B#ocvG?qYBp0;r`aN|E(>?4m?Ot zwC)mj{yd62q#bgW$)S{U6083*O8`5PP?ev86KN&xat&pheKrbqmRGP1z1w-`0S#Di zK8>7Pj*!QgwYWaF9`!3tu`^Q!V`mP=qrHxlciRrlRmWrCw-(WhAT=_-n~X||XIOHE zBt~5wMYkvMT&tQE>L%sT(5c#N(Ju{TVJ)2TOfNF(s0Ca9e01_i=Jm>5C^*gyjfeTu zD(MQ5WbFg^Vib;F^OD&Q8F9NYy9HNRX5iIdf7l|$>n!H=NkN)tJ_a9M%W3bKPp(_b z$RqhF3?4X&a^Fs25s9Z*n0y6IDZPZLzT&x}$H=3o2ESDlFtaI3XwuuC>`>cj*1B~) z@(WM0;V*`umQw-6f2?Whu6jfU-@Dm^-WY5jbCA07^x*Wasoabs65wRMgj~!T1tFfV zpl4AHo*8F{aUcA7w?P?H9{a(5L5Kmxb(W#&vJ>o~@--%Bs^~D!-H#F%p5lIVmQwJ} zE!>Mq`kYVnOqkJflFgXW!9D%vh}99NS=-=hdfdIAg>RDPp18_Uzem$BaMdd&{B(&8 zlW4+*t$M7MU1bX$v)IT(LOj-)&4l5qm=WlNH{yf1l-F`7Kj001sJw_8i=-XyXX)aX zGrzzp!yAIO4;MH~)pCu7KhZ!%7aCpLxV}Y-wAiF*f^feAJkV~S;)%(q9M;KAU6jNI zchvFnsY7Abg$9Z!Dx}iUa}a&YVAe$oQk*J@{P>|*y;T5iKAZ8WoC>i^2jRxO$84nO z3N~Mc!_VcX*!l;#lpVDR^ws`CLO7#ID>8YdmzJ1W^$f;+n1fJgk0wV0&@9~>m5Z-Z zzM>Al9#nDb^gYZZr9j-u1LB$EWH{OhEpOJsxDueKtGht_mFV;7B&OJY0IB=7;yKVC zjWq|*!Z8VA30?}b$4(*W9qD9Hxpg zA?i>T$MD9nyIj~WHI%Xp!k(!cp!cXR{@SyS30HjJRtFts`&0(P9K)$tHf8|KJ>r9s z6L#|!`*ZR5?}Ip`Sp>VU{Nj?L{h9aeHe3$_=}z(jrd@NIqHNpX<$_XL+Lp$a`WjPv zs2uj3=zxqyCGdG{OrJG!s9UFs%vGG=+$T%!$Z7@H^f8k?JpP1POc{phdQ!Oj!XsMN zF9Kz{WhpJ(88Z$#@D2Vd*!ZMc40knHgtH?qsaK~&(`@#0oC&A(ek5Ht-b7xNO(K=Y zd*JdxIpX+a{Ccht44qzrs?sy2K3;}y*wr$_b8Ref?N`|HsF#bc8iS|oe#4+;`pumq ze8`t&kn4eySU#vK zgdWMPYUNx07LeAGG;sX%90e*8&4Jd|WR!6p&5~*%;oc2MDjWsDPn&S;(X+VzT#+F8 z&3p9IUB~5apGb0xjiB&lD{Bkw0bA2J@{2T~OS3k>cJ;+{R?dvo*$hA<>8&t1?ItLz zoq>F>5peBmHVP|4@k?qxUwQX5{+cO|m&$UPZpJQHTGz~$d9<;ZRq^O9af9hBZv$D` zsW5z18P+OvUv$YaO(YOg3J@C$^)g_rPI3u4%L&XRIl1+tb99wY_oP>@0G! zsU%NF8@e=62Jd#7;)a8jbnSa8-igy835OB*`C;g`?| zYH2FuE!rQFm)0$&ajpWD>yN;?SK*NRZ9jSGD$smcDavn|#2wx zflbF@_>IxbHfJ3+E)L>Sl|7;UHlXa=?|fp&R9JrQAz!0?m<89L_U9f+vE;p!Jgqy0r!bbz1O>zkl z+!?+GSDUtwcH}*pzwI`4Jlo1fXqsa~d@>z9#8I#ID^@deJ1W<1XHu`VsMg8SZotwz zm?Txp`uw8tq?9gtnIOCu%w(e@FLSPYWnhuUS)AfOoW^)sz;xGT82x)IsfI2Gd)*Ew zy8as`Z=Ay)KGMcd`>>5(UmQuFO^?91A+69|lLneSGtgB14vwFl2Qw!mA(vXiHRzX- z!P9y+e782sc@c*ngGOVFgahtAu$@U47Q=ylN4W^MNf>M|Ue`8-klr~H>iwF*Z+kQ! zvuEd0#~ed)+GCDepSIwgvmw;cctEs;AsmMGignGX_Q?TQGn15c4 z;yY8B|EglJ9-)lynL-y~bMbzpdi@z1SLDsyL?K{yTn0U|% zJ);{)LvuHjr+bkw+7@QlT;vbj%7me63N#{5{NLRb&6i);!sc{1p=xqA4Iiw^Hf4BI zMRPo#@IV3In+MbWn_0~NwFAZOOTp{*z06=>8Y=gha8g!fxa26|DaUP~H#wR0pLPrD zKUuSyzvHp;(R7k*naexd+OdZ7{m?dG1H_lBu!f3!jIRpjPGro3$T>e*f|olP$0%Y` z`Y^mCd;_;CQc-08m;acqjt?7;vf1}{(^LIAn&sy}DvSK+c2LfGM`)p=Hlk!Kc7u{J1}kth&$%%FQi+t`27kd1|a1hQk1j z$&3YNal%ot!qGwhi!(j$KhCtpe~K9MPZ5?ggk%3L;vbKB+`p1oRYKzE2YDuZ=Z)T8 z-B9s!zChdTHgqi61Wmu)>5hdV*=Ww;T9!rQgAJ4L+mQsEIBzc-|F)8C-8Ta6Oz>hI zZ_M!3wOTqdW;#WVe@?XD6}J}c#?d~%*mNmvx@$TD@25Oq7bhNpRSl1L(X$y}4?Rp0|Ip@i!l_rXaDj9H9xN`HRr|9$>e13d~=t z^$+P{)$ik&blE`q@kt-|6>q0$Tb9#;G*{ZWqlD2$S-?I09lY=}n`>!KVGVPkOFkOz z-`B-D&87Gxzlhb1)@EA*mt#Wp2#Wc%fDaeh)9ytvl)pfleYOx_m}3nbv)#`1sh%L? zNjj8TFb8wEJ#2@%8p?e71}QD2{LS1yY}ng6Xqz{j394OSnpO#I@o0g2`JqrXFp|1w zW?*5~GM2o%kXIYL1oPG+S(umdI^xIrf{imNi)Gh%``cjCn~K|93c&T)6=oVNMJ0YI_@Y7sE9?ze?R4a0H2m%7cc0*6Li$li)Cc%0ahSX8H;@L( z>|(2W_R_qfdtB1YAl7+k3tHdZ&fPgSi5axarU6ZRL1OMGbe?7hM`zCj&+K71c-3Wr zac2=)ee}TR3PrRs(~uIqw~*?y8f-D%#gf#zp>gMPc(q~``@Lcxoh_-Ls-tquYLYZo zD%GLnU=!3oTLdn8{g_+p7OwpG7V791N*jmHq3V+on7LvPlX-1R%~~g!Q}iIrUiA)4 zf5yRgA1S(`rc7-whq9^PLGa`T?vEJi%Hr5P;@pK4;k6gAys({h|(l(cs4`bY}cY@SYJ#U*5sLK7R%Zehi?-%PBCn*qQWe=g`l^QLx); zF~6EW!QV>rp!18QX^TY#+q6jzXF8b(IPK|V{uju#MFTx=3SdiZ5$J7or7IG5>E4== zxNB@G4*a7`vm&F#GT#XO_Bo?dkS#8_bQ_{fZg76$R=3=>6~3K5Nj<*~LebGOMA^Zp z8C%NT!ALZFcL-yvI$)x768NV#z+&+;vsYKr-ce};zVFe7%Im49X{JU*Vu(vEcpHb_e%H>3@&CR7c{6% zV>bx9N6_q-H(+%{CU~M+S+xNg+@OXBzPN9R!km?JE%(jJI=FFH|{|CluR@j`hXWM zoQw);R+Q7y15N$hvBq>7Cymh>%C@wXQ2n$T zmfU)oS$d7epKgnplgA`{lc+|AC-gD9SuQNy>^HpoQ_0VE51_XT%*E-1BFr0H1B;7f zF+E40q*at)%{UdN;~2_&tyoTfI{VN_zKF$cKFWOTw!xyb7z}Yf&9-bcAPJ36{CW!~ zW)*Y-o(0KKv-3$fvT_oo|9r`t*9T(TmN}pkexJM9UsQeJ=k_dWT|SjM9YNFKCj^pvHo^XN1E~Fa5?B6u z17{A6r@Ni6jr+)zN@$Uc>uFHh?}#RM9k6Ob1G#0Xa^4RONq+QYXz|(y zzfEKKsd@*1ciO~WR<`k3FQRFAi33f|;4tCUJ2oJ10NeB_2aWy0SaiTd%)5R=^y$?m zluZA|G@b0xL#2UpkBY>tWn(GjtUA?;v53UcM-X7*M>@whV3V&Bt|+<>eJAHod$1i9 zEOaE{`YdWxvBB8MF5F_{2JC@fV5HNZ{@7>Ikl_*Z@IVBL2d2xYO^N5qIJ3F?Nv_A3 zC5NeEfnEi+9vXr>C#7?Sx6iOZK?v&%&Sg5}!XC=k@;`@8W?g4v*m7xW%65IqMS47i zM9BhFoWC6heVzdGE}EdD*#Jy#Zs*m`X7JT@E77Z}oqL_RhpCUSW|`j8Fs$hloZFd& zD^LXGF&pVoi3=%yx1$+@1JE@_%ybAJ`42X6*(e$qrcjz>IGD z9^l4xTjSffe5w49V37O{6{hl8WaNp5=X zMf3+OUbZgu?4D#F!W3P@S{qey#Q%#S_^c+EF=_(5L zvBz7cVx;h~fof~iiq?0Usz*sgGndHq$#TQw8$DP7{$W|`q0lOY&3>yK!U`7AV# zThBghoaLFXzA2OPp*k#Hun*u{A?x6FKw*hBO<4Jy6G>bW)oj;bHzIXd!<*Y|u7?)p zZjQwAKlAbY&r~?)`;8?$y8uSx+qkR=4e&`z4<<}&}_BJepG7bgLiFZx1DT6zb}puf!}LyBgSD;5OLO?9xZtYHEmUJsc{6_zf?q}`PyvA`0IRP zOga<< zUS5Kq0&nBfF_Fyij2n01q$OU`sKG&{`t+e(0bG^*$ob|6KIv`?O8A|?Y>5U~D(qzw zm7Mrl%Udw+t5WmPB5gK#>s6L6sfeGhq%yzzo|GEC1x~Fkf{wor@Y3u6bl2%Z!Ea4? zvw9fSfBC|EmQTYn`E^XGL!3nN(t-^){rU1;_t~}CrLd(;1^4TFkn@FDv9+`d#<)4N zg+&=m6tIwrH1@(w!!&qaW|KIBh5RfK=k-)@ zH&$a|ekaSnHV&UDl+h<~GUd^v7m&PV5xmr%3h`b$u&1+|*E_3#pVGduqyCntq#Dhp zD1_p{(=#c+w?D2)^uW+_x7dU^#<+UV6BJnQ#L`KT#91!{J2x-EPVXDg{UVpf@7_em z+wL>H<=eooK6zRa%3rl8f>A~rm;fEh6@oUp(G9$XZ#yAvhpOH2tC^-t%zH8t3U zW8rXDW;sS}8^%hO4uIocGB|wH8PN^r`E)Yn6CCjD$DJ*SrpK$Np;5+mJn(xd*f0Lg z!ZN%-X-^nAeLcxuIb=}dCrMl>w&l781=5L+^RV-!19rC?fSH^T4U_l83HF6}nHuP# zuN2$=DubWqqrx_3T@vdCY4+8;9ZJpav#<8)oN#=s(CU9n35*-_FATB%4@a>1ha=d| z5Kj1S7-IJ?3|Uv5ExPEE($qZLN<>-}f)6wA*=q*-3XZ9Gi$13AYdV{kE*dvAyeVMK zOwmQHO3|TJyPEDEI(H&ww7zKa(NvMoh$;3RLRG=<4u3&wvbN~#(wlK%_e?xD_$>RXp++XP4+U3!O{lKYh%z$n!Fl&w(X!~BROu>YQE!F3 z$v$oTu?1W%l)JsgzHyv+fN12 z0v!*!ChyBOs5P>6*$*IR_6dHn_IFm&#F>>$7Ocg(2F|X9HQeF9y7HClJqd|1&o|s$d4=Z#Jw&h zV4>2Fe5_ZJPiqksSDN6OyZYeptsHhJJK{Nw9B~WmiUT$T)0nBVY3lbJlAALe`;||o z0SV?bzG5y@pM#;-zHhcP>z*OTPr8X#3s8^!-UDJy89?K71ZR zt24(DZX8JC9|j`VP>CbYo?)xs&ca$@9JzOiU3Bj=xMs{y8aqM)&n&3Nnc|Q=>z1SY zpu$fL;_Tgr+Y%yI>Z*kQ8`I;?cri9ajF zu9=ASR&D}MDJ`ZOTEnhPQzr9>S6r0aTedkVm(~nVUBpq9q^0 znXjE}+x3-Twx|}C6gZP0-~(JqG^VM4CvtUbTZx^D!=wp11lty{GY57s$v1_V;JO9^ zj$MJ1eWzLF%_XE$TZ|>%96i_k14I8FgRGSq6j(aQ;r{kJm~h_+BLoF3{+caWrd?x# zDc3}q?=^9gr#I$LIS&w;0~`I-&@OE;-Zu#cnWG0-p4ukn8g7ImOAP70=Rv&CAP!(w z4u-uIF?c|&zr(IoI~ig*%{M2JRNq`w(N&?|`5t)ZP&BS9y+~=ppEC)~Vv(~@jot=D z(V+CJVB#Ewm*+hI(W;Rcv-AUVY*VDCvM)seW#5?Z?(<+=FM}sQ+J3_z9^$p^C`|q> zOTTZ8F70EeSvL%)Pxr^y-O=pT^EceY&^qPC?NBDQoOJ2RvlqK(30M9q<0MlS` zn{?tmj-E4*`U3v2++oY9cby9+uaqF8^b?SoFCd-sIp~mOL3+s=wBcz0&X(38#kH>N z@0J1dv@Q|e&mRa?Yu>P>5*y&}luP_vtfh{jO_URF3!99!$?;o193YBhHd-5~wQoDi z8{`9}&B0Xr?l$!8dd06Dn!~oZE8(Nwrw}HP!?~SCM5||Dp^*S*t}H|A(U)o1u}|IpNt6cB?9nw_Hhb$!{5dfe8+-%s^PxA?5-}G~E%hY#w_TiJ-$d$;v7yqj z+i_#Ub(Xb5n)!t6(X*K54jHX*!MF zq>bZv8B*T8gHQEDS`xYj`}cX`X~P?A{!0bgplwedhOg$%H$3HL z)jMI(vx^X6kxt(SDq(c=SYl>k%x1lV-ROTCCRQI~{enKT?gP!Nr1T*lI!VNz)-IrZ z?uc2UA;b@>#jvDU`Z`g2Q)EuE4|XvY^EJp={i*bS7-eM#JV%C>B-0$#-A5m5_iPcjfVur7~wKFMBh;-hcO!3h)Q(TZ<9Y*5R}Cab>&XfoUZ3%k_ecXAbX=F>3Lo2`YV($|6A zd&nLft`Iy^-c7+@Ch;1MxolYWS#~5vg?Hk-xVEM9ILYJFsL{HW{al=aakC#lz_|X+ zYW4$h^^u#Dnb04XdsNW!%+q9iZZz(gY)Ze?%s|@c9#{v}!uqG_oW14(7~|;!mbv5U zoYHJICFeJA_N8wV1~)G`?u;h;?r|lvMpLzDDtV~5P_oqt^!PXvTHaZ) zKYA&w;HV}>ZSZDu6c^Eutv>XAZ49hccQbQ8`Ha=5NYcV~b?P^)na>mivA;~nEuS~u?6y#{&BS72e;$?P^BVa;M&;KU_gsJ6=@t8v?bw$EzR&8J`$8l1Kh}bO zBSZKR;xBuPQx+Dc{s-5KJr={SW~$(_qWjwGklkFP8TI$nz^nhH*YH#*(k8b z->h()K?%NihR`4*K=bbh5R+0_U2Z5vq&i|~;9$ymI0|fX=8__3O&dC=@*?*ge42s* z&0JfK{k@~{&b?m9vbrM@PQJ~psn_J^-tnR6_EM~94(FdNtYT>*Dag%Krs_gBJheN4 zEzk9#_K$!OHCy3A${%iw4wCap1<;8IU`vwTK*e<)E~rlh|2O?9HCRaB?#{rQ8g<;Z zn2T(_-Z@w~;39R)6+##L4O^Zj(Zgk_xVa|^w^}vQ%JkLn{6zrH8)87it5;H{^%Hm) zq5~Up>zHnz2RAeF2An&fj_tNXXxySjR3WvPGkNm}4@U;mmDYo-Y=RawEWM4k>uueH&Bc#vU=Eon-fUpmJC<)XCe+y z;`uUh8t6#1*h(DWNmZF|*n}AkXpmqn_*y+0r{1@N{;zFdzj%Y<*N-+7WEx8QHyr1u zs>9{uwg_IeeW9JbC!lt%1!w-~x+u^wlXl&lO+Fvu1%?a#Kw_0Ib%=DR zut%Rl>&o#*mLXOD*^f)x*WkKqN|aV+ikmd$(0uq7*f=YRt8>VMfv#H6asE3=JX?T+ zKObR%FFZxP10%UgjU5=QzMH$U=Q$s+{31N){kII=j2w{V?j zAIo;IH$pX5K1i48+FRqb(pwbS@(@j(+h~qzqu7GhX0`k!rrhR${MnO~*L;}mk58qr z8!9N@sYY@}Jf8SggX;UjC`n~Bg@-dijLQ>9uF`1=9~z3+TI{Lm-6Iy%?S`TE2k{3l zj^f@-lw`Rx^l5v|S(vN39_nMCLu=+I_Qt}S9WmbkS9_w_1*?r{QY?kp=H2k3bmg;de82B*bD;kO4nnX17rRwkE3EtTQy@f>@cyzvOAOPP~|aUwlNH|$N@0LAmy z;11`F5Vy$)N5K~Ikhsf?ThqZbxsjDAL{dM?Ec~rMh$5#m_Huncylj1$bv|%mhs0w{ zSfT=>Zf#_9weFGJOMe=({s}E`J_a~j9&5#Yk{r}xM28CwF|GyW)gx*8?L+)DQ8`^0 z{DBWL)Tij-E@-1PhOV!f%Q7lQ!TI~`th~q#*V(NTb={keNjBfe3ezxfZU%Z8Y+}NR zu|lW+J0MLX{oz7!C0;;@zPMHn6kUo!ytn6O9P%H^LR_|;dDN(PTT^x;jHQ#>`1*iAOE0@eeeDWYA5s9iyx`tKoERzX_{8ltG=Z#QVbjjA8N4L)lI&=2!pGqtAjh6yZ4)_hgR3NssO_ z1)a&L%&u~$;~uk5&2>z9>KDjSte_dityEM|hEx9LvupSAnb{5xrnxv5+3R6=ZPRJ) zRDKq^IV5rOFNk-)@zq?$b02y*D;>L3a&VUQ5kcF#J){ynlBUher1MI5*qdBkZm{D4 zSgfu97nF~nTiw3@r>`@Q%CYVKzGya;iqcFaLuE**>pXVIl!Qt&2n~ixaZ!c{jhY7y z5*3vUk)cps=iZ4(rc4o~LMYTNM7Ip@<$0d_x8Akh_xJpLt#z!mkMq3NxsUJue7^^u zR&Iogxhys<;S?+{A4R|Bg;0uHC%5aqHaO4E1E&Mc*mkcDzv&KPb4};4Z7W4w)A*~b zq>Bv<$>+S`)o&vG5;(r6b?yjfNk3rk@bQwW}QzJ@}r|z zm3=y!j3?>z9U(5XZA7URWlXBBm-X%n1^JnBaAjyFxJIodAE^-W@n|31O-{ynkDb74 z?HY0qPU7cv6+q=WJyv{q5ucLNiJKpCv~_nayDnu3VPDq>cHUe78gnElp?E38x-SC< zg)y{g*a;ZfYk_Bu)`Lpo2R2+FB*ktE{_+nI)K7c~mhnT#%r2h}ZSq9V9hq?Uh#c!4 zxsNxwmdQr=WpD>%%xG&sCI2#SAp5J|Ew&**iTb83Vc+_ek)D(x{Vw*U=|djC7nx8B z5}Lu}=K&PAHx&bdgINC^t7%Sh1%6V!#@WfFvXd%z1dodEvrS@{zT{pKJ6F4bf?R(x zjS*+a`Sxs(?XeIm@RfX``BxSqw;nFMEQjO?u5@6*4%&3B3{$>^2^ipbB?lpF23Me9e{ps#?kCj~}lg5P~z=!qI0e7DhyA=u{ z)WVdG&zb;_*RNw~0$D1#VFmSNj?}JP$d-MI$1cTiIKH9@%O@F9g0DHWsj1R(Z+kB0 zYag14r#DH?MQH5XBzXMj6UyAG=bqI>bBzHvA?@NyPXE+Jn(yES3*0Urj{DCQ@_?!`Hu?@sMQESauHQpQ&^F2GrP3l7#*CaCO&RQV@Iy@+A2b(K`%ec!Mg!|O=gF)h zT;T4=Nl-g$DCO%PU>BaGP(gor2&#Sw)#C1q6l&1uqImFTxP=VXo@GW2E8y?91NqdA zMYw2mCi{qs`LT&(XlAf6J8gc0cZ#b*kH(L<$X^`<*ArRz!Exk}aTAOsD@3x%auBnm z6Mwi~uP~i5mptZsK;rHNTvSIGEL+*11+*LR#qTq?r6c#V(=Wc@j@4&SdUY0@Q&Aur zwe2)!!f7lYH;y_#E7B~xotT^W38X(XaZa0Vvf&-dRAG4r*giG7^w5N`$DQA1bCmym zaW93sEr*Gc8>w4%FYB{71c&{cVD+Kv7<0}7?0Vkds4x?-2;T>J!4g#A=f$L^J?3Ly zJ;L4J)__SvEOhI>5q&B;h6^3T(L{R&r7Bp#t93V+>G~3okoSkkGC9^^J%+RLzsUOQ zzrnH1`*F0h3wxoe!Jg!8!NSU3F>iR9oAK~G)3PaL-?cWtgTNN+SSQqZ@5)5dX&(rfHlVV(Z(zse@@zi zQfGjQj`{G)J76%f9Sq z=|N*jt6c`P6%0w>W{<^1v)RA{`{-fAI5hby4a>8qz}N}(P$cuZqJCN=f6i(mnaY*Y zzBiWy8s2tTwKkX9GIYp1PZQ*Z?}Wsk>Tv#X44*F?8rZ2DlCXk2eX+f+}7*9(A0kj_7R5{tjRZ^wr^jR0m38 zK2vKp1M4&^QGUxfJd~6NBHAL_bQ($+sVQgb@9X~KmrvD!?AJVcGzgphf;oq zw0zZ7wl%gA58c>6z4oaXo^z1NU6@bcvl=C>nmK0!Q<8l%2)yp7)ep<^m=~P?P?bIpq|Cec+2;5G$i+_u5h8>99nohq2koDI?!GJ4W5io zfxF-cp-ICrN!gog-FFi{XBEPZmLhB(ypL6?Mxv*`IlH{|JFEJ2i79C9;|iueLQAvL z_&`Ge(+62_>%`eICZF=~`LrfBA-j~~8U}#;S23=1vyiWmFvSykx?;NF3<}+A*y&5H zxa@9_xPNgF`?@u0b#x0WRbC_RS?u9+nhXurk^;T#@u2hd8Es}3tWkRpJ-MAsbq}^u zk-ZI;RnHOi>clWmAH#CC>a%C^GSKKdj#)1Zh1olzS$me)81=fHwTOn(_kziEafLQ( zT@ym-M-<_4MjZd+*Cx8Utc%$X+Qw>&t!U=$@zA-^2d&D4*xT|REAGgFN78pnUtvn~ z*L#a&T?WJ2*i~q>Q-ZB3(xMxhduY}7V!Et$3{IGeO#qVxq&)f-zv-zPrG8@E_V}YX z{h~MRkbcSjF1*Aq@JJym{W|7u0u;0I9_RW!gT2#fL~YqZ2)enPg0}@&ZvxUXg6<(@Pn!>ULv7M3 z>e7Ekn%d>e*;Wdcy|;l2g^Os?Bo93Bss`&bLW!PPQU0%GRJYZJcJKqp)<2862jAz^ zX*}gAuO`P)uLzwN&>V*omQo{=@lfSZ+<9a*`Rf9yJrBdjDjjUL(lDs^ie>lWvgl`@ zJsWUvFog`52=$ihY2)cMS}{@qY>yW((;iKX)i-7p6a6u1v?Jd8=#8I}b*LgH3nJ4x z(Yy2>o)?^@`|}WY?X;oD>H#o$<$T(dTaTJbN7$IL>TpHV04_Y*%3VGaThVAXg1Sy> z!{|FzsFcyme)pQwRW3(xsW^_i{AnNtt3`pz3|%T3cm~_vg_7*zr7-z@CyUR204+T- zpta!|>!@u(Q%6Oze%dHDK3(B=uKR&&Yx>h7^R+PIdpmnxh@vp@*rg?WJuhWA6esG> zBw6*-tUas(9Og=a&BZOaJuHC=FTP<}=b^TKM)XN)Efj-Rx);jPDvtmcEF=#>#A8>K=GBLDjPN*8!TWD+3?P2T^6G zAN~qF%FBOOA}yB$d{ZKpr%ufQh1`KOc7i-@`z3(f^qw&sRH}*X&#LER)DN)hBX`i!yk~rE!XAwDzESb~Kr#2%)3Gp>SEWx9 zKgl(HAnvKq+edLTuXayl?&fH)sx^d8=SxEyUuJVT4Q(tX z;OWSER?Lj3$|R9~s>HBEiJ^?7O6gg&Hhr+1K(~Abu;EJtRCQWx{#JIT^R1uwN|j{z zFlZQ9Tnxl>zc*8ASb@NGVm^EG@h#SVP=(%~>rnr-H#R!F5cKVrg-v&qS##oXvT@1e z0){XYUoGWQ{W@9R<)a^#-qm$4RGw78UvZWg1|f+;lfiV@4VT?q-> zss(PltwHuxKl~)lve;dJjeU{d1YM4+VR33GlTV!l>kEf6vlZ{*>&Gs1&3r-U6trOK z2Y>V{7=SO_tl>pyI~!w}ND-@Iz*Au!HtL**`7$jyAuomhknF`mS2gg^9nO+u&*M?m zMI3Awfkvn^6D|rAF8<#rktzSCMC||35gh*L2#yZICI1maoc<+-IGbmeJq}-5vBCUk z#o;I)QPvb4QO(0GqOoba1)<4lqE`Xlg4IP#w0CxbAX;XvU}J7f#j^dTqWBM!MBSfV zMTNnN6>0BJh@vuYiW*a`M6)N277dLp7AY_4Ez7f9gCMMbat`0)C|B*zN|09V!>(ql6HFL?y z_7*-l{g&SD(c&!T{9;-?*I1<5-zeKw$I67`>6c77cAu?433IUx$Ylxh71It&Z{LF# zGeeo*=1ejw-9RU=#4z>p11!kWA7`s*QhDYj%8|a#a^K}q#9t*LF=acrFyYx}38EwC2kcF{yO^pR@t&E>8iPem1HOJA4H;3h$-zsLekbptp$*D(qX`$xLQcVm zt@mKP`bXSyxP{&|HL>set0;GOu1J$zfc_=tq3cRKy~`O#d8V5|d$k2kkrih;WWMLz zbFx8Z&2k7j8AlJuZ%-bAi4xF_5j|4o`w4;lAAk3`uXWC_48X$`!9_E-|j7jlnFjevxQ%jjnr;a~oe><0wgV8ux z00&wAGFAS@##{_t*dNoc9i*sVv2?g!C0q5*3nYiWWq*yhj2Etc!TBSlaZ>IVxSG~p z;pn2veUortRFws;12RCA{sK-u5R%nPLt1hE2EQw78ICpZ1AqI2l-u$b3!88Q>eUTM zv^bZd!so;9kzwq0X)5$|`vJ2pVd)KBVv%t>dv1G>t_2&=nS!gJs5%ZBp1i_A*Nyl? z)!F<2jYl|m-aY1eK?70-O<`etG%Fhw4U5Jr!s>uJ+%fqYDRo)$BlXMZYx{KZq+cJx z#o*Y4?2nin5(Kf$VyeR69rGU*PRm^{p>6qgDD({AB699CYrzQ%munGFBbCPITuh{M ziC$PXcr>Y5B=W~2cJSleZ(y6J433;@%_bel!GRf__{^k5^k%0GKJp$Rp0K#{k8_e} zL3tM6Q@WGw-nNIWezhO1oYP6Bbfv&{&;(}fssdX%Z*ZxZOrm51kmIlNnt}bGE=(Hi z%jJ?|#1I2g)2TVruD zjwQj%LJSLVV@lJuL*Mn6Y;s6G3{BF7a=J}b<_qbPjWqa+t|H~jf>-o97_6^PH>a%U zzgoMIVSzD4s})k_nhY9oEgC0eJYeer*5lpBr$BY?ck-5e!A~E4in&Lc@o9&%@ZQiE zba(7uc_N+IQE{xA=J37nJjRFgEe_+IzGhs%mtbqW34GXe7Y1=2Fe+>v6X}>FWT~`d?+5JCk|E-duKJ$QgDsEtP~LWGQC;3Gz#nqu7O`P?`V4s-t4bWBOCR zBUX}5O^$>{-7&PP=qQxLAEZZWLNNE4fNAv~aa{RVHY28PsxBAYoe4##VE!oNL%=hCy zE{+3Et%qr!-$-|+{>591yxB+Z_Z6;2CvldVAMH%6pyN}N*sPrEV0!5Tye?b8veu;I zXxV3ik@f&bY%k&XtHWq|QZu{I63UiH9Ky17&R{=l55)D(VFQ-!#UuOr!3;6oBe6|^ z246YP6@A&o%Rjb*U^^x9IULH=jpxwQw+=8~uoPyEa)gCDi|LG3ojAiM99GGRSZml} zG~AvD{oiOpKk+-^C$p8l8fsIlx(VC$+>2hfnp5pSVl&4Eh+|HVatCX*p=IM2-p)jk zerr~u?hAEN^!m*ti$8OEpEp3#;QbIYErFsMRmdgnFmv35xaI3ZOYV*x7v(jmGU;}< zSfbRUyQgg-XwP)$)JmsAQqO2i`!w{k+{wFMeoivi4^!uoLG;qGQcNDiVx;O<)?{eN zTxvR5YeF8xyy-{!#!|R`uovd(i{o_Pet^4!_v6UP$9XrwbxyU-n5Aok({Z5#cv#-& zB;)U(^oOV5y4xP+Zo5npud6UCr5S$uR52Oxl=+TDCK>PU#>DVrFh6z&)vp?b<_DVD z?#F9s!w+#prD`FV_V#g-C4;CYZwJ**Q>90;BPttQTDbkvp3GrEKP&%<=LAP59On`Z zZ-GfyHXD*w$pUljVc)J9P&;A^KWy7xmiW2?X2&~Ipq30IT`a=D-SfdRwGzFi8^CIh zFo+#F6;9jPLcpgU>}-nz`GAqENqkhdzZFd17e&*HYqs=FX*HW8e8XQIQosl89>q@O zL_&OVG=|OeCDG6@koEVZ)1za#OYN&*(0X?^=HNW`xo8qK4V%gB513Db+gDRtlPufN zIRhpvp9a&!(xYl?D|VX8P<7q~KJ?gNI$Wv@>$BZ3CEU(~q{k zE1{K#8wq!yva4zGB_dV&_x$#KU7r#s~#JMJXg9x+iIBRYaFGb+IvjU)W1#Zr~IjT0#9 zl@0bRGvV({*@G@7q3~+6xaIGTg&&rZAXT;$DkORKxt}GjcpplNzuVc8)A?XAy@yYC zI)pL4a?m}iko&sC7qo@CFlN;w40v)8x}zkl%uWe_+wMih)=TK{EHOEgvIbt($#PyY z`q(l>2J#inp|Us~9)x$ZHJ!sjJtjp=^NpgM`3`XN`#bpj(Te+a-v;hvJYqIYrfi6H z6SFP8N||{A?oF!+tvJz1i%03wtBu3xNM|J$gaKSNr^WC1F+|wZW_ru}so{`khp}`hC90msOQ;2T- zO+OtD!I!23Z1UNqU_B`T*Dg$9Wl{x@|LGa)w5(y~(oBmVgI!%U0X7V{ERLRah2%03n;mw6mFi{V_hr$59_5f*_!9Z0%kWov z9d`Cb;L4svw%ya8MT!l4HcES$Qu}0Fa_ktVSe3`J#N(d513n1)G_>LS*f?0c{xCSL z9SiMwg;Y1m4*IG5X0Zi4GrStdWPcL0g*7r`gX>(9ZY|Ti{ z#DcY*A-W}n(jy9RzP2toi*qfOM=plA9~Dfse~75?s1Iv3iKjmXhbs@-EA$jZR8rX)9yAmOF?}|G3H0HBSf@&Nzd6CK+IoVmQ`~u4S@!heCJQF3>$?1Xr62Xv&L0 zqD*F6`1h$XhR-0du<%Z2UAFUM;$%W(3laWG-hLR8nCg1Me){0Ai$ zvRUK}J~uq5|Bz?Q#J3(ll?-PijkB5aDG~GN-iL)B2f=F+faf=260(nRNlRwAq2x;H zNSw|xzi_Plvl7kxbsx{}98bQ_N7GwDD(~ymA1tjS*qA7N7-qsj$u$*xFTsFp3(;M@ z5g(_^F{#ZO*m{2x{djN5rLC-E<)-Dpy)~fN%jIZz>m;p{+)r&zr(l8CS<8;m{prn* zE;i9}Bbm3Hr`qB;61ncBh^Xnv*(&k(`%L)@cL&pr{3iMxvWOb;L@dNC4%$tHkdXEU zBP#E4gZ6BKFO$RsU$8lOh^0e^=?W}3atW+d@dCMz6QRLrF8sAClolM(rm^#mV!nnI z*6A!}n}u#*HYJi?Pd*Lpx{|oG?$6eu+ILjMH zIjn|&SIOWO7Y8l-rZBgLBvf)TV-7E6sQ;-rW`BMQ7uM_rbH()8;t6Naz<4u?0#*6c z?EN(Hj2ZBE?ARn(TiB)J0)F{#S#8Z?EI1U8b+_X%y*VGcThw9m!l_L8`at}7VmZ5% zRSMffPGf1(F$!!nCXWupCjTV*(CSHtd1F%6(u4D#&+*kyoJ9H&OW=gqOn$}Kmh;g6 z%|Da3qLL*OScgXyE>|~$JH2<<_akz&CS?OGdOnq=8vI~^ClV=oWPf`^Q|AvD6f14Skh5ZA_|dr>a+wkgmS+0E=Y{Gf6+`tfmmlUD>GZR|$Ssm7txRB62P`Kj1QzHKnMppi# zCan6SCU`gqJ^v$&c>PNl*`YlRIll%HsqBT3mi7HzIEKd!eV-b@q z5~$2!g6rF|1XF)Bh{BYc1+i?H;GohrQOl7c0T-Ann0D|%`6a)4k&5avUf^6{d8nmL z@at!-DE!O-fo)uss4>M%~6E*)|k7M4zr`msS?E)_fK8 zth^@JX7J7O%122-z{dc~sVkpYDsQnC1sAUtN#{!Q7d#~R3tuuU)%40l4`dZZ(OCoe z0TxD_Ma_PJmA0{H9ctHS1nN-WrP27eE|#gu|Bvd0-uP}6{LEMCK1gKwjHY$+-^ zRN@}r)}tvUCS2F6k+tv0@mJmk68zv z$Mby`*@mLaDBK<>4EA)^{BO1VYj1Y^Q_0Rhm4rA5cm1hEB2u{fPaTqHXT>t%og%`K?p%1cQsDoU*6D&+KLXf5P&W=tyNZZG5s zD&*DhX7sjeE#!kLOev`>NaZTzk6>b8U`R756!2#7W^6AM3@Q}T@Mex+1d7@e3j6u_ zdHn|hFyYNmQYezt>HOpP69N0J%f&Xd@$!CtR!f2@PW{!%5+{&&`n_S&Ix zb`N4J_U>Ie(RP3MHrw?w?E4K>TK1mVT(SSVy2AcfCm?QbqxaA5wEOmb z?+sSj9?M-}yEd<7uU?_EO^@XAy_N-!ta+L`_PVAWv@UPWv8|3Uwe~+$wDeVoAlNdWB38oIrklK}lwAW>xBB?KDogSH20aQX_bQ Z!X-tCnR%&2@x>*HC8>q1wS{b@dH^{&vL65d diff --git a/tests/data/rllib_data/single_agent/checkpoint_1/checkpoint-1.tune_metadata b/tests/data/rllib_data/single_agent/checkpoint_1/checkpoint-1.tune_metadata index e83b72aeaf2291e2f177e78504c94fde7e5a3392..55b72be28978f4b959b001b57aad8683ce0f562a 100644 GIT binary patch delta 105 zcmdnOc!_a>yn>;LiIGWil3_}kiMdH~YHCudrJ;dks!3v6iiufrs?kJyZ7GA&2rdQ& zhLX(O)cBJ8lEj=sR(FTQ4ck^VOl;9ph~Ngw6(v^2m!%dJXXfV>azwBKMGW*T^b86) GOZ5Pp1Rp;D delta 75 zcmcb_xP@_oyh2K{ak8bMiII7tiLptlxw)lqD0&z+vtKr&yE|cKHf@4J z!HdOE8YW&u@vH|At``s9yoeX?>QBf)@CP`XL|Ut}u*|;iy*E4G_x4v_U6|WVJuA$P z4c4RH*-kc$0<=xp17N(jsmrH@W%;$RV2vlZ*N7B2);TShLqb$_1g;}a+R(dEin`yb z>Efn(WJT+nc^q*;1LR;qXoy_Gg*>NMtnca0Nlt45)hQ!w8us=&lOaVe4FNd~@u*Bqb2F}-k@2MNokn245Q zt(4>)W6=;SlDkG(FJF|gu_pV*?A4l2w`~ZU+-4;7YzM1j?e*ln5jL%%u?0H~Bpgo} zSzNJva6AYFsu3?@fPi>)5noRET8=p`i8xBEl7E*U(qGm>5J{&K@!+u0H@A<$kSLOYfGGRxmZ*Q-cjsk-_Tn3NY2YhnNb+pQ!E0kLLCGXQ8jj#X|jFCV+sl rRuVRoR__BANX1FD^bnsKZczi+$R%OV{fGTGCH^t}P}59#cjoFnXm$xn~ZNi9pwnbO1J>F4G@Sy*J# z#7POv%-+lszrUUQi_unK2X_xoaei7!d`5mzW>tRPX&nzxnl9-g4lUW6_ zu_Plk9%S`qS(Y3|ZixLr*BO~jp22!rqBuD^yA1I>alb>G@4CPOr%_h%RmYR{7 zoReA%lq@MKoqUo_d$T0_EXK{3IVLbNo9LNN-pD0Bxt1$r@&+!6$*;LqF&mk3O`aei zz4?}a2os~xrMVD;wi(OSDG7N zo?n!mT2ws6o2iEb#7{3M1#%cBYl&x0l-Cm#C@D(J%!^M-EJ@CYFV3t=o#LDDYNC(Q VW;@ZhjGNz!S2Hq7PR^Fx0st4Wrak}w