diff --git a/examples/exp_configs/non_rl/i210_subnetwork.py b/examples/exp_configs/non_rl/i210_subnetwork.py index dd85c56cf..aa2151bf2 100644 --- a/examples/exp_configs/non_rl/i210_subnetwork.py +++ b/examples/exp_configs/non_rl/i210_subnetwork.py @@ -100,8 +100,9 @@ edge_id = "119257908#1-AddedOnRampEdge" custom_callables = { - "avg_merge_speed": lambda env: np.nan_to_num(np.mean( - env.k.vehicle.get_speed(env.k.vehicle.get_ids_by_edge(edge_id)))), + "avg_speed": lambda env: np.nan_to_num(np.mean([speed for speed + in env.k.vehicle.get_speed(env.k.vehicle.get_ids()) + if speed > 0])), "avg_outflow": lambda env: np.nan_to_num( env.k.vehicle.get_outflow_rate(120)), # we multiply by 5 to account for the vehicle length and by 1000 to convert diff --git a/examples/train.py b/examples/train.py index a159c13ee..702aad43c 100644 --- a/examples/train.py +++ b/examples/train.py @@ -8,11 +8,13 @@ """ import argparse +from copy import deepcopy import json import os import sys from time import strftime +import numpy as np from stable_baselines.common.vec_env import DummyVecEnv, SubprocVecEnv from stable_baselines import PPO2 @@ -25,8 +27,8 @@ from ray.rllib.agents.agent import get_agent_class except ImportError: from ray.rllib.agents.registry import get_agent_class -from copy import deepcopy +from flow.core.rewards import energy_consumption from flow.core.util import ensure_dir from flow.utils.registry import env_constructor from flow.utils.rllib import FlowParamsEncoder, get_flow_params @@ -152,6 +154,37 @@ def setup_exps_rllib(flow_params, config["num_sgd_iter"] = 10 config["horizon"] = horizon + # define some standard and useful callbacks + def on_episode_start(info): + episode = info["episode"] + episode.user_data["avg_speed"] = [] + episode.user_data["energy"] = [] + episode.user_data["outflow"] = [] + + def on_episode_step(info): + episode = info["episode"] + env = info["env"].get_unwrapped()[0] + speed = np.mean([speed for speed in env.k.vehicle.get_speed(env.k.vehicle.get_ids()) if speed > 0]) + if not np.isnan(speed): + episode.user_data["avg_speed"].append(speed) + energy = energy_consumption(env) + if not np.isnan(energy): + episode.user_data["energy"].append(energy) + + def on_episode_end(info): + episode = info["episode"] + env = info["env"].get_unwrapped()[0] + avg_speed = np.mean(episode.user_data["avg_speed"]) + avg_energy = np.mean(episode.user_data["energy"]) + + episode.custom_metrics["avg_speed"] = avg_speed + episode.custom_metrics["avg_energy"] = avg_energy + episode.custom_metrics["outflow"] = env.k.vehicle.get_outflow_rate() + + config["callbacks"] = {"on_episode_start": tune.function(on_episode_start), + "on_episode_step": tune.function(on_episode_step), + "on_episode_end": tune.function(on_episode_end)} + # save the flow params for replay flow_json = json.dumps( flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4) diff --git a/flow/core/rewards.py b/flow/core/rewards.py index 6de472af2..02fcad25b 100755 --- a/flow/core/rewards.py +++ b/flow/core/rewards.py @@ -329,4 +329,4 @@ def energy_consumption(env, gain=.001): power += M * speed * accel + M * g * Cr * speed + 0.5 * rho * A * Ca * speed ** 3 - return -gain * power + return -gain * power / len(env.k.vehicle.get_ids()) diff --git a/flow/utils/rllib.py b/flow/utils/rllib.py index 80193c22b..0206f1b61 100644 --- a/flow/utils/rllib.py +++ b/flow/utils/rllib.py @@ -3,8 +3,8 @@ This includes: environment generation, serialization, and visualization. """ -import json from copy import deepcopy +import json import os import sys diff --git a/tests/fast_tests/test_rewards.py b/tests/fast_tests/test_rewards.py index 3f2e08cde..487bda695 100644 --- a/tests/fast_tests/test_rewards.py +++ b/tests/fast_tests/test_rewards.py @@ -169,13 +169,13 @@ def test_energy_consumption(self): # change the speed of one vehicle env.k.vehicle.test_set_speed("test_0", 1) - self.assertEqual(energy_consumption(env), -12.059337750000001) + self.assertEqual(energy_consumption(env), -1.205933775) # check that stepping change the previous speeds and increases the energy consumption env.step(rl_actions=None) env.step(rl_actions=None) self.assertGreater(env.k.vehicle.get_previous_speed("test_0"), 0.0) - self.assertLess(energy_consumption(env), -12.059337750000001) + self.assertLess(energy_consumption(env), -1.205933775) def test_boolean_action_penalty(self): """Test the boolean_action_penalty method."""