Skip to content

Commit 4ee66fe

Browse files
Merge pull request berkeleyflow#36 from berkeleyflow/grid_fix
Grid fix
2 parents 2fdc503 + 7e83f5b commit 4ee66fe

File tree

4 files changed

+16
-8
lines changed

4 files changed

+16
-8
lines changed

benchmarks/grid0.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ def get_non_flow_params(enter_speed, additional_net_params):
7070
num_cars_right = 1
7171
num_cars_top = 1
7272
num_cars_bot = 1
73+
edge_inflow = 300
7374
rl_veh = 0
7475
tot_cars = (num_cars_left + num_cars_right) * m \
7576
+ (num_cars_bot + num_cars_top) * n
@@ -81,7 +82,8 @@ def get_non_flow_params(enter_speed, additional_net_params):
8182
"rl_veh": rl_veh}
8283

8384
additional_env_params = {"target_velocity": 50, "num_steps": HORIZON,
84-
"control-length": 150, "switch_time": 3.0}
85+
"control-length": 150, "switch_time": 2.0,
86+
"total_inflow": n*m*edge_inflow}
8587

8688
additional_net_params = {"speed_limit": 35, "grid_array": grid_array,
8789
"horizontal_lanes": 1, "vertical_lanes": 1}
@@ -98,7 +100,7 @@ def get_non_flow_params(enter_speed, additional_net_params):
98100
speed_mode="right_of_way")
99101

100102
initial_config, net_params = \
101-
get_flow_params(v_enter, 300, n, m, additional_net_params)
103+
get_flow_params(v_enter, edge_inflow, n, m, additional_net_params)
102104

103105

104106
flow_params = dict(

benchmarks/grid1.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ def get_non_flow_params(enter_speed, additional_net_params):
7070
num_cars_right = 1
7171
num_cars_top = 1
7272
num_cars_bot = 1
73+
edge_inflow = 600
7374
rl_veh = 0
7475
tot_cars = (num_cars_left + num_cars_right) * m \
7576
+ (num_cars_bot + num_cars_top) * n
@@ -81,7 +82,8 @@ def get_non_flow_params(enter_speed, additional_net_params):
8182
"rl_veh": rl_veh}
8283

8384
additional_env_params = {"target_velocity": 50, "num_steps": HORIZON,
84-
"control-length": 150, "switch_time": 3.0}
85+
"control-length": 150, "switch_time": 2.0,
86+
"total_inflow": edge_inflow*n*m}
8587

8688
additional_net_params = {"speed_limit": 35, "grid_array": grid_array,
8789
"horizontal_lanes": 1, "vertical_lanes": 1}
@@ -98,7 +100,7 @@ def get_non_flow_params(enter_speed, additional_net_params):
98100
speed_mode="right_of_way")
99101

100102
initial_config, net_params = \
101-
get_flow_params(v_enter, 600, n, m, additional_net_params)
103+
get_flow_params(v_enter, edge_inflow, n, m, additional_net_params)
102104

103105

104106
flow_params = dict(

flow/core/rewards.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -136,9 +136,8 @@ def penalize_tl_changes(env, actions, gain=1):
136136
:param gain: {float} - multiplicative factor on the action penalty
137137
:return: a penalty on vehicle delays and traffic light switches
138138
"""
139-
delay = min_delay(env)
140-
action_penalty = gain * np.sum(actions)
141-
return delay - action_penalty
139+
action_penalty = gain * np.sum(np.round(actions))
140+
return -action_penalty
142141

143142

144143
def penalize_headway_variance(vehicles, vids, normalization=1, penalty_gain=1,

flow/envs/green_wave_env.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,7 @@ class PO_TrafficLightGridEnv(TrafficLightGridEnv):
376376
def __init__(self, env_params, sumo_params, scenario):
377377
super().__init__(env_params, sumo_params, scenario)
378378
self.num_observed = self.grid_array.get("num_observed", 2)
379+
self.total_inflow = env_params.additional_params["total_inflow"]
379380
self.observed_ids = []
380381

381382
@property
@@ -453,7 +454,11 @@ def get_po_state(self):
453454
self.last_change.flatten().tolist()]))
454455

455456
def compute_reward(self, state, rl_actions, **kwargs):
456-
return rewards.min_delay(self)
457+
hour_frac = self.horizon*self.sim_step/3600
458+
delay_reward = rewards.min_delay(self)/(self.total_inflow*hour_frac)
459+
switch_penalty = rewards.penalize_tl_changes(rl_actions, gain=0.2)
460+
switch_penalty_norm = switch_penalty/(self.rows*self.cols)
461+
return delay_reward + switch_penalty_norm
457462

458463
def additional_command(self):
459464
# specify observed vehicles

0 commit comments

Comments
 (0)