Skip to content

Commit 0e504bd

Browse files
author
Roei Bahumi
committed
HW2: added run_agent.py, a script that allow load/visualize the trained agent's performance.
1 parent dde95f4 commit 0e504bd

File tree

2 files changed

+164
-0
lines changed

2 files changed

+164
-0
lines changed

Diff for: hw2/README.md

+14
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,17 @@ Before doing anything, first replace `gym/envs/box2d/lunar_lander.py` with the p
1414
The only file that you need to look at is `train_pg_f18.py`, which you will implement.
1515

1616
See the [HW2 PDF](http://rail.eecs.berkeley.edu/deeprlcourse/static/homeworks/hw2.pdf) for further instructions.
17+
18+
19+
## Running trained agent
20+
After running `train_pg_f18.py` with a specific setting (gym environment, metaprameters) a new directory will
21+
be added under `data` with the following structure:
22+
```
23+
args.exp_name + '_' + args.env_name + '_' + time.strftime("%d-%m-%Y_%H-%M-%S")
24+
```
25+
Under this directory, there are multiple (exact number is set by 'n_experiments' param) trained agents.
26+
In order to visualize (render) these agents behavior, run the `run_agent.py` script and specify the number of iterations (-n option). For example:
27+
```bash
28+
# Run 3 iterations of a agent number 1 of
29+
python run_agent.py "data/hc_b4000_r0.01_RoboschoolInvertedPendulum-v1_21-07-2019_08-42-10/1" -n 3
30+
```

Diff for: hw2/run_agent.py

+150
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
"""
2+
## Running trained agent
3+
After running `train_pg_f18.py` with a specific setting (gym environment, metaprameters) a new directory will
4+
be added under `data` with the following structure:
5+
6+
args.exp_name + '_' + args.env_name + '_' + time.strftime("%d-%m-%Y_%H-%M-%S")
7+
8+
Under this directory, there are multiple (exact number is set by 'n_experiments' param) trained agents.
9+
In order to visualize (render) these agents behavior, run the `run_agent.py` script and specify the number of iterations (-n option). For example:
10+
11+
> python run_agent.py "data/hc_b4000_r0.01_RoboschoolInvertedPendulum-v1_21-07-2019_08-42-10/1" -n 3
12+
13+
"""
14+
import os
15+
import json
16+
import pickle
17+
import gym
18+
import numpy as np
19+
import tensorflow as tf
20+
from train_pg_f18 import Agent
21+
22+
23+
PARAMS_FILE = "params.json"
24+
VARS_FILE = "vars.pkl"
25+
26+
27+
def load_params(filename):
28+
"""
29+
Load the 'params.json' file.
30+
31+
A simple json.loads() call does not work here because the file was saved with a special separators.
32+
33+
:param filename: str
34+
:return: dict
35+
"""
36+
with open(filename, 'r') as file:
37+
data = file.read().replace(',\n', ',').replace('\t:\t', ':').replace('\n', '')
38+
39+
return json.loads(data)
40+
41+
42+
def load_pickle(filename, mode='rb'):
43+
with open(filename, mode=mode) as f:
44+
return pickle.load(f)
45+
46+
47+
def load_agent_and_env(model_dir):
48+
"""
49+
Load an agent with its pre-trained model and the relevant environment
50+
51+
Most of the code here is taken from train_pg_f18.py::train_PG() function
52+
53+
:param model_dir: str (full directory path to the 'params.json' and 'vars.pkl' files)
54+
:return: tuple (a tuple of length 2, the Agent instance and the gym env object)
55+
"""
56+
# Load the params json
57+
params_file = os.path.join(model_dir, PARAMS_FILE)
58+
params = load_params(filename=params_file)
59+
print(params)
60+
61+
# Load the model variables
62+
vars_filename = os.path.join(model_dir, VARS_FILE)
63+
model_vars = load_pickle(filename=vars_filename)
64+
# print(model_vars)
65+
66+
# Make the gym environment
67+
env = gym.make(params['env_name'])
68+
69+
# Set random seeds
70+
seed = params['seed']
71+
tf.set_random_seed(seed)
72+
np.random.seed(seed)
73+
#env.seed(seed)
74+
75+
# Is this env continuous, or self.discrete?
76+
discrete = isinstance(env.action_space, gym.spaces.Discrete)
77+
78+
# Observation and action sizes
79+
ob_dim = env.observation_space.shape[0]
80+
ac_dim = env.action_space.n if discrete else env.action_space.shape[0]
81+
82+
# ========================================================================================#
83+
# Initialize Agent
84+
# ========================================================================================#
85+
computation_graph_args = {
86+
'n_layers': params['n_layers'],
87+
'ob_dim': ob_dim,
88+
'ac_dim': ac_dim,
89+
'discrete': discrete,
90+
'size': params['size'],
91+
'learning_rate': params['learning_rate'],
92+
}
93+
94+
sample_trajectory_args = {
95+
'animate': params['animate'],
96+
'max_path_length': params['max_path_length'],
97+
'min_timesteps_per_batch': params['min_timesteps_per_batch'],
98+
}
99+
100+
estimate_return_args = {
101+
'gamma': params['gamma'],
102+
'reward_to_go': params['reward_to_go'],
103+
'nn_baseline': params['nn_baseline'],
104+
'normalize_advantages': params['normalize_advantages'],
105+
}
106+
107+
agent = Agent(computation_graph_args, sample_trajectory_args, estimate_return_args)
108+
109+
# build computation graph
110+
agent.build_computation_graph()
111+
112+
# tensorflow: config, session, variable initialization
113+
agent.init_tf_sess()
114+
115+
# Override the graph variables with the pre-trained values
116+
for g_var in tf.global_variables(scope=None):
117+
# Get the saved value and assign it to the tensor
118+
value = model_vars[g_var.name]
119+
set_variable_op = g_var.assign(value)
120+
agent.sess.run(set_variable_op)
121+
122+
# # Validate that the assignment was successful
123+
# for g_var in tf.global_variables(scope=None):
124+
# assert np.array_equal(g_var.eval(), model_vars[g_var.name])
125+
126+
return agent, env
127+
128+
129+
if __name__ == "__main__":
130+
"""
131+
Example usage (after running train_pg_18.py and creating agent 'data' dirs):
132+
- python run_agent.py "data/hc_b4000_r0.01_RoboschoolInvertedPendulum-v1_21-07-2019_08-42-10/1" -n 3
133+
- python run_agent.py "data/ll_b40000_r0.005_LunarLanderContinuous-v2_21-07-2019_09-59-05/1" -n 3
134+
- python run_agent.py "data/hc_b50000_r0.005_RoboschoolHalfCheetah-v1_22-07-2019_20-04-48/1" -n 3
135+
"""
136+
import argparse
137+
138+
parser = argparse.ArgumentParser()
139+
parser.add_argument('model_dir', type=str, help='A relative path to the data dir of a specific experiment. For eample: "data/ll_b40000_r0.005_LunarLanderContinuous-v2_21-07-2019_09-59-05/1"')
140+
parser.add_argument('--n_iter', '-n', type=int, default=3)
141+
args = parser.parse_args()
142+
143+
# Load an agent with its pre-trained model and the relevant environment
144+
model_dir = args.model_dir
145+
agent, env = load_agent_and_env(model_dir)
146+
147+
# Run an episode with this loaded agent
148+
for i in range(args.n_iter):
149+
agent.sample_trajectory(env, animate_this_episode=True)
150+
print("done")

0 commit comments

Comments
 (0)