-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathplay.py
30 lines (26 loc) · 1009 Bytes
/
play.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import tensorflow as tf
from agents.human_agent import HumanAgent
from agents.simple_agent import SimpleAgent
from agents.td_agent import TDAgent
from agents.forward_agent import ForwardAgent
from agents.backward_agent import BackwardAgent
from agents.leaf_agent import LeafAgent
from model import ValueModel
from env import TicTacToeEnv
def main():
log_dir = '/Users/adam/Documents/projects/td_tic_tac_toe/log/leaf2'
env = TicTacToeEnv()
model = ValueModel(env.feature_vector_size, 100)
# agent = SimpleAgent('agent_0', model, env)
# agent = TDAgent('agent_0', model, env)
# agent = ForwardAgent('agent_0', model, env)
# agent = BackwardAgent('agent_0', model, env)
agent = LeafAgent('agent_0', model, env)
human = HumanAgent(env)
with tf.train.SingularMonitoredSession(checkpoint_dir=log_dir) as sess:
agent.sess = sess
env.sess = sess
players = [human, agent]
env.play(players, verbose=True)
if __name__ == "__main__":
main()