-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlab0_problem1.py
35 lines (28 loc) · 1.2 KB
/
lab0_problem1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
from pathlib import Path
from el2805.envs import Maze
from el2805.agents.mdp import DynamicProgramming, ValueIteration
from utils import best_maze_path
def main():
horizon = 20
for map_filepath in [
Path(__file__).parent.parent / "data" / "maze.txt",
Path(__file__).parent.parent / "data" / "maze_delay.txt",
]:
print(f"Map file: {map_filepath}")
environment = Maze(map_filepath=map_filepath, horizon=horizon)
agent = DynamicProgramming(environment=environment)
agent.solve()
# for t in range(horizon):
# print(f"Dynamic programming - Policy with {horizon-t} remaining time steps")
# env.render(mode="policy", policy=agent.policy[t])
# print()
print("Dynamic programming - Shortest path")
environment.render(mode="policy", policy=best_maze_path(environment, agent))
print("Value iteration - Stationary policy")
environment = Maze(map_filepath=map_filepath)
agent = ValueIteration(environment=environment, discount=0.99, precision=1e-2)
agent.solve()
environment.render(mode="policy", policy=agent.policy)
print()
if __name__ == "__main__":
main()