assignment4/test1Results.csv at master · jwennerstrum1/assignment4 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
Value Iteration,20168,763,3141,23,26,27,21,27,29,23,21,22,26,26,23,28,24,23,22,26,24,24,30,22,28,23,39,26,24,25,73,33,35,48,50,31,32,28,32,34,23,92,36,30,34,61,31,46,27,56,33,35,48,44,28,33,56,50,33,37,24,36,26,40,25,52,38,44,57,73,32,48,38,35,33,24,42,40,52,36,100,89,26,59,59,33,36,27,33,36,100,23,25,65,30,70,40,38,68,56
Policy Iteration,41053,12353,19750,250,1164,24,27,25,25,28,24,30,24,23,23,27,22,27,22,23,31,25,20,21,24,20,32,25,27,24,20,25,76,27,52,60,29,54,40,32,24,48,30,25,59,27,31,37,25,62,51,60,31,44,26,26,31,55,47,51,53,25,67,23,37,34,36,55,39,30,31,43,27,123,41,41,31,54,54,36,45,64,41,38,25,33,31,81,30,35,40,28,30,44,45,44,33,40,29,61
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
The data below shows the number of steps/actions the agent required to reach ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
the terminal state given the number of iterations the algorithm was run.,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Iterations,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100
Value Iteration,20168,763,3141,23,26,27,21,27,29,23,21,22,26,26,23,28,24,23,22,26,24,24,30,22,28,23,39,26,24,25,73,33,35,48,50,31,32,28,32,34,23,92,36,30,34,61,31,46,27,56,33,35,48,44,28,33,56,50,33,37,24,36,26,40,25,52,38,44,57,73,32,48,38,35,33,24,42,40,52,36,100,89,26,59,59,33,36,27,33,36,100,23,25,65,30,70,40,38,68,56
Policy Iteration,41053,12353,19750,250,1164,24,27,25,25,28,24,30,24,23,23,27,22,27,22,23,31,25,20,21,24,20,32,25,27,24,20,25,76,27,52,60,29,54,40,32,24,48,30,25,59,27,31,37,25,62,51,60,31,44,26,26,31,55,47,51,53,25,67,23,37,34,36,55,39,30,31,43,27,123,41,41,31,54,54,36,45,64,41,38,25,33,31,81,30,35,40,28,30,44,45,44,33,40,29,61
Q Learning,542,303,1331,393,74,218,151,236,89,71,135,58,147,37,32,914,156,47,37,86,58,45,74,33,571,87,109,59,96,44,58,212,126,100,64,35,45,95,140,93,50,86,61,994,148,113,49,110,90,65,158,92,124,31,48,226,41,143,75,81,127,163,48,125,54,86,71,76,75,55,61,113,28,46,178,52,43,79,43,258,143,36,67,29,48,83,59,47,125,248,262,48,56,56,89,57,197,54,108,80
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
The data below shows the number of milliseconds the algorithm required to generate ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
the optimal policy given the number of iterations the algorithm was run.,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Iterations,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100
Value Iteration,102,9,11,23,12,11,14,16,17,21,27,46,41,35,39,43,35,39,35,34,33,38,45,50,32,50,59,58,48,59,65,78,61,62,77,69,75,71,56,72,112,92,55,47,37,36,58,52,52,44,42,66,58,57,44,54,68,55,45,54,76,52,52,67,73,53,58,66,72,82,77,78,60,66,87,92,90,92,65,66,89,66,75,79,71,79,83,82,81,91,72,90,87,73,93,88,81,93,128,79
Policy Iteration,16,6,14,8,10,15,16,18,22,26,24,44,28,33,39,31,38,37,45,48,48,39,48,61,48,52,65,65,54,75,84,62,68,68,96,111,64,82,94,102,107,94,85,80,97,77,98,86,108,98,106,99,118,95,119,99,125,119,115,127,130,128,134,119,129,128,121,137,127,133,171,193,174,158,162,155,197,201,159,159,151,207,201,213,232,196,167,172,176,167,187,181,192,186,251,275,310,276,278,312
Q Learning,23,53,31,23,16,54,30,38,50,34,19,35,25,25,23,34,28,28,33,19,20,31,27,37,86,46,62,27,23,26,36,41,27,25,27,20,48,26,38,21,17,35,29,38,49,34,33,27,26,28,26,43,32,36,33,31,29,26,27,36,40,22,30,35,44,29,39,34,41,52,26,76,39,59,48,57,43,34,57,86,39,62,44,37,42,50,37,54,67,70,55,42,38,56,42,31,54,48,48,38
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
The data below shows the total reward gained for ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
the optimal policy given the number of iterations the algorithm was run.,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Iterations,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100
Value Iteration Rewards,-504,-453,1723,79,76,75,81,75,73,79,81,80,76,76,79,74,78,79,80,76,78,78,72,80,74,79,63,76,78,77,113,91,67,74,91,81,73,82,80,86,83,134,74,80,88,93,79,84,75,82,95,91,56,100,80,85,110,88,83,85,78,90,80,84,77,94,76,89,103,123,92,100,92,75,89,82,68,57,71,88,134,121,80,95,107,87,84,81,73,82,140,79,77,107,86,79,84,86,98,98
Policy Iteration Rewards,19660,7309,11500,173,769,78,75,77,77,74,78,72,78,79,79,75,80,85,80,79,71,77,82,81,78,82,70,77,75,78,82,77,122,75,81,102,70,102,78,82,78,106,78,85,105,75,89,83,77,75,89,114,74,65,76,73,71,105,101,97,107,77,101,79,89,82,86,113,74,79,77,79,79,147,99,44,83,106,96,90,99,112,99,72,89,75,82,129,88,83,98,82,80,98,99,95,85,86,79,113
Q Learning Rewards,-534,-95,-345,-357,35,20,-64,-138,47,41,30,106,33,62,63,-258,-106,1,70,37,44,70,75,81,-123,11,63,43,41,44,64,141,20,-23,45,74,67,0,-74,13,102,74,53,-239,0,34,67,-4,12,69,-43,-113,-10,77,58,26,69,50,31,68,-21,86,51,19,45,74,54,37,101,68,75,16,78,40,-71,75,57,16,54,-96,-46,58,73,66,58,43,40,39,79,-247,-39,58,54,76,45,74,-13,68,90,-26
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
The data below shows the total reward gained for
the optimal policy given the number of iterations the algorithm was run.
Iterations,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100
Value Iteration Rewards,-1718.0,-6391.0,-1513.0,-2979.0,76.0,79.0,78.0,71.0,80.0,78.0,80.0,73.0,63.0,77.0,73.0,76.0,72.0,71.0,75.0,74.0,66.0,77.0,66.0,77.0,81.0,70.0,79.0,73.0,76.0,78.0,74.0,79.0,71.0,72.0,69.0,79.0,79.0,77.0,76.0,72.0,74.0,79.0,74.0,78.0,81.0,77.0,73.0,79.0,80.0,78.0,79.0,70.0,73.0,79.0,75.0,75.0,73.0,80.0,73.0,76.0,80.0,76.0,68.0,83.0,73.0,81.0,75.0,69.0,76.0,80.0,81.0,72.0,80.0,77.0,68.0,73.0,78.0,81.0,66.0,74.0,80.0,76.0,81.0,68.0,77.0,66.0,76.0,78.0,76.0,77.0,71.0,78.0,69.0,80.0,76.0,79.0,80.0,72.0,72.0,75.0
Policy Iteration Rewards,-3827.0,-603.0,-851.0,-6117.0,-2768.0,-869.0,-1031.0,72.0,77.0,80.0,63.0,72.0,80.0,77.0,79.0,75.0,75.0,81.0,78.0,78.0,73.0,78.0,77.0,80.0,77.0,75.0,74.0,81.0,78.0,76.0,78.0,75.0,81.0,75.0,73.0,78.0,80.0,78.0,77.0,76.0,61.0,76.0,62.0,77.0,80.0,68.0,80.0,74.0,80.0,75.0,73.0,76.0,70.0,79.0,69.0,75.0,76.0,74.0,63.0,78.0,74.0,78.0,82.0,79.0,62.0,78.0,75.0,82.0,74.0,78.0,76.0,76.0,76.0,78.0,77.0,79.0,75.0,70.0,79.0,82.0,76.0,71.0,80.0,78.0,75.0,72.0,74.0,79.0,77.0,76.0,76.0,83.0,78.0,80.0,75.0,71.0,78.0,78.0,80.0,70.0
Q Learning Rewards,-425.0,-201.0,-125.0,-63.0,-37.0,-7.0,-393.0,34.0,-32.0,-27.0,52.0,41.0,-32.0,-62.0,53.0,-42.0,11.0,57.0,-15.0,70.0,68.0,62.0,-31.0,73.0,22.0,-21.0,38.0,8.0,-12.0,67.0,-33.0,-37.0,67.0,41.0,76.0,46.0,76.0,0.0,76.0,43.0,70.0,73.0,-29.0,25.0,-3.0,1.0,25.0,-157.0,12.0,74.0,-16.0,80.0,46.0,51.0,46.0,67.0,75.0,21.0,49.0,66.0,70.0,71.0,62.0,47.0,46.0,67.0,66.0,9.0,33.0,68.0,59.0,29.0,58.0,-147.0,67.0,64.0,79.0,54.0,73.0,76.0,77.0,28.0,64.0,-60.0,48.0,-135.0,5.0,60.0,38.0,63.0,54.0,66.0,62.0,60.0,70.0,70.0,46.0,49.0,68.0,24.0