assignment4/largeGridWorld.csv at master · jwennerstrum1/assignment4 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
Value Iteration,43677,32110,6207,4612,6660,4622,5421,2898,4299,1291,2695,1768,1852,3811,979,411,489,256,2558,4869,1167,1244,129,69,59,65,76,65,62,70,92,76,62,74,68,67,68,62,74,71,84,64,65,61,66,68,58,65,69,74,81,79,65,72,63,77,66,75,79,83,70,60,69,61,66,73,62,63,69,74,65,75,62,63,57,71,65,62,63,74,80,60,61,68,56,88,57,67,61,71,64,65,60,73,77,65,67,64,58,65
Policy Iteration,62895,112768,124994,75354,26817,6042,14805,28118,2123,25376,23000,13408,21049,8584,9742,1292,8226,3058,5739,998,1660,200,396,499,201,943,87,71,76,78,58,71,62,64,71,74,73,57,73,60,74,64,62,62,66,69,60,65,56,58,57,69,64,68,77,63,63,65,71,70,66,76,77,78,65,60,68,80,79,79,66,71,69,63,63,80,57,63,72,65,65,78,67,65,63,64,70,78,65,67,60,70,69,78,70,57,85,65,52,52
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
The data below shows the number of steps/actions the agent required to reach
the terminal state given the number of iterations the algorithm was run.
Iterations,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100
Value Iteration,43677,32110,6207,4612,6660,4622,5421,2898,4299,1291,2695,1768,1852,3811,979,411,489,256,2558,4869,1167,1244,129,69,59,65,76,65,62,70,92,76,62,74,68,67,68,62,74,71,84,64,65,61,66,68,58,65,69,74,81,79,65,72,63,77,66,75,79,83,70,60,69,61,66,73,62,63,69,74,65,75,62,63,57,71,65,62,63,74,80,60,61,68,56,88,57,67,61,71,64,65,60,73,77,65,67,64,58,65
Policy Iteration,62895,112768,124994,75354,26817,6042,14805,28118,2123,25376,23000,13408,21049,8584,9742,1292,8226,3058,5739,998,1660,200,396,499,201,943,87,71,76,78,58,71,62,64,71,74,73,57,73,60,74,64,62,62,66,69,60,65,56,58,57,69,64,68,77,63,63,65,71,70,66,76,77,78,65,60,68,80,79,79,66,71,69,63,63,80,57,63,72,65,65,78,67,65,63,64,70,78,65,67,60,70,69,78,70,57,85,65,52,52
Q Learning,4491,1833,1132,2046,1072,635,574,1358,1209,453,1060,272,1379,3210,617,735,1108,840,447,1151,1243,823,481,404,417,1102,570,425,303,520,881,386,316,647,421,619,428,326,433,418,258,390,272,577,299,221,271,335,302,270,243,884,304,379,258,393,340,262,215,135,518,533,202,315,427,484,332,232,166,607,267,366,485,282,166,220,369,179,382,344,137,255,376,379,1466,154,381,349,185,319,346,283,201,202,371,199,169,233,269,154
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
The data below shows the number of milliseconds the algorithm required to generate
the optimal policy given the number of iterations the algorithm was run.
Iterations,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100
Value Iteration,197,84,68,80,77,107,125,249,356,244,183,220,218,205,236,147,193,262,172,206,221,229,235,192,251,203,264,227,280,237,267,265,258,301,270,290,297,295,360,369,387,444,418,380,339,419,381,472,432,397,428,474,452,441,456,432,470,433,475,448,508,483,561,545,533,537,535,522,583,551,546,618,616,628,605,635,577,630,657,694,645,640,656,667,706,695,668,739,764,710,737,714,756,851,814,768,773,772,796,857
Policy Iteration,70,34,83,100,119,134,121,175,188,218,216,201,210,231,264,298,331,627,901,420,376,387,399,376,475,460,498,489,517,517,561,564,509,644,665,711,650,638,703,692,697,728,773,745,800,808,805,846,891,1163,1053,871,927,1055,957,950,944,990,1096,1037,1081,1011,1144,1104,1126,1219,1216,1209,1186,1170,1256,1294,1330,1223,1289,1341,1297,1352,1314,1479,1385,1375,1870,1741,1638,1568,1434,1605,1813,1534,1570,1586,1581,1629,1669,1617,1628,1725,-1909,2089
Q Learning,172,204,92,92,215,200,147,156,128,87,82,126,90,120,114,140,132,115,119,143,140,164,122,154,146,139,191,180,174,209,181,170,183,168,183,200,173,171,179,151,170,198,154,181,183,189,176,168,170,219,179,211,181,222,255,267,216,229,276,200,251,231,234,221,218,211,228,203,198,252,198,199,299,234,209,219,278,242,249,218,268,261,236,209,247,212,226,259,224,254,294,226,265,269,237,254,221,329,334,285
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
The data below shows the total reward gained for
the optimal policy given the number of iterations the algorithm was run.
Iterations,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100
Value Iteration Rewards,-43602.0,-32017.0,-6132.0,-4510.0,-6567.0,-4529.0,-5319.0,-2796.0,-4197.0,-1198.0,-2602.0,-1666.0,-1750.0,-3709.0,-877.0,-309.0,-387.0,-154.0,-2465.0,-4776.0,-1065.0,-1151.0,-27.0,33.0,43.0,37.0,17.0,37.0,40.0,32.0,10.0,26.0,40.0,28.0,34.0,35.0,34.0,40.0,28.0,31.0,18.0,38.0,37.0,41.0,36.0,34.0,44.0,37.0,33.0,28.0,21.0,23.0,37.0,30.0,39.0,16.0,36.0,27.0,23.0,19.0,32.0,42.0,33.0,41.0,36.0,29.0,40.0,39.0,33.0,28.0,37.0,27.0,40.0,39.0,45.0,31.0,37.0,40.0,39.0,28.0,22.0,42.0,41.0,34.0,46.0,5.0,45.0,35.0,41.0,31.0,38.0,37.0,42.0,20.0,25.0,37.0,35.0,38.0,44.0,37.0
Policy Iteration Rewards,-62793.0,-112729.0,-124937.0,-75423.0,-27111.0,-5958.0,-14730.0,-28025.0,-2039.0,-25301.0,-23042.0,-13306.0,-21136.0,-8689.0,-9667.0,-1190.0,-8277.0,-2965.0,-5781.0,-896.0,-1594.0,-98.0,-294.0,-406.0,-99.0,-868.0,15.0,31.0,17.0,24.0,44.0,31.0,31.0,38.0,31.0,28.0,29.0,45.0,29.0,42.0,28.0,38.0,40.0,40.0,36.0,33.0,42.0,37.0,46.0,44.0,45.0,33.0,38.0,34.0,25.0,39.0,39.0,37.0,31.0,32.0,36.0,26.0,25.0,24.0,37.0,42.0,34.0,22.0,23.0,23.0,36.0,31.0,33.0,39.0,39.0,22.0,45.0,39.0,30.0,37.0,37.0,24.0,35.0,37.0,39.0,38.0,32.0,24.0,37.0,35.0,42.0,32.0,33.0,24.0,32.0,45.0,17.0,37.0,50.0,50.0
Q Learning Rewards,-4641.0,-1749.0,-1048.0,-1944.0,-1024.0,-542.0,-517.0,-1274.0,-1134.0,-405.0,-1129.0,-170.0,-1349.0,-3261.0,-533.0,-687.0,-1033.0,-792.0,-345.0,-1067.0,-1168.0,-730.0,-379.0,-320.0,-342.0,-1018.0,-468.0,-332.0,-201.0,-418.0,-779.0,-284.0,-232.0,-554.0,-319.0,-517.0,-326.0,-251.0,-358.0,-316.0,-156.0,-288.0,-179.0,-484.0,-215.0,-119.0,-178.0,-233.0,-218.0,-168.0,-141.0,-791.0,-202.0,-277.0,-174.0,-309.0,-238.0,-160.0,-113.0,-42.0,-416.0,-440.0,-100.0,-213.0,-334.0,-382.0,-248.0,-130.0,-64.0,-505.0,-174.0,-273.0,-392.0,-216.0,-64.0,-127.0,-267.0,-77.0,-289.0,-260.0,-44.0,-153.0,-274.0,-277.0,-1364.0,-52.0,-297.0,-247.0,-92.0,-217.0,-262.0,-181.0,-126.0,-100.0,-269.0,-97.0,-67.0,-131.0,-185.0,-52.0