You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardexpand all lines: QPlane.py
+88-40
Original file line number
Diff line number
Diff line change
@@ -2,10 +2,15 @@
2
2
importtime
3
3
importos
4
4
importnumpyasnp
5
-
fromsrc.algorithms.QDoubleDeepLearnimportQLearn# can be QLearn, QDeepLearn or QDoubleDeepLearn
5
+
importmatplotlib.pyplotasplt
6
+
fromsrc.algorithms.QDoubleDeepLearnimportQLearn# can be QLearn, QDeepLearn, QDoubleDeepLearn or RandomAgent
6
7
fromsrc.environments.jsbsim.JSBSimEnvimportEnv# can be jsbsim.JSBSimEnv or xplane.XPlaneEnv
8
+
fromsrc.scenarios.deltaAttitudeControlSceneimportScene# can be deltaAttitudeControlScene, sparseAttitudeControlScene or cheatingAttitudeControlScene
7
9
8
10
experimentName="Experiment"
11
+
connectAttempts=0.0# counts everytime the UDP packages are lost on a single retry
12
+
13
+
notes="This experiment was run with..."# add notes that will be saved to the setup file to clearify the experiment setup better
9
14
10
15
dateTime=str(time.ctime(time.time()))
11
16
dateTime=dateTime.replace(":", "-")
@@ -16,25 +21,27 @@
16
21
17
22
timeStart=time.time() # used to measure time
18
23
timeEnd=time.time() # used to measure time
19
-
logPeriod=10# every so many epochs the metrics will be printed into the console
24
+
logPeriod=100# every so many epochs the metrics will be printed into the console
20
25
savePeriod=25# every so many epochs the table/model will be saved to a file
21
-
pauseDelay=0.01# time an action is being applied to the environment
26
+
movingRate=1# Is multiplied with savePeriod The rate at which the metrics will be averaged and saved and plotted.
27
+
pauseDelay=0.1# time an action is being applied to the environment
22
28
logDecimals=0# sets decimals for np.arrays to X for printing
23
29
np.set_printoptions(precision=logDecimals) # sets decimals for np.arrays to X for printing
24
30
25
-
n_epochs=5000# Number of generations
26
-
n_steps=1000# Number of inputs per generation
31
+
n_epochs=50_000# Number of generations
32
+
n_steps=1_000# Number of inputs per generation
27
33
n_actions=4# Number of possible inputs to choose from
28
34
29
-
n_states=729# Number of states for non-Deep QLearning
30
-
gamma=0.95# The discount rate - between 0 an 1! if = 0 then no learning, ! The higher it is the more the new q will factor into the update of the q value
35
+
n_states=182# Number of states for non-Deep QLearning
36
+
gamma=0.75# The discount rate - between 0 an 1! if = 0 then no learning, ! The higher it is the more the new q will factor into the update of the q value
31
37
lr=0.0001# Learning Rate. Deep ~0.0001 / non-Deep ~0.01 - If LR is 0 then the Q value would not update. The higher the value the quicker the agent will adopt the NEW Q value. If lr = 1, the updated value would be exactly be the newly calculated q value, completely ignoring the previous one
32
38
epsilon=1.0# Starting Epsilon Rate, affects the exploration probability. Will decay
33
39
decayRate=0.00001# Rate at which epsilon will decay per step
34
40
epsilonMin=0.1# Minimum value at which epsilon will stop decaying
35
41
n_epochsBeforeDecay=10# number of games to be played before epsilon starts to decay
36
42
37
-
numOfInputs=8# Number of inputs fed to the model
43
+
numOfInputs=7# Number of inputs fed to the model
44
+
stateDepth=1# Number of old observations kept for current state. State will consist of s(t) ... s(t_n)
38
45
minReplayMemSize=1_000# min size determines when the replay will start being used
39
46
replayMemSize=100_000# Max size for the replay buffer
40
47
batchSize=256# Batch size for the model
@@ -47,8 +54,16 @@
47
54
jsbRender=False# will send UDP data to flight gear for rendering if True
48
55
jsbRealTime=False# will slow down the physics to portrait real time rendering
49
56
usePredefinedSeeds=False# Sets seeds for tf, np and random for more replicable results (not fully replicable due to stochastic environments)
57
+
saveResultsToPlot=True# Saves results to png in the experiment folder at runetime
50
58
saveForAutoReload=False# Saves and overrides models, results and memory to the root
51
59
60
+
startingVelocity=60
61
+
startingPitchRange=10
62
+
startingRollRange=15
63
+
randomDesiredState=True# Set a new state to stabalize towards every episode
setup=f"{experimentName=}\n{dateTime=}\nendTime=not yet defined - first save\n{Q.id=}\n{env.id=}\n{pauseDelay=}\n{n_epochs=}\n{n_steps=}\n{n_actions=}\n"
131
-
setup+=f"{n_states=} - states for non deep\n{gamma=}\n{lr=}\n{epsilon=}\n{decayRate=}\n{epsilonMin=}\n{n_epochsBeforeDecay=}\n"
137
+
setup=f"{experimentName=}\n{Q.numGPUs=}\n{dateTime=}\nendTime=not yet defined - first save\n{Q.id=}\n{env.id=}\n{scene.id=}\n{pauseDelay=}\n{n_epochs=}\n"
138
+
setup+=f"{n_steps=}\n{n_actions=}\n{n_states=} - states for non deep\n{gamma=}\n{lr=}\n{epsilon=}\n{decayRate=}\n{epsilonMin=}\n{n_epochsBeforeDecay=}\n"
132
139
setup+=f"{numOfInputs=} - states for deep\n{minReplayMemSize=}\n{replayMemSize=}\n{batchSize=}\n{updateRate=}\n{loadModel=}\n{movingRate=}\n"
setup+=f"{n_steps=}\n{n_actions=}\n{n_states=} - states for non deep\n{gamma=}\n{lr=}\n{epsilon=}\n{decayRate=}\n{epsilonMin=}\n{n_epochsBeforeDecay=}\n"
278
325
setup+=f"{numOfInputs=} - states for deep\n{minReplayMemSize=}\n{replayMemSize=}\n{batchSize=}\n{updateRate=}\n{loadModel=}\n{movingRate=}\n"
0 commit comments