Skip to content

Commit 6c06f5b

Browse files
authored
Merge pull request #1 from JDatPNW/featureScenarios
Feature scenarios
2 parents c3554a8 + a114515 commit 6c06f5b

18 files changed

+1174
-426
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
X-Plane.lnk
33
FlightGear.lnk
44
Experiments/*
5+
TestingResults/*
56
utils/*.npy
67
*.h5
78
*.npy

QPlane.py

+88-40
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,15 @@
22
import time
33
import os
44
import numpy as np
5-
from src.algorithms.QDoubleDeepLearn import QLearn # can be QLearn, QDeepLearn or QDoubleDeepLearn
5+
import matplotlib.pyplot as plt
6+
from src.algorithms.QDoubleDeepLearn import QLearn # can be QLearn, QDeepLearn, QDoubleDeepLearn or RandomAgent
67
from src.environments.jsbsim.JSBSimEnv import Env # can be jsbsim.JSBSimEnv or xplane.XPlaneEnv
8+
from src.scenarios.deltaAttitudeControlScene import Scene # can be deltaAttitudeControlScene, sparseAttitudeControlScene or cheatingAttitudeControlScene
79

810
experimentName = "Experiment"
11+
connectAttempts = 0.0 # counts everytime the UDP packages are lost on a single retry
12+
13+
notes = "This experiment was run with..." # add notes that will be saved to the setup file to clearify the experiment setup better
914

1015
dateTime = str(time.ctime(time.time()))
1116
dateTime = dateTime.replace(":", "-")
@@ -16,25 +21,27 @@
1621

1722
timeStart = time.time() # used to measure time
1823
timeEnd = time.time() # used to measure time
19-
logPeriod = 10 # every so many epochs the metrics will be printed into the console
24+
logPeriod = 100 # every so many epochs the metrics will be printed into the console
2025
savePeriod = 25 # every so many epochs the table/model will be saved to a file
21-
pauseDelay = 0.01 # time an action is being applied to the environment
26+
movingRate = 1 # Is multiplied with savePeriod The rate at which the metrics will be averaged and saved and plotted.
27+
pauseDelay = 0.1 # time an action is being applied to the environment
2228
logDecimals = 0 # sets decimals for np.arrays to X for printing
2329
np.set_printoptions(precision=logDecimals) # sets decimals for np.arrays to X for printing
2430

25-
n_epochs = 5000 # Number of generations
26-
n_steps = 1000 # Number of inputs per generation
31+
n_epochs = 50_000 # Number of generations
32+
n_steps = 1_000 # Number of inputs per generation
2733
n_actions = 4 # Number of possible inputs to choose from
2834

29-
n_states = 729 # Number of states for non-Deep QLearning
30-
gamma = 0.95 # The discount rate - between 0 an 1! if = 0 then no learning, ! The higher it is the more the new q will factor into the update of the q value
35+
n_states = 182 # Number of states for non-Deep QLearning
36+
gamma = 0.75 # The discount rate - between 0 an 1! if = 0 then no learning, ! The higher it is the more the new q will factor into the update of the q value
3137
lr = 0.0001 # Learning Rate. Deep ~0.0001 / non-Deep ~0.01 - If LR is 0 then the Q value would not update. The higher the value the quicker the agent will adopt the NEW Q value. If lr = 1, the updated value would be exactly be the newly calculated q value, completely ignoring the previous one
3238
epsilon = 1.0 # Starting Epsilon Rate, affects the exploration probability. Will decay
3339
decayRate = 0.00001 # Rate at which epsilon will decay per step
3440
epsilonMin = 0.1 # Minimum value at which epsilon will stop decaying
3541
n_epochsBeforeDecay = 10 # number of games to be played before epsilon starts to decay
3642

37-
numOfInputs = 8 # Number of inputs fed to the model
43+
numOfInputs = 7 # Number of inputs fed to the model
44+
stateDepth = 1 # Number of old observations kept for current state. State will consist of s(t) ... s(t_n)
3845
minReplayMemSize = 1_000 # min size determines when the replay will start being used
3946
replayMemSize = 100_000 # Max size for the replay buffer
4047
batchSize = 256 # Batch size for the model
@@ -47,8 +54,16 @@
4754
jsbRender = False # will send UDP data to flight gear for rendering if True
4855
jsbRealTime = False # will slow down the physics to portrait real time rendering
4956
usePredefinedSeeds = False # Sets seeds for tf, np and random for more replicable results (not fully replicable due to stochastic environments)
57+
saveResultsToPlot = True # Saves results to png in the experiment folder at runetime
5058
saveForAutoReload = False # Saves and overrides models, results and memory to the root
5159

60+
startingVelocity = 60
61+
startingPitchRange = 10
62+
startingRollRange = 15
63+
randomDesiredState = True # Set a new state to stabalize towards every episode
64+
desiredPitchRange = 5
65+
desiredRollRange = 5
66+
5267
dictObservation = {
5368
"lat": 0,
5469
"long": 1,
@@ -70,33 +85,22 @@
7085
"update": 0,
7186
"step": 0}
7287
dictRotation = {
73-
"pitch": 0,
74-
"roll": 1,
75-
"velocityY": 2}
88+
"roll": 0,
89+
"pitch": 1,
90+
"yaw": 2,
91+
"northVelo": 3,
92+
"eastVelo": 4,
93+
"verticalVelo": 5}
7694

7795
# -998->NO CHANGE
7896
flightOrigin = [35.126, 126.809, 6000, 0, 0, 0, 1] # Gwangju SK
7997
flightDestinaion = [33.508, 126.487, 6000, -998, -998, -998, 1] # Jeju SK
80-
startingVelocity = -55
8198
# Other locations to use: Memmingen: [47.988, 10.240], Chicago: [41.976, -87.902]
8299

83-
flightStartPitch = 10 # Will be used as -value / 0 / value
84-
flightStartRoll = 15 # Will be used as -value / 0 / value
85-
flightStartVelocityY = 10 # Will be used as -value / 0 / value
86-
87-
flightStartRotation = [[-flightStartPitch, -flightStartRoll, -flightStartVelocityY],
88-
[-flightStartPitch, 0, -flightStartVelocityY],
89-
[-flightStartPitch, flightStartRoll, -flightStartVelocityY],
90-
[0, -flightStartRoll, -0],
91-
[0, 0, 0],
92-
[0, flightStartRoll, 0],
93-
[flightStartPitch, -flightStartRoll, flightStartVelocityY],
94-
[flightStartPitch, 0, flightStartVelocityY],
95-
[flightStartPitch, flightStartRoll, flightStartVelocityY]]
96-
97100
epochRewards = []
98101
epochQs = []
99-
movingRate = 3 * len(flightStartRotation) # Number given in number * len(flightStartRotation)
102+
movingRate = savePeriod * movingRate # gives the number by which the moving average will be done, best if n * savePeriod
103+
100104
movingEpRewards = {
101105
"epoch": [],
102106
"average": [],
@@ -106,6 +110,7 @@
106110
"epsilon": []}
107111

108112
fallbackState = [0] * numOfInputs # Used in case of connection error to XPlane
113+
fallbackState = [tuple(fallbackState)]
109114

110115
# Will load previous results in case a experiment needs to be continued
111116
if(loadResults):
@@ -119,17 +124,20 @@
119124

120125
Q = QLearn(n_states, n_actions, gamma, lr, epsilon,
121126
decayRate, epsilonMin, n_epochsBeforeDecay, experimentName, saveForAutoReload, loadModel, usePredefinedSeeds,
122-
loadMemory, numOfInputs, minReplayMemSize, replayMemSize, batchSize, updateRate)
127+
loadMemory, numOfInputs, minReplayMemSize, replayMemSize, batchSize, updateRate, stateDepth)
123128

124-
env = Env(flightOrigin, flightDestinaion, n_actions, usePredefinedSeeds,
129+
scene = Scene(dictObservation, dictAction, n_actions, stateDepth, startingVelocity, startingPitchRange, startingRollRange, usePredefinedSeeds, randomDesiredState, desiredPitchRange, desiredRollRange)
130+
131+
env = Env(scene, flightOrigin, flightDestinaion, n_actions, usePredefinedSeeds,
125132
dictObservation, dictAction, dictRotation, startingVelocity, pauseDelay, Q.id, jsbRender, jsbRealTime)
126133

127134
# saving setup pre run
128135
if not os.path.exists("./Experiments/" + experimentName):
129136
os.makedirs("./Experiments/" + experimentName)
130-
setup = f"{experimentName=}\n{dateTime=}\nendTime=not yet defined - first save\n{Q.id=}\n{env.id=}\n{pauseDelay=}\n{n_epochs=}\n{n_steps=}\n{n_actions=}\n"
131-
setup += f"{n_states=} - states for non deep\n{gamma=}\n{lr=}\n{epsilon=}\n{decayRate=}\n{epsilonMin=}\n{n_epochsBeforeDecay=}\n"
137+
setup = f"{experimentName=}\n{Q.numGPUs=}\n{dateTime=}\nendTime=not yet defined - first save\n{Q.id=}\n{env.id=}\n{scene.id=}\n{pauseDelay=}\n{n_epochs=}\n"
138+
setup += f"{n_steps=}\n{n_actions=}\n{n_states=} - states for non deep\n{gamma=}\n{lr=}\n{epsilon=}\n{decayRate=}\n{epsilonMin=}\n{n_epochsBeforeDecay=}\n"
132139
setup += f"{numOfInputs=} - states for deep\n{minReplayMemSize=}\n{replayMemSize=}\n{batchSize=}\n{updateRate=}\n{loadModel=}\n{movingRate=}\n"
140+
setup += f"{randomDesiredState=}\n{desiredRollRange=}\n{desiredPitchRange=}\n{startingRollRange=}\n{startingPitchRange=}\n{startingVelocity=}\n{stateDepth=}\n{Q.modelSummary=}\n{notes=}\n"
133141
print(setup, file=open("./Experiments/" + str(experimentName) + "/setup.out", 'w')) # saves hyperparameters to the experiment folder
134142

135143

@@ -138,19 +146,29 @@ def log(i_epoch, i_step, reward, logList):
138146
global timeStart # Used to print time ellapsed between log calls
139147
global timeEnd # Used to print time ellapsed between log calls
140148

141-
state = logList[1]
149+
old_state = logList[0]
150+
new_state = logList[1]
142151
actions_binary = logList[3]
143152
observation = logList[4]
144153
control = logList[5]
145154
explore = logList[6]
146155
currentEpsilon = logList[7]
156+
if(Q.id == "deep" or Q.id == "doubleDeep"):
157+
depth = len(old_state)
158+
depth = "Depth " + str(depth)
159+
old_state = old_state[-1]
160+
new_state = new_state[-1]
161+
else:
162+
depth = ""
147163

148164
timeEnd = time.time() # End timer here
149165
print("\t\tGame ", i_epoch,
150166
"\n\t\t\tMove ", i_step,
151-
"\n\t\t\tStarting Rotation ", flightStartRotation[i_epoch % len(flightStartRotation)],
167+
"\n\t\t\tStarting Rotation ", np.array(env.startingOrientation).round(logDecimals),
168+
"\n\t\t\tDestination Rotation ", env.desiredState,
152169
"\n\t\t\tTime taken ", timeEnd - timeStart,
153-
"\n\t\t\tState ", np.array(state).round(logDecimals),
170+
"\n\t\t\tOld State ", np.array(old_state).round(logDecimals), depth,
171+
"\n\t\t\tNew State ", np.array(new_state).round(logDecimals), depth,
154172
"\n\t\t\t\t\t[p+,p-,r+,r-]",
155173
"\n\t\t\tactions_binary = ", actions_binary,
156174
"\n\t\t\tCurrent Control:", control,
@@ -160,6 +178,7 @@ def log(i_epoch, i_step, reward, logList):
160178
"\n\t\t\tExplored (Random): ", explore,
161179
"\n\t\t\tCurrent Epsilon: ", currentEpsilon,
162180
"\n\t\t\tCurrent Reward: ", reward,
181+
"\n\t\t\tReconnects Percentage & Count: ", float(connectAttempts / (i_epoch * n_steps + i_step + 1)), ",", connectAttempts,
163182
"\n\t\t\tError Percentage & Count: ", float(errors / (i_epoch * n_steps + i_step + 1)), ",", errors,
164183
"\n\t\t\tError Code: ", dictErrors, "\n")
165184
timeStart = time.time() # Start timer here
@@ -168,6 +187,8 @@ def log(i_epoch, i_step, reward, logList):
168187
# A single step(input), this will repeat n_steps times throughout a epoch
169188
def step(i_step, done, reward, oldState):
170189
global errors
190+
global connectAttempts
191+
171192
if(Q.id == "deep" or Q.id == "doubleDeep"):
172193
oldState = list(oldState)
173194

@@ -181,6 +202,7 @@ def step(i_step, done, reward, oldState):
181202
done = True # mark done if episode is finished
182203
except socket.error as socketError: # the specific error for connections used by xpc
183204
dictErrors["step"] = socketError
205+
connectAttempts += 1
184206
continue
185207
else:
186208
break
@@ -198,6 +220,17 @@ def step(i_step, done, reward, oldState):
198220
newPosition = info[0]
199221
actions_binary = info[1]
200222
control = info[2]
223+
# checking if state includes a NaN (happens in JSBSim sometimes)
224+
if(np.isnan(newState).any()):
225+
if(Q.id == "deep" or Q.id == "doubleDeep"):
226+
newState = fallbackState
227+
else:
228+
newState = 0
229+
reward = 0
230+
info = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], 0]
231+
dictErrors["step"] = "NaN in state"
232+
errors += 1
233+
done = True
201234

202235
Q.learn(oldState, action, reward, newState, done)
203236
logList = [oldState, newState, action, actions_binary, newPosition, control, explore, currentEpsilon]
@@ -212,13 +245,16 @@ def step(i_step, done, reward, oldState):
212245
# A epoch is one full run, from respawn/reset to the final step.
213246
def epoch(i_epoch):
214247
global errors
248+
global connectAttempts
249+
215250
epochReward = 0
216251
epochQ = 0
217252
for attempt in range(25):
218253
try:
219-
oldState = env.reset(env.startingPosition, flightStartRotation[i_epoch % len(flightStartRotation)])
254+
oldState = env.reset()
220255
except socket.error as socketError: # the specific error for connections used by xpc
221256
dictErrors["reset"] = socketError
257+
connectAttempts += 1
222258
continue
223259
else:
224260
break
@@ -249,7 +285,7 @@ def epoch(i_epoch):
249285

250286
epochRewards.append(epochReward)
251287
epochQs.append(epochQ)
252-
if(i_epoch % movingRate == 0):
288+
if(i_epoch % movingRate == 0 and i_epoch != 0):
253289
movingEpRewards["epoch"].append(i_epoch)
254290
averageReward = sum(epochRewards[-movingRate:]) / len(epochRewards[-movingRate:])
255291
movingEpRewards["average"].append(averageReward)
@@ -266,16 +302,28 @@ def epoch(i_epoch):
266302
np.save("./Experiments/" + str(experimentName) + "/results" + str(i_epoch) + ".npy", movingEpRewards)
267303
if(saveForAutoReload):
268304
np.save("results.npy", movingEpRewards)
269-
305+
if(saveResultsToPlot and i_epoch % movingRate == 0):
306+
plt.plot(movingEpRewards['epoch'], movingEpRewards['average'], label="average rewards")
307+
plt.plot(movingEpRewards['epoch'], movingEpRewards['averageQ'], label="average Qs")
308+
plt.plot(movingEpRewards['epoch'], movingEpRewards['maximum'], label="max rewards")
309+
plt.plot(movingEpRewards['epoch'], movingEpRewards['minimum'], label="min rewards")
310+
plt.plot(movingEpRewards['epoch'], movingEpRewards['epsilon'], label="epsilon")
311+
plt.title("Results")
312+
plt.xlabel("episodes")
313+
plt.ylabel("reward")
314+
plt.legend(loc=4)
315+
plt.savefig("./Experiments/" + str(experimentName) + "/plot" + str(i_epoch) + ".png")
316+
plt.clf()
270317

271318
np.save("./Experiments/" + str(experimentName) + "/results_final.npy", movingEpRewards)
272319

273320
endTime = str(time.ctime(time.time()))
274321

275322
# saving setup post run
276-
setup = f"{experimentName=}\n{dateTime=}\n{endTime=}\n{Q.id=}\n{env.id=}\n{pauseDelay=}\n{n_epochs=}\n{n_steps=}\n{n_actions=}\n"
277-
setup += f"{n_states=} - states for non deep\n{gamma=}\n{lr=}\n{epsilon=}\n{decayRate=}\n{epsilonMin=}\n{n_epochsBeforeDecay=}\n"
323+
setup = f"{experimentName=}\n{Q.numGPUs=}\n{dateTime=}\n{endTime=}\n{Q.id=}\n{env.id=}\n{scene.id=}\n{pauseDelay=}\n{n_epochs=}\n"
324+
setup += f"{n_steps=}\n{n_actions=}\n{n_states=} - states for non deep\n{gamma=}\n{lr=}\n{epsilon=}\n{decayRate=}\n{epsilonMin=}\n{n_epochsBeforeDecay=}\n"
278325
setup += f"{numOfInputs=} - states for deep\n{minReplayMemSize=}\n{replayMemSize=}\n{batchSize=}\n{updateRate=}\n{loadModel=}\n{movingRate=}\n"
326+
setup += f"{randomDesiredState=}\n{desiredRollRange=}\n{desiredPitchRange=}\n{startingRollRange=}\n{startingPitchRange=}\n{startingVelocity=}\n{stateDepth=}\n{Q.modelSummary=}\n{notes=}\n"
279327
print(setup, file=open("./Experiments/" + str(experimentName) + "/setup.out", 'w')) # saves hyperparameters to the experiment folder
280328

281-
print("<<<<<<<<<<<<<<<<<<<<DONE>>>>>>>>>>>>>>>>>>>>>")
329+
print("<<<<<<<<<<<<<<<<<<<<DONE>>>>>>>>>>>>>>>>>>>>>")

0 commit comments

Comments
 (0)