Reinforcement-Learning-in-Blackjack/trainingWIthAllMethods.py at master · akinolawilson/Reinforcement-Learning-in-Blackjack · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import training as t
import numpy as np

"""
Running this script will produce the Q Table for the the control methods: Q-Learning
Temperoal Difference and State-action-reward-state-action method. These tables
will be produced for deck size of 1 to 10 decks.
"""
policySearchMethods = ['QL','TD', 'SARSA']
gameSizes = [1,2,3,4,5,6,7,8,9,10]
exploreVsExploit =[1000,5000] # interations of phases
results = []

for gameSize in gameSizes:
    for methods in policySearchMethods:
        QTable,  pe, pO, pointOptimal, pointEgreedy = t.agentTraining(gameSize,
                                                                      exploreVsExploit[0],
                                                                      exploreVsExploit[1],
                                                                      methods,
                                                                      1,
                                                                      0)
        numberOfinteractions = 0
        # calculating number of interactions
        for traj in range(len(QTable)):
            for SA in range(len(QTable[traj])):
                numberOfinteractions += 1

        results.append(methods)
        results.append(gameSize)
        results.append(numberOfinteractions)
        results.append(pe)
        results.append(pO)
        results.append(pointOptimal)
        results.append(pointEgreedy)

        np.savetxt('qTable'+str(methods)+str(gameSize)+'.csv',
                                                       QTable,
                                                       newline='\n',
                                                       delimiter="\t",
                                                       fmt='%s')