-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPlayer.py
198 lines (156 loc) · 8.02 KB
/
Player.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
from copy import deepcopy
from math import inf
# Human Player
class Player:
# initial state is one finger out on each hand
def __init__(self):
self.hands = [1, 1] # [left hand, right hand]
# return the current number of fingers out on each hand
def __str__(self):
return str(self.hands)
# return all the possible moves the current player has (used to help the AI)
# takes as input the opponent to view their current position
def getPossibleMoves(self, opponent):
moves = []
# splits
points = sum(self.hands)
# cannot have more than four fingers up in one hand
for i in range(max(0, points - 5 + 1), min(points + 1, 5)):
current = [i, points - i]
if current != self.hands: # cannot have same hands after split
moves.append("split {} {}".format(current[0], current[1]))
# taps
p1HandsAvailable = [i for i, hand in enumerate(self.hands) if hand != 0]
p2HandsAvailable = [i for i, hand in enumerate(opponent.hands) if hand != 0]
for hand1 in p1HandsAvailable:
for hand2 in p2HandsAvailable:
moves.append("tap {} {}".format(hand1, hand2))
return moves
# split hands to position in new
# position cannot be the same as it was prior
# position must have at most four fingers on one hand
# position must have the same sum as it was prior
def split(self, new):
if new == self.hands:
raise Exception("No change")
elif min(new) < 0 or max(new) >= 5:
raise Exception("Hand out of bounds")
elif sum(new) != sum(self.hands):
raise Exception("Invalid split")
self.hands = new[:]
# tap opponent's hand2 with your hand1
# hand1 and hand2 must be active
# return True if the game is won
# else return False
def tap(self, opponent, hand1, hand2):
for hand in [hand1, hand2]:
if not 0 <= hand < len(self.hands):
raise Exception("Invalid hand {}".format(hand))
if self.hands[hand1] == 0 or opponent.hands[hand2] == 0:
raise Exception("Hand {} out".format(hand))
return opponent.receiveTap(hand2, self.hands[hand1])
# add amount to hand
# ensure that the hand being tapped is still active
# return True if both hands are out, else return False
def receiveTap(self, hand, amount):
if not 0 <= hand < len(self.hands):
raise Exception("Invalid hand {}".format(hand))
if self.hands[hand] == 0:
raise Exception("Hand {} out".format(hand))
self.hands[hand] += amount
if self.hands[hand] >= 5: # hand is no longer active
self.hands[hand] = 0
return sum(self.hands) == 0
# AI Player, extends Human Player
class AI(Player):
def __init__(self, idx, minimax=True):
super().__init__()
self.idx = idx # index in the players list
self.other = (idx + 1) % 2 # either 0 or 1, opposite of idx
self.depth = 8 # depth of Minimax search
self.minimax = minimax # False = expectimax
if not self.minimax:
self.depth = 6 # slower since no alpha beta pruning
# score of current state
# prefers opponent to have more fingers out and AI to have less fingers out
def evaluation(self, opponent):
return sum(opponent.hands) - sum(self.hands)
# find the optimal move to perform using minimax search with alpha beta pruning, given the opponent's current state
def getOptimalMove(self, opponent):
# find the optimal move for the AI (the max player) given the current state of the players list, the current depth, and alpha/beta
# returns (max value, optimal move)
def maxValue(players, depth, alpha, beta):
best = (-inf, None)
moves = players[self.idx].getPossibleMoves(players[self.other])
for move in moves:
# create a copy of players that will be updated in generateSuccessorState
newPlayers = deepcopy(players)
if generateSuccessorState(newPlayers, self.idx, self.other, move): # AI wins at this point, no need to travel deeper
return (100, move)
score = value(newPlayers, depth + 1, move, alpha, beta)
alpha = max(alpha, score[0])
if score[0] > best[0]: # only update best if new score is explicitly greater than the current to ensure alpha beta pruning works as intended
best = score
if self.minimax and alpha >= beta:
break
if len(moves) == 0: # should always be a move possible
best = (players[self.idx].evaluation(players[self.other]), None)
return best
# find the optimal move for the opponent (the min player) given the current state of the players list, the current depth, and alpha/beta
# returns min value (move does not matter for opponent)
# if self.minimax = False, then will use expectimax instead of minimax to find the expected move for the opponent instead of the optimal minimum move
# use expectimax when a player is not playing optimally
def minValue(players, depth, alpha, beta):
best = inf
if not self.minimax: # take average for expectimax
best = 0
moves = players[self.other].getPossibleMoves(players[self.idx])
for move in moves:
newPlayers = deepcopy(players)
# create a copy of players that will be updated in generateSuccessorState
if generateSuccessorState(newPlayers, self.other, self.idx, move): # opponent wins at this point, no need to travel deeper
return -100
score = value(newPlayers, depth + 1, None, alpha, beta)[0]
if self.minimax:
beta = min(beta, score)
best = min(best, score)
else: # expectimax
best += score / len(moves)
if self.minimax and alpha >= beta:
break
if len(moves) == 0: # should always be a move possible
best = (players[self.idx].evaluation(players[self.other]), None)
return best
# find value of current state/move
def value(players, depth, move=None, alpha=-inf, beta=inf):
if depth == self.depth: # return evaluation function
return (players[self.idx].evaluation(players[self.other]), move)
findMax = (depth % 2 == 0) # at max level of tree
if findMax:
return maxValue(players, depth, alpha, beta)
else:
# add optimal move for max player to return value
return (minValue(players, depth, alpha, beta), move)
# create players list
players = [opponent, opponent]
players[self.idx] = self
move = value(players, 0) # initial depth is 0
return move[1] # return optimal move
# perform a move given the players list, the index of the current turn, the index of the opponent (other), and the move performed
# updates variable players
# return True if player wins after move, else return False
def generateSuccessorState(players, turn, other, move):
move = move.split()
if len(move) != 3:
raise Exception("Invalid move")
move[1] = int(move[1])
move[2] = int(move[2])
if move[0] == "split":
players[turn].split(move[1:])
elif move[0] == "tap":
finished = players[turn].tap(players[other], move[1], move[2])
if finished:
return True
else:
raise Exception("Invalid move")
return False