Skip to content

Encoder branch #7

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 134 additions & 0 deletions deep_q/deep_q_session.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
from __future__ import annotations
import numpy as np
import torch
from q_model import QModel

# Utility function. Outside of QSession because requires knowledge of actions that would take place
def get_maximal_Q(session: QSession, s_prime):

# Get all of the actions that can be taken from s_prime
actions = []

action_Qs = np.zeros(len(actions))

for i, a in enumerate(actions):
action_Qs[i] = session.get_Q(session.encode(s_prime, a))

return action_Qs.max()

class QModel(torch.nn.Module):

def __init__(self):
super().__init__()

def forward(self, data):
return data

class QSession:

def __init__(self):

# Initialized in either new_model or load_model
self.model: torch.nn.Module = None
self.optimizer = None

# This will likely just hold a list of tensors
self.replay_buffer = []

# The weight of future rewards. Set this to something reasonable
self.gamma = 0.9

# Somehow encodes a state and action into a tensor
def encode(self, state, action, reward, s_prime) -> torch.Tensor:
pass

# Set this to a reasonable loss function
def loss_function(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
return (x-y).square().mean()

# Will store in replay_buffer
def register_in_buffer(self, state, action, reward, s_prime):
self.replay_buffer(
[state, action, reward, s_prime]
)

# This trains network on values seen in the past to regularize
def train_on_past_values(self, batch_size):

# Sample random values from replay_buffer

samples = np.random.randint(0,len(self.replay_buffer), size=batch_size)

for i in samples:
self.train(*self.replay_buffer[i], new=False)

# Updates network based on single step.
def train(self, state, action, reward, s_prime, new = True) -> torch.Tensor :

if(new): self.register_in_buffer(state, action, reward, s_prime)

input_encoded = session.encode(state, action, reward, s_prime)

# Forward pass of the network
pred_Q = self.model(input_encoded)

# Get Q target of network
target_Q = reward + get_maximal_Q(self, s_prime)*self.gamma

# Calculate loss
loss = self.loss_function(pred_Q, target_Q)

# Update weight
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()

if(new): self.train_on_past_values()

# Return loss
return loss

def get_Q(self, state, action):

with torch.no_grad():
pred_Q = self.model(self.encode(state, action))

return pred_Q

def new_model(self):
self.model = QModel()
self.optimizer = torch.optim.Adam(
filter(lambda x: x.requires_grad, self.model.parameters()),
lr=0.001,
)

def save_model(self, save_path):
torch.save(self.model, save_path)

def load_model(self, model_filepath):
self.model = torch.load(model_filepath)
self.optimizer = torch.optim.Adam(
filter(lambda x: x.requires_grad, self.model.parameters()),
lr=0.001,
)

if __name__ == "__main__":

# Example instance
session = QSession()
session.new_model()

# Agent will traverse the state space. As it traverses, gets states and actions, and rewards from environement.
# Run this as many times as you want, exploring different parts of state space
state, action, reward, s_prime = None, None, None, None
session.train(state, action, reward, s_prime)

# Save model trained somewhere
save_path = ""
session.save_model(save_path)

# Load later
session.load_model(save_path)

# Keep training, or go ahead and actually use by calling forward_no_grad
pred_Q = session.get_Q(state, action)
Empty file added game_encoder/__init__.py
Empty file.
97 changes: 97 additions & 0 deletions game_encoder/game_encoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import numpy as np

class PokerGameEncoder:
_suit_offset = {'C': 0, 'D': 13, 'H': 26, 'S': 39}
_rank_value = {'2':0, '3':1, '4':2, '5':3, '6':4, '7':5, '8':6, '9':7, 'T':8, 'J':9, 'Q':10, 'K':11, 'A':12}
_pre_rounds = {'river': 'turn', 'turn': 'flop', 'flop': 'preflop'}

def __init__(self, our_uuid, our_stack, op_uuid, op_stack, game_info):
self.our_uuid = our_uuid
self.opponent_uuid = op_uuid
self.hole_cards_vector = np.zeros(52, dtype=np.float32)
self.community_cards_vector = np.zeros(52, dtype=np.float32)
self.our_total_money = our_stack
self.opponent_total_money = op_stack
self.our_investment = 0
self.opponent_investment = 0
self.round_count = 1
self.seats = game_info.get('seats')
self.prev_our_pot = our_stack
self.prev_op_pot = op_stack

@classmethod
def card_to_index(cls, card):
return cls._suit_offset[card[0]] + cls._rank_value[card[1]]

@classmethod
def encode_cards(cls, cards):
vec = np.zeros(52, dtype=np.float32)
for card in cards:
vec[cls.card_to_index(card)] = 1.0
return vec

def get_stack(self, uuid, new_game_data):
for seat in new_game_data.get('seats'):
if seat['uuid'] == uuid:
return seat['stack']
raise ValueError(f"UUID {uuid} not found in seats")

def update(self, new_game_data, new_hole_cards=None):
if new_hole_cards:
self.hole_cards_vector = self.encode_cards(new_hole_cards)
self.community_cards_vector = self.encode_cards(new_game_data.get('community_card', []))

if new_game_data['round_count'] != self.round_count:
self.round_count = new_game_data['round_count']
small_blind_pos = new_game_data['small_blind_pos']
small_blind_amount = new_game_data['small_blind_amount']

small_blind_uuid = new_game_data['seats'][small_blind_pos]['uuid']

if new_game_data.get('action_histories').get('preflop')[-1].get('action') == 'BIGBLIND':
if self.our_uuid == small_blind_uuid:
self.our_investment = small_blind_amount
self.opponent_investment = 2 * small_blind_amount
self.prev_our_pot = self.get_stack(self.our_uuid, new_game_data) + small_blind_amount
self.prev_op_pot = self.get_stack(self.opponent_uuid, new_game_data) + 2 * small_blind_amount
self.our_total_money = self.get_stack(self.our_uuid, new_game_data)
self.opponent_total_money = self.get_stack(self.opponent_uuid, new_game_data)
else:
self.our_investment = 2 * small_blind_amount
self.opponent_investment = small_blind_amount
self.prev_our_pot = self.get_stack(self.our_uuid, new_game_data) + 2 * small_blind_amount
self.prev_op_pot = self.get_stack(self.opponent_uuid, new_game_data) + small_blind_amount
self.our_total_money = self.get_stack(self.our_uuid, new_game_data)
self.opponent_total_money = self.get_stack(self.opponent_uuid, new_game_data)
else:
last_opponent_paid = new_game_data.get('action_histories').get('preflop')[-1].get('paid')
if self.our_uuid == small_blind_uuid:
self.our_investment = small_blind_amount
self.opponent_investment = 2 * small_blind_amount + last_opponent_paid
self.prev_our_pot = self.get_stack(self.our_uuid, new_game_data) + small_blind_amount
self.prev_op_pot = self.get_stack(self.opponent_uuid, new_game_data) + 2 * small_blind_amount + last_opponent_paid
self.our_total_money = self.get_stack(self.our_uuid, new_game_data)
self.opponent_total_money = self.get_stack(self.opponent_uuid, new_game_data)
else:
self.our_investment = 2 * small_blind_amount
self.opponent_investment = small_blind_amount + last_opponent_paid
self.prev_our_pot = self.get_stack(self.our_uuid, new_game_data) + 2 * small_blind_amount
self.prev_op_pot = self.get_stack(self.opponent_uuid, new_game_data) + small_blind_amount + last_opponent_paid
self.our_total_money = self.get_stack(self.our_uuid, new_game_data)
self.opponent_total_money = self.get_stack(self.opponent_uuid, new_game_data)
else:
self.round_count = new_game_data['round_count']
self.our_total_money = self.get_stack(self.our_uuid, new_game_data)
self.opponent_total_money = self.get_stack(self.opponent_uuid, new_game_data)
self.our_investment = self.prev_our_pot - self.our_total_money
self.opponent_investment = self.prev_op_pot - self.opponent_total_money

def get_features(self):
return {
'hole_cards_vector': self.hole_cards_vector,
'community_cards_vector': self.community_cards_vector,
'our_total_money': self.our_total_money,
'opponent_total_money': self.opponent_total_money,
'our_investment_this_round': self.our_investment,
'opponent_investment_this_round': self.opponent_investment,
}
28 changes: 25 additions & 3 deletions raise_player.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,41 @@
from pypokerengine.players import BasePokerPlayer
from time import sleep
from game_encoder.game_encoder import PokerGameEncoder
import pprint

class RaisedPlayer(BasePokerPlayer):

classencoder = None

def declare_action(self, valid_actions, hole_card, round_state):
self.classencoder.update(round_state, hole_card)
pp = pprint.PrettyPrinter(indent=2)
print("------------ROUND_STATE(RAISE)--------")
pp.pprint(round_state)
print("------------HOLE_CARD----------")
pp.pprint(hole_card)
print("------------VALID_ACTIONS----------")
pp.pprint(valid_actions)
print("------------FEATURES----------")
pp.pprint(self.classencoder.get_features())
print("-------------------------------")
for i in valid_actions:
if i["action"] == "raise":
action = i["action"]
return action # action returned here is sent to the poker engine
return action
action = valid_actions[1]["action"]
return action # action returned here is sent to the poker engine
return action

def receive_game_start_message(self, game_info):
pass
seats = game_info.get('seats')
for seat in seats:
if seat.get('name') == 'RaisedPlayer':
self_uuid = seat.get('uuid')
self_stack = seat.get('stack')
else:
op_uuid = seat.get('uuid')
op_stack = seat.get('stack')
self.classencoder = PokerGameEncoder(self_uuid, self_stack, op_uuid, op_stack, game_info)

def receive_round_start_message(self, round_count, hole_card, seats):
pass
Expand Down
37 changes: 26 additions & 11 deletions randomplayer.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,24 @@
from pypokerengine.players import BasePokerPlayer
import random as rand
from game_encoder.game_encoder import PokerGameEncoder
import pprint

class RandomPlayer(BasePokerPlayer):

classencoder = None

def declare_action(self, valid_actions, hole_card, round_state):
# valid_actions format => [raise_action_pp = pprint.PrettyPrinter(indent=2)
#pp = pprint.PrettyPrinter(indent=2)
#print("------------ROUND_STATE(RANDOM)--------")
#pp.pprint(round_state)
#print("------------HOLE_CARD----------")
#pp.pprint(hole_card)
#print("------------VALID_ACTIONS----------")
#pp.pprint(valid_actions)
#print("-------------------------------")
self.classencoder.update(round_state, hole_card)
pp = pprint.PrettyPrinter(indent=2)
print("------------ROUND_STATE(RANDOM)--------")
pp.pprint(round_state)
print("------------HOLE_CARD----------")
pp.pprint(hole_card)
print("------------VALID_ACTIONS----------")
pp.pprint(valid_actions)
print("------------FEATURES----------")
pp.pprint(self.classencoder.get_features())
print("-------------------------------")
r = rand.random()
if r <= 0.5:
call_action_info = valid_actions[1]
Expand All @@ -25,10 +30,20 @@ def declare_action(self, valid_actions, hole_card, round_state):
return action # action returned here is sent to the poker engine

def receive_game_start_message(self, game_info):
pass
seats = game_info.get('seats')
for seat in seats:
if seat.get('name') == 'Random Warrior 1':
self_uuid = seat.get('uuid')
self_stack = seat.get('stack')
else:
op_uuid = seat.get('uuid')
op_stack = seat.get('stack')
self.classencoder = PokerGameEncoder(self_uuid, self_stack, op_uuid, op_stack, game_info)

def receive_round_start_message(self, round_count, hole_card, seats):
pass
pp = pprint.PrettyPrinter(indent=2)
print("------------NEW_ROUND_STATE(RANDOM)--------")
pp.pprint(seats)

def receive_street_start_message(self, street, round_state):
pass
Expand Down