@@ -123,20 +123,13 @@ def __init__(
123
123
for player in self .players :
124
124
player .is_turn = False
125
125
self .current_player .is_turn = True
126
- # TODO add attribute of public_cards, that can be supplied by
127
- # convenience method
128
- self ._public_cards = public_cards
129
126
if public_cards :
130
127
assert len (public_cards ) in {3 , 4 , 5 }
131
128
self ._public_cards = public_cards
132
129
# only want to do these actions in real game play, as they are slow
133
130
if self .real_time_test :
134
131
# must have offline strategy loaded up
135
132
self ._starting_hand_probs = self ._initialize_starting_hands ()
136
- # TODO: We might not need this
137
- cards_in_deck = self ._table .dealer .deck ._cards_in_deck
138
- self ._evals = [c .eval_card for c in cards_in_deck ]
139
- self ._evals_to_cards = {i .eval_card : i for i in cards_in_deck }
140
133
141
134
def __repr__ (self ):
142
135
"""Return a helpful description of object in strings and debugger."""
@@ -329,34 +322,30 @@ def _increment_stage(self):
329
322
def _normalize_bayes (self ):
330
323
"""Normalize probability of reach for each player"""
331
324
n_players = len (self .players )
332
- for player in range (n_players ):
333
- total_prob = sum (self ._starting_hand_probs [player ].values ())
334
- for starting_hand , prob in self ._starting_hand_probs [player ].items ():
335
- self ._starting_hand_probs [player ][starting_hand ] = prob / total_prob
325
+ for p_i in range (n_players ):
326
+ total_prob = sum (self ._starting_hand_probs [p_i ].values ())
327
+ for starting_hand , prob in self ._starting_hand_probs [p_i ].items ():
328
+ self ._starting_hand_probs [p_i ][starting_hand ] = prob / total_prob
336
329
330
+ # TODO: figure out typing for dicts..
337
331
def _update_hole_cards_bayes (self , offline_strategy : Dict ):
338
- """Get probability of reach for each pair of hole cards for each player"""
332
+ """Get probability of reach for each starting hand for each player"""
339
333
n_players = len (self ._table .players )
340
334
player_indices : List [int ] = [p_i for p_i in range (n_players )]
341
335
for p_i in player_indices :
336
+ # TODO: might make since to put starting hands in the deck class
342
337
for starting_hand in self ._starting_hand_probs [p_i ].keys ():
338
+
339
+ starting_hand = list (
340
+ starting_hand
341
+ )
343
342
# TODO: is this bad?
344
343
if "p_reach" in locals ():
345
344
del p_reach
346
345
action_sequence : Dict [str , List [str ]] = collections .defaultdict (list )
347
- previous_betting_stage = "pre_flop"
348
- first_action_round = False
349
346
for idx , betting_stage in enumerate (self ._history .keys ()):
350
- # import ipdb;
351
- # ipdb.set_trace()
352
347
n_actions_round = len (self ._history [betting_stage ])
353
348
for i in range (n_actions_round ):
354
- # if i == 0:
355
- # betting_stage = previous_betting_stage
356
- # elif i == n_actions_round - 1:
357
- # previous_betting_stage = betting_stage
358
- # else:
359
- # betting_stage = betting_round
360
349
action = self ._history [betting_stage ][i ]
361
350
while action == 'skip' :
362
351
i += 1 # action sequences don't end in skip
@@ -368,114 +357,113 @@ def _update_hole_cards_bayes(self, offline_strategy: Dict):
368
357
ph = i % n_players
369
358
if p_i != ph :
370
359
prob_reach_all_hands = []
371
- num_hands = 0
372
360
for opp_starting_hand in self ._starting_hand_probs [
373
361
p_i
374
362
].keys ():
375
- # TODO: clean this up
376
- public_evals = [
377
- c .eval_card
378
- for c in self ._public_information [betting_stage ]
379
- ]
380
- if len (set (opp_starting_hand ).union (set (public_evals )).union (set (starting_hand ))) < \
381
- len (opp_starting_hand ) + len (starting_hand ) + len (public_evals ):
363
+ opp_starting_hand = list (
364
+ opp_starting_hand
365
+ )
366
+ publics = self ._public_information [betting_stage ]
367
+ if len (
368
+ set (opp_starting_hand ).union (
369
+ set (publics )
370
+ ).union (set (starting_hand ))
371
+ ) < len (
372
+ opp_starting_hand
373
+ ) + len (
374
+ starting_hand
375
+ ) + len (
376
+ publics
377
+ ):
382
378
prob = 0
383
- num_hands += 1
384
379
else :
385
- num_hands += 1
386
-
387
- public_cards = self ._public_information [
380
+ publics = self ._public_information [
388
381
betting_stage
389
382
]
390
- public_cards_evals = [c .eval_card for c in public_cards ]
391
383
infoset = self ._info_set_helper (
392
384
opp_starting_hand ,
393
- public_cards_evals ,
385
+ publics ,
394
386
action_sequence ,
395
387
betting_stage ,
396
388
)
397
- # check to see if the strategy exists, if not equal probability
398
- # TODO: is this hacky? problem with defaulting to 1 / 3, is that it
399
- # doesn't work for calculations that need to be made with the object's values
389
+ # check to see if the strategy exists,
390
+ # if not equal probability
391
+ # TODO: is this overly hacky?
392
+ # Problem with defaulting to 1 / 3, is that it
393
+ # it doesn't work for calculations that
394
+ # need to be made with the object's values
400
395
401
396
try : # TODO: with or without keys
402
397
prob = offline_strategy [infoset ][action ]
403
398
except KeyError :
404
399
prob = 1 / len (self .legal_actions )
405
400
prob_reach_all_hands .append (prob )
406
- # import ipdb;
407
- # ipdb.set_trace()
408
- prob2 = sum (prob_reach_all_hands ) / num_hands
401
+ total_opp_prob_h = sum (prob_reach_all_hands ) / \
402
+ len (prob_reach_all_hands )
409
403
if "p_reach" not in locals ():
410
- p_reach = prob2
404
+ p_reach = total_opp_prob_h
411
405
else :
412
- p_reach *= prob2
406
+ p_reach *= total_opp_prob_h
413
407
elif p_i == ph :
414
- public_evals = [
415
- c .eval_card
416
- for c in self ._public_information [betting_stage ]
417
- ]
418
- if len (set (starting_hand ).union (set (public_evals ))) < (
419
- len (public_evals ) + 2
408
+ publics = self ._public_information [betting_stage ]
409
+ if len (
410
+ set (starting_hand ).union (
411
+ set (publics )
412
+ )
413
+ ) < (
414
+ len (publics ) + 2
420
415
):
421
- prob = 0
416
+ total_prob = 0
422
417
else :
423
- public_cards = self ._public_information [betting_stage ]
424
- public_cards_evals = [c .eval_card for c in public_cards ]
418
+ publics = self ._public_information [betting_stage ]
425
419
infoset = self ._info_set_helper (
426
420
starting_hand ,
427
- public_cards_evals ,
421
+ publics ,
428
422
action_sequence ,
429
423
betting_stage ,
430
424
)
431
425
# TODO: Check this
432
426
try :
433
- prob = offline_strategy [infoset ][action ]
427
+ total_prob = offline_strategy [infoset ][action ]
434
428
except KeyError :
435
- prob = 1 / len (self .legal_actions )
429
+ total_prob = 1 / len (self .legal_actions )
436
430
if "p_reach" not in locals ():
437
- p_reach = prob
431
+ p_reach = total_prob
438
432
else :
439
- p_reach *= prob
433
+ p_reach *= total_prob
440
434
action_sequence [betting_stage ].append (action )
441
435
self ._starting_hand_probs [p_i ][tuple (starting_hand )] = p_reach
442
436
self ._normalize_bayes ()
443
- # TODO: delete this? at least for our purposes we don't need it again
444
437
445
438
def deal_bayes (self ):
446
- start = time . time ()
439
+ # TODO: Not sure if I need this yet
447
440
lut = self .info_set_lut
448
441
self .info_set_lut = {}
449
442
new_state = copy .deepcopy (self )
450
443
new_state .info_set_lut = self .info_set_lut = lut
451
- end = time .time ()
452
- print (f"Took { start - end } to load" )
453
-
454
444
players = list (range (len (self .players )))
455
445
random .shuffle (players )
456
-
457
- # TODO should contain the current public cards/heros real hand, if exists
458
- card_evals_selected = []
459
-
460
- for player in players :
461
- # does this maintain order?
462
- starting_hand_eval = new_state ._get_starting_hand (player )
463
- len_union = len (set (starting_hand_eval ).union (set (card_evals_selected )))
464
- len_individual = len (starting_hand_eval ) + len (card_evals_selected )
446
+ cards_selected = []
447
+ # TODO: this might be made better by selecting the first player's
448
+ # cards, then normalizing the second and third, etc..
449
+ for p_i in players :
450
+ starting_hand = new_state ._get_starting_hand (p_i )
451
+ len_union = len (set (starting_hand ).union (set (cards_selected )))
452
+ len_individual = len (starting_hand ) + len (cards_selected )
465
453
while len_union < len_individual :
466
- starting_hand_eval = new_state ._get_starting_hand (player )
467
- len_union = len (set (starting_hand_eval ).union (set (card_evals_selected )))
468
- len_individual = len (starting_hand_eval ) + len (card_evals_selected )
469
- for card_eval in starting_hand_eval :
470
- card = new_state . _evals_to_cards [ card_eval ]
471
- new_state . players [ player ]. add_private_card ( card )
472
- card_evals_selected += starting_hand_eval
473
- cards_selected = [ new_state . _evals_to_cards [ c ] for c in card_evals_selected ]
454
+ starting_hand = new_state ._get_starting_hand (p_i )
455
+ len_union = len (set (starting_hand ).union (set (cards_selected )))
456
+ len_individual = len (starting_hand ) + len (cards_selected )
457
+ # TODO: pull this into a helper method, maybe it should
458
+ # be in the dealer class..
459
+ for card in starting_hand :
460
+ new_state . players [ p_i ]. add_private_card ( card )
461
+ cards_selected += starting_hand
474
462
cards_selected += new_state ._public_cards
475
463
for card in cards_selected :
476
464
new_state ._table .dealer .deck .remove (card )
477
465
return new_state
478
- # TODO add convenience method to supply public cards
466
+ # TODO add convenience method to supply public cards
479
467
480
468
def load_game_state (self , offline_strategy : Dict , action_sequence : list ):
481
469
"""
@@ -498,23 +486,31 @@ def load_game_state(self, offline_strategy: Dict, action_sequence: list):
498
486
499
487
def _get_starting_hand (self , player_idx : int ):
500
488
"""Get starting hand based on probability of reach"""
501
- starting_hand_idxs = list (range (len (self ._starting_hand_probs [player_idx ].keys ())))
502
- starting_hands_probs = list (self ._starting_hand_probs [player_idx ].values ())
503
- starting_hand_idx = np .random .choice (starting_hand_idxs , 1 , p = starting_hands_probs )[0 ]
504
- starting_hand = list (self ._starting_hand_probs [player_idx ].keys ())[starting_hand_idx ]
489
+ starting_hands = list (self ._starting_hand_probs [player_idx ].keys ())
490
+ # hacky for using tuples as keys
491
+ starting_hands_idxs = list (range (len (starting_hands )))
492
+ starting_hands_probs = list (self ._starting_hand_probs [
493
+ player_idx
494
+ ].values ())
495
+ starting_hand_idx = np .random .choice (
496
+ starting_hands_idxs ,
497
+ 1 ,
498
+ p = starting_hands_probs
499
+ )[0 ]
500
+ starting_hand = list (starting_hands [starting_hand_idx ])
505
501
return starting_hand
506
502
507
503
def _initialize_starting_hands (self ):
508
504
"""Dictionary of starting hands to store probabilities in"""
509
505
assert self .betting_stage == "pre_flop"
510
- # TODO: make this abstracted for n_players
511
- starting_hand_probs = {0 : {}, 1 : {}, 2 : {}}
506
+ starting_hand_probs = {}
512
507
n_players = len (self .players )
513
508
starting_hands = self ._get_card_combos (2 )
514
509
for p_i in range (n_players ):
510
+ starting_hand_probs [p_i ] = {}
515
511
for starting_hand in starting_hands :
516
512
starting_hand_probs [p_i ][
517
- tuple ([ c . eval_card for c in starting_hand ])
513
+ starting_hand
518
514
] = 1
519
515
return starting_hand_probs
520
516
@@ -523,15 +519,23 @@ def _info_set_helper(
523
519
):
524
520
# didn't want to combine this with the other, as we may want to modularize soon
525
521
"""Get the information set for the current player."""
526
- cards = sorted (hole_cards , reverse = True ,)
527
- cards += sorted (public_cards , reverse = True ,)
528
- eval_cards = tuple (cards )
522
+ cards = sorted (
523
+ hole_cards ,
524
+ key = operator .attrgetter ("eval_card" ),
525
+ reverse = True ,
526
+ )
527
+ cards += sorted (
528
+ public_cards ,
529
+ key = operator .attrgetter ("eval_card" ),
530
+ reverse = True ,
531
+ )
532
+ eval_cards = tuple ([int (c ) for c in cards ])
529
533
try :
530
534
cards_cluster = self .info_set_lut [betting_stage ][eval_cards ]
531
535
except KeyError :
532
536
if not self .info_set_lut :
533
537
raise ValueError ("Pickle luts must be loaded for info set." )
534
- elif eval_cards not in self .info_set_lut [self . _betting_stage ]:
538
+ elif eval_cards not in self .info_set_lut [betting_stage ]:
535
539
raise ValueError ("Cards {cards} not in pickle files." )
536
540
else :
537
541
raise ValueError ("Unrecognised betting stage in pickle files." )
@@ -548,7 +552,7 @@ def _info_set_helper(
548
552
549
553
def _get_card_combos (self , num_cards ):
550
554
"""Get combinations of cards"""
551
- return list (combinations (self ._poker_engine . table . dealer . deck . _cards_in_deck , num_cards ))
555
+ return list (combinations (self .cards_in_deck , num_cards ))
552
556
553
557
@property
554
558
def community_cards (self ) -> List [Card ]:
@@ -560,6 +564,11 @@ def private_hands(self) -> Dict[ShortDeckPokerPlayer, List[Card]]:
560
564
"""Return all private hands."""
561
565
return {p : p .cards for p in self .players }
562
566
567
+ @property
568
+ def cards_in_deck (self ):
569
+ """Returns current cards in deck"""
570
+ return self ._table .dealer .deck ._cards_in_deck
571
+
563
572
@property
564
573
def initial_regret (self ) -> Dict [str , float ]:
565
574
"""Returns the default regret for this state."""
@@ -590,11 +599,6 @@ def n_players_started_round(self) -> bool:
590
599
"""Return n_players that started the round."""
591
600
return self ._n_players_started_round
592
601
593
- # @property
594
- # def first_move_of_current_round(self) -> bool:
595
- # """Return boolfor first move of current round."""
596
- # return self._first_move_of_current_round
597
-
598
602
@property
599
603
def player_i (self ) -> int :
600
604
"""Get the index of the players turn it is."""
@@ -603,11 +607,11 @@ def player_i(self) -> int:
603
607
@player_i .setter
604
608
def player_i (self , _ : Any ):
605
609
"""Raise an error if player_i is set."""
606
- raise ValueError (f "The player_i property should not be set." )
610
+ raise ValueError ("The player_i property should not be set." )
607
611
608
612
@property
609
613
def betting_round (self ) -> int :
610
- """Algorithm 1 of pluribus supp. material references betting_round. """
614
+ """Return 0 indexed betting round """
611
615
try :
612
616
betting_round = self ._betting_stage_to_round [self ._betting_stage ]
613
617
except KeyError :
@@ -631,7 +635,7 @@ def info_set(self) -> str:
631
635
key = operator .attrgetter ("eval_card" ),
632
636
reverse = True ,
633
637
)
634
- eval_cards = tuple ([card . eval_card for card in cards ])
638
+ eval_cards = tuple ([int ( card ) for card in cards ])
635
639
try :
636
640
cards_cluster = self .info_set_lut [self ._betting_stage ][eval_cards ]
637
641
except KeyError :
0 commit comments