blackjack/blackjack.py at main · hhoppe/blackjack · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# %% [markdown]
# # Blackjack Notebook (bjnb)
#
# [Hugues Hoppe](https://hhoppe.com/)
# &nbsp;&nbsp;&mdash;&nbsp;
# &nbsp; [**[Open in Colab]**](https://colab.research.google.com/github/hhoppe/blackjack/blob/main/blackjack.ipynb)
# &nbsp; [**[in Kaggle]**](https://www.kaggle.com/notebooks/welcome?src=https://github.com/hhoppe/blackjack/blob/main/blackjack.ipynb)
# &nbsp; [**[in MyBinder]**](https://mybinder.org/v2/gh/hhoppe/blackjack/main?filepath=blackjack.ipynb)
# &nbsp; [**[in DeepNote]**](https://deepnote.com/launch?url=https%3A%2F%2Fgithub.com%2Fhhoppe%2Fblackjack%2Fblob%2Fmain%2Fblackjack.ipynb)
# &nbsp; [**[GitHub source]**](https://github.com/hhoppe/blackjack/blob/main/blackjack.ipynb)
#
# Blackjack &mdash; _"the most widely played casino banking game in the world"_.

# %% [markdown]
# **Goals**:
#
# - Solution methods for both:
#
#   1. [Probabilistic analysis](#Probabilistic-analysis)
#      of blackjack actions and outcomes under many strategies, and
#
#   2. [Monte Carlo simulation](#Monte-Carlo-simulation)
#      for cut-card effects and precise split-hand rewards.
#
# - Support for many [rule variations](#Define-Rules)
#   \(12 parameters including #decks, dealer hit soft17, cut-card, ...).
#
# - Optimal [action tables](#Tables-for-basic-strategy) for
#   [basic strategy](#Define-Action-and-Strategy)
#   under any rules, reproducing
#   [Wikipedia](https://en.wikipedia.org/wiki/Blackjack#Basic_strategy) and
#   [WizardOfOdds](https://wizardofodds.com/games/blackjack/strategy/calculator/) results.
#
# - Six [composition-dependent strategies](#Define-Action-and-Strategy)
#   based on progressively greater levels of *attention*.
#   <!--(initial cards, all hand cards, cards in *prior split hands*, ...).-->
#
# - Computation of
#   [house edge](#House-edge-results)
#   under any rules, with either basic or composition-dependent strategies.
#
# - Comparisons with results from online
#   [hand calculators](#Hand-calculator-results) and
#   [house edge calculators](#House-edge-results).
#
# - Novel analysis and visualization of the
#   [*cut-card effect*](#Effect-of-using-a-cut-card)
#   and its [surprising oscillations](#cut-card-graph).
#
# - Open-source Python, sped up with jitting (\~30x) and multiprocessing (\~10x),
#   simulating \~$10^{8}$ hands/s.
#
# - GPU implementation using `numba.cuda`, simulating \~$10^{10}$ hands/s.

# %% [markdown]
# **Versions**:
# - 1.0 (March 2022): use probabilistic analysis for basic strategy tables and
#   approximate house edge.
# - 2.0 (July 2022): add Monte Carlo simulation, hand analysis,
#   and cut-card analysis.
# - 3.0 (January 2025): add CUDA implementation of simulation.

# %% [markdown]
# **Running this Jupyter notebook**:
# - The notebook requires Python 3.10 or later.
# - We recommend starting a Jupyter server on a local machine with a fast multi-core CPU. <br/>
#   (The notebook can also be [executed on a Colab server](
#    https://colab.research.google.com/github/hhoppe/blackjack/blob/main/blackjack.ipynb),
#   where it greatly benefits from a CUDA GPU.)
# - Configure a Linux environment (e.g.,
#   [Windows Subsystem for Linux](https://docs.microsoft.com/en-us/windows/wsl/install)):
#
# ```bash
#     sudo apt install python3-pip
#     python3 -m pip install --upgrade pip
#     pip install jupyterlab jupytext
#     jupyter lab --no-browser
# ```
#
# - Open the URL (output by `jupyter lab`) using a web browser (e.g., Google Chrome on Windows).
# - Load the notebook (`blackjack.ipynb` file).
# - Evaluate all cells in `Code library` and then selectively evaluate `Results`.
# - Adjust the `EFFORT` global variable to trade off speed and accuracy.

# %%
# ** Future to-do?
# For CUDA, make action_table.nbytes=144000 somehow fit into 64K constant memory.
# Try numba.njit(nogil=True) and multithreading; https://stackoverflow.com/a/45612308.
# Using prob or sim, compute house edge over many parameters and fit a function.
# Shared lru_cache: https://stackoverflow.com/a/55816091 and https://stackoverflow.com/a/13694262.

# %% [markdown]
# # Code library

# %% [markdown]
# ## Imports

# %%
# !pip install -q hhoppe-tools matplotlib more-itertools numba numpy tqdm

# %%
# !if [ ! -f random32.py ]; then wget https://github.com/hhoppe/blackjack/raw/main/random32.py; fi

# %%
import abc
import collections
from collections.abc import Callable, Iterable, Iterator, Mapping
import contextlib
import dataclasses
import enum
import functools
import itertools
import logging
import math
import multiprocessing
import os
import pathlib
import pickle
import re
import subprocess
import sys
import textwrap
import time
import typing
from typing import Any, Literal, TypeAlias, Union
import unittest.mock
import urllib.parse
import urllib.request
import warnings

import hhoppe_tools as hh
import matplotlib.pyplot as plt
import more_itertools
import numba
from numba import cuda
import numba.cuda.random
import numpy as np
import numpy.typing
import tqdm
import random32


# %%
def omit_cell_output() -> None:
  """Return None so that the cell does not output anything."""
  return None


# %%
# To archive the notebook, run with value 2.
EFFORT = typing.cast(Literal[0, 1, 2, 3, 4], hh.get_env_int('EFFORT', 2))
"""Controls the breadth and precision of the notebook experiments:
- 0: Fast subset of experiments, at lowest precision (~50 s).
- 1: Fast subset of experiments, at low precision (~70 s).
- 2: Most experiments, at normal precision (~14 minutes).
- 3: All experiments, at high precision (~4.5 hours, including 1 hour cut-card analysis).
- 4: Run at even higher precision (>40 hours), likely on isolated experiments."""
assert 0 <= EFFORT <= 4

RECOMPUTE_CUT_CARD_ANALYSIS = EFFORT >= 3
"""If True, perform expensive recomputation of cut-card analysis for results graphs."""

USE_CUDA = cuda.is_available()

# %%
print(f'The number of CPU threads is {multiprocessing.cpu_count()}.')
print(f'Support for multiprocess "fork": {"fork" in multiprocessing.get_all_start_methods()}.')
if USE_CUDA:
  cuda.detect()
  print(f'The number of GPU SMs is {cuda.get_current_device().MULTIPROCESSOR_COUNT}.')

# %%
_F = typing.TypeVar('_F', bound=Callable[..., Any])
_T = typing.TypeVar('_T')

# %%
_NDArray: TypeAlias = numpy.typing.NDArray[Any]
_CudaArray: TypeAlias = Any  # cuda.cudadrv.devicearray.DeviceNDArray
_CudaEvent: TypeAlias = Any  # cuda.cudadrv.driver.Event

# %%
check_eq = hh.check_eq
QUICK = False  # Can be temporarily overridden.
_ORIGINAL_GLOBALS = list(globals())  # Store current set of variable names.
_ = np.seterr(all='raise')  # Let all numpy warnings raise errors.
hh.start_timing_notebook_cells()

# %%
# Disable graphics plot windows when running this notebook as a script.
if not hh.in_notebook():
  plt.show = lambda *args, **kwargs: None

# %%
Card = int
"""Card value satisfying 1 <= card <= 10, where ace is 1."""

Cards = tuple[Card, ...]
"""Sequence of cards, sometimes sorted in ascending order."""

Hand = tuple[Cards, Card]
"""Tuple `(player_cards, dealer1)` where `player_cards[:2]` and `player_cards[2:]` are sorted."""

State = tuple[Cards, Card, Cards]  # (*hand, split_cards)
"""Drawn cards `(player_cards, dealer1, split_cards) = (*hand, split_cards)` involved in a hand:
- `player_cards = (card1, card2, *sorted_other_player_cards)`: with `card1 <= card2`.
- `dealer1`: is the dealer upcard.
- `split_cards = (a, ..., a, *sorted_other_split_cards)`: contains the cards drawn in earlier
  split hands, where `a` is the card value in the original card pair `(a, a)` that was split."""

CARD_VALUES = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10  # (Ace has value 1 but can count as 11.)
CARDS_PER_SUIT = len(CARD_VALUES)
NUM_SUITS = 4
DECK_SIZE = NUM_SUITS * CARDS_PER_SUIT
UNIFORM_CARD_PROB = {card: CARD_VALUES.count(card) / CARDS_PER_SUIT for card in CARD_VALUES}
DEFAULT_NUM_DECKS = 6
SHOE_SIZE_FOR_INFINITE_DECKS_CPU = 6 * DECK_SIZE
SHOE_SIZE_FOR_INFINITE_DECKS_CUDA = 2 * DECK_SIZE
SHOE_SIZE_FOR_SINGLE_HAND_INFINITE_DECKS = 20
SPLIT_SECOND_CARDS_SIZE = 12
NUM_SHUFFLED_CARDS_FOR_SINGLE_HAND = SHOE_SIZE_FOR_SINGLE_HAND_INFINITE_DECKS
PLUS_MINUS_CHAR = '\u00b1'  # Used to indicate precision in Monte Carlo simulation results.
PLUS_MINUS_STANDARD_DEVIATIONS = 2.0  # Results precision bracket; 95% probability within 2 sdv.
WARNING_STANDARD_DEVIATIONS = 3.0  # Warning '*' in results; 99.7% probability within 3 sdv.
AVERAGE_CARDS_PER_HAND = 5.42  # Determined empirically (for one player against dealer).
DISALLOWED = -1e10  # Large negative reward indicating an illegal action.


# %%
def numba_njit(*args: Any, **kwargs: Any) -> Callable[[_F], _F]:
  """Typed replacement for non-bare `@numba.njit()` decorator."""
  assert kwargs or not (len(args) == 1 and callable(args[0]))

  def decorator(func: _F) -> _F:
    jitted_func: _F = numba.njit(*args, **kwargs)(func)
    return jitted_func

  return decorator


# %%
CUDA_KERNEL_PARAMS = tuple[int, int] | tuple[int, int, int] | tuple[int, int, int, int]


# %%
def cuda_jit(*args: Any, **kwargs: Any) -> Callable[[_F], Mapping[CUDA_KERNEL_PARAMS, _F]]:
  """Typed replacement for non-bare `@cuda.jit()` decorator."""
  assert kwargs or not (len(args) == 1 and callable(args[0]))

  def decorator(func: _F) -> Mapping[CUDA_KERNEL_PARAMS, _F]:
    jitted_func: Mapping[CUDA_KERNEL_PARAMS, _F] = cuda.jit(*args, **kwargs)(func)
    return jitted_func

  return decorator


# %%
def multiprocessing_is_available() -> bool:
  """Return True if multiprocessing may enable a performance improvement."""
  has_cpu_limit = os.environ.get('CPU_LIMIT') == '1.0'  # Kubernetes on mybinder.org
  return (
      'fork' in multiprocessing.get_all_start_methods()
      and multiprocessing.cpu_count() > 2
      and not has_cpu_limit
  )


# %%
@contextlib.contextmanager
def temporary_effort(effort: int) -> Iterator[None]:
  """Temporarily set the global `EFFORT` to `effort` and clear all memoization caches."""
  assert 0 <= effort <= 4
  hh.clear_functools_caches(globals())
  try:
    with hh.temporary_assignment(globals(), EFFORT=effort):
      yield
  finally:
    hh.clear_functools_caches(globals())


# %%
def show_kernel_memory_resident_set_size() -> None:
  """Print the memory size of the IPython kernel."""
  if sys.platform != 'linux':
    warnings.warn('show_kernel_memory_resident_set_size is only implemented for linux.')
    return
  command = f'ps -p {os.getpid()} -o rss --no-headers'
  text = subprocess.check_output(command, shell=True, text=True)
  rss_kb = int(text)
  print(f'{rss_kb*1024/1e9:.1f} GiB')


# %%
def show_cuda_memory_usage() -> None:
  """Print the amount of GPU memory allocated by CUDA."""
  if USE_CUDA:
    free, total = cuda.current_context().get_memory_info()
    used = total - free
    print(f'CUDA memory usage {used / 1024**3:.1f} GiB  ({free / 1024**3:.1f} GiB free)')
  else:
    print('CUDA is not available')


# %%
def tqdm_stdout(*args: Any, **kwargs: Any) -> Any:
  """Progress bar customized to show a short ascii text line on stdout."""
  if not hh.in_notebook():
    kwargs['disable'] = True
  kwargs |= dict(leave=False)
  return tqdm.tqdm(
      *args,
      file=sys.stdout,
      ascii=True,
      mininterval=0.2,
      smoothing=0.1,
      bar_format='{desc}:{percentage:3.0f}% [{elapsed}<{remaining}]',
      **kwargs,
  )


# Notes: tqdm uses '\r' and overwrites with spaces (annoying for copy-paste);
# jupyterlab does not correctly handle more than two backspaces ('\b');
# `tqdm.notebook.tqdm()`, which displays HTML, does not work with multiprocessing and does not
# erase completely; `progressbar2` is poorly documented; `rich` requires ipywidget.

omit_cell_output()

# %%
hh.adjust_jupyterlab_markdown_width()


# %% [markdown]
# ## Define `Rules`
# <a name="Define-Rules"></a>
#
# The `Rules` class below captures the many
# [variations in blackjack rules](https://en.wikipedia.org/wiki/Blackjack#Rule_variations_and_effects_on_house_edge).
#
# The class fields default to values used in
# the [Wikipedia page](https://en.wikipedia.org/wiki/Blackjack).
#
# Another useful reference is a
# [comparison of Las Vegas casinos](https://wizardofvegas.com/guides/blackjack-survey/)
# which uses the base rules:
# - BJ pays 3:2;
# - one bet lost to a dealer's BJ;
# - you may double down on any two cards but not after splitting;
# - you may split any pair;
# - you may resplit any pair except aces to make up to four hands;
# - split aces receive one card each;
# - insurance;
# - no surrender.
#
# For a 6-deck shoe, those base rules correspond to
# `Rules.make(double_after_split=False, late_surrender=False)`.
#
# Their table of casinos (171 rows) lists common rule variations:
# - "ds": (`double_after_split=True`): 148 out of 171 (frequent).
# - "ls": (`late_surrender=True`): 56 out of 171.
# - "rsa": (`resplit_aces=True`): 50 out of 171.
# - "s17": (`hit_soft17=False`): 28 out of 171 (opposite of "h17").
#
# The casino listed with the lowest house edge is:
# "Caesars Palace; 0.26%; 6 decks; s17,ds,ls,rsa". <br/>
# This corresponds to `Rules.make(hit_soft17=False, resplit_aces=True)`.


# %%
# "In games involving a shoe, about 3/4 of the deck is used before the cut card is reached and
# the deck is shuffled."
# "In Pitch Blackjack (i.e., single deck or double deck), the cut card is inserted closer to the
# top of the deck, about 1/2 of the way down.  In most cases, the deck is shuffled after
# each hand of play."
def default_cut_card(num_decks: float) -> int:
  """Return the default number of cards in front of the cut-card for a shoe with `num_decks`."""
  match num_decks:
    case math.inf:
      return 0  # (The implementation plays a fixed number of hands per shoe.)
    case 1:
      return DECK_SIZE // 2  # The cut-card is at the middle of the deck.
    case 2:
      return 2 * DECK_SIZE - DECK_SIZE // 2  # The cut-card is 0.5 decks from the rear of the shoe.
    case _:
      assert num_decks > 2 and num_decks == int(num_decks)
      return int(num_decks) * DECK_SIZE - 78  # The cut-card is 1.5 decks from the rear.


# %%
@dataclasses.dataclass(frozen=True, slots=True)
class Rules:
  """Blackjack rules (many variations)."""

  num_decks: float = DEFAULT_NUM_DECKS
  """Number of decks in the shoe, e.g., 1, 2, 4, 6, 8, or math.inf."""

  blackjack_payout: float = 1.5
  """Payout for player ace-ten; 6/5 or 1 greatly increase the house edge."""

  hit_soft17: bool = True
  """Whether the dealer hits (rather than stands) on a soft total of 17 (denoted "s17"/"h17").
  False is favorable to the player."""

  obo: bool = True
  """On dealer blackjack, player loses Original Bet Only (i.e., not DOUBLE or SPLIT bets).
  If True, when the dealer's upcard is 10 or ace, the dealer first peeks at their hole card
  before any player action to detect a dealer bj, and if present, collects the bets from players
  who do not themselves have bj."""

  late_surrender: bool = True
  """Allow the player to SURRENDER, i.e. forfeit 50% of the bet if the dealer does not have
  blackjack (denoted "ls").  Should likely be False if obo=False."""

  double_min_total: Literal[0, 9, 10] = 0
  """If zero, the player may DOUBLE on any two cards.  Otherwise, the player must have a hard
  total of at least this value (either 9 or 10); value 9 is the "Reno rule"; value 10 is the
  "European rule".  Because the optimal strategy tables do not suggest "DOUBLE" for a hard
  total > 11, value 9 is also equivalent to the "DOUBLE on 9-11" rule."""

  double_after_split: bool = True
  """Allow DOUBLE as the first action of splitting a pair (denoted "ds")."""

  split_to_num_hands: float = 4
  """Total number of hands that can result from splitting pairs.  Can be 0 or any value >= 2.
  Common values include 0 (no splitting), 2 (single split), 4, or math.inf."""

  resplit_aces: bool = False
  """Whether aces can be split more than once (denoted "rsa")."""

  hit_split_aces: bool = False
  """Allow hitting after splitting aces.  If False (the default), a single card is added to each
  split ace and the player must stand (which is unfavorable to the player)."""

  double_split_aces: bool = False
  """Allow doubling after splitting aces.  Rare."""

  cut_card: int = -1
  """Number of shoe cards in front of the cut-card.  The shoe is reshuffled after completion of
  the hand during which the cut-card is exposed.  The default is a positive value that depends
  on `num_decks`.  The minimum value is 0, in which case the shoe is reshuffled after every hand,
  as in a continuous shuffling machine; for improved efficiency, we simulate a fixed (>1) number
  of hands and this is still equivalent to continuous shuffling."""

  num_players: int = 1
  """The number of players affects how quickly the cut-card is reached.  Not yet implemented."""

  def __post_init__(self) -> None:
    """Adjust cut_card and check validity of constructed rules."""
    num_decks = self.num_decks
    assert num_decks == math.inf or (num_decks == int(num_decks) and num_decks >= 1)
    if self.cut_card == -1:
      object.__setattr__(self, 'cut_card', default_cut_card(num_decks))
    assert self.blackjack_payout >= 1.0
    assert self.double_min_total in (0, 9, 10)
    splitn = self.split_to_num_hands
    assert splitn in (0, math.inf) or (splitn == int(splitn) and splitn >= 2)
    assert 0 <= self.cut_card <= (0 if num_decks == math.inf else num_decks * DECK_SIZE)
    assert self.num_players == 1  # More than one player is not yet implemented.
    # Note that with split_to_num_hands == 0, we ignore resplit_aces, hit_split_aces,
    # and double_split_aces.  And with split_to_num_hands == 2, we ignore resplit_aces.

  # Same effect as @hh.terse_str class decorator but with dynamic default for `cut_card`.
  def __str__(self) -> str:
    """Return a string containing only the non-default field values."""
    parts = []
    for field in dataclasses.fields(self):
      value = getattr(self, field.name)
      default = default_cut_card(self.num_decks) if field.name == 'cut_card' else field.default
      if value != default:
        parts.append(f'{field.name}={value!r}')
    return f"{self.__class__.__name__}({', '.join(parts)})"


# %%
BEST_RULES = Rules(
    num_decks=1,
    split_to_num_hands=math.inf,
    resplit_aces=True,
    hit_split_aces=True,
    double_split_aces=True,
    cut_card=0,
)
"""Combination of rules with the lowest house edge (which is in fact negative)."""

WORST_RULES = Rules(
    num_decks=math.inf,
    blackjack_payout=1,
    hit_soft17=False,
    obo=False,
    late_surrender=False,
    double_min_total=10,
    double_after_split=False,
    split_to_num_hands=2,
)
"""Combination of rules with the highest house edge."""

omit_cell_output()

# (Although the ingenious `Rules(num_decks=1, ..., cut_card=5)` has a low house edge,
# its edge is not as low as `Rules(num_decks=math.inf)`.)

# %%
# We do not distinguish between different cards with value 10, i.e., 10, jack, queen, and king.
# A possible issue of whether the player is allowed to split any two cards with value 10,
# e.g. (king, 10).  This is usually allowed.  If it were disallowed, it would affect the
# probability of occurrence of splits and resplits.  However, the basic strategy tables show that
# is always favorable to stand on (10, 10), so the issue is moot.

# %% [markdown]
# ## Define `Action` and `Strategy`
# <a name="Define-Action-and-Strategy"></a>

# %%
# pytype fails on hh.OrderedEnum; it is ok with enum.Enum but then max((reward, action)) may fail.
if typing.TYPE_CHECKING:
  OrderedEnum = enum.Enum
else:
  OrderedEnum = hh.OrderedEnum


# %%
class Action(OrderedEnum):
  """Player actions."""

  STAND = enum.auto()
  HIT = enum.auto()
  DOUBLE = enum.auto()
  SPLIT = enum.auto()
  SURRENDER = enum.auto()


# %%
Actions = frozenset[Action]
ACTIONS = frozenset(Action)
IMPOSSIBLE_ACTION_VALUE = 0  # (action.value are numbered starting at 1 as in enum.auto().)

# One-letter abbreviation used in strategy tables.
CODE_FOR_ACTION = {
    Action.STAND: 'S',
    Action.HIT: 'H',
    Action.DOUBLE: 'D',
    Action.SPLIT: 'P',
    Action.SURRENDER: 'U',
}

COLOR_FOR_CODE = {
    'S': (1.0, 0.0, 0.0),  # Red.
    'H': (0.0, 1.0, 0.0),  # Green.
    'D': (0.0, 1.0, 1.0),  # Cyan.
    'P': (1.0, 1.0, 0.0),  # Yellow.
    'U': (1.0, 1.0, 1.0),  # White.
}


# %%
def test_action() -> None:
  """Check string representations of Action."""
  check_eq(list(Action), [Action.STAND, Action.HIT, Action.DOUBLE, Action.SPLIT, Action.SURRENDER])
  action = Action.STAND
  check_eq(action.name, 'STAND')
  check_eq(action.value, 1)
  check_eq(action.name.lower(), 'stand')
  check_eq(str(action), 'Action.STAND')
  check_eq(repr(action), '<Action.STAND: 1>')
  check_eq(CODE_FOR_ACTION[action], 'S')


# %%
test_action()


# %%
class Attention(enum.Enum):
  """Aspects of the player and dealer cards that condition the optimal action."""

  # Other ideas for class name: Scope, Inspect, Feature, Heed, Observe, Mull, Track.

  TOTAL_OF_CARDS = enum.auto()
  """Consider `(card1, card1, dealer1)` for the first action on a paired hand,
  or else `(player_total, player_soft, dealer1)`, i.e., "total-dependent basic strategy"."""

  INITIAL_CARDS_OR_TOTAL = enum.auto()
  """Consider individual cards `(card1, card2)` for the first action, and then revert back to
  TOTAL_OF_CARDS for later actions.  It is a limited form of "composition-dependent" strategy."""

  INITIAL_CARDS_AND_TOTAL = enum.auto()
  """Consider both the first two cards and the card total.  It is a limited form of
  "composition-dependent" strategy."""

  HAND_CARDS_ONLY = enum.auto()
  """Consider all card values in the player's hand (and of course `dealer1`).  However,
  after splitting, keep no memory of cards in the prior split hands."""

  HAND_AND_NUM_PRIOR_SPLITS = enum.auto()
  """Like `HAND_CARDS_ONLY` but also consider the number of split hands that have resulted so far.
  For the third hand of the split hands `[(8, 4, 7), (8, 10), (8, 2, 1)]`, we know the current
  hand `(8, 2, 1)` and the fact that the card `8` appeared twice in the prior split hands.
  We use this as the default for `COMPOSITION_DEPENDENT_STRATEGY`."""

  HAND_AND_INITIAL_CARDS_IN_PRIOR_SPLITS = enum.auto()
  """Like `HAND_AND_NUM_PRIOR_SPLITS` but also consider the initial two cards of all prior split
  hands.  For the third hand of the split hands `[(8, 4, 7), (8, 10), (8, 2, 1)]`, we know the
  current hand `(8, 2, 1)` and the cards `8, 8, 4, 10` which appeared as initial cards in the
  two prior split hands."""

  # HAND_AND_PRIOR_SPLIT_HANDS = enum.auto()
  # Not implemented: it would greatly increase the probabilistic state to track.

  def __repr__(self) -> str:
    """Hide the integer `self.value` and show only the string name."""
    return f'{self.__class__.__name__}.{self.name}'


# %%
@dataclasses.dataclass(frozen=True, slots=True)
class Strategy:
  """Player's strategy."""

  attention: Attention = Attention.TOTAL_OF_CARDS
  """Aspects of the player and dealer cards that contribute to deciding the best action.
  The default (total-dependent basic strategy) considers the dealer upcard, the player total, and
  whether this player total is hard or soft."""

  first_actions: Actions = ACTIONS
  """Subset of player first actions to consider; default is all actions.  The subsequent
  candidate actions are always {Action.STAND, Action.HIT}.  If set to `frozenset({Action.SPLIT})`,
  forces splitting of pairs but places no constraint on the first action of the resulting
  completed split hands."""

  def __post_init__(self) -> None:
    """Check validity of strategy."""
    assert self.first_actions, 'Strategy must have at least one first action.'

  def __str__(self) -> str:
    """Shorten the string description (omit "Attention.", "Action.", and "frozenset(...)")."""
    fields_s = []
    if self.attention is not Attention.TOTAL_OF_CARDS:
      fields_s.append(f'attention={self.attention}')
    if self.first_actions != ACTIONS:
      first_actions = sorted(self.first_actions, key=lambda action: action.value)
      fields_s.append(f'first_actions={{{",".join(action.name for action in first_actions)}}}')
    return f'Strategy({", ".join(fields_s)})'


# %%
BASIC_STRATEGY = Strategy()  # (attention=Attention.TOTAL_OF_CARDS)
"""The basic (or "total-dependent") strategy considers the player's total rather than the
individual card values."""

INITIAL_DEPENDENT_STRATEGY = Strategy(attention=Attention.INITIAL_CARDS_AND_TOTAL)
"""This strategy considers not just the player's total but also the values of the player's two
initial cards."""

COMPOSITION_DEPENDENT_STRATEGY = Strategy(attention=Attention.HAND_AND_NUM_PRIOR_SPLITS)
"""The composition-dependent strategy considers all the cards in the current hand plus the number
of prior split hands.  It less powerful than HAND_AND_INITIAL_CARDS_IN_PRIOR_SPLITS but only
very slightly."""

omit_cell_output()

# %%
BEST_STRATEGY = Strategy(attention=Attention.HAND_AND_INITIAL_CARDS_IN_PRIOR_SPLITS)
"""Strategy that is most favorable to the player."""

WORST_STRATEGY = Strategy(first_actions=frozenset({Action.STAND, Action.HIT}))
"""Strategy that is least favorable to the player."""

omit_cell_output()

# %% [markdown]
# ## Probabilistic analysis
# <a name="Probabilistic-analysis"></a>

# %% [markdown]
# **Graph representation**
#
# We form a graph where:
# 1) the nodes are hand states, and
# 2) the edges are player/dealer actions (`STAND`, `HIT`, `DOUBLE`, `SPLIT`, and `SURRENDER`).
#
# Ideally, the hand $\text{state}$ captures the full sequence of player cards and dealer cards
# and the set of all cards seen in prior hands.
# (We use a few pruning heuristics (based on the global parameter `EFFORT`)
# to put finite bounds on this combinatorial state.)
#
# The probabilities $\text{ProbCard}(\text{state})$ of drawing card values from the shoe
# is computed based on the card counts in the current hand $\text{state}$.
# (As an example, for a 1-deck shoe, if the player cards contain three 2's and
# the dealer upcard is a 2, the probability of drawing another 2 is zero.)
#
# We express the expected reward for an action at a given graph node
# as a weighted summation of expected rewards at other graph nodes.
# For example, the reward for a `HIT` action is:
#
# $$\text{reward}(\text{state}, \text{HIT}) =
#   \sum_{(\text{prob}, \text{card}) \in \text{ProbCard}(\text{state})}
#     \text{prob}\cdot \text{reward}\big(\text{combine}(\text{state}, \text{card})\big),$$
#
# where $\text{combine}(\text{state}, \text{card})$ returns a new state with the added card.
# The other actions have similar recurrence formulas based on the rewards of other states.
#
# The reward for a hand $\text{state}$ is the reward for the optimal action at that state:
#
# $$\text{reward}(\text{state}) = \max_{\text{action}}
#     \text{reward}\big(\text{state}, \text{action}\big).$$
#
# For an initial hand (i.e., a starting state),
# the expected reward is evaluated by recursively traversing the graph,
# computing the expected reward of each encountered hand state.
# Because the graph is a [DAG](https://en.wikipedia.org/wiki/Directed_acyclic_graph)
# rather than a tree, each hand state may be traversed more than once,
# so it is beneficial to
# [memoize](https://en.wikipedia.org/wiki/Memoization) the intermediate reward values.

# %% [markdown]
# **Player strategy/attention**
#
# The optimal action defined above is based on maximizing the reward for the *specific hand state*.
# This is a fully **composition-dependent strategy**.
#
# However, it is impractical for a human player to memorize the best action
# for every possible combination of player cards (including prior split hands).
# Practical strategies restrict the player **attention** to specific aspects of the hand,
# such as the player total and softness.
# We identify [*6 levels of attention*](#Define-Action-and-Strategy).
# The simplest one is `Attention.TOTAL_OF_CARDS`, which corresponds to
# [*basic strategy*](https://en.wikipedia.org/wiki/Blackjack#Basic_strategy).
#
# We generalize the prior formulas to include attention:
#
# $$\text{reward}(\text{state}, \text{attention}, \text{HIT}) =
#   \sum_{(\text{prob}, \text{card}) \in \text{ProbCard}(\text{state})}
#     \text{prob}\cdot \text{reward}\big(
#       \text{combine}(\text{state}, \text{card}), \text{attention}\big),$$
#
# $$\text{reward}(\text{state}, \text{attention}) =
#   \text{reward}\Big(\text{state}, \text{attention},
#     \underset{\text{action}}{\operatorname{argmax}}
#       \text{reward}\big(\text{Project}_{\text{attention}}(\text{state}),
#         \text{attention}, \text{action}\big)
#     \Big),$$
#
# where $\text{state}' = \text{Project}_{\text{attention}}(\text{state})$ maps $\text{state}$
# onto a canonical $\text{state}'$ which keeps only the attended aspects.
#
# As an example, `Attention.HAND_CARDS_ONLY` ignores the card values in any prior split hands,
# so if $\text{state}$=`(player_cards, dealer1, split_cards)`,
# $\text{Project}_{\text{HAND\_CARDS\_ONLY}}(\text{state})$ = `(player_cards, dealer1, ())`.
#
# Crucially, although we select the best action after projecting the state
# to a simpler canonical state,
# we then evaluate the reward for that best action using the original state.

# %% [markdown]
# **Card probabilities**
#
# Before each card is drawn, we create a table of card counts from the original shoe,
# then subtract all of cards in the current hand state (both the player and dealer cards),
# and thus determine the probability of each card value.
#
# A tricky aspect is that with `rules.obo` (Original Bets Only, which is True by default),
# **when the dealer's upcard is 10 or ace**,
# the dealer peeks at their hole card to check if they have blackjack before any player action.
#
# Thereafter, the fact that the dealer does not have blackjack
# **conditions the value of the dealer hole card**, and indirectly,
# the probability of all subsequent cards drawn in the hand.
#
# We model this accurately using fractional counts during the card probability computations.
# For example, if the dealer's upcard is an ace, then it is known that the dealer hole card is
# not a 10, and for each of the card values (1, 2, 3, 4, 5, 6, 7, 8, 9), we reduce
# their count by the fraction $1/9$.
# See function `card_probabilities_helper` for details.
#
# Here is a summary of the ordering of the conditional card probabilities with `obo=True`:
# - We select the dealer upcard `dealer1` with uniform distribution.
# - We select player `card1` conditional on `[dealer1]`.
# - We select player `card2` conditional on `[dealer1, card1]`.
# - We determine `prob_dealer_bj` based on `[dealer1, card1, card2]`,
# - We select all subsequent player cards conditional on `recent_cards` (which contains
#    `dealer1, card1, card2, ...`) and on `dealer1` if it is 1 or 10 due to
#    constraints on `dealer2` that dealer has no blackjack.
# - We choose `dealer2` conditional on `recent_cards` and with a non-blackjack constraint.
# - We select the subsequent dealer cards conditional on `recent_cards`.

# %% [markdown]
# **Evaluating reward for split hands**
#
# For a given hand state, we consider a SPLIT action to perform as many resplits as are allowed.
# (In other words, we do not try to identify states where a smaller (finite) number of splits
# gives a greater reward; from experiments it seems that this never arises.)
#
# For a SPLIT action on a pair, we initialize a list of two incomplete hands that need
# further cards.
# We probabilistically expand the drawing of successive cards (for a finite number of iterations);
# when the drawn card allows a resplit, we push an incomplete hand onto the list,
# else we add the card to an existing incomplete hand.
# See function `reward_for_split` for details.

# %% [markdown]
# **Sources of error in the probabilistic results**:
#
# - The current probabilistic analysis ignores any penetration in the shoe,
#   so it **does not account for a [*cut-card effect*](#Effect-of-using-a-cut-card)**.
#   This is valid if playing just the first hand of each shoe (i.e. "continuous reshuffling") or
#   playing any *fixed number* of hands from each shoe.
#   Hand calculators make this assumption.
#   If computing the house edge for a shoe with a cut card,
#   the probabilistic result underestimates the house edge;
#   the difference is greater for smaller shoes,
#   e.g. it is about 0.14% when the shoe has just one deck.
#
# - The **state for a split hand is incomplete**.
#   When splitting cards to form multiple hands,
#   we compute the reward for split hand $i$ based on `split_cards` which includes
#   the two initial cards `(card1, card2)` for split hands $1, \ldots, i-1$
#   but does not include any subsequent drawn cards (by either the player or dealer) from
#   those earlier split hands.

# %% [markdown]
# ### Hands and totals


# %%
def check_hand(hand: Hand) -> None:
  """Check validity of hand."""
  player_cards, dealer1 = hand
  assert len(player_cards) >= 2 and all(1 <= card <= 10 for card in (*player_cards, dealer1)), hand
  assert player_cards[0] <= player_cards[1], hand
  assert player_cards[2:] == tuple(sorted(player_cards[2:])), hand


# %%
def generate_hands_totaling(total: int, rules: Rules, _prefix: Cards = ()) -> Iterator[Cards]:
  """Yield all possible (non-busted) hands with specified `total`, given `rules.num_decks`."""
  prefix_total = sum(_prefix)
  min_value = _prefix[-1] if _prefix else 2 if total < 12 else 1
  for card in range(min_value, min(11, total - prefix_total + 1)):
    if _prefix.count(card) < rules.num_decks * NUM_SUITS:
      sequence = *_prefix, card
      new_total = prefix_total + card
      total_is_correct = new_total == total or (new_total + 10 == total and 1 in sequence)
      if total_is_correct and len(sequence) >= 2:
        yield sequence
      if new_total < total:
        yield from generate_hands_totaling(total, rules, sequence)


# %%
def generate_all_hands(rules: Rules) -> Iterator[Cards]:
  """Yield all possible (non-busted) hands, given `rules.num_decks`."""
  for total in range(4, 22):
    yield from generate_hands_totaling(total, rules)


# %%
def number_of_unique_hands(rules: Rules) -> int:
  """Return the number of possible (non-busted) hands, given `rules.num_decks`."""
  return sum(1 for _ in generate_all_hands(rules))


# %%
def generate_all_initial_cards() -> Iterator[Cards]:
  """Yield all possible initial two cards (such that card1 <= card2)."""
  for card1 in range(1, 11):
    for card2 in range(card1, 11):
      yield card1, card2


# %%
check_eq(
    list(generate_hands_totaling(8, Rules(num_decks=1))),
    [(2, 2, 2, 2), (2, 2, 4), (2, 3, 3), (2, 6), (3, 5), (4, 4)],
)

check_eq(
    list(generate_hands_totaling(21, Rules(num_decks=1)))[:2],
    [(1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3), (1, 1, 1, 1, 2, 2, 2, 2, 3, 6)],
)

check_eq(
    list(generate_hands_totaling(21, Rules(num_decks=1)))[-7:],
    [(5, 5, 5, 6), (5, 6, 10), (5, 7, 9), (5, 8, 8), (6, 6, 9), (6, 7, 8), (7, 7, 7)],
)

check_eq(list(generate_hands_totaling(21, Rules(num_decks=8)))[:2], [(1,) * 11, (1,) * 21])

check_eq(sum(1 for _ in generate_hands_totaling(21, Rules(num_decks=1))), 416)

check_eq(
    list(generate_all_hands(Rules(num_decks=1)))[:20],
    [(2, 2), (2, 3), (2, 2, 2), (2, 4), (3, 3), (2, 2, 3), (2, 5), (3, 4), (2, 2, 2, 2)]
    + [(2, 2, 4), (2, 3, 3), (2, 6), (3, 5), (4, 4), (2, 2, 2, 3), (2, 2, 5), (2, 3, 4), (2, 7)]
    + [(3, 3, 3), (3, 6)],
)

check_eq(
    list(generate_all_hands(Rules(num_decks=1)))[-20:],
    [(3, 5, 5, 8), (3, 5, 6, 7), (3, 6, 6, 6), (3, 8, 10), (3, 9, 9), (4, 4, 4, 4, 5)]
    + [(4, 4, 4, 9), (4, 4, 5, 8), (4, 4, 6, 7), (4, 5, 5, 7), (4, 5, 6, 6), (4, 7, 10), (4, 8, 9)]
    + [(5, 5, 5, 6), (5, 6, 10), (5, 7, 9), (5, 8, 8), (6, 6, 9), (6, 7, 8), (7, 7, 7)],
)

# %%
# Surprisingly, there are few unique hands in blackjack: 2008 for 1 deck, and 3072 maximum.
check_eq(
    {
        num_decks: number_of_unique_hands(Rules(num_decks=num_decks))
        for num_decks in [1, 2, 4, 6, 8, math.inf]
    },
    {1: 2008, 2: 2796, 4: 3060, 6: 3072, 8: 3072, math.inf: 3072},
)

# The C++ program at https://www.bjstrat.net/playerHands.html reports 3082 possible non-busted
# hands.  It includes single-card hands, which we do not.  Because there are 10 single-card hands,
# the numbers match exactly!


# %%
def order_cards(card1: Card, card2: Card) -> tuple[Card, Card]:
  """Return the two cards in non-descending order."""
  # assert 1 <= card1 <= 10 and 1 <= card2 <= 10
  return (card1, card2) if card1 <= card2 else (card2, card1)


# %%
def combine_two_cards(card1: Card, card2: Card) -> tuple[int, bool]:
  """Return `(total, soft)`; the total of the two cards and whether that total is soft."""
  # assert 1 <= card1 <= 10 and 1 <= card2 <= 10
  if card1 == 1 or card2 == 1:
    return card1 + card2 + 10, True
  return card1 + card2, False


# %%
@numba_njit(inline='always')
def combine_two_cards_numba(card1: Card, card2: Card) -> tuple[int, bool]:
  """Return `(total, soft)`; the total of the two cards and whether that total is soft."""
  int32 = numba.int32
  # assert 1 <= card1 <= 10 and 1 <= card2 <= 10
  card1, card2 = int32(card1), int32(card2)
  if card1 == 1 or card2 == 1:
    return int32(card1 + card2 + 10), True
  return int32(card1 + card2), False


# %%
def combine_cards(cards: Cards) -> tuple[int, bool]:
  """Return `(total, soft)`: the total of `cards` and whether that total is soft."""
  # assert len(cards) >= 2 and all(1 <= card <= 10 for card in cards)
  total = sum(cards)
  if total < 12 and 1 in cards:
    return total + 10, True
  return total, False


# %%
check_eq(combine_cards((2, 5)), (7, False))
check_eq(combine_cards((1, 2)), (13, True))
check_eq(combine_cards((1, 10)), (21, True))
check_eq(combine_cards((2, 1, 6)), (19, True))
check_eq(combine_cards((2, 1, 5, 2)), (20, True))
check_eq(combine_cards((2, 1, 5, 4)), (12, False))
check_eq(combine_cards((1, 1)), (12, True))
check_eq(combine_cards((1, 1, 10)), (12, False))


# %%
def add_card(total: int, soft: bool, card: Card) -> tuple[int, bool]:
  """Return new card total and softness given an additional card."""
  # assert 4 <= total <= 21 and 1 <= card <= 10
  total += card
  if total > 21:
    if soft:
      total, soft = total - 10, False
  elif total < 12 and card == 1:
    total, soft = total + 10, True
  return total, soft


# %%
@numba_njit(inline='always')
def add_card_numba(total: int, soft: bool, card: Card) -> tuple[int, bool]:
  """Return new card total and softness given an additional card."""
  # assert 4 <= total <= 21 and 1 <= card <= 10
  int32 = numba.int32
  total, card = int32(total), int32(card)
  total = int32(total + card)
  if total > 21:
    if soft:
      total, soft = int32(total - 10), False
  elif total < 12 and card == 1:
    total, soft = int32(total + 10), True
  return total, soft


# %%
def two_cards_reproducing_total(total: int, soft: bool, dealer1: Card) -> tuple[Card, Card]:
  """Return two cards to satisfy assertions (not used for probabilities)."""
  # assert (12 if soft else 4) <= total <= 21 and 1 <= dealer1 <= 10
  if total == 21 and not soft:
    soft = True