"""Classes and functions for running poker games"""
import itertools
from typing import Any, Dict, Iterator, List, Optional, Tuple, Type, Union
import numpy as np
from clubs import error, poker, render
[docs]class Dealer:
"""Runs a range of different of poker games dependent on the
given configuration. Supports limit, no limit and pot limit
bet sizing, arbitrary deck sizes, arbitrary hole and community
cards and many other options.
Parameters
----------
num_players : int
maximum number of players
num_streets : int
number of streets including preflop, e.g. for texas hold'em
num_streets=4
blinds : Union[int, List[int]]
blind distribution as a list of ints, one for each player
starting from the button e.g. [0, 1, 2] for a three player game
with a sb of 1 and bb of 2, passed ints will be expanded to
all players i.e. pass blinds=0 for no blinds
antes : Union[int, List[int]]
ante distribution as a list of ints, one for each player
starting from the button e.g. [0, 0, 5] for a three player game
with a bb ante of 5, passed ints will be expanded to all
players i.e. pass antes=0 for no antes
raise_sizes : Union[float, str, List[Union[float, str]]]
max raise sizes for each street, valid raise sizes are ints,
floats, and 'pot', e.g. for a 1-2 limit hold'em the raise sizes
should be [2, 2, 4, 4] as the small and big bet are 2 and 4.
float('inf') can be used for no limit games. pot limit raise
sizes can be set using 'pot'. if only a single int, float or
string is passed the value is expanded to a list the length
of number of streets, e.g. for a standard no limit game pass
raise_sizes=float('inf')
num_raises : Union[float, List[float]]
max number of bets for each street including preflop, valid
raise numbers are ints and floats. if only a single int or float
is passed the value is expanded to a list the length of number
of streets, e.g. for a standard limit game pass num_raises=4
num_suits : int
number of suits to use in deck, must be between 1 and 4
num_ranks : int
number of ranks to use in deck, must be between 1 and 13
num_hole_cards : int
number of hole cards per player, must be greater than 0
num_community_cards : Union[int, List[int]]
number of community cards per street including preflop, e.g.
for texas hold'em pass num_community_cards=[0, 3, 1, 1]. if only
a single int is passed, it is expanded to a list the length of
number of streets
num_cards_for_hand : int
number of cards for a valid poker hand, e.g. for texas hold'em
num_cards_for_hand=5
mandatory_num_hole_cards : int
number of hole cards which have to be used for the hand, e.g.
for pot limit omaha mandatory_num_hole_cards=2
start_stack : int
number of chips each player starts with
low_end_straight : bool, optional
toggle to include the low ace straight within valid hands, by
default True
order : Optional[List[str]], optional
optional custom order of hand ranks, must be permutation of
['sf', 'fk', 'fh', 'fl', 'st', 'tk', 'tp', 'pa', 'hc']. if
order=None, hands are ranked by rarity. by default None
Examples
--------
>>> Dealer( # 1-2 Heads Up No Limit Texas Hold'em
... num_players=2, num_streets=4, blinds=[1, 2], antes=0,
... raise_sizes=float('inf'), num_raises=float('inf'),
... num_suits=4, num_ranks=13, num_hole_cards=2,
... mandatory_num_hole_cards=0, start_stack=200
... )
>>> Dealer( # 1-2 6 Player PLO
... num_players=6, num_streets=4, blinds=[0, 1, 2, 0, 0, 0],
... antes=0, raise_sizes='pot', num_raises=float('inf'),
... num_suits=4, num_ranks=13, num_hole_cards=4,
... mandatory_num_hole_cards=2, start_stack=200
... )
>>> Dealer( # 1-2 Heads Up No Limit Short Deck
... num_players=2, num_streets=4, blinds=[1, 2], antes=0,
... raise_sizes=float('inf'), num_raises=float('inf'),
... num_suits=4, num_ranks=9, num_hole_cards=2,
... mandatory_num_hole_cards=0, start_stack=200,
... order=[
... 'sf', 'fk', 'fl', 'fh', 'st',
... 'tk', 'tp', 'pa', 'hc'
... ]
... )
"""
def __init__(
self,
num_players: int,
num_streets: int,
blinds: Union[int, List[int]],
antes: Union[int, List[int]],
raise_sizes: Union[float, str, List[Union[float, str]]],
num_raises: Union[float, List[float]],
num_suits: int,
num_ranks: int,
num_hole_cards: int,
num_community_cards: Union[int, List[int]],
num_cards_for_hand: int,
mandatory_num_hole_cards: int,
start_stack: int,
low_end_straight: bool = True,
order: Optional[List[str]] = None,
) -> None:
def check_inp(
var: Union[List[Any], Any], expect_num: int, error_msg: str
) -> List[Any]:
if isinstance(var, list):
if len(var) != expect_num:
raise error.InvalidConfigError(error_msg)
return var
return [var] * expect_num
error_msg = "incorrect {} distribution, expected list of length {}, got {}"
blinds = check_inp(
blinds, num_players, error_msg.format("blind", num_players, str(blinds)),
)
antes = check_inp(
antes, num_players, error_msg.format("ante", num_players, str(antes))
)
raise_sizes = check_inp(
raise_sizes,
num_streets,
error_msg.format("raise size", num_streets, str(raise_sizes)),
)
num_raises = check_inp(
num_raises,
num_streets,
error_msg.format("number of raises", num_streets, str(num_raises)),
)
num_community_cards = check_inp(
num_community_cards,
num_streets,
error_msg.format("community card", num_streets, str(num_community_cards)),
)
def clean_rs(raise_size):
if isinstance(raise_size, (int, float)):
return raise_size
if raise_size == "pot":
return raise_size
raise error.InvalidRaiseSizeError(
f"unknown raise size, expected one of (int, float, 'pot'),"
f" got {raise_size}"
)
# config
self.num_players = num_players
self.num_streets = num_streets
self.blinds = np.array(blinds)
self.antes = np.array(antes)
self.big_blind = blinds[1]
self.raise_sizes = [clean_rs(raise_size) for raise_size in raise_sizes]
self.num_raises = [float(raise_num) for raise_num in num_raises]
self.num_suits = num_suits
self.num_ranks = num_ranks
self.num_hole_cards = num_hole_cards
self.num_community_cards = num_community_cards
self.num_cards_for_hand = num_cards_for_hand
self.mandatory_num_hole_cards = mandatory_num_hole_cards
self.start_stack = start_stack
# dealer
self.action = -1
self.active: np.ndarray = np.zeros(self.num_players, dtype=bool)
self.button = 0
self.community_cards: List[poker.Card] = []
self.deck = poker.Deck(self.num_suits, self.num_ranks)
self.evaluator = poker.Evaluator(
self.num_suits,
self.num_ranks,
self.num_cards_for_hand,
self.mandatory_num_hole_cards,
low_end_straight=low_end_straight,
order=order,
)
self.history: List[Tuple[int, int, bool]] = []
self.hole_cards: List[List[poker.Card]] = []
self.largest_raise = 0
self.pot = 0
self.pot_commit = np.zeros(self.num_players, dtype=np.int32)
self.stacks: np.ndarray = np.full(
self.num_players, self.start_stack, dtype=np.int32
)
self.street = 0
self.street_commits: np.ndarray = np.zeros(self.num_players, dtype=np.int32)
self.street_option: np.ndarray = np.zeros(self.num_players, dtype=bool)
self.street_raises = 0
# render
self.viewer: Optional[render.PokerViewer]
self.viewer = None
self.ascii_viewer = render.ASCIIViewer(
num_players, num_hole_cards, sum(num_community_cards)
)
def __str__(self) -> str:
config = self._render_config()
return self.ascii_viewer._parse_string(config)
def __repr__(self) -> str:
string = (
f"Dealer ({id(self)}) - num players: {self.num_players}, "
f"num streets: {self.num_streets}"
)
return string
[docs] def reset(self, reset_button: bool = False, reset_stacks: bool = False) -> Dict:
"""Resets the table. Shuffles the deck, deals new hole cards
to all players, moves the button and collects blinds and antes.
Parameters
----------
reset_button : bool, optional
reset button to first position at table, by default False
reset_stacks : bool, optional
reset stack sizes to starting stack size, by default False
Returns
-------
Dict
observation dictionary
Examples
--------
>>> dealer = Dealer(**configs.LEDUC_TWO_PLAYER)
>>> dealer.reset()
... {'action': 1,
... 'active': [True, True],
... 'button': 1,
... 'call': 0,
... 'community_cards': [],
... 'hole_cards': [[Card (139879188163600): A♥], [Card (139879188163504): A♠]],
... 'max_raise': 2,
... 'min_raise': 2,
... 'pot': 2,
... 'stacks': [9, 9],
... 'street_commits': [0, 0]}
"""
if reset_stacks:
self.active.fill(1)
self.stacks = np.full(self.num_players, self.start_stack)
else:
self.active = self.stacks > 0
if self.active.sum() <= 1:
raise error.TooFewActivePlayersError(
"not enough players have chips, set reset_stacks=True"
)
if reset_button:
self.button = 0
else:
self.button = self.button + 1 % self.num_players
self.deck.shuffle()
self.community_cards = self.deck.draw(self.num_community_cards[0])
self.history = []
self.hole_cards = [
self.deck.draw(self.num_hole_cards) for _ in range(self.num_players)
]
self.largest_raise = self.big_blind
self.pot = 0
self.pot_commit.fill(0)
self.street = 0
self.street_commits.fill(0)
self.street_option.fill(0)
self.street_raises = 0
self.action = self.button
# in heads up button posts small blind
if self.num_players > 2:
self._move_action()
self._collect_multiple_bets(bets=self.antes.tolist(), street_commits=False)
self._collect_multiple_bets(bets=self.blinds.tolist(), street_commits=True)
self._move_action()
self._move_action()
return self._observation(False)
[docs] def step(self, bet: float) -> Tuple[Dict, List[int], List[int]]:
"""Advances poker game to next player. If the bet is 0, it is
either considered a check or fold, depending on the previous
action. The given bet is always rounded to the closest valid bet
size. When it is the same distance from two valid bet sizes
the smaller bet size is used, e.g. if the min raise is 10 and
the bet is 5, it is rounded down to 0.
Parameters
----------
bet : int
number of chips bet by player currently active
Returns
-------
Tuple[Dict, List[int], List[int]]
observation dictionary, payouts for every player, boolean value for every
player showing if that player is still active in the round
Examples
--------
>>> dealer = Dealer(**configs.LEDUC_TWO_PLAYER)
>>> obs = dealer.reset()
>>> dealer.step(0)
... ({'action': 0,
... 'active': [True, True],
... 'button': 1,
... 'call': 0,
... 'community_cards': [],
... 'hole_cards': [[Card (139879188163600): A♥], [Card (139879188163504): A♠]],
... 'max_raise': 2,
... 'min_raise': 2,
... 'pot': 2,
... 'stacks': [9, 9],
... 'street_commits': [0, 0]},
... [0, 0],
... [False, False])
"""
if self.action == -1:
if self.active.any():
done = self._done()
payouts = self._payouts()
observation = self._observation(all(done))
return observation, payouts, done
raise error.TableResetError("call reset() before calling first step()")
fold = bet < 0
bet = round(bet)
call, min_raise, max_raise = self._bet_sizes()
# round bet to nearest sizing
bet = self._clean_bet(bet, call, min_raise, max_raise)
# only fold if player cannot check
if call and ((bet < call) or fold):
self.active[self.action] = 0
bet = 0
# if bet is full raise record as largest raise
if bet and (bet - call) >= self.largest_raise:
self.largest_raise = bet - call
self.street_raises += 1
self._collect_bet(bet)
self.history.append((self.action, int(bet), bool(fold)))
self.street_option[self.action] = True
self._move_action()
# if all agreed go to next street
if self._all_agreed():
self.action = self.button
self._move_action()
# if at most 1 player active and not all in turn up all
# community cards and evaluate hand
while True:
self.street += 1
full_streets = self.street >= self.num_streets
all_in = self.active * (self.stacks == 0)
all_all_in = self.active.sum() - all_in.sum() <= 1
if full_streets:
break
self.community_cards += self.deck.draw(
self.num_community_cards[self.street]
)
if not all_all_in:
break
self.street_commits.fill(0)
self.street_option = np.logical_not(self.active).astype(bool)
self.street_raises = 0
done = self._done()
payouts = self._payouts()
if all(done):
self.action = -1
self.pot = 0
self.stacks += payouts + self.pot_commit
observation = self._observation(all(done))
return observation, payouts, done
def _render_config(self):
action = int(self.action)
active = self.active.tolist()
all_in = (self.active * (self.stacks == 0)).tolist()
community_cards = self.community_cards
button = int(self.button)
done = all(self._done())
hole_cards = self.hole_cards
pot = int(self.pot)
payouts = self._payouts()
street_commits = self.street_commits.tolist()
stacks = self.stacks.tolist()
config = {
"action": action,
"active": active,
"all_in": all_in,
"community_cards": community_cards,
"button": button,
"done": done,
"hole_cards": hole_cards,
"pot": pot,
"payouts": payouts,
"prev_action": None if not self.history else self.history[-1],
"street_commits": street_commits,
"stacks": stacks,
}
return config
[docs] def render(self, mode: str = "human", sleep: float = 0, **kwargs):
"""Renders poker table. Render mode options are: ascii, human
Parameters
----------
mode : str, optional
toggle for using different renderer, by default 'human'
"""
viewer: Optional[Type[render.PokerViewer]] = None
render_modes = ["ascii", "human"]
if self.viewer is None:
if mode == "ascii":
viewer = render.ASCIIViewer
elif mode == "human":
viewer = render.GraphicViewer
else:
raise error.InvalidRenderModeError(
(f"incorrect render mode {mode}," f"use one of {render_modes}")
)
self.viewer = viewer(
self.num_players,
self.num_hole_cards,
sum(self.num_community_cards),
**kwargs,
)
config = self._render_config()
self.viewer.render(config, sleep)
[docs] def win_probabilities(self, n: int = 10000) -> List[float]:
"""Computes win probabilities for each player. If the possible remaining
community card combinations are below 1000, the combinations are exhaustively
checked, otherwise, n random samples are taken and averaged to compute an
estimate of the win probabilities.
Parameters
----------
n : int, optional
max number of iterations to approximate win probabilities, by default 10000
Returns
-------
List[float]
win probabilities
"""
hand_strengths = []
num_additional_comm_cards = sum(self.num_community_cards) - len(
self.community_cards
)
num_comm_combinations = poker.evaluator._ncr(
len(self.deck.cards), num_additional_comm_cards
)
if num_comm_combinations < 1000:
comm_combinations: Iterator[
Tuple[poker.Card, ...]
] = itertools.combinations(self.deck.cards, num_additional_comm_cards)
n = num_comm_combinations
else:
comm_combinations = (
np.random.choice(
self.deck.cards, num_additional_comm_cards, replace=False
)
for _ in range(n)
)
for additional_comm_cards in comm_combinations:
community_cards = self.community_cards + list(additional_comm_cards)
hand_strengths.append(self._eval_hands(self.hole_cards, community_cards))
best_hand = np.min(hand_strengths, axis=1)
hand_won_bool = best_hand.reshape(n, 1) == np.array(hand_strengths)
hands_won = hand_won_bool.sum(axis=0)
win_probs = hands_won / hands_won.sum()
return win_probs
def _all_agreed(self) -> bool:
# not all agreed if not all players had chance to act
if not all(self.street_option):
return False
# all agreed if street commits equal to maximum street commit
# or player is all in
# or player is not active
return all(
(self.street_commits == self.street_commits.max())
| (self.stacks == 0)
| np.logical_not(self.active)
)
def _bet_sizes(self) -> Tuple[int, int, int]:
# call difference between commit and maximum commit
call = self.street_commits.max() - self.street_commits[self.action]
# min raise at least largest previous raise
# if limit game min and max raise equal to raise size
if isinstance(self.raise_sizes[self.street], int):
max_raise = min_raise = self.raise_sizes[self.street] + call
else:
min_raise = max(self.big_blind, self.largest_raise + call)
if self.raise_sizes[self.street] == "pot":
max_raise = self.pot + call * 2
elif self.raise_sizes[self.street] == float("inf"):
max_raise = self.stacks[self.action]
# if maximum number of raises in street
# was reached cap raise at 0
if self.street_raises >= self.num_raises[self.street]:
min_raise = max_raise = 0
# if last full raise was done by active player
# (another player has raised less than minimum raise amount)
# cap active players raise size to 0
if self.street_raises and call < self.largest_raise:
min_raise = max_raise = 0
# clip bets to stack size
call = min(call, self.stacks[self.action])
min_raise = min(min_raise, self.stacks[self.action])
max_raise = min(max_raise, self.stacks[self.action])
return call, min_raise, max_raise
@staticmethod
def _clean_bet(bet: int, call: int, min_raise: int, max_raise: int) -> int:
# find closest bet size to actual bet
# pessimistic approach: in ties order is fold/check -> call -> raise
idx = np.argmin(np.absolute(np.array([0, call, min_raise, max_raise]) - bet))
# if call closest
if idx == 1:
return call
# if min raise or max raise closest
if idx in (2, 3):
return round(min(max_raise, max(min_raise, bet)))
# if check/fold closest
return 0
def _collect_multiple_bets(self, bets: List[int], street_commits: bool = True):
bets_arr = np.roll(bets, self.action)
bets_arr = (self.stacks > 0) * self.active * bets_arr
if street_commits:
self.street_commits += bets_arr
self.pot_commit += bets_arr
self.pot += int(bets_arr.sum())
self.stacks -= bets_arr
def _collect_bet(self, bet: int):
# bet only as large as stack size
bet = min(self.stacks[self.action], bet)
self.pot += bet
self.pot_commit[self.action] += bet
self.street_commits[self.action] += bet
self.stacks[self.action] -= bet
def _done(self) -> List[int]:
if self.street >= self.num_streets or self.active.sum() <= 1:
# end game
out = np.full(self.num_players, 1)
return out.tolist()
return np.logical_not(self.active).tolist()
def _observation(self, done: bool) -> Dict:
if done:
call = min_raise = max_raise = 0
else:
call, min_raise, max_raise = self._bet_sizes()
observation: dict = {
"action": self.action,
"active": self.active.tolist(),
"button": self.button,
"call": call,
"community_cards": self.community_cards,
"hole_cards": self.hole_cards,
"max_raise": max_raise,
"min_raise": min_raise,
"pot": self.pot,
"stacks": self.stacks.tolist(),
"street_commits": self.street_commits.tolist(),
}
return observation
def _payouts(self) -> List[int]:
# players that have folded lose their bets
payouts = -1 * self.pot_commit * np.logical_not(self.active)
if self.active.sum() == 1:
payouts += self.active * (self.pot - self.pot_commit)
# if last street played and still multiple players active
elif self.street >= self.num_streets:
payouts = self._eval_round()
payouts -= self.pot_commit
return payouts.tolist()
def _eval_hands(
self, hole_cards: List[List[poker.Card]], community_cards: List[poker.Card]
) -> List[int]:
# grab array of hand strength and pot commits
worst_hand = self.evaluator.table.max_rank + 1
hand_strengths = []
for player in range(self.num_players):
# if not active hand strength set
# to 1 worse than worst possible rank
hand_strength = (
self.evaluator.evaluate(hole_cards[player], community_cards)
if self.active[player]
else worst_hand
)
hand_strengths.append(hand_strength)
return hand_strengths
def _eval_round(self) -> List[int]:
# grab array of hand strength and pot commits
hand_strengths = self._eval_hands(self.hole_cards, self.community_cards)
hand_list = [
[player_idx, hand_strength, self.pot_commit[player_idx]]
for player_idx, hand_strength in enumerate(hand_strengths)
]
hands = np.array(hand_list)
# sort hands by hand strength and pot commits
hands = hands[np.lexsort([hands[:, 2], hands[:, 1]])]
pot = self.pot
remainder = 0
payouts = np.zeros(self.num_players, dtype=int)
worst_hand = self.evaluator.table.max_rank + 1
# iterate over hand strength and
# pot commits from smallest to largest
for idx, (_, strength, pot_commit) in enumerate(hands):
eligible = hands[:, 0][hands[:, 1] == strength].astype(int)
# cut can only be as large as lowest player commit amount
cut = np.clip(hands[:, 2], None, pot_commit)
split_pot = cut.sum()
split = split_pot // len(eligible)
remain = split_pot % len(eligible)
payouts[eligible] += split
remainder += remain
# remove chips from players and pot
hands[:, 2] -= cut
pot -= split_pot
# remove player from next split pot
hands[idx, 1] = worst_hand
if pot == 0:
break
# give worst position player remainder chips
if remainder:
# worst player is first player after button involved in pot
involved_players = np.nonzero(payouts)[0]
button_shift = (involved_players <= self.button) * self.num_players
button_shifted_players = involved_players + button_shift
worst_idx = np.argmin(button_shifted_players)
worst_pos = involved_players[worst_idx]
payouts[worst_pos] += remainder
return payouts.tolist()
def _move_action(self):
action = self.action
for idx in range(1, self.num_players + 1):
action = (self.action + idx) % self.num_players
if self.active[action]:
break
else:
self.street_option[action] = True
self.action = action