Skip to content

Commit

Permalink
add argument max_raises_per_player_round
Browse files Browse the repository at this point in the history
  • Loading branch information
dickreuter committed Feb 17, 2024
1 parent bbfd049 commit 9e2a6cc
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 58 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,6 @@ Graph/
/EquityCalculatorMontecarlo.pdb
/EquityCalculatorMontecarlo.pyd
/python37.dll

build/*
.vscode/
65 changes: 39 additions & 26 deletions gym_env/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,7 @@ class HoldemTable(Env):
"""Pokergame environment"""

def __init__(self, initial_stacks=100, small_blind=1, big_blind=2, render=False, funds_plot=True,
max_raising_rounds=2, use_cpp_montecarlo=False,
max_steps_after_raiser=None):
max_raises_per_player_round=2, use_cpp_montecarlo=False, raise_illegal_moves=False):
"""
The table needs to be initialized once at the beginning
Expand All @@ -101,9 +100,7 @@ def __init__(self, initial_stacks=100, small_blind=1, big_blind=2, render=False,
big_blind (real)
render (bool): render table after each move in graphical format
funds_plot (bool): show plot of funds history at end of each episode
max_raising_rounds (int): max raises per round per player
max_steps_after_raiser (int): max steps after raiser to end round. If None it will default to 2*len(players) - 1
use_cpp_montecarlo (bool): use cpp montecarlo for equity calculation
max_raises_per_player_round (int): max raises per round per player
"""
if use_cpp_montecarlo:
Expand All @@ -128,7 +125,7 @@ def __init__(self, initial_stacks=100, small_blind=1, big_blind=2, render=False,
self.last_player_pot = None
self.viewer = None
self.player_max_win = None # used for side pots
self.round_number = 0
self.round_number_in_street = 0
self.last_caller = None
self.last_raiser = None
self.raisers = []
Expand All @@ -144,7 +141,7 @@ def __init__(self, initial_stacks=100, small_blind=1, big_blind=2, render=False,
self.initial_stacks = initial_stacks
self.acting_agent = None
self.funds_plot = funds_plot
self.max_round_raising = max_raising_rounds
self.max_raises_per_player_round = max_raises_per_player_round

# pots
self.community_pot = 0
Expand All @@ -162,7 +159,7 @@ def __init__(self, initial_stacks=100, small_blind=1, big_blind=2, render=False,
self.action_space = Discrete(len(Action) - 2)
self.first_action_for_hand = None

self.initial_max_steps_after_raiser = max_steps_after_raiser
self.raise_illegal_moves = raise_illegal_moves

def reset(self):
"""Reset after game over."""
Expand All @@ -177,16 +174,14 @@ def reset(self):
log.warning("No agents added. Add agents before resetting the environment.")
return


for player in self.players:
player.stack = self.initial_stacks

self.dealer_pos = 0
if not self.initial_max_steps_after_raiser:
self.initial_max_steps_after_raiser = len(self.players) - 1
self.player_cycle = PlayerCycle(self.players, dealer_idx=-1,
max_steps_after_raiser=self.initial_max_steps_after_raiser,
max_steps_after_big_blind=len(self.players))
max_steps_after_raiser = (self.max_raises_per_player_round - 1) * len(self.players) - 1
self.player_cycle = PlayerCycle(self.players, dealer_idx=-1, max_steps_after_raiser=max_steps_after_raiser,
max_steps_after_big_blind=len(self.players),
max_raises_per_player_round=self.max_raises_per_player_round)
self._start_new_hand()
self._get_environment()
# auto play for agents where autoplay is set
Expand Down Expand Up @@ -233,7 +228,7 @@ def step(self, action): # pylint: disable=arguments-differ
self.first_action_for_hand[self.acting_agent] = False
self._calculate_reward(action)

log.info(f"Previous action reward for seat {self.acting_agent}: {self.reward}")
log.debug(f"Previous action reward for seat {self.acting_agent}: {self.reward}")
return self.array_everything, self.reward, self.done, self.info

def _execute_step(self, action):
Expand All @@ -249,6 +244,8 @@ def _execute_step(self, action):

def _illegal_move(self, action):
log.warning(f"{action} is an Illegal move, try again. Currently allowed: {self.legal_moves}")
if self.raise_illegal_moves:
raise ValueError(f"{action} is an Illegal move, try again. Currently allowed: {self.legal_moves}")
self.reward = self.illegal_move_reward

def _agent_is_autoplay(self, idx=None):
Expand Down Expand Up @@ -355,26 +352,32 @@ def _process_decision(self, action): # pylint: disable=too-many-statements
elif action == Action.RAISE_3BB:
contribution = 3 * self.big_blind - self.player_pots[self.current_player.seat]
self.raisers.append(self.current_player.seat)
self.current_player.num_raises_in_street[self.stage] += 1

elif action == Action.RAISE_HALF_POT:
contribution = (self.community_pot + self.current_round_pot) / 2
self.raisers.append(self.current_player.seat)
self.current_player.num_raises_in_street[self.stage] += 1

elif action == Action.RAISE_POT:
contribution = (self.community_pot + self.current_round_pot)
self.raisers.append(self.current_player.seat)
self.current_player.num_raises_in_street[self.stage] += 1

elif action == Action.RAISE_2POT:
contribution = (self.community_pot + self.current_round_pot) * 2
self.raisers.append(self.current_player.seat)
self.current_player.num_raises_in_street[self.stage] += 1

elif action == Action.ALL_IN:
contribution = self.current_player.stack
self.raisers.append(self.current_player.seat)
self.current_player.num_raises_in_street[self.stage] += 1

elif action == Action.SMALL_BLIND:
contribution = np.minimum(self.small_blind, self.current_player.stack)


elif action == Action.BIG_BLIND:
contribution = np.minimum(self.big_blind, self.current_player.stack)
self.player_cycle.mark_bb()
Expand All @@ -401,7 +404,7 @@ def _process_decision(self, action): # pylint: disable=too-many-statements
self.player_max_win[self.current_player.seat] += contribution # side pot

pos = self.player_cycle.idx
rnd = self.stage.value + self.round_number
rnd = self.stage.value + self.round_number_in_street
self.stage_data[rnd].calls[pos] = action == Action.CALL
self.stage_data[rnd].raises[pos] = action in [Action.RAISE_2POT, Action.RAISE_HALF_POT, Action.RAISE_POT]
self.stage_data[rnd].min_call_at_action[pos] = self.min_call / (self.big_blind * 100)
Expand Down Expand Up @@ -506,7 +509,7 @@ def _initiate_round(self):
log.info("")
log.info("===Round: Stage: PREFLOP")
# max steps total will be adjusted again at bb
self.player_cycle.max_steps_total = len(self.players) * self.max_round_raising + 2
self.player_cycle.max_steps_total = len(self.players) * self.max_raises_per_player_round

self._next_player()
self._process_decision(Action.SMALL_BLIND)
Expand All @@ -515,7 +518,7 @@ def _initiate_round(self):
self._next_player()

elif self.stage in [Stage.FLOP, Stage.TURN, Stage.RIVER]:
self.player_cycle.max_steps_total = len(self.players) * self.max_round_raising
self.player_cycle.max_steps_total = len(self.players) * self.max_raises_per_player_round

self._next_player()

Expand Down Expand Up @@ -631,8 +634,9 @@ def _get_legal_moves(self):
self.legal_moves.append(Action.CALL)
self.legal_moves.append(Action.FOLD)

if self.current_player.stack >= 3 * self.big_blind - self.player_pots[self.current_player.seat]:
self.legal_moves.append(Action.RAISE_3BB)
if self.current_player.num_raises_in_street[self.stage] < self.max_raises_per_player_round:
if self.current_player.stack >= 3 * self.big_blind - self.player_pots[self.current_player.seat]:
self.legal_moves.append(Action.RAISE_3BB)

if self.current_player.stack >= ((self.community_pot + self.current_round_pot) / 2) >= self.min_call:
self.legal_moves.append(Action.RAISE_HALF_POT)
Expand Down Expand Up @@ -746,7 +750,8 @@ class PlayerCycle:
"""Handle the circularity of the Table."""

def __init__(self, lst, start_idx=0, dealer_idx=0, max_steps_total=None,
last_raiser_step=None, max_steps_after_raiser=None, max_steps_after_big_blind=None):
last_raiser_step=None, max_steps_after_raiser=None, max_steps_after_big_blind=None,
max_raises_per_player_round=2):
"""Cycle over a list"""
self.lst = lst
self.start_idx = start_idx
Expand All @@ -758,7 +763,7 @@ def __init__(self, lst, start_idx=0, dealer_idx=0, max_steps_total=None,
self.last_raiser = None
self.step_counter = 0
self.steps_for_blind_betting = 2
self.round_number = 0
self.round_number_in_street = 0
self.idx = 0
self.dealer_idx = dealer_idx
self.can_still_make_moves_in_this_hand = [] # if the player can still play in this round
Expand All @@ -767,6 +772,7 @@ def __init__(self, lst, start_idx=0, dealer_idx=0, max_steps_total=None,
self.new_hand_reset()
self.checkers = 0
self.folder = None
self.max_raises_per_player_round = max_raises_per_player_round

def new_hand_reset(self):
"""Reset state if a new hand is dealt"""
Expand All @@ -779,7 +785,7 @@ def new_hand_reset(self):
def new_round_reset(self):
"""Reset the state for the next stage: flop, turn or river"""
self.step_counter = 0
self.round_number = 0
self.round_number_in_street = 0
self.idx = self.dealer_idx
self.last_raiser_step = len(self.lst)
self.checkers = 0
Expand All @@ -794,9 +800,9 @@ def next_player(self, step=1):
self.step_counter += step
self.idx %= len(self.lst)
if self.step_counter > len(self.lst):
self.round_number += 1
self.round_number_in_street += 1
if self.max_steps_total and (self.step_counter >= self.max_steps_total):
log.debug("Max steps total has been reached")
log.info("Max steps total has been reached")
return False

if self.last_raiser:
Expand Down Expand Up @@ -876,7 +882,7 @@ def mark_out_of_cash_but_contributed(self):
def mark_bb(self):
"""Ensure bb can raise"""
self.last_raiser_step = self.step_counter + len(self.lst)
# self.max_steps_total = self.step_counter + len(self.lst) * 2
self.max_steps_total = self.step_counter + len(self.lst) * self.max_raises_per_player_round

def is_raising_allowed(self):
"""Check if raising is still allowed at this position"""
Expand Down Expand Up @@ -908,3 +914,10 @@ def __init__(self, stack_size, name):
self.temp_stack = []
self.name = name
self.agent_obj = None
self.num_raises_in_street = {Stage.PREFLOP: 0,
Stage.FLOP: 0,
Stage.TURN: 0,
Stage.RIVER: 0}

def __repr__(self):
return f"Player {self.name} at seat {self.seat} with stack of {self.stack} and cards {self.cards}"
5 changes: 4 additions & 1 deletion pytest.ini
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
[pytest]
addopts = -s
addopts = -ra -s
log_cli = True
log_cli_level = INFO
log_cli_format = %(asctime)s %(levelname)s %(message)s
python_functions = test_*
44 changes: 13 additions & 31 deletions tests/test_gym_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,13 @@


def _create_env(n_players,
initial_stacks=100, small_blind=1, big_blind=2, render=False, funds_plot=True,
max_raising_rounds=2, max_steps_after_raiser=None,
initial_stacks=100, small_blind=1, big_blind=2, render=False, funds_plot=False,
max_raises_per_player_round=2,
use_cpp_montecarlo=False):
"""Create an environment"""
env = HoldemTable(small_blind=small_blind, big_blind=big_blind, initial_stacks=initial_stacks,
max_raising_rounds=max_raising_rounds,
max_steps_after_raiser=max_steps_after_raiser,
use_cpp_montecarlo=use_cpp_montecarlo)
max_raises_per_player_round=max_raises_per_player_round,
use_cpp_montecarlo=use_cpp_montecarlo, funds_plot=funds_plot, render=render)

for _ in range(n_players):
player = PlayerForTest()
Expand Down Expand Up @@ -41,7 +40,7 @@ def test_basic_actions_with_6_player():
assert env.players[2].stack == 98
assert env.stage == Stage.PREFLOP
env.step(Action.RAISE_POT) # big blind raises
assert env.player_cycle.round_number
assert env.player_cycle.round_number_in_street
env.step(Action.FOLD) # utg
env.step(Action.CALL) # 4 only remaining player calls
assert env.stage == Stage.FLOP
Expand Down Expand Up @@ -102,8 +101,7 @@ def test_raise_to_3_times_big_blind_after_big_blind_bet():
def test_raise_to_3_times_big_blind_is_not_possible_with_not_enough_remaining_stack():
"""1. Test raise to 3 times big blind is only possible with enough chips.
See https://github.com/dickreuter/neuron_poker/issues/41"""
env = _create_env(2) # bet small blind and big blind
env.players[0].stack = 2
env = _create_env(2, initial_stacks=2) # bet small blind and big blind

env.step(Action.CALL)
assert Action.RAISE_3BB not in env.legal_moves
Expand Down Expand Up @@ -267,28 +265,12 @@ def test_call_proper_amount():

def test_unlimited_raising_preflop():
"""Test raising unlimited preflop"""
env = _create_env(2, initial_stacks=100000, max_raising_rounds=100, max_steps_after_raiser=100)
env.step(Action.CALL) # seat 3 utg
env.step(Action.RAISE_POT) # seat 4
env.step(Action.CALL) # seat 0 dealer
env.step(Action.RAISE_POT) # seat 1 small blind
env.step(Action.CALL) # seat 2 big blind
assert env.stage == Stage.PREFLOP
env.step(Action.CALL) # seat 3 utg
env.step(Action.RAISE_POT) # seat 4
env.step(Action.CALL) # seat 0 dealer
env.step(Action.RAISE_POT) # seat 1 small blind
env.step(Action.CALL) # seat 2 big blind
env = _create_env(2, initial_stacks=100000, max_raises_per_player_round=3)
env.step(Action.CALL) # sb
env.step(Action.RAISE_POT) # bb raises
env.step(Action.CALL) # sb
assert env.stage == Stage.PREFLOP
env.step(Action.CALL) # seat 3 utg
env.step(Action.RAISE_POT) # seat 4
env.step(Action.CALL) # seat 0 dealer
env.step(Action.RAISE_POT) # seat 1 small blind
env.step(Action.CALL) # seat 2 big blind
assert env.stage == Stage.PREFLOP
env.step(Action.CALL) # seat 3 utg
env.step(Action.RAISE_POT) # seat 4
env.step(Action.CALL) # seat 0 dealer
env.step(Action.RAISE_POT) # seat 1 small blind
env.step(Action.CALL) # seat 2 big blind
env.step(Action.RAISE_POT) # bb raises
assert env.stage == Stage.PREFLOP
env.step(Action.CALL) # sb calls
assert env.stage == Stage.FLOP

0 comments on commit 9e2a6cc

Please sign in to comment.