diff --git a/pluribus/games/short_deck/agent.py b/pluribus/games/short_deck/agent.py
new file mode 100644
index 00000000..c93cf4cf
--- /dev/null
+++ b/pluribus/games/short_deck/agent.py
@@ -0,0 +1,30 @@
+import collections
+import joblib
+
+
+class Agent:
+ """Agent class can hold a trained strategy and regret"""
+ def __init__(self, regret_path=None):
+ self.strategy = collections.defaultdict(
+ lambda: collections.defaultdict(lambda: 0)
+ )
+ if regret_path:
+ offline_strategy = joblib.load(regret_path)
+ self.regret = collections.defaultdict(
+ lambda: collections.defaultdict(lambda: 0),
+ offline_strategy['regret']
+ )
+ else:
+ self.regret = collections.defaultdict(
+ lambda: collections.defaultdict(lambda: 0)
+ )
+ self.tmp_regret = collections.defaultdict(
+ lambda: collections.defaultdict(lambda: 0)
+ )
+
+ def reset_new_regret(self):
+ """Remove regret from temporary storage"""
+ del self.tmp_regret
+ self.tmp_regret = collections.defaultdict(
+ lambda: collections.defaultdict(lambda: 0)
+ )
diff --git a/pluribus/games/short_deck/state.py b/pluribus/games/short_deck/state.py
index af2e08b7..ecb2d273 100644
--- a/pluribus/games/short_deck/state.py
+++ b/pluribus/games/short_deck/state.py
@@ -7,8 +7,11 @@
import operator
import os
from typing import Any, Dict, List, Optional, Tuple
+from itertools import combinations
+import random
import dill as pickle
+import numpy as np
from pluribus import utils
from pluribus.poker.card import Card
@@ -32,7 +35,8 @@ def new_game(
]
if info_set_lut:
# Don't reload massive files, it takes ages.
- state = ShortDeckPokerState(players=players, load_pickle_files=False, **kwargs)
+ state = ShortDeckPokerState(players=players,
+ load_pickle_files=False, **kwargs)
state.info_set_lut = info_set_lut
else:
# Load massive files.
@@ -54,6 +58,8 @@ def __init__(
big_blind: int = 100,
pickle_dir: str = ".",
load_pickle_files: bool = True,
+ real_time_test: bool = False,
+ public_cards: List[Card] = []
):
"""Initialise state."""
n_players = len(players)
@@ -74,6 +80,7 @@ def __init__(
self._initial_n_chips = players[0].n_chips
self.small_blind = small_blind
self.big_blind = big_blind
+ self.real_time_test = real_time_test
self._poker_engine = PokerEngine(
table=self._table, small_blind=small_blind, big_blind=big_blind
)
@@ -81,9 +88,13 @@ def __init__(
# this), assign blinds to the players.
self._poker_engine.round_setup()
# Deal private cards to players.
- self._table.dealer.deal_private_cards(self._table.players)
+ if not self.real_time_test:
+ self._poker_engine.table.dealer.deal_private_cards(
+ self._table.players
+ )
# Store the actions as they come in here.
self._history: Dict[str, List[str]] = collections.defaultdict(list)
+ self._public_information: Dict[str, List[Card]] = collections.defaultdict(list)
self._betting_stage = "pre_flop"
self._betting_stage_to_round: Dict[str, int] = {
"pre_flop": 0,
@@ -107,11 +118,19 @@ def __init__(
"terminal": player_i_order,
}
self._skip_counter = 0
- self._first_move_of_current_round = True
+ # self._first_move_of_current_round = True
self._reset_betting_round_state()
for player in self.players:
player.is_turn = False
self.current_player.is_turn = True
+ if public_cards:
+ assert len(public_cards) in {3, 4, 5}
+ self._public_cards = public_cards
+ self._final_action = None
+ # only want to do these actions in real game play, as they are slow
+ if self.real_time_test:
+ # must have offline strategy loaded up
+ self._starting_hand_probs = self._initialize_starting_hands()
def __repr__(self):
"""Return a helpful description of object in strings and debugger."""
@@ -145,7 +164,6 @@ def apply_action(self, action_str: Optional[str]) -> ShortDeckPokerState:
new_state.info_set_lut = self.info_set_lut = lut
# An action has been made, so alas we are not in the first move of the
# current betting round.
- new_state._first_move_of_current_round = False
if action_str is None:
# Assert active player has folded already.
assert (
@@ -189,7 +207,6 @@ def apply_action(self, action_str: Optional[str]) -> ShortDeckPokerState:
# stage of the game.
new_state._increment_stage()
new_state._reset_betting_round_state()
- new_state._first_move_of_current_round = True
if not new_state.current_player.is_active:
new_state._skip_counter += 1
assert not new_state.current_player.is_active
@@ -209,6 +226,63 @@ def apply_action(self, action_str: Optional[str]) -> ShortDeckPokerState:
new_state.current_player.is_turn = True
return new_state
+ def load_game_state(self, offline_strategy: Dict[str, Dict[str, float]],
+ action_sequence: list):
+ """
+ Follow through the action sequence provided to get current node.
+ :param action_sequence: List of actions without 'skip'
+ """
+ if 'skip' in set(action_sequence):
+ action_sequence = [a for a in action_sequence if a != 'skip']
+ if len(action_sequence) == 1:
+ # TODO: Not sure if I need to deepcopy
+ betting_stage = self.betting_stage
+ public_cards = self._public_cards
+ # Must declare the appropriate amount of public cards for RTS..
+ assert self._public_information[betting_stage] == public_cards
+ lut = self.info_set_lut
+ self.info_set_lut = {}
+ new_state = copy.deepcopy(self)
+ new_state.info_set_lut = self.info_set_lut = lut
+ new_state._final_action = action_sequence.pop(0)
+ new_state._update_hole_cards_bayes(offline_strategy)
+ return new_state
+ a = action_sequence.pop(0)
+ new_state = self.apply_action(a)
+ return new_state.load_game_state(offline_strategy, action_sequence)
+
+ def deal_bayes(self):
+ # Deep copy the parts of state that are needed that must be immutable
+ # from state to state.
+ lut = self.info_set_lut
+ self.info_set_lut = {}
+ new_state = copy.deepcopy(self)
+ new_state.info_set_lut = self.info_set_lut = lut
+ players = list(range(len(new_state.players)))
+ random.shuffle(players)
+ cards_selected = []
+ # TODO: This would be better by selecting the first player's
+ # cards, then normalizing the second and third, etc..
+ for p_i in players:
+ starting_hand = new_state._get_starting_hand(p_i)
+ len_union = len(set(starting_hand).union(set(cards_selected)))
+ len_individual = len(starting_hand) + len(cards_selected)
+ while len_union < len_individual:
+ starting_hand = new_state._get_starting_hand(p_i)
+ len_union = len(set(starting_hand).union(set(cards_selected)))
+ len_individual = len(starting_hand) + len(cards_selected)
+ # TODO: pull this into a helper method, maybe it should
+ # be in the dealer class..
+ for card in starting_hand:
+ new_state.players[p_i].add_private_card(card)
+ cards_selected += starting_hand
+ cards_selected += new_state._public_cards
+ for card in cards_selected:
+ new_state._table.dealer.deck.remove(card)
+ final_action = new_state._final_action
+ newest_state = new_state.apply_action(final_action)
+ return newest_state
+
@staticmethod
def load_pickle_files(pickle_dir: str) -> Dict[str, Dict[Tuple[int, ...], str]]:
"""Load pickle files into memory."""
@@ -254,15 +328,36 @@ def _increment_stage(self):
if self._betting_stage == "pre_flop":
# Progress from private cards to the flop.
self._betting_stage = "flop"
- self._poker_engine.table.dealer.deal_flop(self._table)
+ if len(self._public_cards) >= 3:
+ community_cards = self._public_cards[:3]
+ self._poker_engine.table.community_cards += community_cards
+ else:
+ self._poker_engine.table.dealer.deal_flop(self._table)
+ self._public_information[
+ self.betting_stage
+ ] = self._table.community_cards.copy()
elif self._betting_stage == "flop":
# Progress from flop to turn.
self._betting_stage = "turn"
- self._poker_engine.table.dealer.deal_turn(self._table)
+ if len(self._public_cards) >= 4:
+ community_cards = self._public_cards[3:4]
+ self._poker_engine.table.community_cards += community_cards
+ else:
+ self._poker_engine.table.dealer.deal_turn(self._table)
+ self._public_information[
+ self.betting_stage
+ ] = self._table.community_cards.copy()
elif self._betting_stage == "turn":
# Progress from turn to river.
self._betting_stage = "river"
- self._poker_engine.table.dealer.deal_river(self._table)
+ if len(self._public_cards) == 5:
+ community_cards = self._public_cards[4:]
+ self._poker_engine.table.community_cards += community_cards
+ else:
+ self._poker_engine.table.dealer.deal_river(self._table)
+ self._public_information[
+ self.betting_stage
+ ] = self._table.community_cards.copy()
elif self._betting_stage == "river":
# Progress to the showdown.
self._betting_stage = "show_down"
@@ -271,6 +366,200 @@ def _increment_stage(self):
else:
raise ValueError(f"Unknown betting_stage: {self._betting_stage}")
+ def _initialize_starting_hands(self) -> Dict[int, Dict[List[Card], float]]:
+ """Dictionary of starting hands to store probabilities in"""
+ assert self.betting_stage == "pre_flop"
+ starting_hand_probs: Dict = {}
+ n_players = len(self.players)
+ starting_hands = self._get_card_combos(2)
+ for p_i in range(n_players):
+ starting_hand_probs[p_i] = {}
+ for starting_hand in starting_hands:
+ starting_hand_probs[p_i][
+ starting_hand
+ ] = 1
+ return starting_hand_probs
+
+ def _get_card_combos(self, num_cards) -> List[Tuple[Any, ...]]:
+ """Get combinations of cards"""
+ return list(combinations(self.cards_in_deck, num_cards))
+
+ def _normalize_bayes(self):
+ """Normalize probability of reach for each player"""
+ n_players = len(self.players)
+ for p_i in range(n_players):
+ total_prob = sum(self._starting_hand_probs[p_i].values())
+ for starting_hand, prob in self._starting_hand_probs[p_i].items():
+ self._starting_hand_probs[p_i][starting_hand] = prob / total_prob
+
+ def _update_hole_cards_bayes(self, offline_strategy: Dict[str, Dict[str,
+ float]]):
+ """Get probability of reach for each starting hand for each player"""
+ assert self._history
+ n_players = len(self._table.players)
+ player_indices: List[int] = [p_i for p_i in range(n_players)]
+ for p_i in player_indices:
+ # TODO: Might make since to put starting hands in the deck class
+ for starting_hand in self._starting_hand_probs[p_i].keys():
+ starting_hand = list(
+ starting_hand
+ )
+ # TODO: Is this bad?
+ if "p_reach" in locals():
+ del p_reach
+ action_sequence: Dict[str, List[str]] = collections.defaultdict(list)
+ for idx, betting_stage in enumerate(self._history.keys()):
+ n_actions_round = len(self._history[betting_stage])
+ for i in range(n_actions_round):
+ action = self._history[betting_stage][i]
+ while action == 'skip':
+ i += 1 # Action sequences don't end in skip
+ action = self._history[betting_stage][i]
+ # TODO: Maybe a method already exists for this?
+ if betting_stage == "pre_flop":
+ ph = (i + 2) % n_players
+ else:
+ ph = i % n_players
+ if p_i != ph:
+ prob_reach_all_hands = []
+ for opp_starting_hand in self._starting_hand_probs[
+ p_i
+ ].keys():
+ opp_starting_hand = list(
+ opp_starting_hand
+ )
+ publics = self._public_information[betting_stage]
+ if len(
+ set(opp_starting_hand).union(
+ set(publics)
+ ).union(set(starting_hand))
+ ) < len(
+ opp_starting_hand
+ ) + len(
+ starting_hand
+ ) + len(
+ publics
+ ):
+ prob = 0
+ else:
+ publics = self._public_information[
+ betting_stage
+ ]
+ infoset = self._info_set_builder(
+ hole_cards=opp_starting_hand,
+ public_cards=publics,
+ history=action_sequence,
+ this_betting_stage=betting_stage,
+ )
+ # Check to see if the strategy exists,
+ # if not equal probability
+ # TODO: is this overly hacky?
+ # Problem with defaulting to 1 / 3, is that it
+ # it doesn't work for calculations that
+ # need to be made with the object's values
+ try:
+ prob = offline_strategy[infoset][action]
+ # Normalizing unnormalized offline_stregy
+ prob /= sum(offline_strategy[infoset]\
+ .values())
+ except KeyError:
+ prob = 1 / len(self.legal_actions)
+ prob_reach_all_hands.append(prob)
+ total_opp_prob_h = sum(prob_reach_all_hands) /\
+ len(prob_reach_all_hands)
+ if "p_reach" not in locals():
+ p_reach = total_opp_prob_h
+ else:
+ p_reach *= total_opp_prob_h
+ elif p_i == ph:
+ publics = self._public_information[betting_stage]
+ if len(
+ set(starting_hand).union(
+ set(publics)
+ )
+ ) < (
+ len(publics) + 2
+ ):
+ total_prob = 0
+ else:
+ publics = self._public_information[betting_stage]
+ infoset = self._info_set_builder(
+ hole_cards=starting_hand,
+ public_cards=publics,
+ history=action_sequence,
+ this_betting_stage=betting_stage,
+ )
+ try:
+ total_prob = offline_strategy[infoset][action]
+ # Normalizing unnormalized offline_stregy
+ total_prob /= sum(offline_strategy[infoset]\
+ .values())
+ except KeyError:
+ total_prob = 1 / len(self.legal_actions)
+ if "p_reach" not in locals():
+ p_reach = total_prob
+ else:
+ p_reach *= total_prob
+ action_sequence[betting_stage].append(action)
+ self._starting_hand_probs[p_i][tuple(starting_hand)] = p_reach
+ self._normalize_bayes()
+
+ def _get_starting_hand(self, player_idx: int) -> List[Card]:
+ """Get starting hand based on probability of reach"""
+ starting_hands = list(self._starting_hand_probs[player_idx].keys())
+ starting_hands_idxs = list(range(len(starting_hands)))
+ starting_hands_probs = list(self._starting_hand_probs[
+ player_idx
+ ].values())
+ starting_hand_idx = np.random.choice(
+ starting_hands_idxs,
+ 1,
+ p=starting_hands_probs
+ )[0]
+ starting_hand = list(starting_hands[starting_hand_idx])
+ return starting_hand
+
+ def _info_set_builder(self, hole_cards=None, public_cards=None,
+ history=None, this_betting_stage=None) -> str:
+ """Get the information set for the current player."""
+ if hole_cards is None:
+ hole_cards = self.current_player.cards
+ if public_cards is None:
+ public_cards = self._table.community_cards
+ if history is None:
+ history = self._history
+ if this_betting_stage is None:
+ this_betting_stage = self._betting_stage
+ cards = sorted(
+ hole_cards,
+ key=operator.attrgetter("eval_card"),
+ reverse=True,
+ )
+ cards += sorted(
+ public_cards,
+ key=operator.attrgetter("eval_card"),
+ reverse=True,
+ )
+ eval_cards = tuple([int(card) for card in cards])
+ try:
+ cards_cluster = self.info_set_lut[this_betting_stage][eval_cards]
+ except KeyError:
+ import ipdb;
+ ipdb.set_trace()
+ return "default info set, please ensure you load it correctly"
+ # Convert history from a dict of lists to a list of dicts as I'm
+ # paranoid about JSON's lack of care with insertion order.
+ info_set_dict = {
+ "cards_cluster": cards_cluster,
+ "history": [
+ {betting_stage: [str(action) for action in actions]}
+ for betting_stage, actions in history.items()
+ ],
+ }
+ return json.dumps(
+ info_set_dict, separators=(",", ":"), cls=utils.io.NumpyJSONEncoder
+ )
+
@property
def community_cards(self) -> List[Card]:
"""Return all shared/public cards."""
@@ -281,6 +570,11 @@ def private_hands(self) -> Dict[ShortDeckPokerPlayer, List[Card]]:
"""Return all private hands."""
return {p: p.cards for p in self.players}
+ @property
+ def cards_in_deck(self):
+ """Returns current cards in deck"""
+ return self._table.dealer.deck._cards_in_deck
+
@property
def initial_regret(self) -> Dict[str, float]:
"""Returns the default regret for this state."""
@@ -314,11 +608,11 @@ def player_i(self) -> int:
@player_i.setter
def player_i(self, _: Any):
"""Raise an error if player_i is set."""
- raise ValueError(f"The player_i property should not be set.")
+ raise ValueError("The player_i property should not be set.")
@property
def betting_round(self) -> int:
- """Algorithm 1 of pluribus supp. material references betting_round."""
+ """Return 0 indexed betting round"""
try:
betting_round = self._betting_stage_to_round[self._betting_stage]
except KeyError:
@@ -332,33 +626,7 @@ def betting_round(self) -> int:
@property
def info_set(self) -> str:
"""Get the information set for the current player."""
- cards = sorted(
- self.current_player.cards,
- key=operator.attrgetter("eval_card"),
- reverse=True,
- )
- cards += sorted(
- self._table.community_cards,
- key=operator.attrgetter("eval_card"),
- reverse=True,
- )
- eval_cards = tuple([card.eval_card for card in cards])
- try:
- cards_cluster = self.info_set_lut[self._betting_stage][eval_cards]
- except KeyError:
- return "default info set, please ensure you load it correctly"
- # Convert history from a dict of lists to a list of dicts as I'm
- # paranoid about JSON's lack of care with insertion order.
- info_set_dict = {
- "cards_cluster": cards_cluster,
- "history": [
- {betting_stage: [str(action) for action in actions]}
- for betting_stage, actions in self._history.items()
- ],
- }
- return json.dumps(
- info_set_dict, separators=(",", ":"), cls=utils.io.NumpyJSONEncoder
- )
+ return self._info_set_builder()
@property
def payout(self) -> Dict[int, int]:
diff --git a/pluribus/poker/card.py b/pluribus/poker/card.py
index 5fe30a61..a3fc6db7 100644
--- a/pluribus/poker/card.py
+++ b/pluribus/poker/card.py
@@ -74,6 +74,9 @@ def __eq__(self, other):
def __ne__(self, other):
return int(self) != int(other)
+ def __hash__(self):
+ return hash(int(self))
+
@property
def eval_card(self) -> EvaluationCard:
"""Return an `EvaluationCard` for use in the `Evaluator`."""
@@ -178,4 +181,3 @@ def from_dict(x: Dict[str, Union[int, str]]):
if set(x) != {"rank", "suit"}:
raise NotImplementedError(f"Unrecognised dict {x}")
return Card(rank=x["rank"], suit=x["suit"])
-
diff --git a/pluribus/poker/deck.py b/pluribus/poker/deck.py
index efc7e5f6..c6801105 100644
--- a/pluribus/poker/deck.py
+++ b/pluribus/poker/deck.py
@@ -61,3 +61,9 @@ def pick(self, random: bool = True) -> Card:
card: Card = self._cards_in_deck.pop(index)
self._dealt_cards.append(card)
return card
+
+ def remove(self, card):
+ """Remove a specific card from the deck"""
+ if card in self._cards_in_deck:
+ self._cards_in_deck.remove(card)
+ self._dealt_cards.append(card)
diff --git a/research/blueprint_algo/blueprint_short_deck_poker.py b/research/blueprint_algo/blueprint_short_deck_poker.py
index b2deeb34..ca3f89bb 100644
--- a/research/blueprint_algo/blueprint_short_deck_poker.py
+++ b/research/blueprint_algo/blueprint_short_deck_poker.py
@@ -204,7 +204,7 @@ def cfr(agent: Agent, state: ShortDeckPokerState, i: int, t: int) -> float:
logging.debug(f"Got EV for {a}: {voa[a]}")
vo += sigma[I][a] * voa[a]
logging.debug(
- f"""Added to Node EV for ACTION: {a} INFOSET: {I}
+ f"""Added to Node EV for ACTION: {a} INFOSET: {I}
STRATEGY: {sigma[I][a]}: {sigma[I][a] * voa[a]}"""
)
logging.debug(f"Updated EV at {I}: {vo}")
@@ -346,16 +346,16 @@ def _create_dir() -> Path:
@click.command()
-@click.option("--strategy_interval", default=2, help=".")
-@click.option("--n_iterations", default=10, help=".")
-@click.option("--lcfr_threshold", default=80, help=".")
-@click.option("--discount_interval", default=1000, help=".")
-@click.option("--prune_threshold", default=4000, help=".")
+@click.option("--strategy_interval", default=400, help=".")
+@click.option("--n_iterations", default=5500, help=".")
+@click.option("--lcfr_threshold", default=400, help=".")
+@click.option("--discount_interval", default=400, help=".")
+@click.option("--prune_threshold", default=400, help=".")
@click.option("--c", default=-20000, help=".")
@click.option("--n_players", default=3, help=".")
-@click.option("--print_iteration", default=10, help=".")
-@click.option("--dump_iteration", default=10, help=".")
-@click.option("--update_threshold", default=0, help=".")
+@click.option("--print_iteration", default=100, help=".")
+@click.option("--dump_iteration", default=20, help=".")
+@click.option("--update_threshold", default=400, help=".")
def train(
strategy_interval: int,
n_iterations: int,
diff --git a/research/rts/RT.py b/research/rts/RT.py
new file mode 100644
index 00000000..457ef58d
--- /dev/null
+++ b/research/rts/RT.py
@@ -0,0 +1,30 @@
+from typing import List
+import joblib
+
+from RT_cfr import rts
+from pluribus.poker.card import Card
+
+
+if __name__ == "__main__":
+ # We can set public cards or not
+ public_cards = [Card("ace", "diamonds"), Card("king", "clubs"),
+ Card("jack", "spades"), Card("10", "hearts"),
+ Card("10", "spades")]
+ # Action sequence must be in old form (one list, includes skips)
+ action_sequence = ["raise", "raise", "raise", "call", "call",
+ "raise", "raise", "raise", "call", "call",
+ "raise", "raise", "raise", "call", "call", "call"]
+ agent_output, offline_strategy = rts(
+ 'test_strategy2/unnormalized_output/offline_strategy_1500.gz',
+ 'test_strategy2/strategy_1500.gz', public_cards, action_sequence,
+ 1400, 1, 1, 3, 1, 1, 20
+ )
+ save_path = "test_strategy2/unnormalized_output/"
+ last_regret = {
+ info_set: dict(strategy)
+ for info_set, strategy in agent_output.regret.items()
+ }
+ joblib.dump(offline_strategy, save_path + 'rts_output.gz', compress="gzip")
+ joblib.dump(last_regret, save_path + 'last_regret.gz', compress="gzip")
+ import ipdb;
+ ipdb.set_trace()
diff --git a/research/rts/RT_cfr.py b/research/rts/RT_cfr.py
new file mode 100644
index 00000000..1e1d596e
--- /dev/null
+++ b/research/rts/RT_cfr.py
@@ -0,0 +1,203 @@
+from __future__ import annotations
+
+import collections
+from typing import Dict, List
+import joblib
+from pathlib import Path
+
+from tqdm import trange
+import numpy as np
+import datetime
+import yaml
+
+from pluribus import utils
+from pluribus.games.short_deck.state import ShortDeckPokerState, new_game
+from pluribus.games.short_deck.agent import Agent
+from pluribus.poker.card import Card
+
+
+def normalize_strategy(this_info_sets_regret: Dict[str, float]) -> Dict[str, float]:
+ """Calculate the strategy based on the current information sets regret."""
+ actions = this_info_sets_regret.keys()
+ regret_sum = sum([max(regret, 0) for regret in this_info_sets_regret.values()])
+ if regret_sum > 0:
+ strategy: Dict[str, float] = {
+ action: max(this_info_sets_regret[action], 0) / regret_sum
+ for action in actions
+ }
+ elif this_info_sets_regret == {}:
+ # Don't return strategy if no strategy was made
+ # during training
+ strategy: Dict[str, float] = {}
+ elif regret_sum == 0:
+ # Regret is negative, we learned something
+ default_probability = 1 / len(actions)
+ strategy: Dict[str, float] = {action: default_probability for action in actions}
+ return strategy
+
+
+def calculate_strategy(
+ regret: Dict[str, Dict[str, float]],
+ I: str,
+ state: ShortDeckPokerState,
+) -> Dict[str, Dict[str, float]]:
+ """
+ Calculate strategy based on regret
+ """
+ sigma = collections.defaultdict(lambda: collections.defaultdict(lambda: 1 / 3))
+ rsum = sum([max(x, 0) for x in regret[I].values()])
+ for a in state.legal_actions:
+ if rsum > 0:
+ sigma[I][a] = max(regret[I][a], 0) / rsum
+ else:
+ sigma[I][a] = 1 / len(state.legal_actions)
+ return sigma
+
+
+def _create_dir(folder_id: str) -> Path:
+ """Create and get a unique dir path to save to using a timestamp."""
+ time = str(datetime.datetime.now())
+ for char in ":- .":
+ time = time.replace(char, "_")
+ path: Path = Path(f"./{folder_id}_results_{time}")
+ path.mkdir(parents=True, exist_ok=True)
+ return path
+
+
+def cfr(agent: Agent, state: ShortDeckPokerState, i: int, t: int) -> float:
+ """
+ CFR algo with the a temporary regret object for better strategy averaging
+ """
+ ph = state.player_i
+
+ player_not_in_hand = not state.players[i].is_active
+ if state.is_terminal or player_not_in_hand:
+ return state.payout[i]
+
+ elif ph == i:
+ I = state.info_set
+ # Move regret over to temporary object and build off that
+ if agent.tmp_regret[I] == {}:
+ agent.tmp_regret[I] == agent.regret[I].copy()
+ sigma = calculate_strategy(agent.tmp_regret, I, state)
+
+ vo = 0.0
+ voa = {}
+ for a in state.legal_actions:
+ new_state: ShortDeckPokerState = state.apply_action(a)
+ voa[a] = cfr(agent, new_state, i, t)
+ vo += sigma[I][a] * voa[a]
+
+ for a in state.legal_actions:
+ agent.tmp_regret[I][a] += voa[a] - vo
+
+ return vo
+ else:
+ Iph = state.info_set
+ # Move regret over to a temporary object and build off that
+ if agent.tmp_regret[Iph] == {}:
+ agent.tmp_regret[Iph] == agent.regret[Iph].copy()
+ sigma = calculate_strategy(agent.tmp_regret, Iph, state)
+
+ try:
+ a = np.random.choice(
+ list(sigma[Iph].keys()), 1, p=list(sigma[Iph].values()),
+ )[0]
+ except KeyError:
+ p = 1 / len(state.legal_actions)
+ probabilities = np.full(len(state.legal_actions), p)
+ a = np.random.choice(state.legal_actions, p=probabilities)
+ sigma[Iph] = {action: p for action in state.legal_actions}
+ except:
+ import ipdb;
+ ipdb.set_trace()
+
+ new_state: ShortDeckPokerState = state.apply_action(a)
+ return cfr(agent, new_state, i, t)
+
+
+def rts(
+ offline_strategy_path: str,
+ last_regret_path: str,
+ public_cards: list,
+ action_sequence: list,
+ n_iterations: int,
+ lcfr_threshold: int,
+ discount_interval: int,
+ n_players: int,
+ update_interval: int,
+ update_threshold: int,
+ dump_int: int,
+):
+ """RTS."""
+ config: Dict[str, int] = {**locals()}
+ save_path: Path = _create_dir('RTS')
+ with open(save_path / "config.yaml", "w") as steam:
+ yaml.dump(config, steam)
+ # TODO: fix the seed
+ # utils.random.seed(36)
+ agent = Agent(regret_path=last_regret_path)
+ # Load unnormalized strategy to build off
+ offline_strategy = joblib.load(offline_strategy_path)
+ state: ShortDeckPokerState = new_game(
+ 3, real_time_test=True, public_cards=public_cards
+ )
+ # Load current game state
+ current_game_state: ShortDeckPokerState = state.load_game_state(
+ offline_strategy, action_sequence
+ )
+ for t in trange(1, n_iterations + 1, desc="train iter"):
+ for i in range(n_players): # fixed position i
+ # Deal hole cards based on bayesian updating of hole card probs
+ state: ShortDeckPokerState = current_game_state.deal_bayes()
+ cfr(agent, state, i, t)
+ if t < lcfr_threshold & t % discount_interval == 0:
+ d = (t / discount_interval) / ((t / discount_interval) + 1)
+ for I in agent.tmp_regret.keys():
+ for a in agent.tmp_regret[I].keys():
+ agent.tmp_regret[I][a] *= d
+ # Add the unnormalized strategy into the original
+ # Right now assumes dump_int is a multiple of n_iterations
+ if t % dump_int == 0:
+ # Adding the regret back to the regret dict, we'll build off for
+ # next RTS
+ for I in agent.tmp_regret.keys():
+ if agent.tmp_regret != {}:
+ agent.regret[I] = agent.tmp_regret[I].copy()
+ for info_set, this_info_sets_regret in sorted(agent.tmp_regret.items()):
+ # If this_info_sets_regret == {}, we do nothing
+ strategy = normalize_strategy(this_info_sets_regret)
+ # Check if info_set exists..
+ no_info_set = info_set not in offline_strategy
+ if no_info_set or offline_strategy[info_set] == {}:
+ offline_strategy[info_set] = {a: 0 for a in strategy.keys()}
+ for action, probability in strategy.items():
+ offline_strategy[info_set][action] += probability
+ agent.reset_new_regret()
+
+ return agent, offline_strategy
+
+
+if __name__ == "__main__":
+ # We can set public cards or not
+ public_cards = [Card("ace", "diamonds"), Card("king", "clubs"),
+ Card("jack", "spades"), Card("10", "hearts"),
+ Card("10", "spades")]
+ # Action sequence must be in old form (one list, includes skips)
+ action_sequence = ["raise", "raise", "raise", "call", "call",
+ "raise", "raise", "raise", "call", "call",
+ "raise", "raise", "raise", "call", "call", "call"]
+ agent_output, offline_strategy = rts(
+ 'test_strategy3/unnormalized_output/offline_strategy_1500.gz',
+ 'test_strategy3/strategy.gz', public_cards, action_sequence,
+ 1400, 1, 1, 3, 1, 1, 20
+ )
+ save_path = "test_strategy3/unnormalized_output/"
+ last_regret = {
+ info_set: dict(strategy)
+ for info_set, strategy in agent_output.regret.items()
+ }
+ joblib.dump(offline_strategy, save_path + 'rts_output.gz', compress="gzip")
+ joblib.dump(last_regret, save_path + 'last_regret.gz', compress="gzip")
+ import ipdb;
+ ipdb.set_trace()
diff --git a/research/stat_tests/agent_test.py b/research/stat_tests/agent_test.py
new file mode 100644
index 00000000..6569ac6e
--- /dev/null
+++ b/research/stat_tests/agent_test.py
@@ -0,0 +1,162 @@
+from typing import List, Dict, DefaultDict
+from pathlib import Path
+import joblib
+import collections
+
+import click
+from tqdm import trange
+import yaml
+import datetime
+import numpy as np
+from scipy import stats
+
+from pluribus.games.short_deck.state import ShortDeckPokerState, new_game
+from pluribus.poker.card import Card
+
+
+def _calculate_strategy(
+ state: ShortDeckPokerState,
+ I: str,
+ strategy: DefaultDict[str, DefaultDict[str, float]],
+ count=None,
+ total_count=None
+) -> str:
+ sigma = collections.defaultdict(
+ lambda: collections.defaultdict(lambda: 1 / 3)
+ )
+ try:
+ # If strategy is empty, go to other block
+ sigma[I] = strategy[I].copy()
+ if sigma[I] == {}:
+ raise KeyError
+ norm = sum(sigma[I].values())
+ for a in sigma[I].keys():
+ sigma[I][a] /= norm
+ a = np.random.choice(
+ list(sigma[I].keys()), 1, p=list(sigma[I].values()),
+ )[0]
+ except KeyError:
+ if count is not None:
+ count += 1
+ p = 1 / len(state.legal_actions)
+ probabilities = np.full(len(state.legal_actions), p)
+ a = np.random.choice(state.legal_actions, p=probabilities)
+ sigma[I] = {action: p for action in state.legal_actions}
+ if total_count is not None:
+ total_count += 1
+ return a, count, total_count
+
+
+def _create_dir(folder_id: str) -> Path:
+ """Create and get a unique dir path to save to using a timestamp."""
+ time = str(datetime.datetime.now())
+ for char in ":- .":
+ time = time.replace(char, "_")
+ path: Path = Path(f"./{folder_id}_results_{time}")
+ path.mkdir(parents=True, exist_ok=True)
+ return path
+
+
+def agent_test(
+ hero_strategy_path: str,
+ opponent_strategy_path: str,
+ real_time_est: bool = False,
+ action_sequence: List[str] = None,
+ public_cards: List[Card] = [],
+ n_outer_iters: int = 30,
+ n_inner_iters: int = 100,
+ n_players: int = 3,
+ hero_count=None,
+ hero_total_count=None,
+):
+ config: Dict[str, int] = {**locals()}
+ save_path: Path = _create_dir('bt')
+ with open(save_path / "config.yaml", "w") as steam:
+ yaml.dump(config, steam)
+
+ # Load unnormalized strategy for hero
+ hero_strategy = joblib.load(hero_strategy_path)
+ # Load unnormalized strategy for opponents
+ opponent_strategy = joblib.load(opponent_strategy_path)
+
+ # Loading game state we used RTS on
+ if real_time_est:
+ state: ShortDeckPokerState = new_game(
+ n_players, real_time_test=real_time_est, public_cards=public_cards
+ )
+ current_game_state: ShortDeckPokerState = state.load_game_state(
+ opponent_strategy, action_sequence
+ )
+
+ # TODO: Right now, this can only be used for loading states if the two
+ # strategies are averaged. Even averaging strategies is risky. Loading a
+ # game state should be used with caution. It will work only if the
+ # probability of reach is identical across strategies. Use the average
+ # strategy.
+
+ info_set_lut = {}
+ EVs = np.array([])
+ for _ in trange(1, n_outer_iters):
+ EV = np.array([]) # Expected value for player 0 (hero)
+ for t in trange(1, n_inner_iters + 1, desc="train iter"):
+ for p_i in range(n_players):
+ if real_time_est:
+ # Deal hole cards based on bayesian updating of hole card
+ # probabilities
+ state: ShortDeckPokerState = current_game_state.deal_bayes()
+ else:
+ state: ShortDeckPokerState = new_game(
+ n_players,
+ info_set_lut
+ )
+ info_set_lut = state.info_set_lut
+ while True:
+ player_not_in_hand = not state.players[p_i].is_active
+ if state.is_terminal or player_not_in_hand:
+ EV = np.append(EV, state.payout[p_i])
+ break
+ if state.player_i == p_i:
+ random_action, hero_count, hero_total_count = \
+ _calculate_strategy(
+ state,
+ state.info_set,
+ hero_strategy,
+ count=hero_count,
+ total_count=hero_total_count
+ )
+ else:
+ random_action, oc, otc = _calculate_strategy(
+ state,
+ state.info_set,
+ opponent_strategy,
+ )
+ state = state.apply_action(random_action)
+ EVs = np.append(EVs, EV.mean())
+ t_stat = (EVs.mean() - 0) / (EVs.std() / np.sqrt(n_outer_iters))
+ p_val = stats.t.sf(np.abs(t_stat), n_outer_iters - 1)
+ results_dict = {
+ 'Expected Value': float(EVs.mean()),
+ 'T Statistic': float(t_stat),
+ 'P Value': float(p_val),
+ 'Standard Deviation': float(EVs.std()),
+ 'N': int(len(EVs)),
+ 'Random Moves Hero': hero_count,
+ 'Total Moves Hero': hero_total_count
+ }
+ with open(save_path / 'results.yaml', "w") as stream:
+ yaml.safe_dump(results_dict, stream=stream, default_flow_style=False)
+
+
+if __name__ == "__main__":
+ strat_path = "test_strategy2/unnormalized_output/"
+ agent_test(
+ hero_strategy_path=strat_path + "random_strategy.gz",
+ opponent_strategy_path=strat_path + "offline_strategy_1500.gz",
+ real_time_est=False,
+ public_cards=[],
+ action_sequence=None,
+ n_inner_iters=25,
+ n_outer_iters=75,
+ hero_count=0,
+ hero_total_count=0
+ )
diff --git a/research/stat_tests/average_unnormalized_strategy.py b/research/stat_tests/average_unnormalized_strategy.py
new file mode 100644
index 00000000..e35965ad
--- /dev/null
+++ b/research/stat_tests/average_unnormalized_strategy.py
@@ -0,0 +1,91 @@
+import collections
+import glob
+import os
+import re
+from typing import Dict, List, Union
+
+import click
+import joblib
+from tqdm import tqdm
+
+
+def calculate_strategy(this_info_sets_regret: Dict[str, float]) -> Dict[str, float]:
+ """Calculate the strategy based on the current information sets regret."""
+ actions = this_info_sets_regret.keys()
+ regret_sum = sum([max(regret, 0) for regret in this_info_sets_regret.values()])
+ if regret_sum > 0:
+ strategy: Dict[str, float] = {
+ action: max(this_info_sets_regret[action], 0) / regret_sum
+ for action in actions
+ }
+ elif this_info_sets_regret == {}:
+ # Don't return strategy if no strategy was made
+ # during training
+ strategy: Dict[str, float] = {}
+ elif regret_sum == 0:
+ # Regret is negative, we learned something
+ default_probability = 1 / len(actions)
+ strategy: Dict[str, float] = {action: default_probability for action in actions}
+ return strategy
+
+
+def try_to_int(text: str) -> Union[str, int]:
+ """Attempt to return int."""
+ return int(text) if text.isdigit() else text
+
+
+def natural_key(text):
+ """Sort with natural numbers."""
+ return [try_to_int(c) for c in re.split(r"(\d+)", text)]
+
+
+def average_strategy(all_file_paths: List[str]) -> Dict[str, Dict[str, float]]:
+ """Compute the mean strategy over all timesteps."""
+ # The offline strategy for all information sets.
+ offline_strategy: Dict[str, Dict[str, float]] = collections.defaultdict(
+ lambda: collections.defaultdict(lambda: 0.0)
+ )
+ # Sum up all strategies.
+ for dump_path in tqdm(all_file_paths, desc="loading dumps"):
+ # Load file.
+ try:
+ agent = joblib.load(dump_path)
+ except Exception as e:
+ tqdm.write(f"Failed to load file at {dump_path} because:{e}")
+ agent = {}
+ regret = agent.get("regret", {})
+ # Sum probabilities from computed strategy..
+ for info_set, this_info_sets_regret in sorted(regret.items()):
+ strategy = calculate_strategy(this_info_sets_regret)
+ # If strategy == {}, we do nothing
+ for action, probability in strategy.items():
+ offline_strategy[info_set][action] += probability
+ # Return regular dict, not defaultdict.
+ return {info_set: dict(strategy) for info_set, strategy in offline_strategy.items()}
+
+
+@click.command()
+@click.option(
+ "--results_dir_path", default=".", help="the location of the agent file dumps."
+)
+@click.option(
+ "--write_dir_path", default=".", help="where to save the offline strategy"
+)
+def cli(results_dir_path: str, write_dir_path: str):
+ """Compute the strategy and write to file."""
+ # Find all files to load.
+ all_file_paths = glob.glob(os.path.join(results_dir_path, "*.gz"))
+ if not all_file_paths:
+ raise ValueError(f"No agent dumps could be found at: {results_dir_path}")
+ # Sort the file paths in the order they were created.
+ all_file_paths = sorted(all_file_paths, key=natural_key)
+ offline_strategy = average_strategy(all_file_paths)
+ # Save dictionary to compressed file.
+ latest_file = os.path.basename(all_file_paths[-1])
+ latest_iteration: int = int(re.findall(r"\d+", latest_file)[0])
+ save_file: str = f"offline_strategy_{latest_iteration}.gz"
+ joblib.dump(offline_strategy, os.path.join(write_dir_path, save_file))
+
+
+if __name__ == "__main__":
+ cli()
diff --git a/research/stat_tests/rts_ab_test.py b/research/stat_tests/rts_ab_test.py
new file mode 100644
index 00000000..047d571d
--- /dev/null
+++ b/research/stat_tests/rts_ab_test.py
@@ -0,0 +1,112 @@
+import numpy as np
+import json
+import joblib
+import sys
+from typing import List
+
+import click
+
+from agent_test import agent_test
+from pluribus.poker.deck import Deck
+sys.path.append('research/rts')
+from RT_cfr import rts
+
+
+@click.command()
+@click.option("--offline_strategy_path", help=".")
+@click.option("--last_regret_path", help=".")
+@click.option("--n_iterations", default=1500, help=".")
+@click.option("--lcfr_threshold", default=400, help=".")
+@click.option("--discount_interval", default=400, help=".")
+@click.option("--n_players", default=3, help=".")
+@click.option("--update_interval", default=400, help=".")
+@click.option("--update_threshold", default=400, help=".")
+@click.option("--dump_int", default=20, help=".")
+@click.option("--save_dir", help=".")
+@click.option("--n_inner_iters", default=25, help=".")
+@click.option("--n_outer_iters", default=150, help=".")
+def rts_ab_test(
+ offline_strategy_path: str,
+ last_regret_path: str,
+ n_iterations: int,
+ lcfr_threshold: int,
+ discount_interval: int,
+ n_players: int,
+ update_interval: int,
+ update_threshold: int,
+ dump_int: int,
+ save_dir: str,
+ n_inner_iters: int,
+ n_outer_iters: int,
+ ranks: List[int] = list(range(10, 14 + 1)),
+):
+ check = joblib.load(offline_strategy_path)
+ histories = np.random.choice(list(check.keys()), 2)
+ action_sequences = []
+ public_cards_lst = []
+ community_card_dict = {
+ "pre_flop": 0,
+ "flop": 3,
+ "turn": 4,
+ "river": 5,
+ }
+ deck = Deck(include_ranks=ranks)
+ for history in histories:
+ history_dict = json.loads(history)
+ history_lst = history_dict['history']
+ action_sequence = []
+ betting_rounds = []
+ for x in history_lst:
+ action_sequence += list(x.values())[0]
+ betting_rounds += list(x.keys())
+ action_sequences.append(action_sequence)
+ if action_sequences:
+ final_betting_round = list(betting_rounds)[-1]
+ else:
+ final_betting_round = "pre_flop"
+ n_cards = community_card_dict[final_betting_round]
+ cards_in_deck = deck._cards_in_deck
+ public_cards = list(
+ np.random.choice(cards_in_deck, n_cards)
+ )
+ public_cards_lst.append(public_cards)
+
+ for i in range(0, len(action_sequences)):
+ public_cards = public_cards_lst[i].copy()
+ action_sequence = action_sequences[i].copy()
+ agent_output, offline_strategy = rts(
+ offline_strategy_path,
+ last_regret_path,
+ public_cards,
+ action_sequence,
+ n_iterations=n_iterations,
+ lcfr_threshold=lcfr_threshold,
+ discount_interval=discount_interval,
+ n_players=n_players,
+ update_interval=update_interval,
+ update_threshold=update_threshold,
+ dump_int=dump_int
+ )
+ last_regret = {
+ info_set: dict(strategy)
+ for info_set, strategy in agent_output.regret.items()
+ }
+ joblib.dump(offline_strategy, save_dir + f'rts_output{i}.gz', compress="gzip")
+ joblib.dump(last_regret, save_dir + f'last_regret{i}.gz', compress="gzip")
+
+ public_cards = public_cards_lst[i].copy()
+ action_sequence = action_sequences[i].copy()
+ agent_test(
+ hero_strategy_path=save_dir + f"rts_output{i}.gz",
+ opponent_strategy_path=offline_strategy_path,
+ real_time_est=True,
+ public_cards=public_cards,
+ action_sequence=action_sequence,
+ n_inner_iters=n_inner_iters,
+ n_outer_iters=n_outer_iters,
+ hero_count=0,
+ hero_total_count=0,
+ )
+
+if __name__ == "__main__":
+ rts_ab_test()
diff --git a/research/test_methodology/validating_nash_equilibriums_via_simulations.ipynb b/research/test_methodology/validating_nash_equilibriums_via_simulations.ipynb
deleted file mode 100644
index c146c559..00000000
--- a/research/test_methodology/validating_nash_equilibriums_via_simulations.ipynb
+++ /dev/null
@@ -1,224 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Validating Nash Equilibriums Via Simulations"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "_by Colin Manko_"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "In an effort to validate and test possible improvements to core poker artificial intelligence algorithms, I have designed the following methodology."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Goals"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "- Validate that MCCFR offline learning strategy is approximating a Nash equilibrium\n",
- "- More generally, create a methodology that allows for rigorously testing changes made to the core AI algorithms"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Prerequisites"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "- Need two test bot implementation strategies ($\\beta{1}$ and $\\beta{2}$) that we would like to compare\n",
- "- Need a human tester ($H_{0}$) as a quasi control. The human tester should not have access to any underlying strategies from the test bots or simulated Nash"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Step 1: Randomly Generate Test Game"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Given a set of $N$ game tree nodes (this is the entire game tree, as given by infoset, $I$), randomly generate $x$ test nodes without preplacement and with equal probability. Call the set of test nodes $U$.\n",
- "\n",
- "As a side note, we will account for probability of reach ($p(h)$) in another step. Equal probability across nodes allows us to find patterns across nodes where our agent underperforms. We will adjust the expected value at $I$, ($v^{\\sigma}(I)$), by $p(h)$.\n",
- "- **How to**: For Limit Texas Hold'em, the number of action sequences ($N$), is small enough that they can be found computationally rather than analytically. We can run *all_action_sequences.py* in the *size_of_problem* directory to generate this list. \n",
- "- _Something like 15-20 hours and less than 4GB??_\n",
- "- Generate $x$ integers to be indices and select them from the *all_action_sequences.py* output\n",
- "- Once $x$ action sequences are generated, randomly generate $x$ public card combos, based on the betting stage of the test node, $u$, as well as one pair of private hole cards to be used by $\\beta{1}$, $\\beta{2}$ and $H_0$. They will only get that hand at $u$. "
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Step 2: Prepare Realtime Search for Finding Nash Equilibrium"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "For each test node, $u$, in $U$, use realtime search to compute the Nash Equilibrium ($\\sigma^*$) by constraining the search algorithm to start at $u$, where $u$ is equivalent to $I$ in regard to action sequence, but does not have any set hand for the traversing player ($p_i$).\n",
- "\n",
- "Use a pooled strategy between $\\beta{1}$ and $\\beta{2}$ to estimate $p(h)$ without bias:"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "For hand in possible combinations of real hands:\n",
- "
\n",
- " For idx, $a$ in action sequence at $u$:\n",
- "
\n",
- " if idx == 0:\n",
- "
\n",
- " $p(h)_\\beta{1}$ = $\\beta{1}$[$I$][$a$]\n",
- "
\n",
- " $p(h)_\\beta{2}$ = $\\beta{2}$[$I$][$a$]\n",
- "
\n",
- " $p(h)_\\beta{1}$ *= $\\beta{1}$[$I$][$a$]\n",
- "
\n",
- " $p(h)_\\beta{2}$ *= $\\beta{1}$[$I$][$a$]\n",
- "
\n",
- " p(h)[rs] = ($p(h)_\\beta{1}$ + $p(h)_\\beta{2}$)/2"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The root node of the realtime search algorithm is replaced with a chance node that represents each possible node in the public state $G$ [[Brown, Sandholm, Amos]](https://papers.nips.cc/paper/7993-depth-limited-solving-for-imperfect-information-games.pdf). From the above psuedo-code, this deal can be generated as: "
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Generate deal order\n",
- "
\n",
- "For $i$ in $P_i$ deal order:\n",
- "
\n",
- " Generate hand for player based on normalized $p(h)[rs]$ if available, else try again"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "_The \"if available, else try again\" part could be made better_"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**Other important features of the _\"Nash Bot\"_ real time search..**:\n",
- "- The _\"Nash bot\"_ is the master of this node. In order to reach full convergence, from the normal MCCFR algorithm, we must remove the sampling of actions for opponents.\n",
- "- For ease, the real time search should not use leaf nodes, but should search to the end of the game tree, where either a terminal node or a shown down is entered. In this way, we can get a truer sense of the expected value."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "A Nash Equilibrium is found if the change in strategy on each iteration drops below some threshold $t$ for the real hand we are testing for. Charting probabilities for each action in $u$ over time for the randomly generated real hand to test should show a convergence over time."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "_One main benefit of using this real time search to validate CFR is this search will need to be developed anyway._"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "#### Step 3: Test and Measure Success"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**For the test bots:**\n",
- "For each $u$ in $U$, play each test strategy ($\\beta{1}$ and $\\beta{2}$) against the _\"Nash bot\"_ for $r$ number of simulations. The _\"Nash bot\"_ should be dealt available hands from the distribution of probabilities as determined by $p(h)[rs]$ in the pseudo-code above. Both the test bots and the human tester will be dealt the same hand in each simulation of game play on $u$, as randomly generated in step 1. \n",
- "\n",
- "If $\\beta{1}$ or $\\beta{2}$ has converged to a Nash equilibrium, then we should expect $v^\\sigma$ to be equal to 0 for our test bot, assuming that _\"Nash bot\"_ has converged to a Nash equilibrium itself. $v^{\\sigma^*}(u)$ and $v^{\\sigma}(u)$ are the estimated payouts for the _\"Nash bot\"_ opponents and the \"hero\" (test bots or human), respectively.\n",
- "\n",
- "**For the human tester:**\n",
- "We can simply create a contrived game. Based on the normalized probability of reach for $u$, $\\bar{p(h)}$, we can randomly generate which $u$ the human player is entered into, however they will always have the same hand upon entering $u$ and their opponents hands will vary based on $p(h)[rs]$.\n",
- "\n",
- "The test metric is as follows, after $p(h)$ has been normalized for space $U$, $\\bar{p(h)}$:\n",
- "$$\\sum_{i=1}^{x}(v^{\\sigma}(u_i)-v^{\\sigma^*}(u_{-i}))\\times{\\bar{p(h)}}$$"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The value closest to 0 (summing no testing agent goes over 0) will have best approximated the Nash equilibrium. Additionaly, $H_0$ can be used as a quasi-control, to validate that the bot is beating a human.\n",
- "\n",
- "The above metric also has some degree of simulation error. For each simulation in $r$ simulations, we create a distribution of values that has a standard deviation and follows the normal distribution. \n",
- "\n",
- "Along with calculating the expected payout per simulation, $u^{\\sigma}(u_i)-u^{\\sigma^*}(u_{-i})$, we can also calculate $\\sigma$ for this distribution in order to describe a confidence interval around the test metric. \n",
- "\n",
- "Finally, a simple difference of means can be done between each test bot to decipher a winner and if that winner had a statistically significant edge. We can then study each $u$ in $U$ to find patterns in which nodes the espspective bots did not do well with."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.7.0"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/research/to_do.md b/research/to_do.md
deleted file mode 100644
index e510af90..00000000
--- a/research/to_do.md
+++ /dev/null
@@ -1,49 +0,0 @@
-A Place for Next Steps in Short Deck Implementation
-
-## Abstraction
-
-#### Information Abstraction
-- hard code opening hand clusters
-- decide how to store these for lookup in blueprint/real time algo
-- run for short deck
-
-#### Action Abstraction
-- not sure how this fits into blueprint/real time yet
-
-## Blueprint Algo
-- apply to contrived short deck game
-
-## Real Time Search Algo
-- need isomorphic/lossless handling of cards?? # Non-essential maybe..
-- mock up "toy" version
- - pre-req: stateful version of short deck
-
-### Rules of Contrived Short Deck Game
-- 3 players
-- 2-9 removed
-- no adjustments to hand rankings versus no-limit
-- 10000 in stack, 50 small blind, 100 big blind
-- limited betting
-
-#### Possible Next Steps
-- fix short deck game and roll out to online hosting?
-- go right on to full game?
-
-#### Current (Concise) Papers
-- Abstraction
- - https://www.cs.cmu.edu/~sandholm/hierarchical.aamas15.pdf <- this algo
- - http://www.ifaamas.org/Proceedings/aamas2013/docs/p271.pdf <- these features
-- Blueprint
- - https://science.sciencemag.org/content/sci/suppl/2019/07/10/science.aay2400.DC1/aay2400-Brown-SM.pdf <- pseudo code
-- Real Time Algo
- - https://papers.nips.cc/paper/7993-depth-limited-solving-for-imperfect-information-games.pdf <- build off this
- - make theses changes:
- - [optimized vector-based linear cfr?](https://arxiv.org/pdf/1809.04040.pdf)
- - [only samples chance events?](http://martin.zinkevich.org/publications/ijcai2011_rgbr.pdf)
-
-#### TODO: Colin
-- Generate abstraction for 20 cards
--- Program to turn that into dictionary and store separately
-- Hard code preflop lossless
-- Write next steps in docstring of blueprint algo
-- Consider getting rid of notebooks before merging into develop..
\ No newline at end of file
diff --git a/research/size_of_problem/action_sequences.pkl b/test/data/action_sequences.pkl
similarity index 100%
rename from research/size_of_problem/action_sequences.pkl
rename to test/data/action_sequences.pkl
diff --git a/test/data/random_action_sequences.pkl b/test/data/random_action_sequences.pkl
new file mode 100644
index 00000000..41ab5a88
Binary files /dev/null and b/test/data/random_action_sequences.pkl differ
diff --git a/test/data/random_offline_strategy.gz b/test/data/random_offline_strategy.gz
new file mode 100644
index 00000000..ff4cfe7f
Binary files /dev/null and b/test/data/random_offline_strategy.gz differ
diff --git a/test/functional/test_short_deck.py b/test/functional/test_short_deck.py
index 889bc437..276082ba 100644
--- a/test/functional/test_short_deck.py
+++ b/test/functional/test_short_deck.py
@@ -1,17 +1,21 @@
import collections
+import json
import copy
import random
from typing import List, Tuple, Optional
+import joblib
import pytest
import numpy as np
import dill as pickle
-from pluribus.games.short_deck.state import ShortDeckPokerState
+from pluribus.games.short_deck.state import ShortDeckPokerState, new_game, \
+ InfoSetLookupTable
from pluribus.games.short_deck.player import ShortDeckPokerPlayer
from pluribus.poker.card import Card
from pluribus.poker.pot import Pot
from pluribus.utils.random import seed
+from pluribus.poker.deck import Deck
def _new_game(
@@ -35,10 +39,10 @@ def _new_game(
return state, pot
-def _load_action_sequences(directory):
+def _load_pkl_file(directory):
with open(directory, "rb") as file:
- action_sequences = pickle.load(file)
- return action_sequences
+ pkl_file = pickle.load(file)
+ return pkl_file
def test_short_deck_1():
@@ -203,18 +207,17 @@ def _get_flop(state: ShortDeckPokerState) -> List[Card]:
@pytest.mark.parametrize("n_players", [2, 3])
-def test_call_action_sequence(n_players):
+def test_call_action_sequence(n_players, n: int = 50):
"""
- Make sure we never see an action sequence of "raise", "call", "call" in the same
- round with only two players. There would be a similar analog for more than two players,
- but this should aid in initially finding the bug.
+ Make sure we never see an action sequence of "raise", "call", "call" when
+ down to two players
"""
# Seed the random number generation so things are procedural/reproducable.
seed(42)
- # example of a bad sequence in a two-handed game in one round
+ # Example of a bad sequence in a two-handed game in one round
bad_seq = ["raise", "call", "call"]
# Run some number of random iterations.
- for _ in range(200):
+ for _ in range(n):
state, _ = _new_game(n_players=n_players, small_blind=50, big_blind=100)
betting_round_dict = collections.defaultdict(list)
while state.betting_stage not in {"show_down", "terminal"}:
@@ -231,22 +234,22 @@ def test_call_action_sequence(n_players):
# Loop through the action history and make sure the bad
# sequence has not happened.
for i in range(len(no_fold_action_history)):
- history_slice = no_fold_action_history[i : i + len(bad_seq)]
+ history_slice = no_fold_action_history[i: i + len(bad_seq)]
assert history_slice != bad_seq
state = state.apply_action(random_action)
@pytest.mark.parametrize("n_players", [2, 3])
-def test_action_sequence(n_players: int):
- """
- Check each round against validated action sequences to ensure the state class is
- working correctly.
- """
+def test_action_sequence(
+ n_players: int,
+ n: int = 50,
+ action_sequences_path: str = "test/data/action_sequences.pkl"
+):
+ """Ensure action sequences are legal.. """
# Seed the random number generation so things are procedural/reproducable.
seed(42)
- directory = "research/size_of_problem/action_sequences.pkl"
- action_sequences = _load_action_sequences(directory)
- for i in range(200):
+ action_sequences = _load_pkl_file(action_sequences_path)
+ for i in range(n):
state, _ = _new_game(n_players=n_players, small_blind=50, big_blind=100)
betting_stage_dict = {
@@ -281,14 +284,14 @@ def test_action_sequence(n_players: int):
assert action_sequence in possible_sequences
-def test_skips(n_players: int = 3):
+def test_skips(n_players: int = 3, n: int = 50):
"""
- Check each round to make sure that skips are mod number of players and appended on
- the skipped player's turn
+ Check each round to make sure that skips are mod number of players and
+ appended on the skipped player's turn
"""
# Seed the random number generation so things are procedural/reproducable.
seed(42)
- for _ in range(500):
+ for _ in range(n):
state, _ = _new_game(n_players=n_players, small_blind=50, big_blind=100)
while True:
@@ -338,3 +341,137 @@ def test_skips(n_players: int = 3):
for i, action in enumerate(actions[fold_idx:]):
if i % n_players == 0:
assert action == "skip"
+
+
+def test_load_game_state(
+ n_players: int = 3,
+ n: int = 5,
+ random_actions_path: str = "test/data/random_action_sequences.pkl"
+):
+ # Load a random sample of action sequences
+ action_sequences = _load_pkl_file(random_actions_path)
+ test_action_sequences = np.random.choice(action_sequences, n)
+ # Lookup table that defaults to 0 as the cluster id
+ # TODO: Not sure how to quiet the mypy typing complaint..
+ info_set_lut: InfoSetLookupTable = {
+ "pre_flop": collections.defaultdict(lambda: 0),
+ "flop": collections.defaultdict(lambda: 0),
+ "turn": collections.defaultdict(lambda: 0),
+ "river": collections.defaultdict(lambda: 0),
+ }
+ state: ShortDeckPokerState = new_game(
+ n_players,
+ info_set_lut=info_set_lut,
+ real_time_test=True,
+ public_cards=[]
+ )
+ for action_sequence in test_action_sequences:
+ game_action_sequence = action_sequence.copy()
+ # Load current game state
+ current_game_state: ShortDeckPokerState = state.load_game_state(
+ offline_strategy={}, action_sequence=game_action_sequence
+ )
+ current_history = current_game_state._history
+ check_action_seq_current = []
+ for betting_stage in current_history.keys():
+ check_action_seq_current += current_history[betting_stage]
+ check_action_sequence = [a for a in check_action_seq_current if a != "skip"]
+ assert check_action_sequence == action_sequence[:-1]
+
+ new_state = current_game_state.deal_bayes()
+ full_history = new_state._history
+ check_action_seq_full = []
+ for betting_stage in full_history.keys():
+ check_action_seq_full += full_history[betting_stage]
+ check_action_sequence = [a for a in check_action_seq_full if a != "skip"]
+ assert check_action_sequence == action_sequence
+
+
+def test_public_cards(
+ n_players: int = 3,
+ n: int = 5,
+ strategy_path: str = "test/data/random_offline_strategy.gz"
+):
+ strategy = joblib.load(strategy_path)
+ histories = np.random.choice(list(strategy.keys()), n)
+ action_sequences = []
+ public_cards_lst = []
+ final_betting_round_lst: List[str] = []
+ community_card_dict = {
+ "pre_flop": 0,
+ "flop": 3,
+ "turn": 4,
+ "river": 5,
+ }
+ ranks = list(range(10, 14 + 1))
+ deck = Deck(include_ranks=ranks)
+ for history in histories:
+ history_dict = json.loads(history)
+ history_lst = history_dict["history"]
+ action_sequence = []
+ betting_rounds = []
+ for x in history_lst:
+ action_sequence += list(x.values())[0]
+ betting_rounds += list(x.keys())
+ if not action_sequence:
+ continue
+ action_sequences.append(action_sequence)
+ final_betting_round = list(betting_rounds)[-1]
+ final_betting_round_lst.append(final_betting_round)
+ n_cards = community_card_dict[final_betting_round]
+ cards_in_deck = deck._cards_in_deck
+ public_cards = list(
+ np.random.choice(cards_in_deck, n_cards, replace=False)
+ )
+ public_cards_lst.append(public_cards)
+
+ # TODO: Not sure how to quiet mypy here for typing complaint..
+ info_set_lut: InfoSetLookupTable = {
+ "pre_flop": collections.defaultdict(lambda: 0),
+ "flop": collections.defaultdict(lambda: 0),
+ "turn": collections.defaultdict(lambda: 0),
+ "river": collections.defaultdict(lambda: 0),
+ }
+ for i in range(0, len(action_sequences)):
+ public_cards = public_cards_lst[i].copy()
+ final_betting_round = final_betting_round_lst[i]
+ if not public_cards and final_betting_round == "pre_flop":
+ continue
+ action_sequence = action_sequences[i].copy()
+ state: ShortDeckPokerState = new_game(
+ n_players,
+ info_set_lut=info_set_lut,
+ real_time_test=True,
+ public_cards=public_cards,
+ )
+ current_game_state: ShortDeckPokerState = state.load_game_state(
+ offline_strategy={}, action_sequence=action_sequence
+ )
+ new_state = current_game_state.deal_bayes()
+
+ cont = True
+ if len(public_cards) == 0:
+ loaded_betting_stage = "pre_flop"
+ elif len(public_cards) == 3:
+ loaded_betting_stage = "flop"
+ elif len(public_cards) == 4:
+ loaded_betting_stage = "turn"
+ elif len(public_cards) == 5:
+ loaded_betting_stage = "river"
+
+ public_info = new_state._public_information
+ for betting_stage in public_info.keys():
+ if betting_stage == "pre_flop":
+ # No cards in the pre_flop stage..
+ continue
+ if cont:
+ card_len = community_card_dict[betting_stage]
+ assert public_cards[:card_len] == public_info[betting_stage]
+ if betting_stage == loaded_betting_stage:
+ cont = False
+ else:
+ # Should only get here if we hit the last action_sequence of
+ # a round..
+ state_public_card_len = len(new_state.community_cards)
+ public_card_len = len(public_cards)
+ assert state_public_card_len == public_card_len + 1
diff --git a/test/regression/check_bayes.py b/test/regression/check_bayes.py
new file mode 100644
index 00000000..d42faf5f
--- /dev/null
+++ b/test/regression/check_bayes.py
@@ -0,0 +1,157 @@
+import joblib
+import collections
+import json
+from typing import DefaultDict
+
+import numpy as np
+from tqdm import trange
+
+from pluribus.poker.deck import Deck
+from pluribus.games.short_deck.state import ShortDeckPokerState, new_game
+
+
+def _calculate_strategy(
+ state: ShortDeckPokerState,
+ I: str,
+ strategy: DefaultDict[str, DefaultDict[str, float]],
+) -> str:
+ sigma = collections.defaultdict(lambda: collections.defaultdict(lambda: 1 / 3))
+ try:
+ # If strategy is empty, go to other block
+ sigma[I] = strategy[I].copy()
+ if sigma[I] == {}:
+ raise KeyError
+ norm = sum(sigma[I].values())
+ for a in sigma[I].keys():
+ sigma[I][a] /= norm
+ a = np.random.choice(
+ list(sigma[I].keys()), 1, p=list(sigma[I].values()),
+ )[0]
+ except KeyError:
+ p = 1 / len(state.legal_actions)
+ probabilities = np.full(len(state.legal_actions), p)
+ a = np.random.choice(state.legal_actions, p=probabilities)
+ sigma[I] = {action: p for action in state.legal_actions}
+ return a
+
+
+n = 10000
+n_players = 3
+inner_iters = 1000
+
+strategy_dir = "research/test_methodology/test_strategy2/"
+strategy_path = "unnormalized_output/offline_strategy_1500.gz"
+check = joblib.load(strategy_dir + strategy_path)
+histories = np.random.choice(list(check.keys()), n)
+action_sequences = []
+public_cards_lst = []
+community_card_dict = {
+ "pre_flop": 0,
+ "flop": 3,
+ "turn": 4,
+ "river": 5,
+}
+# Shorter deck for more reasonable simulation time..
+ranks = list(range(12, 14 + 1))
+deck = Deck(include_ranks=ranks)
+found = 0
+for idx, history in enumerate(histories):
+ if idx % 100 == 0:
+ print(idx)
+ history_dict = json.loads(history)
+ history_lst = history_dict["history"]
+ if history_lst == []:
+ continue
+ action_sequence = []
+ betting_rounds = []
+ for x in history_lst:
+ action_sequence += list(x.values())[0]
+ betting_rounds += list(x.keys())
+ try:
+ final_betting_round = list(betting_rounds)[-1]
+ except:
+ import ipdb;
+ ipdb.set_trace()
+ # Hacking this for now, keeping the simulation small..
+ if len(action_sequence) > 2:
+ continue
+ action_sequences.append(action_sequence)
+ n_cards = community_card_dict[final_betting_round]
+ cards_in_deck = deck._cards_in_deck
+ public_cards = np.random.choice(cards_in_deck, n_cards, replace=False)
+ public_cards_lst.append(list(public_cards))
+ found += 1
+ if found == 2:
+ break
+ # Assuming we find 2 action sequences a=out of 1000
+
+store_hand_probs = {}
+for i in trange(0, len(action_sequences)):
+ public_cards = public_cards_lst[i].copy()
+ # will need to check for this bug later..
+# if not public_cards:
+# import ipdb;
+# ipdb.set_trace()
+ action_sequence = action_sequences[i].copy()
+ state: ShortDeckPokerState = new_game(
+ n_players,
+ real_time_test=True,
+ public_cards=public_cards,
+ )
+ current_game_state: ShortDeckPokerState = state.load_game_state(
+ offline_strategy=check, action_sequence=action_sequence
+ )
+ new_state = current_game_state.deal_bayes()
+
+ this_hand_probs = new_state._starting_hand_probs.copy()
+ for p_i in this_hand_probs.keys():
+ for starting_hand in this_hand_probs[p_i].keys():
+ x = this_hand_probs[p_i][starting_hand]
+ this_hand_probs[p_i][starting_hand] = {'deal_bayes':x, 'sim':None}
+
+ action_sequence = action_sequences[i].copy()
+ public_cards = public_cards_lst[i].copy()
+ info_set_lut = {}
+ cont = True
+ actions = []
+ tries = 0
+ success = 0
+ hand_dict = {0: {}, 1: {}, 2: {}}
+ while cont:
+ state: ShortDeckPokerState = new_game(
+ n_players,
+ info_set_lut,
+ real_time_test=True,
+ public_cards=public_cards
+ )
+ info_set_lut = state.info_set_lut
+ while True:
+ count = 0
+ if tries == 1000: # definitely a hack need to be careful about this
+ # value
+ for p_i in state.players:
+ hole_cards = tuple(state.players[p_i].cards)
+ try:
+ hand_dict[p_i][hole_cards] += 0
+ except KeyError:
+ hand_dict[p_i][hole_cards] = 0
+ random_action = _calculate_strategy(state, state.info_set, check)
+ if random_action != action_sequence[count]:
+ tries += 1
+ break
+ new_state = state.apply_action(random_action)
+ actions.append(random_action)
+ if actions == action_sequence:
+ for p_i in state.players:
+ hole_cards = tuple(state.players[p_i].cards)
+ try:
+ hand_dict[p_i][hole_cards] += 1
+ except KeyError:
+ hand_dict[p_i][hole_cards] = 1
+ success += 1
+ break
+ count += 1
+ if success == 1:
+ break
+ import ipdb;
+ ipdb.set_trace()