diff --git a/pluribus/games/short_deck/agent.py b/pluribus/games/short_deck/agent.py
new file mode 100644
index 00000000..c93cf4cf
--- /dev/null
+++ b/pluribus/games/short_deck/agent.py
@@ -0,0 +1,30 @@
+import collections
+import joblib
+
+
+class Agent:
+    """Agent class can hold a trained strategy and regret"""
+    def __init__(self, regret_path=None):
+        self.strategy = collections.defaultdict(
+            lambda: collections.defaultdict(lambda: 0)
+        )
+        if regret_path:
+            offline_strategy = joblib.load(regret_path)
+            self.regret = collections.defaultdict(
+                lambda: collections.defaultdict(lambda: 0),
+                offline_strategy['regret']
+            )
+        else:
+            self.regret = collections.defaultdict(
+                lambda: collections.defaultdict(lambda: 0)
+            )
+        self.tmp_regret = collections.defaultdict(
+            lambda: collections.defaultdict(lambda: 0)
+        )
+
+    def reset_new_regret(self):
+        """Remove regret from temporary storage"""
+        del self.tmp_regret
+        self.tmp_regret = collections.defaultdict(
+            lambda: collections.defaultdict(lambda: 0)
+        )
diff --git a/pluribus/games/short_deck/state.py b/pluribus/games/short_deck/state.py
index af2e08b7..ecb2d273 100644
--- a/pluribus/games/short_deck/state.py
+++ b/pluribus/games/short_deck/state.py
@@ -7,8 +7,11 @@
 import operator
 import os
 from typing import Any, Dict, List, Optional, Tuple
+from itertools import combinations
+import random
 
 import dill as pickle
+import numpy as np
 
 from pluribus import utils
 from pluribus.poker.card import Card
@@ -32,7 +35,8 @@ def new_game(
     ]
     if info_set_lut:
         # Don't reload massive files, it takes ages.
-        state = ShortDeckPokerState(players=players, load_pickle_files=False, **kwargs)
+        state = ShortDeckPokerState(players=players,
+                                    load_pickle_files=False, **kwargs)
         state.info_set_lut = info_set_lut
     else:
         # Load massive files.
@@ -54,6 +58,8 @@ def __init__(
         big_blind: int = 100,
         pickle_dir: str = ".",
         load_pickle_files: bool = True,
+        real_time_test: bool = False,
+        public_cards: List[Card] = []
     ):
         """Initialise state."""
         n_players = len(players)
@@ -74,6 +80,7 @@ def __init__(
         self._initial_n_chips = players[0].n_chips
         self.small_blind = small_blind
         self.big_blind = big_blind
+        self.real_time_test = real_time_test
         self._poker_engine = PokerEngine(
             table=self._table, small_blind=small_blind, big_blind=big_blind
         )
@@ -81,9 +88,13 @@ def __init__(
         # this), assign blinds to the players.
         self._poker_engine.round_setup()
         # Deal private cards to players.
-        self._table.dealer.deal_private_cards(self._table.players)
+        if not self.real_time_test:
+            self._poker_engine.table.dealer.deal_private_cards(
+                self._table.players
+            )
         # Store the actions as they come in here.
         self._history: Dict[str, List[str]] = collections.defaultdict(list)
+        self._public_information: Dict[str, List[Card]] = collections.defaultdict(list)
         self._betting_stage = "pre_flop"
         self._betting_stage_to_round: Dict[str, int] = {
             "pre_flop": 0,
@@ -107,11 +118,19 @@ def __init__(
             "terminal": player_i_order,
         }
         self._skip_counter = 0
-        self._first_move_of_current_round = True
+        # self._first_move_of_current_round = True
         self._reset_betting_round_state()
         for player in self.players:
             player.is_turn = False
         self.current_player.is_turn = True
+        if public_cards:
+            assert len(public_cards) in {3, 4, 5}
+        self._public_cards = public_cards
+        self._final_action = None
+        # only want to do these actions in real game play, as they are slow
+        if self.real_time_test:
+            # must have offline strategy loaded up
+            self._starting_hand_probs = self._initialize_starting_hands()
 
     def __repr__(self):
         """Return a helpful description of object in strings and debugger."""
@@ -145,7 +164,6 @@ def apply_action(self, action_str: Optional[str]) -> ShortDeckPokerState:
         new_state.info_set_lut = self.info_set_lut = lut
         # An action has been made, so alas we are not in the first move of the
         # current betting round.
-        new_state._first_move_of_current_round = False
         if action_str is None:
             # Assert active player has folded already.
             assert (
@@ -189,7 +207,6 @@ def apply_action(self, action_str: Optional[str]) -> ShortDeckPokerState:
                 # stage of the game.
                 new_state._increment_stage()
                 new_state._reset_betting_round_state()
-                new_state._first_move_of_current_round = True
             if not new_state.current_player.is_active:
                 new_state._skip_counter += 1
                 assert not new_state.current_player.is_active
@@ -209,6 +226,63 @@ def apply_action(self, action_str: Optional[str]) -> ShortDeckPokerState:
         new_state.current_player.is_turn = True
         return new_state
 
+    def load_game_state(self, offline_strategy: Dict[str, Dict[str, float]],
+                        action_sequence: list):
+        """
+        Follow through the action sequence provided to get current node.
+        :param action_sequence: List of actions without 'skip'
+        """
+        if 'skip' in set(action_sequence):
+            action_sequence = [a for a in action_sequence if a != 'skip']
+        if len(action_sequence) == 1:
+            # TODO: Not sure if I need to deepcopy
+            betting_stage = self.betting_stage
+            public_cards = self._public_cards
+            # Must declare the appropriate amount of public cards for RTS..
+            assert self._public_information[betting_stage] == public_cards
+            lut = self.info_set_lut
+            self.info_set_lut = {}
+            new_state = copy.deepcopy(self)
+            new_state.info_set_lut = self.info_set_lut = lut
+            new_state._final_action = action_sequence.pop(0)
+            new_state._update_hole_cards_bayes(offline_strategy)
+            return new_state
+        a = action_sequence.pop(0)
+        new_state = self.apply_action(a)
+        return new_state.load_game_state(offline_strategy, action_sequence)
+
+    def deal_bayes(self):
+        # Deep copy the parts of state that are needed that must be immutable
+        # from state to state.
+        lut = self.info_set_lut
+        self.info_set_lut = {}
+        new_state = copy.deepcopy(self)
+        new_state.info_set_lut = self.info_set_lut = lut
+        players = list(range(len(new_state.players)))
+        random.shuffle(players)
+        cards_selected = []
+        # TODO: This would be better by selecting the first player's
+        # cards, then normalizing the second and third, etc..
+        for p_i in players:
+            starting_hand = new_state._get_starting_hand(p_i)
+            len_union = len(set(starting_hand).union(set(cards_selected)))
+            len_individual = len(starting_hand) + len(cards_selected)
+            while len_union < len_individual:
+                starting_hand = new_state._get_starting_hand(p_i)
+                len_union = len(set(starting_hand).union(set(cards_selected)))
+                len_individual = len(starting_hand) + len(cards_selected)
+            # TODO: pull this into a helper method, maybe it should
+            # be in the dealer class..
+            for card in starting_hand:
+                new_state.players[p_i].add_private_card(card)
+            cards_selected += starting_hand
+        cards_selected += new_state._public_cards
+        for card in cards_selected:
+            new_state._table.dealer.deck.remove(card)
+        final_action = new_state._final_action
+        newest_state = new_state.apply_action(final_action)
+        return newest_state
+
     @staticmethod
     def load_pickle_files(pickle_dir: str) -> Dict[str, Dict[Tuple[int, ...], str]]:
         """Load pickle files into memory."""
@@ -254,15 +328,36 @@ def _increment_stage(self):
         if self._betting_stage == "pre_flop":
             # Progress from private cards to the flop.
             self._betting_stage = "flop"
-            self._poker_engine.table.dealer.deal_flop(self._table)
+            if len(self._public_cards) >= 3:
+                community_cards = self._public_cards[:3]
+                self._poker_engine.table.community_cards += community_cards
+            else:
+                self._poker_engine.table.dealer.deal_flop(self._table)
+            self._public_information[
+                self.betting_stage
+            ] = self._table.community_cards.copy()
         elif self._betting_stage == "flop":
             # Progress from flop to turn.
             self._betting_stage = "turn"
-            self._poker_engine.table.dealer.deal_turn(self._table)
+            if len(self._public_cards) >= 4:
+                community_cards = self._public_cards[3:4]
+                self._poker_engine.table.community_cards += community_cards
+            else:
+                self._poker_engine.table.dealer.deal_turn(self._table)
+            self._public_information[
+                self.betting_stage
+            ] = self._table.community_cards.copy()
         elif self._betting_stage == "turn":
             # Progress from turn to river.
             self._betting_stage = "river"
-            self._poker_engine.table.dealer.deal_river(self._table)
+            if len(self._public_cards) == 5:
+                community_cards = self._public_cards[4:]
+                self._poker_engine.table.community_cards += community_cards
+            else:
+                self._poker_engine.table.dealer.deal_river(self._table)
+            self._public_information[
+                self.betting_stage
+            ] = self._table.community_cards.copy()
         elif self._betting_stage == "river":
             # Progress to the showdown.
             self._betting_stage = "show_down"
@@ -271,6 +366,200 @@ def _increment_stage(self):
         else:
             raise ValueError(f"Unknown betting_stage: {self._betting_stage}")
 
+    def _initialize_starting_hands(self) -> Dict[int, Dict[List[Card], float]]:
+        """Dictionary of starting hands to store probabilities in"""
+        assert self.betting_stage == "pre_flop"
+        starting_hand_probs: Dict = {}
+        n_players = len(self.players)
+        starting_hands = self._get_card_combos(2)
+        for p_i in range(n_players):
+            starting_hand_probs[p_i] = {}
+            for starting_hand in starting_hands:
+                starting_hand_probs[p_i][
+                    starting_hand
+                ] = 1
+        return starting_hand_probs
+
+    def _get_card_combos(self, num_cards) -> List[Tuple[Any, ...]]:
+        """Get combinations of cards"""
+        return list(combinations(self.cards_in_deck, num_cards))
+
+    def _normalize_bayes(self):
+        """Normalize probability of reach for each player"""
+        n_players = len(self.players)
+        for p_i in range(n_players):
+            total_prob = sum(self._starting_hand_probs[p_i].values())
+            for starting_hand, prob in self._starting_hand_probs[p_i].items():
+                self._starting_hand_probs[p_i][starting_hand] = prob / total_prob
+
+    def _update_hole_cards_bayes(self, offline_strategy: Dict[str, Dict[str,
+                                 float]]):
+        """Get probability of reach for each starting hand for each player"""
+        assert self._history
+        n_players = len(self._table.players)
+        player_indices: List[int] = [p_i for p_i in range(n_players)]
+        for p_i in player_indices:
+            # TODO: Might make since to put starting hands in the deck class
+            for starting_hand in self._starting_hand_probs[p_i].keys():
+                starting_hand = list(
+                    starting_hand
+                )
+                # TODO: Is this bad?
+                if "p_reach" in locals():
+                    del p_reach
+                action_sequence: Dict[str, List[str]] = collections.defaultdict(list)
+                for idx, betting_stage in enumerate(self._history.keys()):
+                    n_actions_round = len(self._history[betting_stage])
+                    for i in range(n_actions_round):
+                        action = self._history[betting_stage][i]
+                        while action == 'skip':
+                            i += 1  # Action sequences don't end in skip
+                            action = self._history[betting_stage][i]
+                        # TODO: Maybe a method already exists for this?
+                        if betting_stage == "pre_flop":
+                            ph = (i + 2) % n_players
+                        else:
+                            ph = i % n_players
+                        if p_i != ph:
+                            prob_reach_all_hands = []
+                            for opp_starting_hand in self._starting_hand_probs[
+                                p_i
+                            ].keys():
+                                opp_starting_hand = list(
+                                    opp_starting_hand
+                                )
+                                publics = self._public_information[betting_stage]
+                                if len(
+                                        set(opp_starting_hand).union(
+                                            set(publics)
+                                        ).union(set(starting_hand))
+                                ) < len(
+                                        opp_starting_hand
+                                ) + len(
+                                        starting_hand
+                                ) + len(
+                                        publics
+                                ):
+                                    prob = 0
+                                else:
+                                    publics = self._public_information[
+                                        betting_stage
+                                    ]
+                                    infoset = self._info_set_builder(
+                                        hole_cards=opp_starting_hand,
+                                        public_cards=publics,
+                                        history=action_sequence,
+                                        this_betting_stage=betting_stage,
+                                    )
+                                    # Check to see if the strategy exists,
+                                    #    if not equal probability
+                                    # TODO: is this overly hacky?
+                                    # Problem with defaulting to 1 / 3, is that it
+                                    #    it doesn't work for calculations that
+                                    #    need to be made with the object's values
+                                    try:
+                                        prob = offline_strategy[infoset][action]
+                                        # Normalizing unnormalized offline_stregy
+                                        prob /= sum(offline_strategy[infoset]\
+                                                        .values())
+                                    except KeyError:
+                                        prob = 1 / len(self.legal_actions)
+                                prob_reach_all_hands.append(prob)
+                            total_opp_prob_h = sum(prob_reach_all_hands) /\
+                                len(prob_reach_all_hands)
+                            if "p_reach" not in locals():
+                                p_reach = total_opp_prob_h
+                            else:
+                                p_reach *= total_opp_prob_h
+                        elif p_i == ph:
+                            publics = self._public_information[betting_stage]
+                            if len(
+                                    set(starting_hand).union(
+                                        set(publics)
+                                    )
+                            ) < (
+                                len(publics) + 2
+                            ):
+                                total_prob = 0
+                            else:
+                                publics = self._public_information[betting_stage]
+                                infoset = self._info_set_builder(
+                                    hole_cards=starting_hand,
+                                    public_cards=publics,
+                                    history=action_sequence,
+                                    this_betting_stage=betting_stage,
+                                )
+                                try:
+                                    total_prob = offline_strategy[infoset][action]
+                                    # Normalizing unnormalized offline_stregy
+                                    total_prob /= sum(offline_strategy[infoset]\
+                                                        .values())
+                                except KeyError:
+                                    total_prob = 1 / len(self.legal_actions)
+                            if "p_reach" not in locals():
+                                p_reach = total_prob
+                            else:
+                                p_reach *= total_prob
+                        action_sequence[betting_stage].append(action)
+                self._starting_hand_probs[p_i][tuple(starting_hand)] = p_reach
+        self._normalize_bayes()
+
+    def _get_starting_hand(self, player_idx: int) -> List[Card]:
+        """Get starting hand based on probability of reach"""
+        starting_hands = list(self._starting_hand_probs[player_idx].keys())
+        starting_hands_idxs = list(range(len(starting_hands)))
+        starting_hands_probs = list(self._starting_hand_probs[
+            player_idx
+        ].values())
+        starting_hand_idx = np.random.choice(
+            starting_hands_idxs,
+            1,
+            p=starting_hands_probs
+        )[0]
+        starting_hand = list(starting_hands[starting_hand_idx])
+        return starting_hand
+
+    def _info_set_builder(self, hole_cards=None, public_cards=None,
+                          history=None, this_betting_stage=None) -> str:
+        """Get the information set for the current player."""
+        if hole_cards is None:
+            hole_cards = self.current_player.cards
+        if public_cards is None:
+            public_cards = self._table.community_cards
+        if history is None:
+            history = self._history
+        if this_betting_stage is None:
+            this_betting_stage = self._betting_stage
+        cards = sorted(
+            hole_cards,
+            key=operator.attrgetter("eval_card"),
+            reverse=True,
+        )
+        cards += sorted(
+            public_cards,
+            key=operator.attrgetter("eval_card"),
+            reverse=True,
+        )
+        eval_cards = tuple([int(card) for card in cards])
+        try:
+            cards_cluster = self.info_set_lut[this_betting_stage][eval_cards]
+        except KeyError:
+            import ipdb;
+            ipdb.set_trace()
+            return "default info set, please ensure you load it correctly"
+        # Convert history from a dict of lists to a list of dicts as I'm
+        # paranoid about JSON's lack of care with insertion order.
+        info_set_dict = {
+            "cards_cluster": cards_cluster,
+            "history": [
+                {betting_stage: [str(action) for action in actions]}
+                for betting_stage, actions in history.items()
+            ],
+        }
+        return json.dumps(
+            info_set_dict, separators=(",", ":"), cls=utils.io.NumpyJSONEncoder
+        )
+
     @property
     def community_cards(self) -> List[Card]:
         """Return all shared/public cards."""
@@ -281,6 +570,11 @@ def private_hands(self) -> Dict[ShortDeckPokerPlayer, List[Card]]:
         """Return all private hands."""
         return {p: p.cards for p in self.players}
 
+    @property
+    def cards_in_deck(self):
+        """Returns current cards in deck"""
+        return self._table.dealer.deck._cards_in_deck
+
     @property
     def initial_regret(self) -> Dict[str, float]:
         """Returns the default regret for this state."""
@@ -314,11 +608,11 @@ def player_i(self) -> int:
     @player_i.setter
     def player_i(self, _: Any):
         """Raise an error if player_i is set."""
-        raise ValueError(f"The player_i property should not be set.")
+        raise ValueError("The player_i property should not be set.")
 
     @property
     def betting_round(self) -> int:
-        """Algorithm 1 of pluribus supp. material references betting_round."""
+        """Return 0 indexed betting round"""
         try:
             betting_round = self._betting_stage_to_round[self._betting_stage]
         except KeyError:
@@ -332,33 +626,7 @@ def betting_round(self) -> int:
     @property
     def info_set(self) -> str:
         """Get the information set for the current player."""
-        cards = sorted(
-            self.current_player.cards,
-            key=operator.attrgetter("eval_card"),
-            reverse=True,
-        )
-        cards += sorted(
-            self._table.community_cards,
-            key=operator.attrgetter("eval_card"),
-            reverse=True,
-        )
-        eval_cards = tuple([card.eval_card for card in cards])
-        try:
-            cards_cluster = self.info_set_lut[self._betting_stage][eval_cards]
-        except KeyError:
-            return "default info set, please ensure you load it correctly"
-        # Convert history from a dict of lists to a list of dicts as I'm
-        # paranoid about JSON's lack of care with insertion order.
-        info_set_dict = {
-            "cards_cluster": cards_cluster,
-            "history": [
-                {betting_stage: [str(action) for action in actions]}
-                for betting_stage, actions in self._history.items()
-            ],
-        }
-        return json.dumps(
-            info_set_dict, separators=(",", ":"), cls=utils.io.NumpyJSONEncoder
-        )
+        return self._info_set_builder()
 
     @property
     def payout(self) -> Dict[int, int]:
diff --git a/pluribus/poker/card.py b/pluribus/poker/card.py
index 5fe30a61..a3fc6db7 100644
--- a/pluribus/poker/card.py
+++ b/pluribus/poker/card.py
@@ -74,6 +74,9 @@ def __eq__(self, other):
     def __ne__(self, other):
         return int(self) != int(other)
 
+    def __hash__(self):
+        return hash(int(self))
+
     @property
     def eval_card(self) -> EvaluationCard:
         """Return an `EvaluationCard` for use in the `Evaluator`."""
@@ -178,4 +181,3 @@ def from_dict(x: Dict[str, Union[int, str]]):
         if set(x) != {"rank", "suit"}:
             raise NotImplementedError(f"Unrecognised dict {x}")
         return Card(rank=x["rank"], suit=x["suit"])
-
diff --git a/pluribus/poker/deck.py b/pluribus/poker/deck.py
index efc7e5f6..c6801105 100644
--- a/pluribus/poker/deck.py
+++ b/pluribus/poker/deck.py
@@ -61,3 +61,9 @@ def pick(self, random: bool = True) -> Card:
         card: Card = self._cards_in_deck.pop(index)
         self._dealt_cards.append(card)
         return card
+
+    def remove(self, card):
+        """Remove a specific card from the deck"""
+        if card in self._cards_in_deck:
+            self._cards_in_deck.remove(card)
+            self._dealt_cards.append(card)
diff --git a/research/blueprint_algo/blueprint_short_deck_poker.py b/research/blueprint_algo/blueprint_short_deck_poker.py
index b2deeb34..ca3f89bb 100644
--- a/research/blueprint_algo/blueprint_short_deck_poker.py
+++ b/research/blueprint_algo/blueprint_short_deck_poker.py
@@ -204,7 +204,7 @@ def cfr(agent: Agent, state: ShortDeckPokerState, i: int, t: int) -> float:
             logging.debug(f"Got EV for {a}: {voa[a]}")
             vo += sigma[I][a] * voa[a]
             logging.debug(
-                f"""Added to Node EV for ACTION: {a} INFOSET: {I} 
+                f"""Added to Node EV for ACTION: {a} INFOSET: {I}
                 STRATEGY: {sigma[I][a]}: {sigma[I][a] * voa[a]}"""
             )
         logging.debug(f"Updated EV at {I}: {vo}")
@@ -346,16 +346,16 @@ def _create_dir() -> Path:
 
 
 @click.command()
-@click.option("--strategy_interval", default=2, help=".")
-@click.option("--n_iterations", default=10, help=".")
-@click.option("--lcfr_threshold", default=80, help=".")
-@click.option("--discount_interval", default=1000, help=".")
-@click.option("--prune_threshold", default=4000, help=".")
+@click.option("--strategy_interval", default=400, help=".")
+@click.option("--n_iterations", default=5500, help=".")
+@click.option("--lcfr_threshold", default=400, help=".")
+@click.option("--discount_interval", default=400, help=".")
+@click.option("--prune_threshold", default=400, help=".")
 @click.option("--c", default=-20000, help=".")
 @click.option("--n_players", default=3, help=".")
-@click.option("--print_iteration", default=10, help=".")
-@click.option("--dump_iteration", default=10, help=".")
-@click.option("--update_threshold", default=0, help=".")
+@click.option("--print_iteration", default=100, help=".")
+@click.option("--dump_iteration", default=20, help=".")
+@click.option("--update_threshold", default=400, help=".")
 def train(
     strategy_interval: int,
     n_iterations: int,
diff --git a/research/rts/RT.py b/research/rts/RT.py
new file mode 100644
index 00000000..457ef58d
--- /dev/null
+++ b/research/rts/RT.py
@@ -0,0 +1,30 @@
+from typing import List
+import joblib
+
+from RT_cfr import rts
+from pluribus.poker.card import Card
+
+
+if __name__ == "__main__":
+    # We can set public cards or not
+    public_cards = [Card("ace", "diamonds"), Card("king", "clubs"),
+                    Card("jack", "spades"), Card("10", "hearts"),
+                    Card("10", "spades")]
+    # Action sequence must be in old form (one list, includes skips)
+    action_sequence = ["raise", "raise", "raise", "call", "call",
+                       "raise", "raise", "raise", "call", "call",
+                       "raise", "raise", "raise", "call", "call", "call"]
+    agent_output, offline_strategy = rts(
+        'test_strategy2/unnormalized_output/offline_strategy_1500.gz',
+        'test_strategy2/strategy_1500.gz', public_cards, action_sequence,
+        1400, 1, 1, 3, 1, 1, 20
+    )
+    save_path = "test_strategy2/unnormalized_output/"
+    last_regret = {
+        info_set: dict(strategy)
+        for info_set, strategy in agent_output.regret.items()
+    }
+    joblib.dump(offline_strategy, save_path + 'rts_output.gz', compress="gzip")
+    joblib.dump(last_regret, save_path + 'last_regret.gz', compress="gzip")
+    import ipdb;
+    ipdb.set_trace()
diff --git a/research/rts/RT_cfr.py b/research/rts/RT_cfr.py
new file mode 100644
index 00000000..1e1d596e
--- /dev/null
+++ b/research/rts/RT_cfr.py
@@ -0,0 +1,203 @@
+from __future__ import annotations
+
+import collections
+from typing import Dict, List
+import joblib
+from pathlib import Path
+
+from tqdm import trange
+import numpy as np
+import datetime
+import yaml
+
+from pluribus import utils
+from pluribus.games.short_deck.state import ShortDeckPokerState, new_game
+from pluribus.games.short_deck.agent import Agent
+from pluribus.poker.card import Card
+
+
+def normalize_strategy(this_info_sets_regret: Dict[str, float]) -> Dict[str, float]:
+    """Calculate the strategy based on the current information sets regret."""
+    actions = this_info_sets_regret.keys()
+    regret_sum = sum([max(regret, 0) for regret in this_info_sets_regret.values()])
+    if regret_sum > 0:
+        strategy: Dict[str, float] = {
+            action: max(this_info_sets_regret[action], 0) / regret_sum
+            for action in actions
+        }
+    elif this_info_sets_regret == {}:
+        # Don't return strategy if no strategy was made
+        # during training
+        strategy: Dict[str, float] = {}
+    elif regret_sum == 0:
+        # Regret is negative, we learned something
+        default_probability = 1 / len(actions)
+        strategy: Dict[str, float] = {action: default_probability for action in actions}
+    return strategy
+
+
+def calculate_strategy(
+    regret: Dict[str, Dict[str, float]],
+    I: str,
+    state: ShortDeckPokerState,
+) -> Dict[str, Dict[str, float]]:
+    """
+    Calculate strategy based on regret
+    """
+    sigma = collections.defaultdict(lambda: collections.defaultdict(lambda: 1 / 3))
+    rsum = sum([max(x, 0) for x in regret[I].values()])
+    for a in state.legal_actions:
+        if rsum > 0:
+            sigma[I][a] = max(regret[I][a], 0) / rsum
+        else:
+            sigma[I][a] = 1 / len(state.legal_actions)
+    return sigma
+
+
+def _create_dir(folder_id: str) -> Path:
+    """Create and get a unique dir path to save to using a timestamp."""
+    time = str(datetime.datetime.now())
+    for char in ":- .":
+        time = time.replace(char, "_")
+    path: Path = Path(f"./{folder_id}_results_{time}")
+    path.mkdir(parents=True, exist_ok=True)
+    return path
+
+
+def cfr(agent: Agent, state: ShortDeckPokerState, i: int, t: int) -> float:
+    """
+    CFR algo with the a temporary regret object for better strategy averaging
+    """
+    ph = state.player_i
+
+    player_not_in_hand = not state.players[i].is_active
+    if state.is_terminal or player_not_in_hand:
+        return state.payout[i]
+
+    elif ph == i:
+        I = state.info_set
+        # Move regret over to temporary object and build off that
+        if agent.tmp_regret[I] == {}:
+            agent.tmp_regret[I] == agent.regret[I].copy()
+        sigma = calculate_strategy(agent.tmp_regret, I, state)
+
+        vo = 0.0
+        voa = {}
+        for a in state.legal_actions:
+            new_state: ShortDeckPokerState = state.apply_action(a)
+            voa[a] = cfr(agent, new_state, i, t)
+            vo += sigma[I][a] * voa[a]
+
+        for a in state.legal_actions:
+            agent.tmp_regret[I][a] += voa[a] - vo
+
+        return vo
+    else:
+        Iph = state.info_set
+        # Move regret over to a temporary object and build off that
+        if agent.tmp_regret[Iph] == {}:
+            agent.tmp_regret[Iph] == agent.regret[Iph].copy()
+        sigma = calculate_strategy(agent.tmp_regret, Iph, state)
+
+        try:
+            a = np.random.choice(
+                list(sigma[Iph].keys()), 1, p=list(sigma[Iph].values()),
+            )[0]
+        except KeyError:
+            p = 1 / len(state.legal_actions)
+            probabilities = np.full(len(state.legal_actions), p)
+            a = np.random.choice(state.legal_actions, p=probabilities)
+            sigma[Iph] = {action: p for action in state.legal_actions}
+        except:
+            import ipdb;
+            ipdb.set_trace()
+
+        new_state: ShortDeckPokerState = state.apply_action(a)
+        return cfr(agent, new_state, i, t)
+
+
+def rts(
+    offline_strategy_path: str,
+    last_regret_path: str,
+    public_cards: list,
+    action_sequence: list,
+    n_iterations: int,
+    lcfr_threshold: int,
+    discount_interval: int,
+    n_players: int,
+    update_interval: int,
+    update_threshold: int,
+    dump_int: int,
+):
+    """RTS."""
+    config: Dict[str, int] = {**locals()}
+    save_path: Path = _create_dir('RTS')
+    with open(save_path / "config.yaml", "w") as steam:
+        yaml.dump(config, steam)
+    # TODO: fix the seed
+    # utils.random.seed(36)
+    agent = Agent(regret_path=last_regret_path)
+    # Load unnormalized strategy to build off
+    offline_strategy = joblib.load(offline_strategy_path)
+    state: ShortDeckPokerState = new_game(
+        3, real_time_test=True, public_cards=public_cards
+    )
+    # Load current game state
+    current_game_state: ShortDeckPokerState = state.load_game_state(
+        offline_strategy, action_sequence
+    )
+    for t in trange(1, n_iterations + 1, desc="train iter"):
+        for i in range(n_players):  # fixed position i
+            # Deal hole cards based on bayesian updating of hole card probs
+            state: ShortDeckPokerState = current_game_state.deal_bayes()
+            cfr(agent, state, i, t)
+        if t < lcfr_threshold & t % discount_interval == 0:
+            d = (t / discount_interval) / ((t / discount_interval) + 1)
+            for I in agent.tmp_regret.keys():
+                for a in agent.tmp_regret[I].keys():
+                    agent.tmp_regret[I][a] *= d
+        # Add the unnormalized strategy into the original
+        # Right now assumes dump_int is a multiple of n_iterations
+        if t % dump_int == 0:
+            # Adding the regret back to the regret dict, we'll build off for
+            # next RTS
+            for I in agent.tmp_regret.keys():
+                if agent.tmp_regret != {}:
+                    agent.regret[I] = agent.tmp_regret[I].copy()
+            for info_set, this_info_sets_regret in sorted(agent.tmp_regret.items()):
+                # If this_info_sets_regret == {}, we do nothing
+                strategy = normalize_strategy(this_info_sets_regret)
+                # Check if info_set exists..
+                no_info_set = info_set not in offline_strategy
+                if no_info_set or offline_strategy[info_set] == {}:
+                    offline_strategy[info_set] = {a: 0 for a in strategy.keys()}
+                for action, probability in strategy.items():
+                    offline_strategy[info_set][action] += probability
+            agent.reset_new_regret()
+
+    return agent, offline_strategy
+
+
+if __name__ == "__main__":
+    # We can set public cards or not
+    public_cards = [Card("ace", "diamonds"), Card("king", "clubs"),
+                    Card("jack", "spades"), Card("10", "hearts"),
+                    Card("10", "spades")]
+    # Action sequence must be in old form (one list, includes skips)
+    action_sequence = ["raise", "raise", "raise", "call", "call",
+                       "raise", "raise", "raise", "call", "call",
+                       "raise", "raise", "raise", "call", "call", "call"]
+    agent_output, offline_strategy = rts(
+        'test_strategy3/unnormalized_output/offline_strategy_1500.gz',
+        'test_strategy3/strategy.gz', public_cards, action_sequence,
+        1400, 1, 1, 3, 1, 1, 20
+    )
+    save_path = "test_strategy3/unnormalized_output/"
+    last_regret = {
+        info_set: dict(strategy)
+        for info_set, strategy in agent_output.regret.items()
+    }
+    joblib.dump(offline_strategy, save_path + 'rts_output.gz', compress="gzip")
+    joblib.dump(last_regret, save_path + 'last_regret.gz', compress="gzip")
+    import ipdb;
+    ipdb.set_trace()
diff --git a/research/stat_tests/agent_test.py b/research/stat_tests/agent_test.py
new file mode 100644
index 00000000..6569ac6e
--- /dev/null
+++ b/research/stat_tests/agent_test.py
@@ -0,0 +1,162 @@
+from typing import List, Dict, DefaultDict
+from pathlib import Path
+import joblib
+import collections
+
+import click
+from tqdm import trange
+import yaml
+import datetime
+import numpy as np
+from scipy import stats
+
+from pluribus.games.short_deck.state import ShortDeckPokerState, new_game
+from pluribus.poker.card import Card
+
+
+def _calculate_strategy(
+        state: ShortDeckPokerState,
+        I: str,
+        strategy: DefaultDict[str, DefaultDict[str, float]],
+        count=None,
+        total_count=None
+) -> str:
+    sigma = collections.defaultdict(
+        lambda: collections.defaultdict(lambda: 1 / 3)
+    )
+    try:
+        # If strategy is empty, go to other block
+        sigma[I] = strategy[I].copy()
+        if sigma[I] == {}:
+            raise KeyError
+        norm = sum(sigma[I].values())
+        for a in sigma[I].keys():
+            sigma[I][a] /= norm
+        a = np.random.choice(
+            list(sigma[I].keys()), 1, p=list(sigma[I].values()),
+        )[0]
+    except KeyError:
+        if count is not None:
+            count += 1
+        p = 1 / len(state.legal_actions)
+        probabilities = np.full(len(state.legal_actions), p)
+        a = np.random.choice(state.legal_actions, p=probabilities)
+        sigma[I] = {action: p for action in state.legal_actions}
+    if total_count is not None:
+        total_count += 1
+    return a, count, total_count
+
+
+def _create_dir(folder_id: str) -> Path:
+    """Create and get a unique dir path to save to using a timestamp."""
+    time = str(datetime.datetime.now())
+    for char in ":- .":
+        time = time.replace(char, "_")
+    path: Path = Path(f"./{folder_id}_results_{time}")
+    path.mkdir(parents=True, exist_ok=True)
+    return path
+
+
+def agent_test(
+    hero_strategy_path: str,
+    opponent_strategy_path: str,
+    real_time_est: bool = False,
+    action_sequence: List[str] = None,
+    public_cards: List[Card] = [],
+    n_outer_iters: int = 30,
+    n_inner_iters: int = 100,
+    n_players: int = 3,
+    hero_count=None,
+    hero_total_count=None,
+):
+    config: Dict[str, int] = {**locals()}
+    save_path: Path = _create_dir('bt')
+    with open(save_path / "config.yaml", "w") as steam:
+        yaml.dump(config, steam)
+
+    # Load unnormalized strategy for hero
+    hero_strategy = joblib.load(hero_strategy_path)
+    # Load unnormalized strategy for opponents
+    opponent_strategy = joblib.load(opponent_strategy_path)
+
+    # Loading game state we used RTS on
+    if real_time_est:
+        state: ShortDeckPokerState = new_game(
+            n_players, real_time_test=real_time_est, public_cards=public_cards
+        )
+        current_game_state: ShortDeckPokerState = state.load_game_state(
+            opponent_strategy, action_sequence
+        )
+
+    # TODO: Right now, this can only be used for loading states if the two
+    # strategies are averaged. Even averaging strategies is risky. Loading a
+    # game state should be used with caution. It will work only if the
+    # probability of reach is identical across strategies. Use the average
+    # strategy.
+
+    info_set_lut = {}
+    EVs = np.array([])
+    for _ in trange(1, n_outer_iters):
+        EV = np.array([])  # Expected value for player 0 (hero)
+        for t in trange(1, n_inner_iters + 1, desc="train iter"):
+            for p_i in range(n_players):
+                if real_time_est:
+                    # Deal hole cards based on bayesian updating of hole card
+                    # probabilities
+                    state: ShortDeckPokerState = current_game_state.deal_bayes()
+                else:
+                    state: ShortDeckPokerState = new_game(
+                        n_players,
+                        info_set_lut
+                    )
+                    info_set_lut = state.info_set_lut
+                while True:
+                    player_not_in_hand = not state.players[p_i].is_active
+                    if state.is_terminal or player_not_in_hand:
+                        EV = np.append(EV, state.payout[p_i])
+                        break
+                    if state.player_i == p_i:
+                        random_action, hero_count, hero_total_count = \
+                            _calculate_strategy(
+                                state,
+                                state.info_set,
+                                hero_strategy,
+                                count=hero_count,
+                                total_count=hero_total_count
+                        )
+                    else:
+                        random_action, oc, otc = _calculate_strategy(
+                            state,
+                            state.info_set,
+                            opponent_strategy,
+                        )
+                    state = state.apply_action(random_action)
+        EVs = np.append(EVs, EV.mean())
+    t_stat = (EVs.mean() - 0) / (EVs.std() / np.sqrt(n_outer_iters))
+    p_val = stats.t.sf(np.abs(t_stat), n_outer_iters - 1)
+    results_dict = {
+        'Expected Value': float(EVs.mean()),
+        'T Statistic': float(t_stat),
+        'P Value': float(p_val),
+        'Standard Deviation': float(EVs.std()),
+        'N': int(len(EVs)),
+        'Random Moves Hero': hero_count,
+        'Total Moves Hero': hero_total_count
+    }
+    with open(save_path / 'results.yaml', "w") as stream:
+        yaml.safe_dump(results_dict, stream=stream, default_flow_style=False)
+
+
+if __name__ == "__main__":
+    strat_path = "test_strategy2/unnormalized_output/"
+    agent_test(
+        hero_strategy_path=strat_path + "random_strategy.gz",
+        opponent_strategy_path=strat_path + "offline_strategy_1500.gz",
+        real_time_est=False,
+        public_cards=[],
+        action_sequence=None,
+        n_inner_iters=25,
+        n_outer_iters=75,
+        hero_count=0,
+        hero_total_count=0
+    )
diff --git a/research/stat_tests/average_unnormalized_strategy.py b/research/stat_tests/average_unnormalized_strategy.py
new file mode 100644
index 00000000..e35965ad
--- /dev/null
+++ b/research/stat_tests/average_unnormalized_strategy.py
@@ -0,0 +1,91 @@
+import collections
+import glob
+import os
+import re
+from typing import Dict, List, Union
+
+import click
+import joblib
+from tqdm import tqdm
+
+
+def calculate_strategy(this_info_sets_regret: Dict[str, float]) -> Dict[str, float]:
+    """Calculate the strategy based on the current information sets regret."""
+    actions = this_info_sets_regret.keys()
+    regret_sum = sum([max(regret, 0) for regret in this_info_sets_regret.values()])
+    if regret_sum > 0:
+        strategy: Dict[str, float] = {
+            action: max(this_info_sets_regret[action], 0) / regret_sum
+            for action in actions
+        }
+    elif this_info_sets_regret == {}:
+        # Don't return strategy if no strategy was made
+        # during training
+        strategy: Dict[str, float] = {}
+    elif regret_sum == 0:
+        # Regret is negative, we learned something
+        default_probability = 1 / len(actions)
+        strategy: Dict[str, float] = {action: default_probability for action in actions}
+    return strategy
+
+
+def try_to_int(text: str) -> Union[str, int]:
+    """Attempt to return int."""
+    return int(text) if text.isdigit() else text
+
+
+def natural_key(text):
+    """Sort with natural numbers."""
+    return [try_to_int(c) for c in re.split(r"(\d+)", text)]
+
+
+def average_strategy(all_file_paths: List[str]) -> Dict[str, Dict[str, float]]:
+    """Compute the mean strategy over all timesteps."""
+    # The offline strategy for all information sets.
+    offline_strategy: Dict[str, Dict[str, float]] = collections.defaultdict(
+        lambda: collections.defaultdict(lambda: 0.0)
+    )
+    # Sum up all strategies.
+    for dump_path in tqdm(all_file_paths, desc="loading dumps"):
+        # Load file.
+        try:
+            agent = joblib.load(dump_path)
+        except Exception as e:
+            tqdm.write(f"Failed to load file at {dump_path} because:{e}")
+            agent = {}
+        regret = agent.get("regret", {})
+        # Sum probabilities from computed strategy..
+        for info_set, this_info_sets_regret in sorted(regret.items()):
+            strategy = calculate_strategy(this_info_sets_regret)
+            # If strategy == {}, we do nothing
+            for action, probability in strategy.items():
+                offline_strategy[info_set][action] += probability
+    # Return regular dict, not defaultdict.
+    return {info_set: dict(strategy) for info_set, strategy in offline_strategy.items()}
+
+
+@click.command()
+@click.option(
+    "--results_dir_path", default=".", help="the location of the agent file dumps."
+)
+@click.option(
+    "--write_dir_path", default=".", help="where to save the offline strategy"
+)
+def cli(results_dir_path: str, write_dir_path: str):
+    """Compute the strategy and write to file."""
+    # Find all files to load.
+    all_file_paths = glob.glob(os.path.join(results_dir_path, "*.gz"))
+    if not all_file_paths:
+        raise ValueError(f"No agent dumps could be found at: {results_dir_path}")
+    # Sort the file paths in the order they were created.
+    all_file_paths = sorted(all_file_paths, key=natural_key)
+    offline_strategy = average_strategy(all_file_paths)
+    # Save dictionary to compressed file.
+    latest_file = os.path.basename(all_file_paths[-1])
+    latest_iteration: int = int(re.findall(r"\d+", latest_file)[0])
+    save_file: str = f"offline_strategy_{latest_iteration}.gz"
+    joblib.dump(offline_strategy, os.path.join(write_dir_path, save_file))
+
+
+if __name__ == "__main__":
+    cli()
diff --git a/research/stat_tests/rts_ab_test.py b/research/stat_tests/rts_ab_test.py
new file mode 100644
index 00000000..047d571d
--- /dev/null
+++ b/research/stat_tests/rts_ab_test.py
@@ -0,0 +1,112 @@
+import numpy as np
+import json
+import joblib
+import sys
+from typing import List
+
+import click
+
+from agent_test import agent_test
+from pluribus.poker.deck import Deck
+sys.path.append('research/rts')
+from RT_cfr import rts
+
+
+@click.command()
+@click.option("--offline_strategy_path", help=".")
+@click.option("--last_regret_path", help=".")
+@click.option("--n_iterations", default=1500, help=".")
+@click.option("--lcfr_threshold", default=400, help=".")
+@click.option("--discount_interval", default=400, help=".")
+@click.option("--n_players", default=3, help=".")
+@click.option("--update_interval", default=400, help=".")
+@click.option("--update_threshold", default=400, help=".")
+@click.option("--dump_int", default=20, help=".")
+@click.option("--save_dir", help=".")
+@click.option("--n_inner_iters", default=25, help=".")
+@click.option("--n_outer_iters", default=150, help=".")
+def rts_ab_test(
+    offline_strategy_path: str,
+    last_regret_path: str,
+    n_iterations: int,
+    lcfr_threshold: int,
+    discount_interval: int,
+    n_players: int,
+    update_interval: int,
+    update_threshold: int,
+    dump_int: int,
+    save_dir: str,
+    n_inner_iters: int,
+    n_outer_iters: int,
+    ranks: List[int] = list(range(10, 14 + 1)),
+):
+    check = joblib.load(offline_strategy_path)
+    histories = np.random.choice(list(check.keys()), 2)
+    action_sequences = []
+    public_cards_lst = []
+    community_card_dict = {
+        "pre_flop": 0,
+        "flop": 3,
+        "turn": 4,
+        "river": 5,
+    }
+    deck = Deck(include_ranks=ranks)
+    for history in histories:
+        history_dict = json.loads(history)
+        history_lst = history_dict['history']
+        action_sequence = []
+        betting_rounds = []
+        for x in history_lst:
+            action_sequence += list(x.values())[0]
+            betting_rounds += list(x.keys())
+        action_sequences.append(action_sequence)
+        if action_sequences:
+            final_betting_round = list(betting_rounds)[-1]
+        else:
+            final_betting_round = "pre_flop"
+        n_cards = community_card_dict[final_betting_round]
+        cards_in_deck = deck._cards_in_deck
+        public_cards = list(
+            np.random.choice(cards_in_deck, n_cards)
+        )
+        public_cards_lst.append(public_cards)
+
+    for i in range(0, len(action_sequences)):
+        public_cards = public_cards_lst[i].copy()
+        action_sequence = action_sequences[i].copy()
+        agent_output, offline_strategy = rts(
+            offline_strategy_path,
+            last_regret_path,
+            public_cards,
+            action_sequence,
+            n_iterations=n_iterations,
+            lcfr_threshold=lcfr_threshold,
+            discount_interval=discount_interval,
+            n_players=n_players,
+            update_interval=update_interval,
+            update_threshold=update_threshold,
+            dump_int=dump_int
+        )
+        last_regret = {
+            info_set: dict(strategy)
+            for info_set, strategy in agent_output.regret.items()
+        }
+        joblib.dump(offline_strategy, save_dir + f'rts_output{i}.gz', compress="gzip")
+        joblib.dump(last_regret, save_dir + f'last_regret{i}.gz', compress="gzip")
+
+        public_cards = public_cards_lst[i].copy()
+        action_sequence = action_sequences[i].copy()
+        agent_test(
+            hero_strategy_path=save_dir + f"rts_output{i}.gz",
+            opponent_strategy_path=offline_strategy_path,
+            real_time_est=True,
+            public_cards=public_cards,
+            action_sequence=action_sequence,
+            n_inner_iters=n_inner_iters,
+            n_outer_iters=n_outer_iters,
+            hero_count=0,
+            hero_total_count=0,
+        )
+
+if __name__ == "__main__":
+    rts_ab_test()
diff --git a/research/test_methodology/validating_nash_equilibriums_via_simulations.ipynb b/research/test_methodology/validating_nash_equilibriums_via_simulations.ipynb
deleted file mode 100644
index c146c559..00000000
--- a/research/test_methodology/validating_nash_equilibriums_via_simulations.ipynb
+++ /dev/null
@@ -1,224 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Validating Nash Equilibriums Via Simulations"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "_by Colin Manko_"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "In an effort to validate and test possible improvements to core poker artificial intelligence algorithms, I have designed the following methodology."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Goals"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "- Validate that MCCFR offline learning strategy is approximating a Nash equilibrium\n",
-    "- More generally, create a methodology that allows for rigorously testing changes made to the core AI algorithms"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Prerequisites"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "- Need two test bot implementation strategies ($\\beta{1}$ and $\\beta{2}$) that we would like to compare\n",
-    "- Need a human tester ($H_{0}$) as a quasi control. The human tester should not have access to any underlying strategies from the test bots or simulated Nash"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Step 1:  Randomly Generate Test Game"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Given a set of $N$ game tree nodes (this is the entire game tree, as given by infoset, $I$), randomly generate $x$ test nodes without preplacement and with equal probability. Call the set of test nodes $U$.\n",
-    "\n",
-    "As a side note, we will account for probability of reach ($p(h)$) in another step. Equal probability across nodes allows us to find patterns across nodes where our agent underperforms. We will adjust the expected value at $I$, ($v^{\\sigma}(I)$), by $p(h)$.\n",
-    "- **How to**: For Limit Texas Hold'em, the number of action sequences ($N$), is small enough that they can be found computationally rather than analytically. We can run *all_action_sequences.py* in the *size_of_problem* directory to generate this list. \n",
-    "- _Something like 15-20 hours and less than 4GB??_\n",
-    "- Generate $x$ integers to be indices and select them from the *all_action_sequences.py* output\n",
-    "- Once $x$ action sequences are generated, randomly generate $x$ public card combos, based on the betting stage of the test node, $u$, as well as one pair of private hole cards to be used by $\\beta{1}$, $\\beta{2}$ and $H_0$. They will only get that hand at $u$. "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Step 2: Prepare Realtime Search for Finding Nash Equilibrium"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "For each test node, $u$, in $U$, use realtime search to compute the Nash Equilibrium ($\\sigma^*$) by constraining the search algorithm to start at $u$, where $u$ is equivalent to $I$ in regard to action sequence, but does not have any set hand for the traversing player ($p_i$).\n",
-    "\n",
-    "Use a pooled strategy between $\\beta{1}$ and $\\beta{2}$ to estimate $p(h)$ without bias:"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "For hand in possible combinations of real hands:\n",
-    "<br>\n",
-    "&nbsp;&nbsp;&nbsp;&nbsp;For idx, $a$ in action sequence at $u$:\n",
-    "<br>\n",
-    "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;if idx == 0:\n",
-    "<br>\n",
-    "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;$p(h)_\\beta{1}$ = $\\beta{1}$[$I$][$a$]\n",
-    "<br>\n",
-    "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;$p(h)_\\beta{2}$ = $\\beta{2}$[$I$][$a$]\n",
-    "<br>\n",
-    "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;$p(h)_\\beta{1}$ *= $\\beta{1}$[$I$][$a$]\n",
-    "<br>\n",
-    "&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;$p(h)_\\beta{2}$ *= $\\beta{1}$[$I$][$a$]\n",
-    "<br>\n",
-    "&nbsp;&nbsp;&nbsp;&nbsp;p(h)[rs] = ($p(h)_\\beta{1}$ + $p(h)_\\beta{2}$)/2"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The root node of the realtime search algorithm is replaced with a chance node that represents each possible node in the public state $G$ [[Brown, Sandholm, Amos]](https://papers.nips.cc/paper/7993-depth-limited-solving-for-imperfect-information-games.pdf). From the above psuedo-code, this deal can be generated as: "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Generate deal order\n",
-    "<br>\n",
-    "For $i$ in $P_i$ deal order:\n",
-    "<br>\n",
-    "&nbsp;&nbsp;&nbsp;&nbsp;Generate hand for player based on normalized $p(h)[rs]$ if available, else try again"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "_The \"if available, else try again\" part could be made better_"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**Other important features of the _\"Nash Bot\"_ real time search..**:\n",
-    "- The _\"Nash bot\"_ is the master of this node. In order to reach full convergence, from the normal MCCFR algorithm, we must remove the sampling of actions for opponents.\n",
-    "- For ease, the real time search should not use leaf nodes, but should search to the end of the game tree, where either a terminal node or a shown down is entered. In this way, we can get a truer sense of the expected value."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "A Nash Equilibrium is found if the change in strategy on each iteration drops below some threshold $t$ for the real hand we are testing for. Charting probabilities for each action in $u$ over time for the randomly generated real hand to test should show a convergence over time."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "_One main benefit of using this real time search to validate CFR is this search will need to be developed anyway._"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Step 3: Test and Measure Success"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**For the test bots:**\n",
-    "For each $u$ in $U$, play each test strategy ($\\beta{1}$ and $\\beta{2}$) against the _\"Nash bot\"_ for $r$ number of simulations. The _\"Nash bot\"_ should be dealt available hands from the distribution of probabilities as determined by $p(h)[rs]$ in the pseudo-code above. Both the test bots and the human tester will be dealt the same hand in each simulation of game play on $u$, as randomly generated in step 1. \n",
-    "\n",
-    "If $\\beta{1}$ or $\\beta{2}$ has converged to a Nash equilibrium, then we should expect $v^\\sigma$ to be equal to 0 for our test bot, assuming that _\"Nash bot\"_ has converged to a Nash equilibrium itself. $v^{\\sigma^*}(u)$ and $v^{\\sigma}(u)$ are the estimated payouts for the _\"Nash bot\"_ opponents and the \"hero\" (test bots or human), respectively.\n",
-    "\n",
-    "**For the human tester:**\n",
-    "We can simply create a contrived game. Based on the normalized probability of reach for $u$, $\\bar{p(h)}$, we can randomly generate which $u$ the human player is entered into, however they will always have the same hand upon entering $u$ and their opponents hands will vary based on $p(h)[rs]$.\n",
-    "\n",
-    "The test metric is as follows, after $p(h)$ has been normalized for space $U$, $\\bar{p(h)}$:\n",
-    "$$\\sum_{i=1}^{x}(v^{\\sigma}(u_i)-v^{\\sigma^*}(u_{-i}))\\times{\\bar{p(h)}}$$"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The value closest to 0 (summing no testing agent goes over 0) will have best approximated the Nash equilibrium. Additionaly, $H_0$ can be used as a quasi-control, to validate that the bot is beating a human.\n",
-    "\n",
-    "The above metric also has some degree of simulation error. For each simulation in $r$ simulations, we create a distribution of values that has a standard deviation and follows the normal distribution. \n",
-    "\n",
-    "Along with calculating the expected payout per simulation, $u^{\\sigma}(u_i)-u^{\\sigma^*}(u_{-i})$, we can also calculate $\\sigma$ for this distribution in order to describe a confidence interval around the test metric. \n",
-    "\n",
-    "Finally, a simple difference of means can be done between each test bot to decipher a winner and if that winner had a statistically significant edge. We can then study each $u$ in $U$ to find patterns in which nodes the espspective bots did not do well with."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.0"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/research/to_do.md b/research/to_do.md
deleted file mode 100644
index e510af90..00000000
--- a/research/to_do.md
+++ /dev/null
@@ -1,49 +0,0 @@
-A Place for Next Steps in Short Deck Implementation
-
-## Abstraction
-
-#### Information Abstraction
-- hard code opening hand clusters
-- decide how to store these for lookup in blueprint/real time algo
-- run for short deck
-
-#### Action Abstraction
-- not sure how this fits into blueprint/real time yet
-
-## Blueprint Algo
-- apply to contrived short deck game
-
-## Real Time Search Algo
-- need isomorphic/lossless handling of cards??  # Non-essential maybe..
-- mock up "toy" version 
-  - pre-req: stateful version of short deck
-
-### Rules of Contrived Short Deck Game
-- 3 players
-- 2-9 removed
-- no adjustments to hand rankings versus no-limit
-- 10000 in stack, 50 small blind, 100 big blind
-- limited betting
-
-#### Possible Next Steps
-- fix short deck game and roll out to online hosting?
-- go right on to full game?
-
-#### Current (Concise) Papers 
-- Abstraction
-  - https://www.cs.cmu.edu/~sandholm/hierarchical.aamas15.pdf <- this algo
-  - http://www.ifaamas.org/Proceedings/aamas2013/docs/p271.pdf <- these features
-- Blueprint
-  - https://science.sciencemag.org/content/sci/suppl/2019/07/10/science.aay2400.DC1/aay2400-Brown-SM.pdf <- pseudo code
-- Real Time Algo
-  - https://papers.nips.cc/paper/7993-depth-limited-solving-for-imperfect-information-games.pdf <- build off this
-  - make theses changes:
-    - [optimized vector-based linear cfr?](https://arxiv.org/pdf/1809.04040.pdf)
-    - [only samples chance events?](http://martin.zinkevich.org/publications/ijcai2011_rgbr.pdf)
-    
-#### TODO: Colin
-- Generate abstraction for 20 cards
--- Program to turn that into dictionary and store separately
-- Hard code preflop lossless
-- Write next steps in docstring of blueprint algo
-- Consider getting rid of notebooks before merging into develop..
\ No newline at end of file
diff --git a/research/size_of_problem/action_sequences.pkl b/test/data/action_sequences.pkl
similarity index 100%
rename from research/size_of_problem/action_sequences.pkl
rename to test/data/action_sequences.pkl
diff --git a/test/data/random_action_sequences.pkl b/test/data/random_action_sequences.pkl
new file mode 100644
index 00000000..41ab5a88
Binary files /dev/null and b/test/data/random_action_sequences.pkl differ
diff --git a/test/data/random_offline_strategy.gz b/test/data/random_offline_strategy.gz
new file mode 100644
index 00000000..ff4cfe7f
Binary files /dev/null and b/test/data/random_offline_strategy.gz differ
diff --git a/test/functional/test_short_deck.py b/test/functional/test_short_deck.py
index 889bc437..276082ba 100644
--- a/test/functional/test_short_deck.py
+++ b/test/functional/test_short_deck.py
@@ -1,17 +1,21 @@
 import collections
+import json
 import copy
 import random
 from typing import List, Tuple, Optional
+import joblib
 
 import pytest
 import numpy as np
 import dill as pickle
 
-from pluribus.games.short_deck.state import ShortDeckPokerState
+from pluribus.games.short_deck.state import ShortDeckPokerState, new_game, \
+    InfoSetLookupTable
 from pluribus.games.short_deck.player import ShortDeckPokerPlayer
 from pluribus.poker.card import Card
 from pluribus.poker.pot import Pot
 from pluribus.utils.random import seed
+from pluribus.poker.deck import Deck
 
 
 def _new_game(
@@ -35,10 +39,10 @@ def _new_game(
     return state, pot
 
 
-def _load_action_sequences(directory):
+def _load_pkl_file(directory):
     with open(directory, "rb") as file:
-        action_sequences = pickle.load(file)
-    return action_sequences
+        pkl_file = pickle.load(file)
+    return pkl_file
 
 
 def test_short_deck_1():
@@ -203,18 +207,17 @@ def _get_flop(state: ShortDeckPokerState) -> List[Card]:
 
 
 @pytest.mark.parametrize("n_players", [2, 3])
-def test_call_action_sequence(n_players):
+def test_call_action_sequence(n_players, n: int = 50):
     """
-    Make sure we never see an action sequence of "raise", "call", "call" in the same
-    round with only two players. There would be a similar analog for more than two players,
-    but this should aid in initially finding the bug.
+    Make sure we never see an action sequence of "raise", "call", "call" when
+    down to two players
     """
     # Seed the random number generation so things are procedural/reproducable.
     seed(42)
-    # example of a bad sequence in a two-handed game in one round
+    # Example of a bad sequence in a two-handed game in one round
     bad_seq = ["raise", "call", "call"]
     # Run some number of random iterations.
-    for _ in range(200):
+    for _ in range(n):
         state, _ = _new_game(n_players=n_players, small_blind=50, big_blind=100)
         betting_round_dict = collections.defaultdict(list)
         while state.betting_stage not in {"show_down", "terminal"}:
@@ -231,22 +234,22 @@ def test_call_action_sequence(n_players):
                 # Loop through the action history and make sure the bad
                 # sequence has not happened.
                 for i in range(len(no_fold_action_history)):
-                    history_slice = no_fold_action_history[i : i + len(bad_seq)]
+                    history_slice = no_fold_action_history[i: i + len(bad_seq)]
                     assert history_slice != bad_seq
             state = state.apply_action(random_action)
 
 
 @pytest.mark.parametrize("n_players", [2, 3])
-def test_action_sequence(n_players: int):
-    """
-    Check each round against validated action sequences to ensure the state class is
-    working correctly.
-    """
+def test_action_sequence(
+        n_players: int,
+        n: int = 50,
+        action_sequences_path: str = "test/data/action_sequences.pkl"
+):
+    """Ensure action sequences are legal.. """
     # Seed the random number generation so things are procedural/reproducable.
     seed(42)
-    directory = "research/size_of_problem/action_sequences.pkl"
-    action_sequences = _load_action_sequences(directory)
-    for i in range(200):
+    action_sequences = _load_pkl_file(action_sequences_path)
+    for i in range(n):
         state, _ = _new_game(n_players=n_players, small_blind=50, big_blind=100)
 
         betting_stage_dict = {
@@ -281,14 +284,14 @@ def test_action_sequence(n_players: int):
                 assert action_sequence in possible_sequences
 
 
-def test_skips(n_players: int = 3):
+def test_skips(n_players: int = 3, n: int = 50):
     """
-    Check each round to make sure that skips are mod number of players and appended on
-    the skipped player's turn
+    Check each round to make sure that skips are mod number of players and
+    appended on the skipped player's turn
     """
     # Seed the random number generation so things are procedural/reproducable.
     seed(42)
-    for _ in range(500):
+    for _ in range(n):
         state, _ = _new_game(n_players=n_players, small_blind=50, big_blind=100)
 
         while True:
@@ -338,3 +341,137 @@ def test_skips(n_players: int = 3):
                         for i, action in enumerate(actions[fold_idx:]):
                             if i % n_players == 0:
                                 assert action == "skip"
+
+
+def test_load_game_state(
+        n_players: int = 3,
+        n: int = 5,
+        random_actions_path: str = "test/data/random_action_sequences.pkl"
+):
+    # Load a random sample of action sequences
+    action_sequences = _load_pkl_file(random_actions_path)
+    test_action_sequences = np.random.choice(action_sequences, n)
+    # Lookup table that defaults to 0 as the cluster id
+    # TODO: Not sure how to quiet the mypy typing complaint..
+    info_set_lut: InfoSetLookupTable = {
+        "pre_flop": collections.defaultdict(lambda: 0),
+        "flop": collections.defaultdict(lambda: 0),
+        "turn": collections.defaultdict(lambda: 0),
+        "river": collections.defaultdict(lambda: 0),
+    }
+    state: ShortDeckPokerState = new_game(
+        n_players,
+        info_set_lut=info_set_lut,
+        real_time_test=True,
+        public_cards=[]
+    )
+    for action_sequence in test_action_sequences:
+        game_action_sequence = action_sequence.copy()
+        # Load current game state
+        current_game_state: ShortDeckPokerState = state.load_game_state(
+            offline_strategy={}, action_sequence=game_action_sequence
+        )
+        current_history = current_game_state._history
+        check_action_seq_current = []
+        for betting_stage in current_history.keys():
+            check_action_seq_current += current_history[betting_stage]
+        check_action_sequence = [a for a in check_action_seq_current if a != "skip"]
+        assert check_action_sequence == action_sequence[:-1]
+
+        new_state = current_game_state.deal_bayes()
+        full_history = new_state._history
+        check_action_seq_full = []
+        for betting_stage in full_history.keys():
+            check_action_seq_full += full_history[betting_stage]
+        check_action_sequence = [a for a in check_action_seq_full if a != "skip"]
+        assert check_action_sequence == action_sequence
+
+
+def test_public_cards(
+        n_players: int = 3,
+        n: int = 5,
+        strategy_path: str = "test/data/random_offline_strategy.gz"
+):
+    strategy = joblib.load(strategy_path)
+    histories = np.random.choice(list(strategy.keys()), n)
+    action_sequences = []
+    public_cards_lst = []
+    final_betting_round_lst: List[str] = []
+    community_card_dict = {
+        "pre_flop": 0,
+        "flop": 3,
+        "turn": 4,
+        "river": 5,
+    }
+    ranks = list(range(10, 14 + 1))
+    deck = Deck(include_ranks=ranks)
+    for history in histories:
+        history_dict = json.loads(history)
+        history_lst = history_dict["history"]
+        action_sequence = []
+        betting_rounds = []
+        for x in history_lst:
+            action_sequence += list(x.values())[0]
+            betting_rounds += list(x.keys())
+        if not action_sequence:
+            continue
+        action_sequences.append(action_sequence)
+        final_betting_round = list(betting_rounds)[-1]
+        final_betting_round_lst.append(final_betting_round)
+        n_cards = community_card_dict[final_betting_round]
+        cards_in_deck = deck._cards_in_deck
+        public_cards = list(
+            np.random.choice(cards_in_deck, n_cards, replace=False)
+        )
+        public_cards_lst.append(public_cards)
+
+    # TODO: Not sure how to quiet mypy here for typing complaint..
+    info_set_lut: InfoSetLookupTable = {
+        "pre_flop": collections.defaultdict(lambda: 0),
+        "flop": collections.defaultdict(lambda: 0),
+        "turn": collections.defaultdict(lambda: 0),
+        "river": collections.defaultdict(lambda: 0),
+    }
+    for i in range(0, len(action_sequences)):
+        public_cards = public_cards_lst[i].copy()
+        final_betting_round = final_betting_round_lst[i]
+        if not public_cards and final_betting_round == "pre_flop":
+            continue
+        action_sequence = action_sequences[i].copy()
+        state: ShortDeckPokerState = new_game(
+            n_players,
+            info_set_lut=info_set_lut,
+            real_time_test=True,
+            public_cards=public_cards,
+        )
+        current_game_state: ShortDeckPokerState = state.load_game_state(
+            offline_strategy={}, action_sequence=action_sequence
+        )
+        new_state = current_game_state.deal_bayes()
+
+        cont = True
+        if len(public_cards) == 0:
+            loaded_betting_stage = "pre_flop"
+        elif len(public_cards) == 3:
+            loaded_betting_stage = "flop"
+        elif len(public_cards) == 4:
+            loaded_betting_stage = "turn"
+        elif len(public_cards) == 5:
+            loaded_betting_stage = "river"
+
+        public_info = new_state._public_information
+        for betting_stage in public_info.keys():
+            if betting_stage == "pre_flop":
+                # No cards in the pre_flop stage..
+                continue
+            if cont:
+                card_len = community_card_dict[betting_stage]
+                assert public_cards[:card_len] == public_info[betting_stage]
+                if betting_stage == loaded_betting_stage:
+                    cont = False
+            else:
+                # Should only get here if we hit the last action_sequence of
+                # a round..
+                state_public_card_len = len(new_state.community_cards)
+                public_card_len = len(public_cards)
+                assert state_public_card_len == public_card_len + 1
diff --git a/test/regression/check_bayes.py b/test/regression/check_bayes.py
new file mode 100644
index 00000000..d42faf5f
--- /dev/null
+++ b/test/regression/check_bayes.py
@@ -0,0 +1,157 @@
+import joblib
+import collections
+import json
+from typing import DefaultDict
+
+import numpy as np
+from tqdm import trange
+
+from pluribus.poker.deck import Deck
+from pluribus.games.short_deck.state import ShortDeckPokerState, new_game
+
+
+def _calculate_strategy(
+        state: ShortDeckPokerState,
+        I: str,
+        strategy: DefaultDict[str, DefaultDict[str, float]],
+) -> str:
+    sigma = collections.defaultdict(lambda: collections.defaultdict(lambda: 1 / 3))
+    try:
+        # If strategy is empty, go to other block
+        sigma[I] = strategy[I].copy()
+        if sigma[I] == {}:
+            raise KeyError
+        norm = sum(sigma[I].values())
+        for a in sigma[I].keys():
+            sigma[I][a] /= norm
+        a = np.random.choice(
+            list(sigma[I].keys()), 1, p=list(sigma[I].values()),
+        )[0]
+    except KeyError:
+        p = 1 / len(state.legal_actions)
+        probabilities = np.full(len(state.legal_actions), p)
+        a = np.random.choice(state.legal_actions, p=probabilities)
+        sigma[I] = {action: p for action in state.legal_actions}
+    return a
+
+
+n = 10000
+n_players = 3
+inner_iters = 1000
+
+strategy_dir = "research/test_methodology/test_strategy2/"
+strategy_path = "unnormalized_output/offline_strategy_1500.gz"
+check = joblib.load(strategy_dir + strategy_path)
+histories = np.random.choice(list(check.keys()), n)
+action_sequences = []
+public_cards_lst = []
+community_card_dict = {
+    "pre_flop": 0,
+    "flop": 3,
+    "turn": 4,
+    "river": 5,
+}
+# Shorter deck for more reasonable simulation time..
+ranks = list(range(12, 14 + 1))
+deck = Deck(include_ranks=ranks)
+found = 0
+for idx, history in enumerate(histories):
+    if idx % 100 == 0:
+        print(idx)
+    history_dict = json.loads(history)
+    history_lst = history_dict["history"]
+    if history_lst == []:
+        continue
+    action_sequence = []
+    betting_rounds = []
+    for x in history_lst:
+        action_sequence += list(x.values())[0]
+        betting_rounds += list(x.keys())
+    try:
+        final_betting_round = list(betting_rounds)[-1]
+    except:
+        import ipdb;
+        ipdb.set_trace()
+    # Hacking this for now, keeping the simulation small..
+    if len(action_sequence) > 2:
+        continue
+    action_sequences.append(action_sequence)
+    n_cards = community_card_dict[final_betting_round]
+    cards_in_deck = deck._cards_in_deck
+    public_cards = np.random.choice(cards_in_deck, n_cards, replace=False)
+    public_cards_lst.append(list(public_cards))
+    found += 1
+    if found == 2:
+        break
+    # Assuming we find 2 action sequences a=out of 1000
+
+store_hand_probs = {}
+for i in trange(0, len(action_sequences)):
+    public_cards = public_cards_lst[i].copy()
+    # will need to check for this bug later..
+#    if not public_cards:
+#        import ipdb;
+#        ipdb.set_trace()
+    action_sequence = action_sequences[i].copy()
+    state: ShortDeckPokerState = new_game(
+        n_players,
+        real_time_test=True,
+        public_cards=public_cards,
+    )
+    current_game_state: ShortDeckPokerState = state.load_game_state(
+        offline_strategy=check, action_sequence=action_sequence
+    )
+    new_state = current_game_state.deal_bayes()
+
+    this_hand_probs = new_state._starting_hand_probs.copy()
+    for p_i in this_hand_probs.keys():
+        for starting_hand in this_hand_probs[p_i].keys():
+            x = this_hand_probs[p_i][starting_hand]
+            this_hand_probs[p_i][starting_hand] = {'deal_bayes':x, 'sim':None}
+
+    action_sequence = action_sequences[i].copy()
+    public_cards = public_cards_lst[i].copy()
+    info_set_lut = {}
+    cont = True
+    actions = []
+    tries = 0
+    success = 0
+    hand_dict = {0: {}, 1: {}, 2: {}}
+    while cont:
+        state: ShortDeckPokerState = new_game(
+            n_players,
+            info_set_lut,
+            real_time_test=True,
+            public_cards=public_cards
+        )
+        info_set_lut = state.info_set_lut
+        while True:
+            count = 0
+            if tries == 1000: # definitely a hack need to be careful about this
+                              # value
+                for p_i in state.players:
+                    hole_cards = tuple(state.players[p_i].cards)
+                    try:
+                        hand_dict[p_i][hole_cards] += 0
+                    except KeyError:
+                        hand_dict[p_i][hole_cards] = 0
+            random_action = _calculate_strategy(state, state.info_set, check)
+            if random_action != action_sequence[count]:
+                tries += 1
+                break
+            new_state = state.apply_action(random_action)
+            actions.append(random_action)
+            if actions == action_sequence:
+                for p_i in state.players:
+                    hole_cards = tuple(state.players[p_i].cards)
+                    try:
+                        hand_dict[p_i][hole_cards] += 1
+                    except KeyError:
+                        hand_dict[p_i][hole_cards] = 1
+                success += 1
+                break
+            count += 1
+        if success == 1:
+            break
+    import ipdb;
+    ipdb.set_trace()