diff --git a/486577184_3352016061601916_1683643685160174155_n.jpg b/486577184_3352016061601916_1683643685160174155_n.jpg
new file mode 100644
index 0000000000..bc3aca5a58
Binary files /dev/null and b/486577184_3352016061601916_1683643685160174155_n.jpg differ
diff --git a/all_metrics_plot.png b/all_metrics_plot.png
new file mode 100644
index 0000000000..eeafb661e5
Binary files /dev/null and b/all_metrics_plot.png differ
diff --git a/covered.png b/covered.png
new file mode 100644
index 0000000000..b1cb2dc446
Binary files /dev/null and b/covered.png differ
diff --git a/demos/MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0/MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0_0 b/demos/MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0/MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0_0
new file mode 100644
index 0000000000..fc13cbf17c
Binary files /dev/null and b/demos/MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0/MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0_0 differ
diff --git a/demos/MiniGrid-CleaningACar-16x16-N2-v0/MiniGrid-CleaningACar-16x16-N2-v0_0 b/demos/MiniGrid-CleaningACar-16x16-N2-v0/MiniGrid-CleaningACar-16x16-N2-v0_0
new file mode 100644
index 0000000000..39451f2a89
Binary files /dev/null and b/demos/MiniGrid-CleaningACar-16x16-N2-v0/MiniGrid-CleaningACar-16x16-N2-v0_0 differ
diff --git a/demos/MiniGrid-CleaningShoes-16x16-N2-v0/MiniGrid-CleaningShoes-16x16-N2-v0_0 b/demos/MiniGrid-CleaningShoes-16x16-N2-v0/MiniGrid-CleaningShoes-16x16-N2-v0_0
new file mode 100644
index 0000000000..2a68cf6c7b
Binary files /dev/null and b/demos/MiniGrid-CleaningShoes-16x16-N2-v0/MiniGrid-CleaningShoes-16x16-N2-v0_0 differ
diff --git a/demos/MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0/MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0_0 b/demos/MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0/MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0_0
new file mode 100644
index 0000000000..77c13ec731
Binary files /dev/null and b/demos/MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0/MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0_0 differ
diff --git a/demos/MiniGrid-CollectMisplacedItems-16x16-N2-v0/MiniGrid-CollectMisplacedItems-16x16-N2-v0_0 b/demos/MiniGrid-CollectMisplacedItems-16x16-N2-v0/MiniGrid-CollectMisplacedItems-16x16-N2-v0_0
new file mode 100644
index 0000000000..3a312b34d9
Binary files /dev/null and b/demos/MiniGrid-CollectMisplacedItems-16x16-N2-v0/MiniGrid-CollectMisplacedItems-16x16-N2-v0_0 differ
diff --git a/demos/MiniGrid-InstallingAPrinter-16x16-N2-v0/MiniGrid-InstallingAPrinter-16x16-N2-v0_0 b/demos/MiniGrid-InstallingAPrinter-16x16-N2-v0/MiniGrid-InstallingAPrinter-16x16-N2-v0_0
new file mode 100644
index 0000000000..7020128144
Binary files /dev/null and b/demos/MiniGrid-InstallingAPrinter-16x16-N2-v0/MiniGrid-InstallingAPrinter-16x16-N2-v0_0 differ
diff --git a/demos/MiniGrid-LayingWoodFloors-16x16-N2-v0/MiniGrid-LayingWoodFloors-16x16-N2-v0_0 b/demos/MiniGrid-LayingWoodFloors-16x16-N2-v0/MiniGrid-LayingWoodFloors-16x16-N2-v0_0
new file mode 100644
index 0000000000..ea8b5874a3
Binary files /dev/null and b/demos/MiniGrid-LayingWoodFloors-16x16-N2-v0/MiniGrid-LayingWoodFloors-16x16-N2-v0_0 differ
diff --git a/demos/MiniGrid-MakingTea-16x16-N2-v0/MiniGrid-MakingTea-16x16-N2-v0_0 b/demos/MiniGrid-MakingTea-16x16-N2-v0/MiniGrid-MakingTea-16x16-N2-v0_0
new file mode 100644
index 0000000000..ab28d8d64b
Binary files /dev/null and b/demos/MiniGrid-MakingTea-16x16-N2-v0/MiniGrid-MakingTea-16x16-N2-v0_0 differ
diff --git a/demos/MiniGrid-MovingBoxesToStorage-16x16-N2-v0/MiniGrid-MovingBoxesToStorage-16x16-N2-v0_0 b/demos/MiniGrid-MovingBoxesToStorage-16x16-N2-v0/MiniGrid-MovingBoxesToStorage-16x16-N2-v0_0
new file mode 100644
index 0000000000..382c43cea0
Binary files /dev/null and b/demos/MiniGrid-MovingBoxesToStorage-16x16-N2-v0/MiniGrid-MovingBoxesToStorage-16x16-N2-v0_0 differ
diff --git a/demos/MiniGrid-OpeningPackages-16x16-N2-v0/MiniGrid-OpeningPackages-16x16-N2-v0_0 b/demos/MiniGrid-OpeningPackages-16x16-N2-v0/MiniGrid-OpeningPackages-16x16-N2-v0_0
new file mode 100644
index 0000000000..96b0eecb02
Binary files /dev/null and b/demos/MiniGrid-OpeningPackages-16x16-N2-v0/MiniGrid-OpeningPackages-16x16-N2-v0_0 differ
diff --git a/demos/MiniGrid-OrganizingFileCabinet-16x16-N2-v0/MiniGrid-OrganizingFileCabinet-16x16-N2-v0_0 b/demos/MiniGrid-OrganizingFileCabinet-16x16-N2-v0/MiniGrid-OrganizingFileCabinet-16x16-N2-v0_0
new file mode 100644
index 0000000000..77701ff4b9
Binary files /dev/null and b/demos/MiniGrid-OrganizingFileCabinet-16x16-N2-v0/MiniGrid-OrganizingFileCabinet-16x16-N2-v0_0 differ
diff --git a/demos/MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0/MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0_0 b/demos/MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0/MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0_0
new file mode 100644
index 0000000000..bf751d78b9
Binary files /dev/null and b/demos/MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0/MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0_0 differ
diff --git a/demos/MiniGrid-SettingUpCandles-16x16-N2-v0/MiniGrid-SettingUpCandles-16x16-N2-v0_0 b/demos/MiniGrid-SettingUpCandles-16x16-N2-v0/MiniGrid-SettingUpCandles-16x16-N2-v0_0
new file mode 100644
index 0000000000..26a152ccdb
Binary files /dev/null and b/demos/MiniGrid-SettingUpCandles-16x16-N2-v0/MiniGrid-SettingUpCandles-16x16-N2-v0_0 differ
diff --git a/demos/MiniGrid-SortingBooks-16x16-N2-v0/MiniGrid-SortingBooks-16x16-N2-v0_0 b/demos/MiniGrid-SortingBooks-16x16-N2-v0/MiniGrid-SortingBooks-16x16-N2-v0_0
new file mode 100644
index 0000000000..77ae2a429d
Binary files /dev/null and b/demos/MiniGrid-SortingBooks-16x16-N2-v0/MiniGrid-SortingBooks-16x16-N2-v0_0 differ
diff --git a/demos/MiniGrid-SortingBooks-16x16-N2-v0/MiniGrid-SortingBooks-16x16-N2-v0_1 b/demos/MiniGrid-SortingBooks-16x16-N2-v0/MiniGrid-SortingBooks-16x16-N2-v0_1
new file mode 100644
index 0000000000..e8ae981608
Binary files /dev/null and b/demos/MiniGrid-SortingBooks-16x16-N2-v0/MiniGrid-SortingBooks-16x16-N2-v0_1 differ
diff --git a/demos/MiniGrid-SortingBooks-16x16-N2-v0/MiniGrid-SortingBooks-16x16-N2-v0_2 b/demos/MiniGrid-SortingBooks-16x16-N2-v0/MiniGrid-SortingBooks-16x16-N2-v0_2
new file mode 100644
index 0000000000..b160d06854
Binary files /dev/null and b/demos/MiniGrid-SortingBooks-16x16-N2-v0/MiniGrid-SortingBooks-16x16-N2-v0_2 differ
diff --git a/demos/MiniGrid-StoringFood-16x16-N2-v0/MiniGrid-StoringFood-16x16-N2-v0_0 b/demos/MiniGrid-StoringFood-16x16-N2-v0/MiniGrid-StoringFood-16x16-N2-v0_0
new file mode 100644
index 0000000000..90acc25406
Binary files /dev/null and b/demos/MiniGrid-StoringFood-16x16-N2-v0/MiniGrid-StoringFood-16x16-N2-v0_0 differ
diff --git a/demos/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0_0 b/demos/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0_0
new file mode 100644
index 0000000000..6a0bb3279a
Binary files /dev/null and b/demos/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0_0 differ
diff --git a/demos/MiniGrid-WashingPotsAndPans-16x16-N2-v0/MiniGrid-WashingPotsAndPans-16x16-N2-v0_0 b/demos/MiniGrid-WashingPotsAndPans-16x16-N2-v0/MiniGrid-WashingPotsAndPans-16x16-N2-v0_0
new file mode 100644
index 0000000000..7884ebdaf6
Binary files /dev/null and b/demos/MiniGrid-WashingPotsAndPans-16x16-N2-v0/MiniGrid-WashingPotsAndPans-16x16-N2-v0_0 differ
diff --git a/demos/MiniGrid-WateringHouseplants-16x16-N2-v0/MiniGrid-WateringHouseplants-16x16-N2-v0_0 b/demos/MiniGrid-WateringHouseplants-16x16-N2-v0/MiniGrid-WateringHouseplants-16x16-N2-v0_0
new file mode 100644
index 0000000000..966f1a9653
Binary files /dev/null and b/demos/MiniGrid-WateringHouseplants-16x16-N2-v0/MiniGrid-WateringHouseplants-16x16-N2-v0_0 differ
diff --git a/equivalent.png b/equivalent.png
new file mode 100644
index 0000000000..afde84eeca
Binary files /dev/null and b/equivalent.png differ
diff --git a/extra/MiniGrid-CleaningACar-16x16-N2-v0_0 b/extra/MiniGrid-CleaningACar-16x16-N2-v0_0
new file mode 100644
index 0000000000..0cda03cceb
Binary files /dev/null and b/extra/MiniGrid-CleaningACar-16x16-N2-v0_0 differ
diff --git a/extra/MiniGrid-CollectMisplacedItems-16x16-N2-v0_0 b/extra/MiniGrid-CollectMisplacedItems-16x16-N2-v0_0
new file mode 100644
index 0000000000..525d1b37cb
Binary files /dev/null and b/extra/MiniGrid-CollectMisplacedItems-16x16-N2-v0_0 differ
diff --git a/extra/MiniGrid-OpeningPackages-16x16-N2-v0_0 b/extra/MiniGrid-OpeningPackages-16x16-N2-v0_0
new file mode 100644
index 0000000000..70a54c0248
Binary files /dev/null and b/extra/MiniGrid-OpeningPackages-16x16-N2-v0_0 differ
diff --git a/extra/MiniGrid-SortingBooks-16x16-N2-v0_0 b/extra/MiniGrid-SortingBooks-16x16-N2-v0_0
new file mode 100644
index 0000000000..40139ef049
Binary files /dev/null and b/extra/MiniGrid-SortingBooks-16x16-N2-v0_0 differ
diff --git a/extra/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0_0 b/extra/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0_0
new file mode 100644
index 0000000000..3deaefae46
Binary files /dev/null and b/extra/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0_0 differ
diff --git a/hitl_1_covered.png b/hitl_1_covered.png
new file mode 100644
index 0000000000..c666a4d288
Binary files /dev/null and b/hitl_1_covered.png differ
diff --git a/hitl_1_equivalent.png b/hitl_1_equivalent.png
new file mode 100644
index 0000000000..6a81282c35
Binary files /dev/null and b/hitl_1_equivalent.png differ
diff --git a/hitl_1_missed.png b/hitl_1_missed.png
new file mode 100644
index 0000000000..0fa9c7d24e
Binary files /dev/null and b/hitl_1_missed.png differ
diff --git a/hitl_1_overfit.png b/hitl_1_overfit.png
new file mode 100644
index 0000000000..e7e1e13103
Binary files /dev/null and b/hitl_1_overfit.png differ
diff --git a/hitl_5_covered.png b/hitl_5_covered.png
new file mode 100644
index 0000000000..7a09ebb99c
Binary files /dev/null and b/hitl_5_covered.png differ
diff --git a/hitl_5_equivalent.png b/hitl_5_equivalent.png
new file mode 100644
index 0000000000..56221d0766
Binary files /dev/null and b/hitl_5_equivalent.png differ
diff --git a/hitl_5_missed.png b/hitl_5_missed.png
new file mode 100644
index 0000000000..1b4ae6b216
Binary files /dev/null and b/hitl_5_missed.png differ
diff --git a/hitl_5_overfit.png b/hitl_5_overfit.png
new file mode 100644
index 0000000000..38cf7c75c9
Binary files /dev/null and b/hitl_5_overfit.png differ
diff --git a/learning_curve_all_metrics.png b/learning_curve_all_metrics.png
new file mode 100644
index 0000000000..18fd3848c9
Binary files /dev/null and b/learning_curve_all_metrics.png differ
diff --git a/lifelong_learning_success.png b/lifelong_learning_success.png
new file mode 100644
index 0000000000..746cb6de2c
Binary files /dev/null and b/lifelong_learning_success.png differ
diff --git a/llx MITSCx21 Certificate _ LLX.pdf b/llx MITSCx21 Certificate _ LLX.pdf
new file mode 100644
index 0000000000..3b78769f00
Binary files /dev/null and b/llx MITSCx21 Certificate _ LLX.pdf differ
diff --git a/missed.png b/missed.png
new file mode 100644
index 0000000000..429e101873
Binary files /dev/null and b/missed.png differ
diff --git a/operator_learning_summary.png b/operator_learning_summary.png
new file mode 100644
index 0000000000..8fbc0574c2
Binary files /dev/null and b/operator_learning_summary.png differ
diff --git a/output_image.jpeg b/output_image.jpeg
new file mode 100644
index 0000000000..36f4f458f4
Binary files /dev/null and b/output_image.jpeg differ
diff --git a/overfit.png b/overfit.png
new file mode 100644
index 0000000000..1ac31c44df
Binary files /dev/null and b/overfit.png differ
diff --git a/predicators/approaches/minigrid_controller_approach.py b/predicators/approaches/minigrid_controller_approach.py
new file mode 100644
index 0000000000..de2748db29
--- /dev/null
+++ b/predicators/approaches/minigrid_controller_approach.py
@@ -0,0 +1,30 @@
+"""An approach that just takes random low-level actions."""
+
+from typing import Callable
+
+from predicators.approaches import BaseApproach
+from predicators.structs import Action, State, Task
+
+
+class MinigridControllerApproach(BaseApproach):
+    """Samples random low-level actions."""
+
+    @classmethod
+    def get_name(cls) -> str:
+        return "minigrid_controller"
+
+    @property
+    def is_learning_based(self) -> bool:
+        return False
+
+    def _solve(self, task: Task, timeout: int) -> Callable[[State], Action]:
+        zero_vec = self._action_space.low
+
+        def _policy(_: State) -> Action:
+            action_vec = zero_vec.copy()
+            print(task.goal)
+            action_vec[int(input("Action: "))] = 1.0
+            print(action_vec)
+            return Action(action_vec)
+
+        return _policy
diff --git a/predicators/envs/mini_behavior_env.py b/predicators/envs/mini_behavior_env.py
new file mode 100644
index 0000000000..56467cf84f
--- /dev/null
+++ b/predicators/envs/mini_behavior_env.py
@@ -0,0 +1,386 @@
+"""A MiniBehavior environment wrapping https://github.com/StanfordVL/mini_behavior."""
+import sys
+from typing import ClassVar, Dict, List, Optional, Sequence, Set
+
+import gymnasium as gym
+import matplotlib
+import numpy as np
+from gym.spaces import Box
+
+from predicators import utils
+from predicators.envs import BaseEnv
+from predicators.settings import CFG
+from predicators.structs import Action, EnvironmentTask, Image, Object, \
+    Observation, Predicate, State, Type, Video
+
+from minigrid.wrappers import *
+from mini_behavior.window import Window
+from mini_behavior.utils.save import get_step, save_demo
+from mini_behavior.grid import GridDimension
+from mini_behavior.utils.wrappers import MiniBHFullyObsWrapper
+from mini_behavior.utils.save import all_state_values
+
+class MiniBehavior(BaseEnv):
+    """MiniBehavior environment wrapping gym-sokoban."""
+
+    name_to_enum: ClassVar[Dict[str, int]] = OBJECT_TO_IDX
+
+    object_type = Type("obj", ["row", "column", "type", "state", "color"])
+
+    def __init__(self, use_gui: bool = True) -> None:
+        super().__init__(use_gui)
+
+        # Predicates
+        self._IsLoc = Predicate("IsLoc", [self.object_type], self._IsLoc_holds)
+        self._Above = Predicate("Above", [self.object_type, self.object_type],
+                                self._Above_holds)
+        self._Below = Predicate("Below", [self.object_type, self.object_type],
+                                self._Below_holds)
+        self._RightOf = Predicate("RightOf",
+                                  [self.object_type, self.object_type],
+                                  self._RightOf_holds)
+        self._LeftOf = Predicate("LeftOf",
+                                 [self.object_type, self.object_type],
+                                 self._LeftOf_holds)
+        self._IsFacingUp = Predicate("IsFacingUp", [self.object_type],
+                                     self._IsFacingUp_holds)
+        self._IsFacingDown = Predicate("IsFacingDown", [self.object_type],
+                                       self._IsFacingDown_holds)
+        self._IsFacingLeft = Predicate("IsFacingLeft", [self.object_type],
+                                        self._IsFacingLeft_holds)
+        self._IsFacingRight = Predicate("IsFacingRight", [self.object_type],
+                                        self._IsFacingRight_holds)
+        self._IsNonGoalLoc = Predicate("IsNonGoalLoc", [self.object_type],
+                                       self._IsNonGoalLoc_holds)
+        self._Unknown = Predicate("Unknown", [self.object_type],
+                                        self._Unknown_holds)
+        self._Found = Predicate("Found", [self.object_type],
+                                        self._Found_holds)
+        self._IsAgent, self._At, self._IsGoal, self._IsBall, \
+        self._IsKey, self._IsBox, self._IsRed, self._IsGreen, \
+        self._IsBlue, self._IsPurple, self._IsYellow, self._IsGrey, \
+        self._Holding, self._Near = self.get_goal_predicates()
+
+        self.last_action = None
+
+        # NOTE: we can change the level by modifying what we pass
+
+        # into gym.make here.
+        if CFG.mini_behavior_env_fully_observable:
+            self._gym_env = MiniBHFullyObsWrapper(gym.make(CFG.mini_behavior_env_name))
+        else:
+            NotImplementedError("Partial Observability Not implemented yet")
+
+    @classmethod
+    def get_goal_predicates(cls) -> list[Predicate]:
+        """Defined public so that the perceiver can use it."""
+        return [Predicate("IsAgent", [cls.object_type], cls._IsAgent_holds),
+                Predicate("At", [cls.object_type, cls.object_type], cls._At_holds),
+                Predicate("IsGoal", [cls.object_type], cls._IsGoal_holds),
+                Predicate("IsBall", [cls.object_type], cls._IsBall_holds),
+                Predicate("IsKey", [cls.object_type], cls._IsKey_holds),
+                Predicate("IsBox", [cls.object_type], cls._IsBox_holds),
+                Predicate("IsRed", [cls.object_type], cls._IsRed_holds),
+                Predicate("IsGreen", [cls.object_type], cls._IsGreen_holds),
+                Predicate("IsBlue", [cls.object_type], cls._IsBlue_holds),
+                Predicate("IsPurple", [cls.object_type], cls._IsPurple_holds),
+                Predicate("IsYellow", [cls.object_type], cls._IsYellow_holds),
+                Predicate("IsGrey", [cls.object_type], cls._IsGrey_holds),
+                Predicate("Holding", [cls.object_type], cls._Holding_holds),
+                Predicate("Near", [cls.object_type, cls.object_type], cls._Near_holds)]
+
+
+    def _generate_train_tasks(self) -> List[EnvironmentTask]:
+        return self._get_tasks(num=CFG.num_train_tasks, train_or_test="train")
+
+    def _generate_test_tasks(self) -> List[EnvironmentTask]:
+        return self._get_tasks(num=CFG.num_test_tasks, train_or_test="test")
+
+    @classmethod
+    def get_name(cls) -> str:
+        return "mini_behavior_env"
+
+    def get_observation(self) -> Observation:
+        return self._copy_observation(self._current_observation)
+
+    def render_state_plt(
+            self,
+            state: State,
+            task: EnvironmentTask,
+            action: Optional[Action] = None,
+            caption: Optional[str] = None) -> matplotlib.figure.Figure:
+        raise NotImplementedError("This env does not use Matplotlib")
+
+    def render_state(self,
+                     state: State,
+                     task: EnvironmentTask,
+                     action: Optional[Action] = None,
+                     caption: Optional[str] = None) -> Video:
+        raise NotImplementedError("A gym environment cannot render "
+                                  "arbitrary states.")
+
+    def render(self,
+               action: Optional[Action] = None,
+               caption: Optional[str] = None) -> Video:
+        assert caption is None
+        arr: Image = self._gym_env.get_frame()
+        return [arr]
+
+    @property
+    def predicates(self) -> Set[Predicate]:
+        return {
+            self._At, self._IsLoc, self._Above, self._Below,
+            self._RightOf, self._LeftOf, self._IsAgent, self._IsGoal, self._IsNonGoalLoc,
+            self._IsFacingUp, self._IsFacingDown, self._IsFacingLeft, self._IsFacingRight,
+            self._Unknown, self._Found, self._IsBall, self._IsKey, self._IsBox, self._IsRed,
+            self._IsGreen, self._IsBlue, self._IsPurple, self._IsYellow, self._IsGrey,
+            self._Holding, self._Near
+        }
+
+    @property
+    def goal_predicates(self) -> Set[Predicate]:
+        return {self._IsAgent, self._At, self._IsGoal}
+
+    @property
+    def types(self) -> Set[Type]:
+        return {self.object_type}
+
+    @property
+    def action_space(self) -> Box:
+        # One-hot encoding of discrete action space.
+        num_actions = 15
+        assert self._gym_env.action_space.n == num_actions  # type: ignore
+        lowers = np.zeros(num_actions, dtype=np.float32)
+        uppers = np.ones(num_actions, dtype=np.float32)
+        return Box(lowers, uppers)
+
+    def reset(self, train_or_test: str, task_idx: int) -> Observation:
+        """Resets the current state to the train or test task initial state."""
+        self._current_task = self.get_task(train_or_test, task_idx)
+        self._current_observation = self._current_task.init_obs
+        # We now need to reset the underlying gym environment to the correct
+        # state.
+        seed = utils.get_task_seed(train_or_test, task_idx)
+        self._reset_initial_state_from_seed(seed)
+        return self._copy_observation(self._current_observation)
+
+    def simulate(self, state: State, action: Action) -> State:
+        raise NotImplementedError("Simulate not implemented for gym envs. " +
+                                  "Try using --bilevel_plan_without_sim True")
+
+    def step(self, action: Action) -> Observation:
+        # Convert our actions to their discrete action space.
+        discrete_action = np.argmax(action.arr)
+
+        goal_position = [
+            y.cur_pos for x, y in enumerate(self._gym_env.grid.grid) if isinstance(y, Goal)
+        ]
+        self._current_observation = self._gym_env.step(discrete_action)
+        self._gym_env.render()
+        self.last_action = discrete_action
+        self._current_observation[4]['last_action'] = self.last_action
+        
+        if CFG.mini_behavior_gym_render:
+            # save frame to png
+            visual = self._gym_env.get_frame()
+            import matplotlib.pyplot as plt
+            plt.imsave('render.png', visual.astype('uint8'))
+
+
+        return self._copy_observation(self._current_observation)
+
+    def goal_reached(self) -> bool:
+        if len(self._current_observation) == 5:
+            return self._current_observation[2]
+        return False
+
+    def _get_tasks(self, num: int,
+                   train_or_test: str) -> List[EnvironmentTask]:
+        tasks = []
+        for task_idx in range(num):
+            seed = utils.get_task_seed(train_or_test, task_idx)
+            init_obs = self._reset_initial_state_from_seed(seed)
+            goal_description = self._gym_env.mission
+            task = EnvironmentTask(init_obs, goal_description)
+            tasks.append(task)
+        return tasks
+
+    def _reset_initial_state_from_seed(self, seed: int) -> Observation:
+        self._gym_env.reset(seed=seed)
+        return self._gym_env.gen_full_obs()
+
+    @classmethod
+    def _IsLoc_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        # Free spaces and goals are locations.
+        loc, = objects
+        obj_type = int(state.get(loc, "type"))
+        return obj_type in {cls.name_to_enum["empty"], cls.name_to_enum["goal"]}
+
+    @classmethod
+    def _IsGoal_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        return cls._check_enum(state, objects, "goal")
+
+    @classmethod
+    def _IsAgent_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        return cls._check_enum(state, objects, "agent")
+
+    @classmethod
+    def _IsBall_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        return cls._check_enum(state, objects, "ball")
+    
+    @classmethod
+    def _IsKey_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        return cls._check_enum(state, objects, "key")
+
+    @classmethod
+    def _IsBox_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        return cls._check_enum(state, objects, "box")
+
+    @classmethod
+    def _IsRed_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj, = objects
+        return state.get(obj, "color") == 'red'
+
+    @classmethod
+    def _IsGreen_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj, = objects
+        return state.get(obj, "color") == 'green'
+
+    @classmethod
+    def _IsBlue_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj, = objects
+        return state.get(obj, "color") == 'blue'
+
+    @classmethod
+    def _IsPurple_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj, = objects
+        return state.get(obj, "color") == 'purple'
+
+    @classmethod
+    def _IsYellow_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj, = objects
+        return state.get(obj, "color") == 'yellow'
+
+    @classmethod
+    def _IsGrey_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj, = objects
+        return state.get(obj, "color") == 'grey'
+
+    @classmethod
+    def _IsNonGoalLoc_holds(cls, state: State,
+                            objects: Sequence[Object]) -> bool:
+        return cls._check_enum(state, objects, "empty")
+
+    @classmethod
+    def _At_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj1, _ = objects
+        if cls._check_enum(state, [obj1], "agent"):
+            return cls._check_spatial_relation(state, objects, 0, 0)
+        return False
+    
+    @classmethod
+    def _Above_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        return cls._check_spatial_relation(state, objects, 1, 0)
+
+    @classmethod
+    def _Below_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        return cls._check_spatial_relation(state, objects, -1, 0)
+
+    @classmethod
+    def _RightOf_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        return cls._check_spatial_relation(state, objects, 0, -1)
+
+    @classmethod
+    def _LeftOf_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        return cls._check_spatial_relation(state, objects, 0, 1)
+
+    @classmethod
+    def _IsFacingRight_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj, = objects
+        if cls._check_enum(state, [obj], "agent"):
+            return state.get(obj, "state") == 0
+        return False
+    
+    @classmethod
+    def _IsFacingDown_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj, = objects
+        if cls._check_enum(state, [obj], "agent"):
+            return state.get(obj, "state") == 1
+        return False
+    
+    @classmethod
+    def _IsFacingLeft_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj, = objects
+        if cls._check_enum(state, [obj], "agent"):
+            return state.get(obj, "state") == 2
+        return False
+
+    @classmethod
+    def _IsFacingUp_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj, = objects
+        if cls._check_enum(state, [obj], "agent"):
+            return state.get(obj, "state") == 3
+        return False
+
+    @classmethod
+    def _Holding_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj, = objects
+        return int(state.get(obj, "state")) == 3
+
+    @classmethod
+    def _Near_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj1, ob2 = objects
+        return cls._Above_holds(state, [obj1, ob2]) or \
+                cls._Below_holds(state, [obj1, ob2]) or \
+                cls._RightOf_holds(state, [obj1, ob2]) or \
+                cls._LeftOf_holds(state, [obj1, ob2])
+    
+    @classmethod
+    def _Unknown_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj, = objects
+        return int(state.get(obj, "state")) == -1
+    
+    @classmethod
+    def _Found_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj, = objects
+        return int(state.get(obj, "state")) != -1
+
+    @classmethod
+    def get_objects_of_enum(cls, state: State, enum_name: str) -> Set[Object]:
+        """Made public for use by perceiver."""
+        return {
+            o
+            for o in state
+            if int(state.get(o, "type")) == int(cls.name_to_enum[enum_name])
+        }
+
+    @classmethod
+    def _check_spatial_relation(cls, state: State, objects: Sequence[Object],
+                                dr: int, dc: int) -> bool:
+        obj1, obj2 = objects
+        obj1_r = int(state.get(obj1, "row"))
+        obj1_c = int(state.get(obj1, "column"))
+        obj2_r = int(state.get(obj2, "row"))
+        obj2_c = int(state.get(obj2, "column"))
+        if obj1_r == sys.maxsize or obj2_r == sys.maxsize or obj1_c == sys.maxsize or obj2_c == sys.maxsize:
+            return False
+        return ((obj1_r + dr) == obj2_r) and ((obj1_c + dc) == obj2_c)
+
+    @classmethod
+    def _check_enum(cls, state: State, objects: Sequence[Object],
+                    enum_name: str) -> bool:
+        obj, = objects
+        obj_type = state.get(obj, "type")
+        return int(obj_type) == int(cls.name_to_enum[enum_name])
+
+    @classmethod
+    def _is_static(cls, obj: Object, state: State) -> bool:
+        return cls._IsGoal_holds(state, [obj]) or \
+               cls._IsNonGoalLoc_holds(state, [obj])
+
+    @classmethod
+    def _is_dynamic(cls, obj: Object, state: State) -> bool:
+        return not cls._is_static(obj, state)
+
+    def _copy_observation(self, obs: Observation) -> Observation:
+        return tuple(m.copy() if type(m) not in [bool, int, float] else m for m in obs)
diff --git a/predicators/envs/minigrid_env.py b/predicators/envs/minigrid_env.py
new file mode 100644
index 0000000000..9f86008e15
--- /dev/null
+++ b/predicators/envs/minigrid_env.py
@@ -0,0 +1,386 @@
+"""A MiniGrid environment wrapping https://github.com/mpSchrader/gym-sokoban."""
+import sys
+from typing import ClassVar, Dict, List, Optional, Sequence, Set
+
+import gymnasium as gym
+import matplotlib
+import numpy as np
+from gym.spaces import Box
+
+from predicators import utils
+from predicators.envs import BaseEnv
+from predicators.settings import CFG
+from predicators.structs import Action, EnvironmentTask, Image, Object, \
+    Observation, Predicate, State, Type, Video
+
+from minigrid.core.constants import (
+    OBJECT_TO_IDX,
+)
+from minigrid.core.world_object import Ball as BallObj, Goal, Key as KeyObj, Box as BoxObj
+from minigrid.wrappers import FullyObsWrapper
+
+class MiniGridEnv(BaseEnv):
+    """MiniGrid environment wrapping gym-sokoban."""
+
+    name_to_enum: ClassVar[Dict[str, int]] = OBJECT_TO_IDX
+
+    object_type = Type("obj", ["row", "column", "type", "state", "color"])
+
+    def __init__(self, use_gui: bool = True) -> None:
+        super().__init__(use_gui)
+
+        # Predicates
+        self._IsLoc = Predicate("IsLoc", [self.object_type], self._IsLoc_holds)
+        self._Above = Predicate("Above", [self.object_type, self.object_type],
+                                self._Above_holds)
+        self._Below = Predicate("Below", [self.object_type, self.object_type],
+                                self._Below_holds)
+        self._RightOf = Predicate("RightOf",
+                                  [self.object_type, self.object_type],
+                                  self._RightOf_holds)
+        self._LeftOf = Predicate("LeftOf",
+                                 [self.object_type, self.object_type],
+                                 self._LeftOf_holds)
+        self._IsFacingUp = Predicate("IsFacingUp", [self.object_type],
+                                     self._IsFacingUp_holds)
+        self._IsFacingDown = Predicate("IsFacingDown", [self.object_type],
+                                       self._IsFacingDown_holds)
+        self._IsFacingLeft = Predicate("IsFacingLeft", [self.object_type],
+                                        self._IsFacingLeft_holds)
+        self._IsFacingRight = Predicate("IsFacingRight", [self.object_type],
+                                        self._IsFacingRight_holds)
+        self._IsNonGoalLoc = Predicate("IsNonGoalLoc", [self.object_type],
+                                       self._IsNonGoalLoc_holds)
+        self._Unknown = Predicate("Unknown", [self.object_type],
+                                        self._Unknown_holds)
+        self._Found = Predicate("Found", [self.object_type],
+                                        self._Found_holds)
+        self._IsAgent, self._At, self._IsGoal, self._IsBall, \
+        self._IsKey, self._IsBox, self._IsRed, self._IsGreen, \
+        self._IsBlue, self._IsPurple, self._IsYellow, self._IsGrey, \
+        self._Holding, self._Near = self.get_goal_predicates()
+
+        self.last_action = None
+
+        # NOTE: we can change the level by modifying what we pass
+
+        # into gym.make here.
+        if CFG.minigrid_gym_fully_observable:
+            self._gym_env = FullyObsWrapper(gym.make(CFG.minigrid_gym_name))
+        else:
+            self._gym_env = gym.make(CFG.minigrid_gym_name)
+
+    @classmethod
+    def get_goal_predicates(cls) -> list[Predicate]:
+        """Defined public so that the perceiver can use it."""
+        return [Predicate("IsAgent", [cls.object_type], cls._IsAgent_holds),
+                Predicate("At", [cls.object_type, cls.object_type], cls._At_holds),
+                Predicate("IsGoal", [cls.object_type], cls._IsGoal_holds),
+                Predicate("IsBall", [cls.object_type], cls._IsBall_holds),
+                Predicate("IsKey", [cls.object_type], cls._IsKey_holds),
+                Predicate("IsBox", [cls.object_type], cls._IsBox_holds),
+                Predicate("IsRed", [cls.object_type], cls._IsRed_holds),
+                Predicate("IsGreen", [cls.object_type], cls._IsGreen_holds),
+                Predicate("IsBlue", [cls.object_type], cls._IsBlue_holds),
+                Predicate("IsPurple", [cls.object_type], cls._IsPurple_holds),
+                Predicate("IsYellow", [cls.object_type], cls._IsYellow_holds),
+                Predicate("IsGrey", [cls.object_type], cls._IsGrey_holds),
+                Predicate("Holding", [cls.object_type], cls._Holding_holds),
+                Predicate("Near", [cls.object_type, cls.object_type], cls._Near_holds)]
+
+
+    def _generate_train_tasks(self) -> List[EnvironmentTask]:
+        return self._get_tasks(num=CFG.num_train_tasks, train_or_test="train")
+
+    def _generate_test_tasks(self) -> List[EnvironmentTask]:
+        return self._get_tasks(num=CFG.num_test_tasks, train_or_test="test")
+
+    @classmethod
+    def get_name(cls) -> str:
+        return "minigrid_env"
+
+    def get_observation(self) -> Observation:
+        return self._copy_observation(self._current_observation)
+
+    def render_state_plt(
+            self,
+            state: State,
+            task: EnvironmentTask,
+            action: Optional[Action] = None,
+            caption: Optional[str] = None) -> matplotlib.figure.Figure:
+        raise NotImplementedError("This env does not use Matplotlib")
+
+    def render_state(self,
+                     state: State,
+                     task: EnvironmentTask,
+                     action: Optional[Action] = None,
+                     caption: Optional[str] = None) -> Video:
+        raise NotImplementedError("A gym environment cannot render "
+                                  "arbitrary states.")
+
+    def render(self,
+               action: Optional[Action] = None,
+               caption: Optional[str] = None) -> Video:
+        assert caption is None
+        arr: Image = self._gym_env.get_frame()
+        import matplotlib.pyplot as plt
+        plt.imsave('visual_image.png', arr.astype('uint8'))
+        return [arr]
+
+    @property
+    def predicates(self) -> Set[Predicate]:
+        return {
+            self._At, self._IsLoc, self._Above, self._Below,
+            self._RightOf, self._LeftOf, self._IsAgent, self._IsGoal, self._IsNonGoalLoc,
+            self._IsFacingUp, self._IsFacingDown, self._IsFacingLeft, self._IsFacingRight,
+            self._Unknown, self._Found, self._IsBall, self._IsKey, self._IsBox, self._IsRed,
+            self._IsGreen, self._IsBlue, self._IsPurple, self._IsYellow, self._IsGrey,
+            self._Holding, self._Near
+        }
+
+    @property
+    def goal_predicates(self) -> Set[Predicate]:
+        return {self._IsAgent, self._At, self._IsGoal}
+
+    @property
+    def types(self) -> Set[Type]:
+        return {self.object_type}
+
+    @property
+    def action_space(self) -> Box:
+        # One-hot encoding of discrete action space.
+        num_actions = 7
+        assert self._gym_env.action_space.n == num_actions  # type: ignore
+        lowers = np.zeros(num_actions, dtype=np.float32)
+        uppers = np.ones(num_actions, dtype=np.float32)
+        return Box(lowers, uppers)
+
+    def reset(self, train_or_test: str, task_idx: int) -> Observation:
+        """Resets the current state to the train or test task initial state."""
+        self._current_task = self.get_task(train_or_test, task_idx)
+        self._current_observation = self._current_task.init_obs
+        # We now need to reset the underlying gym environment to the correct
+        # state.
+        seed = utils.get_task_seed(train_or_test, task_idx)
+        self._reset_initial_state_from_seed(seed)
+        return self._copy_observation(self._current_observation)
+
+    def simulate(self, state: State, action: Action) -> State:
+        raise NotImplementedError("Simulate not implemented for gym envs. " +
+                                  "Try using --bilevel_plan_without_sim True")
+
+    def step(self, action: Action) -> Observation:
+        # Convert our actions to their discrete action space.
+        discrete_action = np.argmax(action.arr)
+
+        goal_position = [
+            y.cur_pos for x, y in enumerate(self._gym_env.grid.grid) if isinstance(y, Goal)
+        ]
+        self._current_observation = self._gym_env.step(discrete_action)
+        self._gym_env.render()
+        self.last_action = discrete_action
+        self._current_observation[4]['last_action'] = self.last_action
+        
+        if CFG.minigrid_gym_render:
+            # save frame to png
+            visual = self._gym_env.get_frame()
+            import matplotlib.pyplot as plt
+            plt.imsave('render.png', visual.astype('uint8'))
+
+
+        return self._copy_observation(self._current_observation)
+
+    def goal_reached(self) -> bool:
+        if len(self._current_observation) == 5:
+            return self._current_observation[2]
+        return False
+
+    def _get_tasks(self, num: int,
+                   train_or_test: str) -> List[EnvironmentTask]:
+        tasks = []
+        for task_idx in range(num):
+            seed = utils.get_task_seed(train_or_test, task_idx)
+            init_obs = self._reset_initial_state_from_seed(seed)
+            goal_description = self._gym_env.mission
+            task = EnvironmentTask(init_obs, goal_description)
+            tasks.append(task)
+        return tasks
+
+    def _reset_initial_state_from_seed(self, seed: int) -> Observation:
+        return self._gym_env.reset(seed=seed)
+
+    @classmethod
+    def _IsLoc_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        # Free spaces and goals are locations.
+        loc, = objects
+        obj_type = int(state.get(loc, "type"))
+        return obj_type in {cls.name_to_enum["empty"], cls.name_to_enum["goal"]}
+
+    @classmethod
+    def _IsGoal_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        return cls._check_enum(state, objects, "goal")
+
+    @classmethod
+    def _IsAgent_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        return cls._check_enum(state, objects, "agent")
+
+    @classmethod
+    def _IsBall_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        return cls._check_enum(state, objects, "ball")
+    
+    @classmethod
+    def _IsKey_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        return cls._check_enum(state, objects, "key")
+
+    @classmethod
+    def _IsBox_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        return cls._check_enum(state, objects, "box")
+
+    @classmethod
+    def _IsRed_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj, = objects
+        return state.get(obj, "color") == 'red'
+
+    @classmethod
+    def _IsGreen_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj, = objects
+        return state.get(obj, "color") == 'green'
+
+    @classmethod
+    def _IsBlue_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj, = objects
+        return state.get(obj, "color") == 'blue'
+
+    @classmethod
+    def _IsPurple_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj, = objects
+        return state.get(obj, "color") == 'purple'
+
+    @classmethod
+    def _IsYellow_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj, = objects
+        return state.get(obj, "color") == 'yellow'
+
+    @classmethod
+    def _IsGrey_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj, = objects
+        return state.get(obj, "color") == 'grey'
+
+    @classmethod
+    def _IsNonGoalLoc_holds(cls, state: State,
+                            objects: Sequence[Object]) -> bool:
+        return cls._check_enum(state, objects, "empty")
+
+    @classmethod
+    def _At_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj1, _ = objects
+        if cls._check_enum(state, [obj1], "agent"):
+            return cls._check_spatial_relation(state, objects, 0, 0)
+        return False
+    
+    @classmethod
+    def _Above_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        return cls._check_spatial_relation(state, objects, 1, 0)
+
+    @classmethod
+    def _Below_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        return cls._check_spatial_relation(state, objects, -1, 0)
+
+    @classmethod
+    def _RightOf_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        return cls._check_spatial_relation(state, objects, 0, -1)
+
+    @classmethod
+    def _LeftOf_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        return cls._check_spatial_relation(state, objects, 0, 1)
+
+    @classmethod
+    def _IsFacingRight_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj, = objects
+        if cls._check_enum(state, [obj], "agent"):
+            return state.get(obj, "state") == 0
+        return False
+    
+    @classmethod
+    def _IsFacingDown_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj, = objects
+        if cls._check_enum(state, [obj], "agent"):
+            return state.get(obj, "state") == 1
+        return False
+    
+    @classmethod
+    def _IsFacingLeft_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj, = objects
+        if cls._check_enum(state, [obj], "agent"):
+            return state.get(obj, "state") == 2
+        return False
+
+    @classmethod
+    def _IsFacingUp_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj, = objects
+        if cls._check_enum(state, [obj], "agent"):
+            return state.get(obj, "state") == 3
+        return False
+
+    @classmethod
+    def _Holding_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj, = objects
+        return int(state.get(obj, "state")) == 3
+
+    @classmethod
+    def _Near_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj1, ob2 = objects
+        return cls._Above_holds(state, [obj1, ob2]) or \
+                cls._Below_holds(state, [obj1, ob2]) or \
+                cls._RightOf_holds(state, [obj1, ob2]) or \
+                cls._LeftOf_holds(state, [obj1, ob2])
+    
+    @classmethod
+    def _Unknown_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj, = objects
+        return int(state.get(obj, "state")) == -1
+    
+    @classmethod
+    def _Found_holds(cls, state: State, objects: Sequence[Object]) -> bool:
+        obj, = objects
+        return int(state.get(obj, "state")) != -1
+
+    @classmethod
+    def get_objects_of_enum(cls, state: State, enum_name: str) -> Set[Object]:
+        """Made public for use by perceiver."""
+        return {
+            o
+            for o in state
+            if int(state.get(o, "type")) == int(cls.name_to_enum[enum_name])
+        }
+
+    @classmethod
+    def _check_spatial_relation(cls, state: State, objects: Sequence[Object],
+                                dr: int, dc: int) -> bool:
+        obj1, obj2 = objects
+        obj1_r = int(state.get(obj1, "row"))
+        obj1_c = int(state.get(obj1, "column"))
+        obj2_r = int(state.get(obj2, "row"))
+        obj2_c = int(state.get(obj2, "column"))
+        if obj1_r == sys.maxsize or obj2_r == sys.maxsize or obj1_c == sys.maxsize or obj2_c == sys.maxsize:
+            return False
+        return ((obj1_r + dr) == obj2_r) and ((obj1_c + dc) == obj2_c)
+
+    @classmethod
+    def _check_enum(cls, state: State, objects: Sequence[Object],
+                    enum_name: str) -> bool:
+        obj, = objects
+        obj_type = state.get(obj, "type")
+        return int(obj_type) == int(cls.name_to_enum[enum_name])
+
+    @classmethod
+    def _is_static(cls, obj: Object, state: State) -> bool:
+        return cls._IsGoal_holds(state, [obj]) or \
+               cls._IsNonGoalLoc_holds(state, [obj])
+
+    @classmethod
+    def _is_dynamic(cls, obj: Object, state: State) -> bool:
+        return not cls._is_static(obj, state)
+
+    def _copy_observation(self, obs: Observation) -> Observation:
+        return tuple(m.copy() if type(m) not in [bool, int, float] else m for m in obs)
diff --git a/predicators/ground_truth_models/mini_behavior_env/__init__.py b/predicators/ground_truth_models/mini_behavior_env/__init__.py
new file mode 100644
index 0000000000..2a8f9dca73
--- /dev/null
+++ b/predicators/ground_truth_models/mini_behavior_env/__init__.py
@@ -0,0 +1,6 @@
+"""Ground truth models for MiniBehavior gym environment."""
+
+from .nsrts import MiniBehaviorGroundTruthNSRTFactory
+from .options import MiniBehaviorGroundTruthOptionFactory
+
+__all__ = ["MiniBehaviorGroundTruthOptionFactory", "MiniBehaviorGroundTruthNSRTFactory"]
diff --git a/predicators/ground_truth_models/mini_behavior_env/nsrts.py b/predicators/ground_truth_models/mini_behavior_env/nsrts.py
new file mode 100644
index 0000000000..24f6825af4
--- /dev/null
+++ b/predicators/ground_truth_models/mini_behavior_env/nsrts.py
@@ -0,0 +1,319 @@
+"""Ground-truth NSRTs for the cover environment."""
+
+from typing import Dict, List, Set
+
+from predicators.ground_truth_models import GroundTruthNSRTFactory
+from predicators.structs import NSRT, LiftedAtom, ParameterizedOption, \
+    Predicate, Type, Variable
+from predicators.utils import null_sampler
+
+
+class MiniBehaviorGroundTruthNSRTFactory(GroundTruthNSRTFactory):
+    """Ground-truth NSRTs for the MiniBehavior environment."""
+
+    @classmethod
+    def get_env_names(cls) -> Set[str]:
+        return {"mini_behavior_env"}
+
+    @staticmethod
+    def get_nsrts(env_name: str, types: Dict[str, Type],
+                  predicates: Dict[str, Predicate],
+                  options: Dict[str, ParameterizedOption]) -> Set[NSRT]:
+        # Types
+        object_type = types["obj"]
+
+        # Objects
+        obj1 = Variable("?obj1", object_type)
+        obj2 = Variable("?obj2", object_type)
+        obj3 = Variable("?obj3", object_type)
+
+        # Predicates
+        At = predicates["At"]
+        IsLoc = predicates["IsLoc"]
+        Above = predicates["Above"]
+        Below = predicates["Below"]
+        RightOf = predicates["RightOf"]
+        LeftOf = predicates["LeftOf"]
+        IsAgent = predicates["IsAgent"]
+        IsGoal = predicates["IsGoal"]
+        IsFacingUp = predicates["IsFacingUp"]
+        IsFacingDown = predicates["IsFacingDown"]
+        IsFacingLeft = predicates["IsFacingLeft"]
+        IsFacingRight = predicates["IsFacingRight"]
+        Unknown = predicates["Unknown"]
+        Found = predicates["Found"]
+        Holding = predicates["Holding"]
+        Near = predicates["Near"]
+
+        # Options
+        MoveForward = options["Forward"]
+        TurnLeft = options["Left"]
+        TurnRight = options["Right"]
+        Pickup = options["Pickup_0"]
+        Drop = options["Drop_0"]
+        Toggle = options["Toggle"]
+        FindObj = options["FindObj"]
+        ReplanToObj = options["ReplanToObj"]
+
+        nsrts = set()
+
+        # MoveUp
+        # Agent, from_loc, to_loc
+        parameters = [obj1, obj2, obj3]
+        preconditions = {
+            LiftedAtom(IsAgent, [obj1]),
+            LiftedAtom(IsLoc, [obj3]),
+            LiftedAtom(IsLoc, [obj2]),
+            LiftedAtom(Above, [obj3, obj2]),
+            LiftedAtom(At, [obj1, obj2]),
+            LiftedAtom(IsFacingUp, [obj1]),
+        }
+        add_effects = {LiftedAtom(At, [obj1, obj3])}
+        delete_effects = {LiftedAtom(At, [obj1, obj2])}
+        option = MoveForward
+        option_vars: List[Variable] = []  # dummy - not used
+        move_up_nsrt = NSRT("MoveUp", parameters, preconditions, add_effects,
+                            delete_effects, set(), option, option_vars,
+                            null_sampler)
+        nsrts.add(move_up_nsrt)
+
+        # MoveDown
+        # Agent, from_loc, to_loc
+        parameters = [obj1, obj2, obj3]
+        preconditions = {
+            LiftedAtom(IsAgent, [obj1]),
+            LiftedAtom(IsLoc, [obj3]),
+            LiftedAtom(IsLoc, [obj2]),
+            LiftedAtom(Below, [obj3, obj2]),
+            LiftedAtom(At, [obj1, obj2]),
+            LiftedAtom(IsFacingDown, [obj1]),
+        }
+        add_effects = {LiftedAtom(At, [obj1, obj3])}
+        delete_effects = {LiftedAtom(At, [obj1, obj2])}
+        option = MoveForward
+        option_vars = []  # dummy - not used
+        move_down_nsrt = NSRT("MoveDown", parameters, preconditions,
+                              add_effects, delete_effects, set(), option,
+                              option_vars, null_sampler)
+        nsrts.add(move_down_nsrt)
+
+        # MoveRight
+        # Agent, from_loc, to_loc
+        parameters = [obj1, obj2, obj3]
+        preconditions = {
+            LiftedAtom(IsAgent, [obj1]),
+            LiftedAtom(IsLoc, [obj3]),
+            LiftedAtom(IsLoc, [obj2]),
+            LiftedAtom(RightOf, [obj3, obj2]),
+            LiftedAtom(At, [obj1, obj2]),
+            LiftedAtom(IsFacingRight, [obj1]),
+        }
+        add_effects = {LiftedAtom(At, [obj1, obj3])}
+        delete_effects = {LiftedAtom(At, [obj1, obj2])}
+        option = MoveForward
+        option_vars = []  # dummy - not used
+        move_right_nsrt = NSRT("MoveRight", parameters, preconditions,
+                               add_effects, delete_effects, set(), option,
+                               option_vars, null_sampler)
+        nsrts.add(move_right_nsrt)
+
+        # MoveLeft
+        # Agent, from_loc, to_loc
+        parameters = [obj1, obj2, obj3]
+        preconditions = {
+            LiftedAtom(IsAgent, [obj1]),
+            LiftedAtom(IsLoc, [obj3]),
+            LiftedAtom(IsLoc, [obj2]),
+            LiftedAtom(LeftOf, [obj3, obj2]),
+            LiftedAtom(At, [obj1, obj2]),
+            LiftedAtom(IsFacingLeft, [obj1]),
+        }
+        add_effects = {LiftedAtom(At, [obj1, obj3])}
+        delete_effects = {LiftedAtom(At, [obj1, obj2])}
+        option = MoveForward
+        option_vars = []  # dummy - not used
+        move_left_nsrt = NSRT("MoveLeft", parameters, preconditions,
+                              add_effects, delete_effects, set(), option,
+                              option_vars, null_sampler)
+        nsrts.add(move_left_nsrt)
+
+        # TurnRight
+        turn_right_from_up_nsrt = NSRT("TurnRightFromUp", [obj1],
+                               {LiftedAtom(IsFacingUp, [obj1])},
+                               {LiftedAtom(IsFacingRight, [obj1])},
+                               {LiftedAtom(IsFacingUp, [obj1])},
+                                set(),
+                                TurnRight, [], null_sampler)
+        nsrts.add(turn_right_from_up_nsrt)
+
+        turn_right_from_down_nsrt = NSRT("TurnRightFromDown", [obj1],
+                               {LiftedAtom(IsFacingDown, [obj1])},
+                               {LiftedAtom(IsFacingLeft, [obj1])},
+                               {LiftedAtom(IsFacingDown, [obj1])},
+                                set(),
+                                TurnRight, [], null_sampler)
+        nsrts.add(turn_right_from_down_nsrt)
+
+        turn_right_from_left_nsrt = NSRT("TurnRightFromLeft", [obj1],
+                               {LiftedAtom(IsFacingLeft, [obj1])},
+                               {LiftedAtom(IsFacingUp, [obj1])},
+                               {LiftedAtom(IsFacingLeft, [obj1])},
+                                set(),
+                                TurnRight, [], null_sampler)
+        nsrts.add(turn_right_from_left_nsrt)
+
+        turn_right_from_right_nsrt = NSRT("TurnRightFromRight", [obj1],
+                                 {LiftedAtom(IsFacingRight, [obj1])},
+                                 {LiftedAtom(IsFacingDown, [obj1])},
+                                 {LiftedAtom(IsFacingRight, [obj1])},
+                                  set(),
+                                  TurnRight, [], null_sampler)
+        nsrts.add(turn_right_from_right_nsrt)
+
+        # TurnLeft
+        turn_left_from_up_nsrt = NSRT("TurnLeftFromUp", [obj1],
+                               {LiftedAtom(IsFacingUp, [obj1])},
+                               {LiftedAtom(IsFacingLeft, [obj1])},
+                               {LiftedAtom(IsFacingUp, [obj1])},
+                                set(),
+                                TurnLeft, [], null_sampler)
+        nsrts.add(turn_left_from_up_nsrt)
+
+        turn_left_from_down_nsrt = NSRT("TurnLeftFromDown", [obj1],
+                                 {LiftedAtom(IsFacingDown, [obj1])},
+                                 {LiftedAtom(IsFacingRight, [obj1])},
+                                 {LiftedAtom(IsFacingDown, [obj1])},
+                                  set(),
+                                  TurnLeft, [], null_sampler)
+        nsrts.add(turn_left_from_down_nsrt)
+
+        turn_left_from_left_nsrt = NSRT("TurnLeftFromLeft", [obj1],
+                                    {LiftedAtom(IsFacingLeft, [obj1])},
+                                    {LiftedAtom(IsFacingDown, [obj1])},
+                                    {LiftedAtom(IsFacingLeft, [obj1])},
+                                    set(),
+                                    TurnLeft, [], null_sampler)
+        nsrts.add(turn_left_from_left_nsrt)
+
+        turn_left_from_right_nsrt = NSRT("TurnLeftFromRight", [obj1],
+                                    {LiftedAtom(IsFacingRight, [obj1])},
+                                    {LiftedAtom(IsFacingUp, [obj1])},
+                                    {LiftedAtom(IsFacingRight, [obj1])},
+                                    set(),
+                                    TurnLeft, [], null_sampler)
+        nsrts.add(turn_left_from_right_nsrt)
+
+        # Pickup Left
+        # Agent, obj, agent_loc
+        parameters = [obj1, obj2, obj3]
+        preconditions = {
+            LiftedAtom(IsAgent, [obj1]),
+            LiftedAtom(IsLoc, [obj3]),
+            LiftedAtom(At, [obj1, obj3]),
+            LiftedAtom(LeftOf, [obj2, obj3]),
+            LiftedAtom(IsFacingLeft, [obj1]),
+            LiftedAtom(Found, [obj2])
+        }
+        add_effects = {LiftedAtom(Holding, [obj2])}
+        delete_effects = {LiftedAtom(LeftOf, [obj2, obj1])}
+        option = Pickup
+        option_vars: List[Variable] = []
+        pickup_left_nsrt = NSRT("Pickup_Left", parameters, preconditions,
+                              add_effects, delete_effects, set(), option,
+                              option_vars, null_sampler)
+        nsrts.add(pickup_left_nsrt)
+
+        # Pickup Right
+        # Agent, obj, agent_loc
+        parameters = [obj1, obj2, obj3]
+        preconditions = {
+            LiftedAtom(IsAgent, [obj1]),
+            LiftedAtom(IsLoc, [obj3]),
+            LiftedAtom(At, [obj1, obj3]),
+            LiftedAtom(RightOf, [obj2, obj3]),
+            LiftedAtom(IsFacingRight, [obj1]),
+            LiftedAtom(Found, [obj2])
+        }
+        add_effects = {LiftedAtom(Holding, [obj2])}
+        delete_effects = {LiftedAtom(RightOf, [obj2, obj1])}
+        option = Pickup
+        option_vars: List[Variable] = []
+        pickup_right_nsrt = NSRT("Pickup_Right", parameters, preconditions,
+                              add_effects, delete_effects, set(), option,
+                              option_vars, null_sampler)
+        nsrts.add(pickup_right_nsrt)
+
+        # Pickup Up
+        # Agent, obj, agent_loc
+        parameters = [obj1, obj2, obj3]
+        preconditions = {
+            LiftedAtom(IsAgent, [obj1]),
+            LiftedAtom(IsLoc, [obj3]),
+            LiftedAtom(At, [obj1, obj3]),
+            LiftedAtom(Above, [obj2, obj3]),
+            LiftedAtom(IsFacingUp, [obj1]),
+            LiftedAtom(Found, [obj2])
+        }
+        add_effects = {LiftedAtom(Holding, [obj2])}
+        delete_effects = {LiftedAtom(Above, [obj2, obj1])}
+        option = Pickup
+        option_vars: List[Variable] = []
+        pickup_up_nsrt = NSRT("Pickup_Up", parameters, preconditions,
+                              add_effects, delete_effects, set(), option,
+                              option_vars, null_sampler)
+        nsrts.add(pickup_up_nsrt)
+
+        # Pickup Down
+        # Agent, obj, agent_loc
+        parameters = [obj1, obj2, obj3]
+        preconditions = {
+            LiftedAtom(IsAgent, [obj1]),
+            LiftedAtom(IsLoc, [obj3]),
+            LiftedAtom(At, [obj1, obj3]),
+            LiftedAtom(Below, [obj2, obj3]),
+            LiftedAtom(IsFacingDown, [obj1]),
+            LiftedAtom(Found, [obj2])
+        }
+        add_effects = {LiftedAtom(Holding, [obj2])}
+        delete_effects = {LiftedAtom(Below, [obj2, obj1])}
+        option = Pickup
+        option_vars: List[Variable] = []
+        pickup_down_nsrt = NSRT("Pickup_Down", parameters, preconditions,
+                              add_effects, delete_effects, set(), option,
+                              option_vars, null_sampler)
+        nsrts.add(pickup_down_nsrt)
+
+        # Drop
+        # TODO
+
+        # Toggle
+        # TODO
+
+        # For Partial Observability
+        # Find Object
+        find_obj_nsrt = NSRT("FindObj", [obj1],
+                                    {LiftedAtom(Unknown, [obj1])},
+                                    {LiftedAtom(Found, [obj1])},
+                                    set(),
+                                    {LeftOf, RightOf, Above, Below},
+                                    FindObj, [obj1], null_sampler)
+        nsrts.add(find_obj_nsrt)
+
+        # Replan With Obj Known
+        replan_to_obj_nsrt = NSRT("ReplanToObj", [obj1, obj2],
+                                {LiftedAtom(IsAgent, [obj1]), LiftedAtom(IsLoc, [obj2]), LiftedAtom(Unknown, [obj2]), LiftedAtom(Found, [obj2])},
+                                {LiftedAtom(At, [obj1, obj2])},
+                                set(),
+                                {LeftOf, RightOf, Above, Below},
+                                ReplanToObj, [], null_sampler)
+        nsrts.add(replan_to_obj_nsrt)
+
+        replan_to_pickable_obj_nsrt = NSRT("ReplanToPickableObj", [obj1, obj2],
+                                {LiftedAtom(IsAgent, [obj1]), LiftedAtom(Unknown, [obj2]), LiftedAtom(Found, [obj2])},
+                                {LiftedAtom(Holding, [obj2])},
+                                set(),
+                                {LeftOf, RightOf, Above, Below},
+                                ReplanToObj, [], null_sampler)
+        nsrts.add(replan_to_pickable_obj_nsrt)
+
+        return nsrts
diff --git a/predicators/ground_truth_models/mini_behavior_env/options.py b/predicators/ground_truth_models/mini_behavior_env/options.py
new file mode 100644
index 0000000000..e664a4b6ff
--- /dev/null
+++ b/predicators/ground_truth_models/mini_behavior_env/options.py
@@ -0,0 +1,92 @@
+"""Ground-truth options for the sokoban environment."""
+
+from typing import Dict, Sequence, Set
+
+import numpy as np
+from gym.spaces import Box
+
+from enum import IntEnum
+from predicators import utils
+from predicators.ground_truth_models import GroundTruthOptionFactory
+from predicators.structs import Action, Array, Object, ParameterizedOption, \
+    ParameterizedPolicy, Predicate, State, Type
+
+class Actions(IntEnum):
+    left = 0
+    right = 1
+    forward = 2
+    toggle = 3
+    open = 4
+    close = 5
+    slice = 6
+    cook = 7
+    drop_in = 8
+    pickup_0 = 9
+    pickup_1 = 10
+    pickup_2 = 11
+    drop_0 = 12
+    drop_1 = 13
+    drop_2 = 14
+
+class MiniBehaviorGroundTruthOptionFactory(GroundTruthOptionFactory):
+    """Ground-truth options for the MiniBehavior environment."""
+
+    @classmethod
+    def get_env_names(cls) -> Set[str]:
+        return {"mini_behavior_env"}
+
+    @classmethod
+    def get_options(cls, env_name: str, types: Dict[str, Type],
+                    predicates: Dict[str, Predicate],
+                    action_space: Box) -> Set[ParameterizedOption]:
+
+        # Reformat names for consistency with other option naming.
+        def _format_name(name: str) -> str:
+            return "".join([n.capitalize() for n in name.split(" ")])
+
+        options: Set[ParameterizedOption] = {
+            utils.SingletonParameterizedOption(
+                _format_name(name), cls._create_policy(discrete_action=i))
+            for i, name in {value: key for key, value in Actions.__members__.items()}.items()
+        }
+
+        # FindObj option.
+        object_type = types["obj"]
+        FindObjOption = ParameterizedOption(
+                                "FindObj",
+                                [object_type],
+                                Box(low=np.array([]), high=np.array([]), shape=(0, )),
+                                policy=cls._create_find_obj_policy(),
+                                initiable=lambda s, m, o, p: True,
+                                terminal=lambda s, m, o, p: s.get(o[0], "type") == 8 and s.get(o[0], "state") != -1) # 8 is the goal enum type
+        options.add(FindObjOption)
+
+        # ReplanToObj option.
+        ReplanToObj = utils.SingletonParameterizedOption("ReplanToObj", cls._create_policy(discrete_action=6))
+        options.add(ReplanToObj)
+        
+        return options
+
+    @classmethod
+    def _create_policy(cls, discrete_action: int) -> ParameterizedPolicy:
+
+        def policy(state: State, memory: Dict, objects: Sequence[Object],
+                   params: Array) -> Action:
+            del state, memory, objects, params  # unused.
+            arr = np.zeros(7, dtype=np.float32)
+            arr[discrete_action] = 1
+            return Action(arr)
+
+        return policy
+    
+    @classmethod
+    def _create_find_obj_policy(cls) -> ParameterizedPolicy:
+
+        def policy(state: State, memory: Dict, objects: Sequence[Object],
+                   params: Array) -> Action:
+            del state, memory, objects, params  # unused.
+            arr = np.zeros(7, dtype=np.float32)
+            arr[np.random.choice([0, 1, 2], 1, p=[0.2, 0.2, 0.6])[0]] = 1
+            return Action(arr)
+
+        return policy
diff --git a/predicators/ground_truth_models/minigrid_env/__init__.py b/predicators/ground_truth_models/minigrid_env/__init__.py
new file mode 100644
index 0000000000..63ddf1fab2
--- /dev/null
+++ b/predicators/ground_truth_models/minigrid_env/__init__.py
@@ -0,0 +1,6 @@
+"""Ground truth models for MiniGrid gym environment."""
+
+from .nsrts import MiniGridGroundTruthNSRTFactory
+from .options import MiniGridGroundTruthOptionFactory
+
+__all__ = ["MiniGridGroundTruthOptionFactory", "MiniGridGroundTruthNSRTFactory"]
diff --git a/predicators/ground_truth_models/minigrid_env/nsrts.py b/predicators/ground_truth_models/minigrid_env/nsrts.py
new file mode 100644
index 0000000000..31304111de
--- /dev/null
+++ b/predicators/ground_truth_models/minigrid_env/nsrts.py
@@ -0,0 +1,320 @@
+"""Ground-truth NSRTs for the cover environment."""
+
+from typing import Dict, List, Set
+
+from predicators.ground_truth_models import GroundTruthNSRTFactory
+from predicators.structs import NSRT, LiftedAtom, ParameterizedOption, \
+    Predicate, Type, Variable
+from predicators.utils import null_sampler
+
+
+class MiniGridGroundTruthNSRTFactory(GroundTruthNSRTFactory):
+    """Ground-truth NSRTs for the MiniGrid environment."""
+
+    @classmethod
+    def get_env_names(cls) -> Set[str]:
+        return {"minigrid_env"}
+
+    @staticmethod
+    def get_nsrts(env_name: str, types: Dict[str, Type],
+                  predicates: Dict[str, Predicate],
+                  options: Dict[str, ParameterizedOption]) -> Set[NSRT]:
+        # Types
+        object_type = types["obj"]
+
+        # Objects
+        obj1 = Variable("?obj1", object_type)
+        obj2 = Variable("?obj2", object_type)
+        obj3 = Variable("?obj3", object_type)
+
+        # Predicates
+        At = predicates["At"]
+        IsLoc = predicates["IsLoc"]
+        Above = predicates["Above"]
+        Below = predicates["Below"]
+        RightOf = predicates["RightOf"]
+        LeftOf = predicates["LeftOf"]
+        IsAgent = predicates["IsAgent"]
+        IsGoal = predicates["IsGoal"]
+        IsFacingUp = predicates["IsFacingUp"]
+        IsFacingDown = predicates["IsFacingDown"]
+        IsFacingLeft = predicates["IsFacingLeft"]
+        IsFacingRight = predicates["IsFacingRight"]
+        Unknown = predicates["Unknown"]
+        Found = predicates["Found"]
+        Holding = predicates["Holding"]
+        Near = predicates["Near"]
+
+        # Options
+        MoveForward = options["Forward"]
+        TurnLeft = options["Left"]
+        TurnRight = options["Right"]
+        Pickup = options["Pickup"]
+        Drop = options["Drop"]
+        Toggle = options["Toggle"]
+        Done = options["Done"]
+        FindObj = options["FindObj"]
+        ReplanToObj = options["ReplanToObj"]
+
+        nsrts = set()
+
+        # MoveUp
+        # Agent, from_loc, to_loc
+        parameters = [obj1, obj2, obj3]
+        preconditions = {
+            LiftedAtom(IsAgent, [obj1]),
+            LiftedAtom(IsLoc, [obj3]),
+            LiftedAtom(IsLoc, [obj2]),
+            LiftedAtom(Above, [obj3, obj2]),
+            LiftedAtom(At, [obj1, obj2]),
+            LiftedAtom(IsFacingUp, [obj1]),
+        }
+        add_effects = {LiftedAtom(At, [obj1, obj3])}
+        delete_effects = {LiftedAtom(At, [obj1, obj2])}
+        option = MoveForward
+        option_vars: List[Variable] = []  # dummy - not used
+        move_up_nsrt = NSRT("MoveUp", parameters, preconditions, add_effects,
+                            delete_effects, set(), option, option_vars,
+                            null_sampler)
+        nsrts.add(move_up_nsrt)
+
+        # MoveDown
+        # Agent, from_loc, to_loc
+        parameters = [obj1, obj2, obj3]
+        preconditions = {
+            LiftedAtom(IsAgent, [obj1]),
+            LiftedAtom(IsLoc, [obj3]),
+            LiftedAtom(IsLoc, [obj2]),
+            LiftedAtom(Below, [obj3, obj2]),
+            LiftedAtom(At, [obj1, obj2]),
+            LiftedAtom(IsFacingDown, [obj1]),
+        }
+        add_effects = {LiftedAtom(At, [obj1, obj3])}
+        delete_effects = {LiftedAtom(At, [obj1, obj2])}
+        option = MoveForward
+        option_vars = []  # dummy - not used
+        move_down_nsrt = NSRT("MoveDown", parameters, preconditions,
+                              add_effects, delete_effects, set(), option,
+                              option_vars, null_sampler)
+        nsrts.add(move_down_nsrt)
+
+        # MoveRight
+        # Agent, from_loc, to_loc
+        parameters = [obj1, obj2, obj3]
+        preconditions = {
+            LiftedAtom(IsAgent, [obj1]),
+            LiftedAtom(IsLoc, [obj3]),
+            LiftedAtom(IsLoc, [obj2]),
+            LiftedAtom(RightOf, [obj3, obj2]),
+            LiftedAtom(At, [obj1, obj2]),
+            LiftedAtom(IsFacingRight, [obj1]),
+        }
+        add_effects = {LiftedAtom(At, [obj1, obj3])}
+        delete_effects = {LiftedAtom(At, [obj1, obj2])}
+        option = MoveForward
+        option_vars = []  # dummy - not used
+        move_right_nsrt = NSRT("MoveRight", parameters, preconditions,
+                               add_effects, delete_effects, set(), option,
+                               option_vars, null_sampler)
+        nsrts.add(move_right_nsrt)
+
+        # MoveLeft
+        # Agent, from_loc, to_loc
+        parameters = [obj1, obj2, obj3]
+        preconditions = {
+            LiftedAtom(IsAgent, [obj1]),
+            LiftedAtom(IsLoc, [obj3]),
+            LiftedAtom(IsLoc, [obj2]),
+            LiftedAtom(LeftOf, [obj3, obj2]),
+            LiftedAtom(At, [obj1, obj2]),
+            LiftedAtom(IsFacingLeft, [obj1]),
+        }
+        add_effects = {LiftedAtom(At, [obj1, obj3])}
+        delete_effects = {LiftedAtom(At, [obj1, obj2])}
+        option = MoveForward
+        option_vars = []  # dummy - not used
+        move_left_nsrt = NSRT("MoveLeft", parameters, preconditions,
+                              add_effects, delete_effects, set(), option,
+                              option_vars, null_sampler)
+        nsrts.add(move_left_nsrt)
+
+        # TurnRight
+        turn_right_from_up_nsrt = NSRT("TurnRightFromUp", [obj1],
+                               {LiftedAtom(IsFacingUp, [obj1])},
+                               {LiftedAtom(IsFacingRight, [obj1])},
+                               {LiftedAtom(IsFacingUp, [obj1])},
+                                set(),
+                                TurnRight, [], null_sampler)
+        nsrts.add(turn_right_from_up_nsrt)
+
+        turn_right_from_down_nsrt = NSRT("TurnRightFromDown", [obj1],
+                               {LiftedAtom(IsFacingDown, [obj1])},
+                               {LiftedAtom(IsFacingLeft, [obj1])},
+                               {LiftedAtom(IsFacingDown, [obj1])},
+                                set(),
+                                TurnRight, [], null_sampler)
+        nsrts.add(turn_right_from_down_nsrt)
+
+        turn_right_from_left_nsrt = NSRT("TurnRightFromLeft", [obj1],
+                               {LiftedAtom(IsFacingLeft, [obj1])},
+                               {LiftedAtom(IsFacingUp, [obj1])},
+                               {LiftedAtom(IsFacingLeft, [obj1])},
+                                set(),
+                                TurnRight, [], null_sampler)
+        nsrts.add(turn_right_from_left_nsrt)
+
+        turn_right_from_right_nsrt = NSRT("TurnRightFromRight", [obj1],
+                                 {LiftedAtom(IsFacingRight, [obj1])},
+                                 {LiftedAtom(IsFacingDown, [obj1])},
+                                 {LiftedAtom(IsFacingRight, [obj1])},
+                                  set(),
+                                  TurnRight, [], null_sampler)
+        nsrts.add(turn_right_from_right_nsrt)
+
+        # TurnLeft
+        turn_left_from_up_nsrt = NSRT("TurnLeftFromUp", [obj1],
+                               {LiftedAtom(IsFacingUp, [obj1])},
+                               {LiftedAtom(IsFacingLeft, [obj1])},
+                               {LiftedAtom(IsFacingUp, [obj1])},
+                                set(),
+                                TurnLeft, [], null_sampler)
+        nsrts.add(turn_left_from_up_nsrt)
+
+        turn_left_from_down_nsrt = NSRT("TurnLeftFromDown", [obj1],
+                                 {LiftedAtom(IsFacingDown, [obj1])},
+                                 {LiftedAtom(IsFacingRight, [obj1])},
+                                 {LiftedAtom(IsFacingDown, [obj1])},
+                                  set(),
+                                  TurnLeft, [], null_sampler)
+        nsrts.add(turn_left_from_down_nsrt)
+
+        turn_left_from_left_nsrt = NSRT("TurnLeftFromLeft", [obj1],
+                                    {LiftedAtom(IsFacingLeft, [obj1])},
+                                    {LiftedAtom(IsFacingDown, [obj1])},
+                                    {LiftedAtom(IsFacingLeft, [obj1])},
+                                    set(),
+                                    TurnLeft, [], null_sampler)
+        nsrts.add(turn_left_from_left_nsrt)
+
+        turn_left_from_right_nsrt = NSRT("TurnLeftFromRight", [obj1],
+                                    {LiftedAtom(IsFacingRight, [obj1])},
+                                    {LiftedAtom(IsFacingUp, [obj1])},
+                                    {LiftedAtom(IsFacingRight, [obj1])},
+                                    set(),
+                                    TurnLeft, [], null_sampler)
+        nsrts.add(turn_left_from_right_nsrt)
+
+        # Pickup Left
+        # Agent, obj, agent_loc
+        parameters = [obj1, obj2, obj3]
+        preconditions = {
+            LiftedAtom(IsAgent, [obj1]),
+            LiftedAtom(IsLoc, [obj3]),
+            LiftedAtom(At, [obj1, obj3]),
+            LiftedAtom(LeftOf, [obj2, obj3]),
+            LiftedAtom(IsFacingLeft, [obj1]),
+            LiftedAtom(Found, [obj2])
+        }
+        add_effects = {LiftedAtom(Holding, [obj2])}
+        delete_effects = {LiftedAtom(LeftOf, [obj2, obj1])}
+        option = Pickup
+        option_vars: List[Variable] = []
+        pickup_left_nsrt = NSRT("Pickup_Left", parameters, preconditions,
+                              add_effects, delete_effects, set(), option,
+                              option_vars, null_sampler)
+        nsrts.add(pickup_left_nsrt)
+
+        # Pickup Right
+        # Agent, obj, agent_loc
+        parameters = [obj1, obj2, obj3]
+        preconditions = {
+            LiftedAtom(IsAgent, [obj1]),
+            LiftedAtom(IsLoc, [obj3]),
+            LiftedAtom(At, [obj1, obj3]),
+            LiftedAtom(RightOf, [obj2, obj3]),
+            LiftedAtom(IsFacingRight, [obj1]),
+            LiftedAtom(Found, [obj2])
+        }
+        add_effects = {LiftedAtom(Holding, [obj2])}
+        delete_effects = {LiftedAtom(RightOf, [obj2, obj1])}
+        option = Pickup
+        option_vars: List[Variable] = []
+        pickup_right_nsrt = NSRT("Pickup_Right", parameters, preconditions,
+                              add_effects, delete_effects, set(), option,
+                              option_vars, null_sampler)
+        nsrts.add(pickup_right_nsrt)
+
+        # Pickup Up
+        # Agent, obj, agent_loc
+        parameters = [obj1, obj2, obj3]
+        preconditions = {
+            LiftedAtom(IsAgent, [obj1]),
+            LiftedAtom(IsLoc, [obj3]),
+            LiftedAtom(At, [obj1, obj3]),
+            LiftedAtom(Above, [obj2, obj3]),
+            LiftedAtom(IsFacingUp, [obj1]),
+            LiftedAtom(Found, [obj2])
+        }
+        add_effects = {LiftedAtom(Holding, [obj2])}
+        delete_effects = {LiftedAtom(Above, [obj2, obj1])}
+        option = Pickup
+        option_vars: List[Variable] = []
+        pickup_up_nsrt = NSRT("Pickup_Up", parameters, preconditions,
+                              add_effects, delete_effects, set(), option,
+                              option_vars, null_sampler)
+        nsrts.add(pickup_up_nsrt)
+
+        # Pickup Down
+        # Agent, obj, agent_loc
+        parameters = [obj1, obj2, obj3]
+        preconditions = {
+            LiftedAtom(IsAgent, [obj1]),
+            LiftedAtom(IsLoc, [obj3]),
+            LiftedAtom(At, [obj1, obj3]),
+            LiftedAtom(Below, [obj2, obj3]),
+            LiftedAtom(IsFacingDown, [obj1]),
+            LiftedAtom(Found, [obj2])
+        }
+        add_effects = {LiftedAtom(Holding, [obj2])}
+        delete_effects = {LiftedAtom(Below, [obj2, obj1])}
+        option = Pickup
+        option_vars: List[Variable] = []
+        pickup_down_nsrt = NSRT("Pickup_Down", parameters, preconditions,
+                              add_effects, delete_effects, set(), option,
+                              option_vars, null_sampler)
+        nsrts.add(pickup_down_nsrt)
+
+        # Drop
+        # TODO
+
+        # Toggle
+        # TODO
+
+        # For Partial Observability
+        # Find Object
+        find_obj_nsrt = NSRT("FindObj", [obj1],
+                                    {LiftedAtom(Unknown, [obj1])},
+                                    {LiftedAtom(Found, [obj1])},
+                                    set(),
+                                    {LeftOf, RightOf, Above, Below},
+                                    FindObj, [obj1], null_sampler)
+        nsrts.add(find_obj_nsrt)
+
+        # Replan With Obj Known
+        replan_to_obj_nsrt = NSRT("ReplanToObj", [obj1, obj2],
+                                {LiftedAtom(IsAgent, [obj1]), LiftedAtom(IsLoc, [obj2]), LiftedAtom(Unknown, [obj2]), LiftedAtom(Found, [obj2])},
+                                {LiftedAtom(At, [obj1, obj2])},
+                                set(),
+                                {LeftOf, RightOf, Above, Below},
+                                ReplanToObj, [], null_sampler)
+        nsrts.add(replan_to_obj_nsrt)
+
+        replan_to_pickable_obj_nsrt = NSRT("ReplanToPickableObj", [obj1, obj2],
+                                {LiftedAtom(IsAgent, [obj1]), LiftedAtom(Unknown, [obj2]), LiftedAtom(Found, [obj2])},
+                                {LiftedAtom(Holding, [obj2])},
+                                set(),
+                                {LeftOf, RightOf, Above, Below},
+                                ReplanToObj, [], null_sampler)
+        nsrts.add(replan_to_pickable_obj_nsrt)
+
+        return nsrts
diff --git a/predicators/ground_truth_models/minigrid_env/options.py b/predicators/ground_truth_models/minigrid_env/options.py
new file mode 100644
index 0000000000..64b0a43291
--- /dev/null
+++ b/predicators/ground_truth_models/minigrid_env/options.py
@@ -0,0 +1,76 @@
+"""Ground-truth options for the sokoban environment."""
+
+from typing import Dict, Sequence, Set
+
+import numpy as np
+from gym.spaces import Box
+from minigrid.core.actions import Actions
+
+from predicators import utils
+from predicators.ground_truth_models import GroundTruthOptionFactory
+from predicators.structs import Action, Array, Object, ParameterizedOption, \
+    ParameterizedPolicy, Predicate, State, Type
+
+
+class MiniGridGroundTruthOptionFactory(GroundTruthOptionFactory):
+    """Ground-truth options for the minigrid environment."""
+
+    @classmethod
+    def get_env_names(cls) -> Set[str]:
+        return {"minigrid_env"}
+
+    @classmethod
+    def get_options(cls, env_name: str, types: Dict[str, Type],
+                    predicates: Dict[str, Predicate],
+                    action_space: Box) -> Set[ParameterizedOption]:
+
+        # Reformat names for consistency with other option naming.
+        def _format_name(name: str) -> str:
+            return "".join([n.capitalize() for n in name.split(" ")])
+
+        options: Set[ParameterizedOption] = {
+            utils.SingletonParameterizedOption(
+                _format_name(name), cls._create_policy(discrete_action=i))
+            for i, name in {value: key for key, value in Actions.__members__.items()}.items()
+        }
+
+        # FindObj option.
+        object_type = types["obj"]
+        FindObjOption = ParameterizedOption(
+                                "FindObj",
+                                [object_type],
+                                Box(low=np.array([]), high=np.array([]), shape=(0, )),
+                                policy=cls._create_find_obj_policy(),
+                                initiable=lambda s, m, o, p: True,
+                                terminal=lambda s, m, o, p: s.get(o[0], "type") == 8 and s.get(o[0], "state") != -1) # 8 is the goal enum type
+        options.add(FindObjOption)
+
+        # ReplanToObj option.
+        ReplanToObj = utils.SingletonParameterizedOption("ReplanToObj", cls._create_policy(discrete_action=6))
+        options.add(ReplanToObj)
+        
+        return options
+
+    @classmethod
+    def _create_policy(cls, discrete_action: int) -> ParameterizedPolicy:
+
+        def policy(state: State, memory: Dict, objects: Sequence[Object],
+                   params: Array) -> Action:
+            del state, memory, objects, params  # unused.
+            arr = np.zeros(7, dtype=np.float32)
+            arr[discrete_action] = 1
+            return Action(arr)
+
+        return policy
+    
+    @classmethod
+    def _create_find_obj_policy(cls) -> ParameterizedPolicy:
+
+        def policy(state: State, memory: Dict, objects: Sequence[Object],
+                   params: Array) -> Action:
+            del state, memory, objects, params  # unused.
+            arr = np.zeros(7, dtype=np.float32)
+            arr[np.random.choice([0, 1, 2], 1, p=[0.2, 0.2, 0.6])[0]] = 1
+            return Action(arr)
+
+        return policy
diff --git a/predicators/nsrt_learning/nsrt_learning_main.py b/predicators/nsrt_learning/nsrt_learning_main.py
index d13ef054b6..72abeb4ac2 100644
--- a/predicators/nsrt_learning/nsrt_learning_main.py
+++ b/predicators/nsrt_learning/nsrt_learning_main.py
@@ -119,7 +119,8 @@ def learn_nsrts_from_data(
     if CFG.strips_learner != "oracle" or CFG.sampler_learner != "oracle" or \
        CFG.option_learner != "no_learning":
         # Updates the PNADs in-place.
-        _learn_pnad_options(pnads, known_options, action_space)
+        if CFG.option_learner != "no_learning":
+            _learn_pnad_options(pnads, known_options, action_space)
 
     # STEP 4: Learn samplers (sampler_learning.py) and update PNADs.
     _learn_pnad_samplers(pnads, sampler_learner)  # in-place update
diff --git a/predicators/nsrt_learning/strips_learning/base_strips_learner.py b/predicators/nsrt_learning/strips_learning/base_strips_learner.py
index 5d3aa998ac..c64665d584 100644
--- a/predicators/nsrt_learning/strips_learning/base_strips_learner.py
+++ b/predicators/nsrt_learning/strips_learning/base_strips_learner.py
@@ -162,7 +162,11 @@ def _check_single_demo_preservation(
             traj_goal, option_plan, atoms_seq)
         return ground_nsrt_plan is not None
 
-    def _recompute_datastores_from_segments(self, pnads: List[PNAD]) -> None:
+    def _recompute_datastores_from_segments(self,
+                                            pnads: List[PNAD],
+                                            check_only_preconditions: bool = False,
+                                            check_assertion: bool = True,
+                                            any_matching: bool = False) -> None:
         """For the given PNADs, wipe and recompute the datastores.
 
         Uses a "rationality" heuristic, where for each segment, we
@@ -182,7 +186,7 @@ def _recompute_datastores_from_segments(self, pnads: List[PNAD]) -> None:
             objects = set(seg_traj[0].states[0])
             for segment in seg_traj:
                 best_pnad, best_sub = self._find_best_matching_pnad_and_sub(
-                    segment, objects, pnads)
+                    segment, objects, pnads, check_only_preconditions, check_assertion, any_matching)
                 if best_pnad is not None:
                     assert best_sub is not None
                     best_pnad.add_to_datastore((segment, best_sub),
@@ -193,7 +197,9 @@ def _find_best_matching_pnad_and_sub(
         segment: Segment,
         objects: Set[Object],
         pnads: List[PNAD],
-        check_only_preconditions: bool = False
+        check_only_preconditions: bool = False,
+        check_assertion: bool = True,
+        any_matching: bool = False,
     ) -> Tuple[Optional[PNAD], Optional[Dict[Variable, Object]]]:
         """Find the best matching PNAD (if any) given our rationality-based
         score function, and return the PNAD and substitution necessary to
@@ -226,7 +232,8 @@ def _find_best_matching_pnad_and_sub(
         for pnad in pnads:
             param_opt, opt_vars = pnad.option_spec
             if param_opt != segment_param_option:
-                continue
+                if not any_matching:
+                    continue
             isub = dict(zip(opt_vars, segment_option_objs))
             if segment in pnad.seg_to_keep_effects_sub:
                 # If there are any variables only in the keep effects,
@@ -244,7 +251,8 @@ def _find_best_matching_pnad_and_sub(
                 # If the preconditions don't hold in the segment's
                 # initial atoms, skip.
                 if not ground_op.preconditions.issubset(segment.init_atoms):
-                    continue
+                    if not any_matching:
+                        continue
                 next_atoms = utils.apply_operator(ground_op,
                                                   segment.init_atoms)
                 if not check_only_preconditions:
@@ -265,7 +273,8 @@ def _find_best_matching_pnad_and_sub(
                     # with a most-general PNAD that has no add effects
                     # and all other predicates sidelined, and thus this
                     # assertion must hold.
-                    assert next_atoms.issubset(segment.final_atoms)
+                    if check_assertion:
+                        assert next_atoms.issubset(segment.final_atoms)
                 # This ground PNAD covers this segment. Score it!
                 score = self._score_segment_ground_op_match(segment, ground_op)
                 if score < best_score:  # we want a closer match
diff --git a/predicators/nsrt_learning/strips_learning/gen_to_spec_learner.py b/predicators/nsrt_learning/strips_learning/gen_to_spec_learner.py
index 59906cd49a..f66cdd2425 100644
--- a/predicators/nsrt_learning/strips_learning/gen_to_spec_learner.py
+++ b/predicators/nsrt_learning/strips_learning/gen_to_spec_learner.py
@@ -10,8 +10,25 @@
 from predicators.settings import CFG
 from predicators.structs import PNAD, GroundAtom, Object, \
     ParameterizedOption, Segment, STRIPSOperator, Variable, \
-    _GroundSTRIPSOperator
+    _GroundSTRIPSOperator, _Atom, LowLevelTrajectory, Predicate, Type, Action, LiftedAtom, NSRT
+from predicators.planning import task_plan, task_plan_grounding, _SkeletonSearchTimeout
+import re
 
+name_to_actions = {
+    "Move": 0,
+    "Actions.pickup_0": 3,
+    "Actions.pickup_1": 4,
+    "Actions.pickup_2": 5,
+    "Actions.drop_0": 6,
+    "Actions.drop_1": 7,
+    "Actions.drop_2": 8,
+    "Actions.drop_in": 9,
+    "Actions.toggle": 10,
+    "Actions.close": 11,
+    "Actions.open": 12,
+    "Actions.cook": 13,
+    "Actions.slice": 14
+}
 
 class GeneralToSpecificSTRIPSLearner(BaseSTRIPSLearner):
     """Base class for a general-to-specific STRIPS learner."""
@@ -138,7 +155,6 @@ def get_pnads_with_keep_effects(pnad: PNAD) -> Set[PNAD]:
                 # Remember to copy seg_to_keep_effects_sub into the new_pnad!
                 new_pnad.seg_to_keep_effects_sub = pnad.seg_to_keep_effects_sub
                 new_pnads_with_keep_effects.add(new_pnad)
-
         return new_pnads_with_keep_effects
 
     def _reset_all_segment_necessary_add_effs(self) -> None:
@@ -499,3 +515,591 @@ def _assert_all_data_in_exactly_one_datastore(self,
                 continue
             for segment in seg_traj:
                 assert segment in all_segs_in_data
+
+class BackwardForwardSTRIPSLearner(GeneralToSpecificSTRIPSLearner):
+    """Learn STRIPS operators by backchaining and forward search."""
+
+    def _learn(self) -> List[PNAD]:
+        # Initialize the most general PNADs by merging self._initial_pnads.
+        # As a result, we will have one very general PNAD per option.
+        param_opt_to_nec_pnads: Dict[ParameterizedOption, List[PNAD]] = {}
+        # Extract all parameterized options from the data.
+        parameterized_options = set()
+        for ll_traj, seg_traj in zip(self._trajectories,
+                                     self._segmented_trajs):
+            if not ll_traj.is_demo:
+                continue
+            for segment in seg_traj:
+                parameterized_options.add(segment.get_option().parent)
+
+        # Set up the param_opt_to_nec_pnads dictionary.
+        for param_opt in parameterized_options:
+            param_opt_to_nec_pnads[param_opt] = []
+
+        prev_itr_ops: Set[STRIPSOperator] = set()
+
+        # Load initial pnad set
+        if CFG.backward_forward_load_initial:
+            with open("test_saved.NSRTs.txt", "r") as file:
+                content = file.read()
+            nsrt_strs = ["NSRT-" + nsrt_str for nsrt_str in content.split("NSRT-") if nsrt_str != '']
+            pnads = [self.parse_nsrt_block(nsrt_str) for nsrt_str in nsrt_strs]
+            self._recompute_datastores_from_segments(pnads)
+            for pnad in pnads:
+                param_opt_to_nec_pnads[pnad.option_spec[0]].append(pnad)
+        ###
+
+        # We loop until the harmless PNADs induced by our procedure
+        # converge to a fixed point (i.e, they don't change after two
+        # subsequent iterations).
+        for _ in range(10):
+            # Run multiple passes of backchaining over the data until
+            # convergence to a fixed point. Note that this process creates
+            # operators with only parameters, preconditions, and add effects.
+
+            # Step 1: Run backchaining
+            self._backchain_multipass(param_opt_to_nec_pnads)
+
+            # Step 2: Strip preconditions (optional)
+            for pnads in param_opt_to_nec_pnads.values():
+                for pnad in pnads:
+                    pnad.op = pnad.op.copy_with(preconditions=set(), ignore_effects=set())
+
+            # Step 3: Forward refinement
+            self._forward_one_pass(param_opt_to_nec_pnads)
+
+            # Recompute datastores.
+            cur_itr_pnads_unfiltered = [
+                pnad for pnads in param_opt_to_nec_pnads.values()
+                for pnad in pnads
+            ]
+            self._recompute_datastores_from_segments(cur_itr_pnads_unfiltered, check_only_preconditions=True, check_assertion=False)
+            
+            # Induce delete effects, ignore effects and potentially
+            # keep effects.
+            self._induce_delete_side_keep(param_opt_to_nec_pnads)
+
+            # Harmlessness should now hold, but it's slow to check.
+            if CFG.backchaining_check_intermediate_harmlessness:
+                assert self._check_harmlessness(
+                    self._get_uniquely_named_nec_pnads(param_opt_to_nec_pnads))
+                
+            # Recompute datastores and filter out PNADs that don't have datastores.
+            cur_itr_pnads_unfiltered = [
+                pnad for pnads in param_opt_to_nec_pnads.values()
+                for pnad in pnads
+            ]
+            self._recompute_datastores_from_segments(cur_itr_pnads_unfiltered)
+            cur_itr_pnads_filtered = []
+            for pnad in cur_itr_pnads_unfiltered:
+                if len(pnad.datastore) > 0:
+                    # new_pre = self._induce_preconditions_via_intersection(pnad)
+                    # NOTE: this implicitly changes param_opt_to_nec_pnads
+                    # as well, since we're directly modifying the PNAD objects.
+                    # nad.op = pnad.op.copy_with(preconditions=new_pre)
+                    cur_itr_pnads_filtered.append(pnad)
+                else:
+                    param_opt_to_nec_pnads[pnad.option_spec[0]].remove(pnad)
+            del cur_itr_pnads_unfiltered  # should be unused after this
+
+            # Check if the PNAD set has converged. If so, break.
+            if {pnad.op for pnad in cur_itr_pnads_filtered} == prev_itr_ops:
+                break
+
+            prev_itr_ops = {pnad.op for pnad in cur_itr_pnads_filtered}
+
+        # Assign a unique name to each PNAD.
+        final_pnads = self._get_uniquely_named_nec_pnads(
+            param_opt_to_nec_pnads)
+        # Assert data has been correctly partitioned amongst PNADs.
+        # self._assert_all_data_in_exactly_one_datastore(final_pnads)
+        return final_pnads
+    
+    def parse_nsrt_block(self, block: str) -> PNAD:
+        """Parses a single NSRT block into an PNAD object."""
+        lines = block.strip().split("\n")
+        
+        name_match = re.match(r"(\S+):", lines[0])
+        name = name_match.group(1) if name_match else ""
+
+        parameters = re.findall(r"\?x\d+:\w+", lines[1])
+        
+        def extract_effects(label: str) -> Set[str]:
+            """Extracts a list of predicates from labeled sections."""
+            for line in lines:
+                if line.strip().startswith(label):
+                    return set(re.findall(r"\w+\(.*?\)", line))
+            return set()
+        
+        preconditions = extract_effects("Preconditions")
+        add_effects = extract_effects("Add Effects")
+        delete_effects = extract_effects("Delete Effects")
+        ignore_effects = extract_effects("Ignore Effects")
+
+        option_spec_match = re.search(r"Option Spec:\s*(.*)", block)
+        option_spec = option_spec_match.group(1) if option_spec_match else ""
+
+        objects = set()
+        atoms = set()
+        option_specs = {}
+        for traj in self._segmented_trajs:
+            for segment in traj:
+                for state in segment.states:
+                    for k, v in state.items():
+                        objects.add(k)
+                atoms |= segment.init_atoms | segment.final_atoms
+                option_specs[segment.get_option().parent.name] = segment.get_option().parent
+        all_predicates_list = [(atom.predicate.name,atom.predicate) for atom in atoms]
+        def get_predicate(name, entities):
+            for pred_name, pred in all_predicates_list:
+                if pred_name == pred_name and pred.arity == len(entities):
+                    valid_types = True
+                    for i, ent in enumerate(entities):
+                        if ent.type != pred.types[i]:
+                            valid_types = False
+                    if valid_types:
+                        return pred
+            raise NotImplementedError
+            
+        types = {obj.type.name:obj.type for obj in objects}
+
+        def extract_parameters(predicate: str) -> Set[str]:
+            parameter_pattern = re.compile(r"\?x\d+:\w+")  # Matches variables like ?x0:obj_type
+            matches = parameter_pattern.findall(predicate)
+            return matches
+        
+        parameters = [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in parameters]
+        preconditions = set([LiftedAtom(get_predicate(pre.split("(")[0], [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(pre)]), [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(pre)]) for pre in preconditions])
+        add_effects = set([LiftedAtom(get_predicate(add.split("(")[0], [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(add)]), [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(add)]) for add in add_effects])
+        delete_effects = set([LiftedAtom(get_predicate(dle.split("(")[0], [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(dle)]), [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(dle)]) for dle in delete_effects])
+        ignore_effects = set([get_predicate(ige, None) for ige in ignore_effects])
+        option_spec = (option_specs[option_spec.split("(")[0]], [])
+
+        nsrt = NSRT(name, parameters, preconditions, add_effects, delete_effects, ignore_effects, option_spec, [], None)
+        return PNAD(nsrt.op, [], option_spec)
+    
+    def _forward_one_pass(
+            self, param_opt_to_nec_pnads: Dict[ParameterizedOption, List[PNAD]]
+        ) -> None:
+        """Perform one forward search passes to refine PNAD preconditions
+        """
+
+        for ll_traj, seg_traj in zip(self._trajectories, self._segmented_trajs):
+            if not ll_traj.is_demo:
+                continue
+            task = self._train_tasks[ll_traj.train_task_idx]
+
+            # Get initial atoms and object list
+            objects, _, _, ground_atoms_traj, _ = parse_objs_preds_and_options(
+                ll_traj, train_task_idx=ll_traj.train_task_idx)
+            init_atoms = ground_atoms_traj[1][0]
+
+            prev_op_set: Set[STRIPSOperator] = set()
+
+            nsrts = [pnad.op for pnads in param_opt_to_nec_pnads.values()
+                    for pnad in pnads]
+            nsrt_to_option = {pnad.op:pnad.option_spec for pnads in param_opt_to_nec_pnads.values() for pnad in pnads}
+            predicates = self._predicates
+
+            # Plan using current operators
+            ground_nsrts, reachable_atoms = task_plan_grounding(
+                init_atoms, objects, nsrts, allow_noops=True)
+            heuristic = utils.create_task_planning_heuristic(
+                "hadd", init_atoms, task.goal, ground_nsrts,
+                predicates, objects)
+            task_plan_generator = task_plan(
+                init_atoms, task.goal, ground_nsrts,
+                reachable_atoms, heuristic,
+                timeout=100, seed=123, max_skeletons_optimized=3)
+            
+            skeleton, _, _ = next(task_plan_generator)
+
+            # Check if plan matches the actual low-level trajectory
+            planned_options = []
+            for step in skeleton:
+                planned_options.append(nsrt_to_option[step.parent][0])
+
+            for i, planned_option in enumerate(planned_options):
+                if seg_traj[i].get_option().name != planned_option.name:
+                    # TODO should not just be the first
+                    pnad = None
+                    for option_pnad in param_opt_to_nec_pnads[planned_option]:
+                        if pnad is None:
+                            pnad = option_pnad
+                        if len(option_pnad.op.preconditions) < len(pnad.op.preconditions):
+                            pnad = option_pnad
+                    positive_data = pnad.datastore
+                    diff_atoms = []
+                    diff_preds = []
+                    necessary_effects = set.union(*[seg.necessary_add_effects for seg in seg_traj])
+
+                    for pos_seg in positive_data:
+                        curr_diff_atoms = (pos_seg[0].init_atoms - seg_traj[i].init_atoms) & necessary_effects
+                        diff_atoms.append(curr_diff_atoms)
+                        diff_preds.append(set([atom.predicate for atom in curr_diff_atoms]))
+
+                    # if diff_preds == [] or set.intersection(*[s for s in diff_preds]) == set():
+                    #     diff_atoms = []
+                    #     diff_preds = []
+                    #     for pos_seg in positive_data:
+                    #         curr_diff_atoms = (pos_seg[0].init_atoms - seg_traj[i].init_atoms)
+                    #         diff_atoms.append(curr_diff_atoms)
+                    #         diff_preds.append(set([atom.predicate for atom in curr_diff_atoms]))
+
+                    new_pre = set()
+                    new_params = []
+                    print()
+                    print(planned_option, set.intersection(*[s for s in diff_preds]))
+                    if diff_preds != []:
+                        new_preds = set.intersection(*[s for s in diff_preds])
+                        if new_preds != set():
+                            for pred in new_preds:
+                                best_pnad, best_sub = self._find_best_matching_pnad_and_sub(positive_data[0][0], objects, param_opt_to_nec_pnads[planned_option], check_only_preconditions=True, check_assertion=False, any_matching=True)
+                                pred_objs = [atom.objects for atom in positive_data[0][0].init_atoms if atom.predicate == pred][0]
+                                print(pred_objs)
+                                obj_vars = {v:k for k,v in best_sub.items()}
+                                if best_pnad is not None:
+                                    params = []
+                                    for obj in pred_objs:
+                                        if obj in obj_vars:
+                                            params.append(obj_vars[obj])
+                                        else:
+                                            params.append(Variable("?x" + str(len(obj_vars.keys())), obj.type))
+                                    new_pre.add(LiftedAtom(pred, params))
+                                    new_params += params
+                                print(params)
+                                print(pnad)
+                            if len(new_pre) > len(pnad.op.preconditions):
+                                new_params += pnad.op.parameters
+                                pnad.op = pnad.op.copy_with(parameters=list(set(new_params)),preconditions=new_pre)
+                                
+
+                # # Check for convergence
+                # cur_op_set = {pnad.op for pnads in param_opt_to_nec_pnads.values()
+                #             for pnad in pnads}
+                # if cur_op_set == prev_op_set:
+                #     break
+                # prev_op_set = cur_op_set
+
+    def _backchain_multipass(
+            self, param_opt_to_nec_pnads: Dict[ParameterizedOption,
+                                               List[PNAD]]) -> None:
+        """Take multiple passes through the demonstrations, running
+        self._backchain_one_pass() each time.
+
+        Keep going until the PNADs reach a fixed point. Note that this
+        process creates operators with only parameters, preconditions,
+        and add effects.
+        """
+        while True:
+            # Before each pass, clear the poss_keep_effects
+            # of all the PNADs. We do this because we only want the
+            # poss_keep_effects of the final pass, where the PNADs did
+            # not change. However, we cannot simply clear the
+            # pnad.seg_to_keep_effects_sub because some of these
+            # substitutions might be necessary if this happens to be
+            # a PNAD that already has keep effects. Thus, we call a
+            # method that handles this correctly.
+            for pnads in param_opt_to_nec_pnads.values():
+                for pnad in pnads:
+                    self.clear_unnecessary_keep_effs(pnad)
+            # Run one pass of backchaining.
+            nec_pnad_set_changed = self._backchain_one_pass(
+                param_opt_to_nec_pnads)
+            if not nec_pnad_set_changed:
+                break
+
+    def _backchain_one_pass(
+            self, param_opt_to_nec_pnads: Dict[ParameterizedOption,
+                                               List[PNAD]]) -> bool:
+        """Take one pass through the demonstrations in the given order.
+
+        Go through each one from the end back to the start, making the
+        PNADs more specific whenever needed. Return whether any PNAD was
+        changed.
+        """
+        # Reset all segments' necessary_add_effects so that they aren't
+        # accidentally used from a previous iteration of backchaining.
+        self._reset_all_segment_necessary_add_effs()
+        nec_pnad_set_changed = False
+        for ll_traj, seg_traj in zip(self._trajectories,
+                                     self._segmented_trajs):
+            if not ll_traj.is_demo:
+                continue
+            traj_goal = self._train_tasks[ll_traj.train_task_idx].goal
+            atoms_seq = utils.segment_trajectory_to_atoms_sequence(seg_traj)
+            assert traj_goal.issubset(atoms_seq[-1])
+            # This variable, necessary_image, gets updated as we
+            # backchain. It always holds the set of ground atoms that
+            # are necessary for the remainder of the plan to reach the
+            # goal. At the start, necessary_image is simply the goal.
+            necessary_image = set(traj_goal)
+            for t in range(len(atoms_seq) - 2, -1, -1):
+                segment = seg_traj[t]
+                option = segment.get_option()
+                # Find the necessary PNADs associated with this option. If
+                # there are none, then use the general PNAD associated with
+                # this option. (But make sure to use a copy of it, because we
+                # don't want the general PNAD to get mutated when we mutate
+                # necessary PNADs!)
+                if len(param_opt_to_nec_pnads[option.parent]) == 0:
+                    general_pnad = self._create_general_pnad_for_option(
+                        option.parent)
+                    pnads_for_option = [
+                        PNAD(general_pnad.op, list(general_pnad.datastore),
+                             general_pnad.option_spec)
+                    ]
+                else:
+                    pnads_for_option = param_opt_to_nec_pnads[option.parent]
+
+                # Compute the ground atoms that must be added on this timestep.
+                # They must be a subset of the current PNAD's add effects.
+                necessary_add_effects = necessary_image - atoms_seq[t]
+                assert necessary_add_effects.issubset(segment.add_effects)
+                # Update the segment's necessary_add_effects.
+                segment.necessary_add_effects = necessary_add_effects
+
+                # We start by checking if any of the PNADs associated with the
+                # demonstrated option are able to match this transition.
+                objects = set(segment.states[0])
+                pnad, var_to_obj = self._find_best_matching_pnad_and_sub(
+                    segment, objects, pnads_for_option)
+                if pnad is not None:
+                    assert var_to_obj is not None
+                    obj_to_var = {v: k for k, v in var_to_obj.items()}
+                    assert len(var_to_obj) == len(obj_to_var)
+                    ground_op = pnad.op.ground(
+                        tuple(var_to_obj[var] for var in pnad.op.parameters))
+                    if len(param_opt_to_nec_pnads[option.parent]) == 0:
+                        param_opt_to_nec_pnads[option.parent].append(pnad)
+                    segs_in_pnad = {
+                        datapoint[0]
+                        for datapoint in pnad.datastore
+                    }
+                    # In this case, we want to move the segment from
+                    # another PNAD into the current PNAD. Note that
+                    # we don't have to recompute the PNAD's add
+                    # effects or preconditions because of the fact that
+                    # this PNAD was found by the _find_best_matching
+                    # function (which internally checks that the
+                    # preconditions and add effects are all correct).
+                    if segment not in segs_in_pnad:
+                        # Find PNAD that the segment is currently in.
+                        for seg_pnad in pnads_for_option:
+                            segs_in_seg_pnad = [
+                                datapoint[0]
+                                for datapoint in seg_pnad.datastore
+                            ]
+                            if segment in set(segs_in_seg_pnad):
+                                seg_idx = segs_in_seg_pnad.index(segment)
+                                seg_pnad.datastore.pop(seg_idx)
+                                break
+                        pnad.datastore.append((segment, var_to_obj))
+                        self._remove_empty_datastore_pnads(
+                            param_opt_to_nec_pnads, option.parent)
+
+                # If we weren't able to find a substitution (i.e, the above
+                # _find_best_matching call didn't yield a PNAD), we need to
+                # spawn a new PNAD from the most general PNAD to cover
+                # these necessary add effects.
+                else:
+                    nec_pnad_set_changed = True
+                    pnad = self.spawn_new_pnad(segment)
+                    param_opt_to_nec_pnads[option.parent].append(pnad)
+
+                    # Recompute datastores for ALL PNADs associated with this
+                    # option. We need to do this because the new PNAD may now
+                    # be a better match for some transition that we previously
+                    # matched to another PNAD.
+                    self._recompute_datastores_from_segments(
+                        param_opt_to_nec_pnads[option.parent])
+                    # Now that we have done this, certain PNADs may be
+                    # left with empty datastores. Remove these.
+                    self._remove_empty_datastore_pnads(param_opt_to_nec_pnads,
+                                                       option.parent)
+
+                    # Recompute all preconditions, now that we have recomputed
+                    # the datastores.
+                    for nec_pnad in param_opt_to_nec_pnads[option.parent]:
+                        if len(nec_pnad.datastore) > 0:
+                            pre = self._induce_preconditions_via_intersection(
+                                nec_pnad)
+                            nec_pnad.op = nec_pnad.op.copy_with(
+                                preconditions=pre)
+
+                    # After all this, the unification call that failed earlier
+                    # (leading us into the current else statement) should work.
+                    best_score_pnad, var_to_obj = \
+                        self._find_best_matching_pnad_and_sub(
+                        segment, objects,
+                        param_opt_to_nec_pnads[option.parent])
+                    assert var_to_obj is not None
+                    assert best_score_pnad == pnad
+                    # Also, since this segment caused us to induce the new
+                    # PNAD, it should appear in this new PNAD's datastore.
+                    segs_in_pnad = {
+                        datapoint[0]
+                        for datapoint in pnad.datastore
+                    }
+                    assert segment in segs_in_pnad
+                    obj_to_var = {v: k for k, v in var_to_obj.items()}
+                    assert len(var_to_obj) == len(obj_to_var)
+                    ground_op = pnad.op.ground(
+                        tuple(var_to_obj[var] for var in pnad.op.parameters))
+
+                self._update_pnad_seg_to_keep_effs(pnad, necessary_image,
+                                                   ground_op, obj_to_var,
+                                                   segment)
+
+                # Update necessary_image for this timestep. It no longer
+                # needs to include the ground add effects of this PNAD, but
+                # must now include its ground preconditions.
+                necessary_image -= {
+                    a.ground(var_to_obj)
+                    for a in pnad.op.add_effects
+                }
+                necessary_image |= {
+                    a.ground(var_to_obj)
+                    for a in pnad.op.preconditions
+                }
+        return nec_pnad_set_changed
+
+    @staticmethod
+    def _remove_empty_datastore_pnads(param_opt_to_nec_pnads: Dict[
+        ParameterizedOption, List[PNAD]],
+                                      param_opt: ParameterizedOption) -> None:
+        """Removes all PNADs associated with the given param_opt that have
+        empty datastores from the input param_opt_to_nec_pnads dict."""
+        pnads_to_rm = []
+        for pnad in param_opt_to_nec_pnads[param_opt]:
+            if len(pnad.datastore) == 0:
+                pnads_to_rm.append(pnad)
+        for rm_pnad in pnads_to_rm:
+            param_opt_to_nec_pnads[param_opt].remove(rm_pnad)
+
+    def _induce_delete_side_keep(
+            self, param_opt_to_nec_pnads: Dict[ParameterizedOption,
+                                               List[PNAD]]) -> None:
+        """Given the current PNADs where add effects and preconditions are
+        correct, learn the remaining components: delete effects, side
+        predicates, and keep_effects.
+
+        Note that this may require spawning new PNADs with keep effects.
+        """
+        for option, nec_pnad_list in sorted(param_opt_to_nec_pnads.items(),
+                                            key=str):
+            pnads_with_keep_effects = set()
+            for pnad in nec_pnad_list:
+                self._compute_pnad_delete_effects(pnad)
+                self._compute_pnad_ignore_effects(pnad)
+                pnads_with_keep_effects |= self.get_pnads_with_keep_effects(
+                    pnad)
+            param_opt_to_nec_pnads[option].extend(
+                list(pnads_with_keep_effects))
+
+    @classmethod
+    def get_name(cls) -> str:
+        return "backward-forward"
+
+    def _assert_all_data_in_exactly_one_datastore(self,
+                                                  pnads: List[PNAD]) -> None:
+        """Assert that every demo datapoint appears in exactly one datastore
+        among the given PNADs' datastores."""
+        all_segs_in_data_lst = [
+            seg for pnad in pnads for seg, _ in pnad.datastore
+        ]
+        all_segs_in_data = set(all_segs_in_data_lst)
+        assert len(all_segs_in_data_lst) == len(all_segs_in_data)
+        for ll_traj, seg_traj in zip(self._trajectories,
+                                     self._segmented_trajs):
+            if not ll_traj.is_demo:  # ignore non-demo data
+                continue
+            for segment in seg_traj:
+                assert segment in all_segs_in_data
+
+def parse_objs_preds_and_options(trajectory, train_task_idx=0, all_atoms=None):
+    objs = set()
+    preds = set()
+    options = set()
+    state = None
+    states = []
+    actions = []
+    ground_atoms_traj = []
+    obj_types = {"obj_type": Type("obj_type", ["is_obj"]), "surface_type": Type("surface_type", ["is_obj"])}
+    
+    for i, s in enumerate(trajectory.states):
+        ground_atoms = set()
+        for pred_str in s:
+            pred = None
+            choice = []
+            pattern = re.compile(r"(\w+)\((.*?)\)")
+            match = pattern.match(pred_str)
+            if match:
+                func_name = match.group(1)
+                args = match.group(2).split(',') if match.group(2) else []
+                for arg in args:
+                    base_name = arg.strip().split("_")[0]
+                    if base_name in ['box','cabinet','table','sink','bucket', 'ashcan']:
+                        obj_types[base_name] = Type("surface_type", ["is_obj"])
+                    else:
+                        obj_types[base_name] = Type("obj_type", ["is_obj"]) #Type(base_name, ["is_obj"])
+                    obj = obj_types[base_name](arg.strip())
+                    choice.append(obj)
+                    objs.add(obj)
+                if len(args) == 1:
+                    base_name = args[0].strip().split("_")[0]
+                    pred = Predicate(func_name, [obj_types[base_name]], lambda s, o: True)
+                    preds.add(pred)
+                elif len(args) == 2:
+                    base_name1 = args[0].strip().split("_")[0]
+                    base_name2 = args[1].strip().split("_")[0]
+                    pred = Predicate(func_name, [obj_types[base_name1], obj_types[base_name2]], lambda s, o: True)
+                    preds.add(pred)
+                else:
+                    NotImplementedError("")
+            ground_atoms.add(GroundAtom(pred, choice))
+        states.append(state)
+        ground_atoms_traj.append(ground_atoms)
+
+        if i < len(trajectory.actions):
+            a_name = trajectory.actions[i]
+
+            param_option = utils.SingletonParameterizedOption(
+                a_name, lambda s, m, o, p: Action(name_to_actions[a_name]))
+            options.add(param_option)
+            option = param_option.ground([], [])
+            action = option.policy(state)
+            action.set_option(option)
+            actions.append(action)
+
+    def get_all_atoms_in_traj(ground_atoms_traj):
+        all_atoms = set()
+        for timestep_atoms in ground_atoms_traj:
+            all_atoms.update(timestep_atoms)
+        return all_atoms
+    
+    def add_neg_atoms(preds, lltraj, all_atoms):
+        ground_atoms = []
+        neg_pred_table = {str(atom):GroundAtom(Predicate("~" + atom.predicate.name, atom.predicate.types, lambda s, o: True), atom.objects) for atom in all_atoms}
+        neg_pred_table["HandEmpty"] = GroundAtom(Predicate("handempty", [], lambda s, o: True), [])
+        for timestep_atoms in lltraj[1]:
+            missing_atoms = all_atoms - timestep_atoms
+            neg_atoms = set([neg_pred_table[str(atom)] for atom in missing_atoms])
+            handempty = True
+            for atom in timestep_atoms:
+                if "inhandofrobot" in str(atom):
+                    handempty = False
+            if handempty:
+                neg_atoms |= set([neg_pred_table["HandEmpty"]])
+            ground_atoms.append(timestep_atoms | neg_atoms)
+        lltraj = (lltraj[0], ground_atoms)
+        return preds | set([v.predicate for v in neg_pred_table.values()]) | set([atom.predicate for atom in all_atoms]), lltraj
+    
+    lltraj = (LowLevelTrajectory([{obj:[0.0] for obj in objs} for _ in states], actions, _is_demo=True, _train_task_idx=train_task_idx), ground_atoms_traj)
+    if all_atoms is None:
+        all_atoms = get_all_atoms_in_traj(ground_atoms_traj)
+        preds, lltraj = add_neg_atoms(preds, lltraj, all_atoms)
+    else:
+        preds, lltraj = add_neg_atoms(preds, lltraj, all_atoms)
+    
+    return objs, preds, options, lltraj, all_atoms
diff --git a/predicators/nsrt_learning/strips_learning/pnad_search_learner.py b/predicators/nsrt_learning/strips_learning/pnad_search_learner.py
index f7ebf9bd21..6c9b795a12 100644
--- a/predicators/nsrt_learning/strips_learning/pnad_search_learner.py
+++ b/predicators/nsrt_learning/strips_learning/pnad_search_learner.py
@@ -11,7 +11,8 @@
     GeneralToSpecificSTRIPSLearner
 from predicators.settings import CFG
 from predicators.structs import PNAD, GroundAtom, LowLevelTrajectory, \
-    ParameterizedOption, Predicate, Segment, Task, _GroundSTRIPSOperator
+    ParameterizedOption, Predicate, Segment, Task, _GroundSTRIPSOperator, NSRT, Variable, LiftedAtom
+import re
 
 
 class _PNADSearchOperator(abc.ABC):
@@ -101,6 +102,7 @@ def _append_new_pnad_and_keep_effects(
         # that are unnecessary.
         new_pnads = self._learner.recompute_pnads_from_effects(
             sorted(new_pnads))
+        print(len(new_pnads))
         return new_pnads
 
     def _get_backchaining_results(
@@ -267,6 +269,70 @@ def recompute_pnads_from_effects(self, pnads: List[PNAD]) -> List[PNAD]:
             pnad_map[p.option_spec[0]].append(p)
         new_pnads = self._get_uniquely_named_nec_pnads(pnad_map)
         return new_pnads
+    
+    
+    def parse_nsrt_block(self, block: str) -> PNAD:
+        """Parses a single NSRT block into an PNAD object."""
+        lines = block.strip().split("\n")
+        
+        name_match = re.match(r"(\S+):", lines[0])
+        name = name_match.group(1) if name_match else ""
+
+        parameters = re.findall(r"\?x\d+:\w+", lines[1])
+        
+        def extract_effects(label: str) -> Set[str]:
+            """Extracts a list of predicates from labeled sections."""
+            for line in lines:
+                if line.strip().startswith(label):
+                    return set(re.findall(r"\w+\(.*?\)", line))
+            return set()
+        
+        preconditions = extract_effects("Preconditions")
+        add_effects = extract_effects("Add Effects")
+        delete_effects = extract_effects("Delete Effects")
+        ignore_effects = extract_effects("Ignore Effects")
+
+        option_spec_match = re.search(r"Option Spec:\s*(.*)", block)
+        option_spec = option_spec_match.group(1) if option_spec_match else ""
+
+        objects = set()
+        atoms = set()
+        option_specs = {}
+        for traj in self._segmented_trajs:
+            for segment in traj:
+                for state in segment.states:
+                    for k, v in state.items():
+                        objects.add(k)
+                atoms |= segment.init_atoms | segment.final_atoms
+                option_specs[segment.get_option().parent.name] = segment.get_option().parent
+        all_predicates_list = [(atom.predicate.name,atom.predicate) for atom in atoms]
+        def get_predicate(name, entities):
+            for pred_name, pred in all_predicates_list:
+                if pred_name == pred_name and pred.arity == len(entities):
+                    valid_types = True
+                    for i, ent in enumerate(entities):
+                        if ent.type != pred.types[i]:
+                            valid_types = False
+                    if valid_types:
+                        return pred
+            raise NotImplementedError
+            
+        types = {obj.type.name:obj.type for obj in objects}
+
+        def extract_parameters(predicate: str) -> Set[str]:
+            parameter_pattern = re.compile(r"\?x\d+:\w+")  # Matches variables like ?x0:obj_type
+            matches = parameter_pattern.findall(predicate)
+            return matches
+        
+        parameters = [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in parameters]
+        preconditions = set([LiftedAtom(get_predicate(pre.split("(")[0], [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(pre)]), [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(pre)]) for pre in preconditions])
+        add_effects = set([LiftedAtom(get_predicate(add.split("(")[0], [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(add)]), [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(add)]) for add in add_effects])
+        delete_effects = set([LiftedAtom(get_predicate(dle.split("(")[0], [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(dle)]), [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(dle)]) for dle in delete_effects])
+        ignore_effects = set([get_predicate(ige, None) for ige in ignore_effects])
+        option_spec = (option_specs[option_spec.split("(")[0]], [])
+
+        nsrt = NSRT(name, parameters, preconditions, add_effects, delete_effects, ignore_effects, option_spec, [], None)
+        return PNAD(nsrt.op, [], option_spec)
 
     def _learn(self) -> List[PNAD]:
         # Set up hill-climbing search over PNAD sets.
@@ -285,6 +351,16 @@ def get_successors(
                 for i, child in enumerate(op.get_successors(pnads)):
                     yield (op, i), child, 1.0  # cost always 1
 
+        # Load initial pnad set
+        if CFG.pnad_search_load_initial:
+            initial_state = None
+            with open("test_saved.NSRTs.txt", "r") as file:
+                content = file.read()
+            nsrt_strs = ["NSRT-" + nsrt_str for nsrt_str in content.split("NSRT-") if nsrt_str != '']
+            pnads = [self.parse_nsrt_block(nsrt_str) for nsrt_str in nsrt_strs]
+            self._recompute_datastores_from_segments(pnads)
+            initial_state = frozenset(pnads)
+
         # Run hill-climbing search.
         path, _, _ = utils.run_hill_climbing(initial_state=initial_state,
                                              check_goal=lambda _: False,
@@ -296,6 +372,7 @@ def get_successors(
         # Extract the best PNADs set.
         final_pnads = path[-1]
         sorted_final_pnads = sorted(final_pnads)
+
         # Fix naming.
         pnad_map: Dict[ParameterizedOption, List[PNAD]] = {
             p.option_spec[0]: []
diff --git a/predicators/perception/mini_behavior_env_perceiver.py b/predicators/perception/mini_behavior_env_perceiver.py
new file mode 100644
index 0000000000..eb43c38a23
--- /dev/null
+++ b/predicators/perception/mini_behavior_env_perceiver.py
@@ -0,0 +1,227 @@
+"""A mini_behavior-specific perceiver."""
+
+import sys
+from typing import Dict, Tuple
+
+import numpy as np
+
+from predicators import utils
+from predicators.settings import CFG
+from predicators.envs.mini_behavior_env import MiniBehavior
+from predicators.perception.base_perceiver import BasePerceiver
+from predicators.structs import EnvironmentTask, GroundAtom, Object, \
+    Observation, State, Task, Video
+from mini_behavior.grid import BehaviorGrid
+from mini_bddl import DEFAULT_STATES, STATE_FUNC_MAPPING, DEFAULT_ACTIONS, OBJECT_TO_IDX, IDX_TO_OBJECT, OBJECTS, ABILITIES
+
+class MiniBehaviorPerceiver(BasePerceiver):
+    """A mini_behavior-specific perceiver."""
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.grid = BehaviorGrid(16, 16)
+
+    @classmethod
+    def get_name(cls) -> str:
+        return "mini_behavior_env"
+
+    def parse_mini_behavior_task(self, env_task: EnvironmentTask) -> Task:
+        state = self._observation_to_state(env_task.init_obs)
+        if env_task.goal_description == "Get to the goal":
+            IsAgent, At, IsGoal, IsBall, IsKey, IsBox, \
+            IsRed, IsGreen, IsBlue, IsPurple, IsYellow, IsGrey, \
+            Holding, Near = MiniBehavior.get_goal_predicates()
+            assert len(MiniBehavior.get_objects_of_enum(state, "agent")) == 1
+            assert len(MiniBehavior.get_objects_of_enum(state, "goal")) == 1
+            agent_obj = list(MiniBehavior.get_objects_of_enum(state, "agent"))[0]
+            goal_obj = list(MiniBehavior.get_objects_of_enum(state, "goal"))[0]
+            goal = {GroundAtom(IsAgent, [agent_obj]),
+                    GroundAtom(At, [agent_obj, goal_obj]),
+                    GroundAtom(IsGoal, [goal_obj])}
+        elif "go to the " in env_task.goal_description:
+            color, obj_type = env_task.goal_description.split("go to the ")[1].split(" ")[0:2]
+            obj_name = f"{color}_{obj_type}"
+            IsAgent, At, IsGoal, IsBall, IsKey, IsBox, \
+            IsRed, IsGreen, IsBlue, IsPurple, IsYellow, IsGrey, \
+            Holding, Near = MiniBehavior.get_goal_predicates()
+            assert len(MiniBehavior.get_objects_of_enum(state, "agent")) == 1
+            assert len(MiniBehavior.get_objects_of_enum(state, obj_type)) > 1
+            agent_obj = list(MiniBehavior.get_objects_of_enum(state, "agent"))[0]
+            for obj in MiniBehavior.get_objects_of_enum(state, obj_type):
+                if obj.name == obj_name:
+                    goal_obj = obj
+            obj_type_to_predicate = {
+                "ball": IsBall,
+                "key": IsKey,
+                "box": IsBox
+            }  
+            color_to_predicate = {
+                "red": IsRed,
+                "green": IsGreen,
+                "blue": IsBlue,
+                "purple": IsPurple,
+                "yellow": IsYellow,
+                "grey": IsGrey
+            } 
+            goal = {GroundAtom(IsAgent, [agent_obj]),
+                    GroundAtom(At, [agent_obj, goal_obj]),
+                    GroundAtom(obj_type_to_predicate[obj_type], [goal_obj]),
+                    GroundAtom(color_to_predicate[color], [goal_obj]),
+                    }
+        elif env_task.goal_description == "get to the green goal square":
+            IsAgent, At, IsGoal, IsBall, IsKey, IsBox, \
+            IsRed, IsGreen, IsBlue, IsPurple, IsYellow, IsGrey, \
+            Holding, Near = MiniBehavior.get_goal_predicates()
+            assert len(MiniBehavior.get_objects_of_enum(state, "agent")) == 1
+            assert len(MiniBehavior.get_objects_of_enum(state, "goal")) == 1
+            agent_obj = list(MiniBehavior.get_objects_of_enum(state, "agent"))[0]
+            goal_obj = list(MiniBehavior.get_objects_of_enum(state, "goal"))[0]
+            goal = {GroundAtom(IsAgent, [agent_obj]),
+                    GroundAtom(At, [agent_obj, goal_obj]),
+                    GroundAtom(IsGoal, [goal_obj])}
+        elif env_task.goal_description.startswith("get a") or \
+                env_task.goal_description.startswith("go get a") or \
+                env_task.goal_description.startswith("fetch a") or \
+                env_task.goal_description.startswith("go fetch a") or \
+                env_task.goal_description.startswith("you must fetch a") or \
+                env_task.goal_description.startswith("pick up the"):
+                color, obj_type = env_task.goal_description.split(" ")[-2:]
+                obj_name = f"{color}_{obj_type}"
+                IsAgent, At, IsGoal, IsBall, IsKey, IsBox, \
+                IsRed, IsGreen, IsBlue, IsPurple, IsYellow, IsGrey, \
+                Holding, Near = MiniBehavior.get_goal_predicates()
+                assert len(MiniBehavior.get_objects_of_enum(state, "agent")) == 1
+                assert len(MiniBehavior.get_objects_of_enum(state, obj_type)) > 1
+                agent_obj = list(MiniBehavior.get_objects_of_enum(state, "agent"))[0]
+                for obj in MiniBehavior.get_objects_of_enum(state, obj_type):
+                    if obj.name == obj_name:
+                        goal_obj = obj
+                obj_type_to_predicate = {
+                    "ball": IsBall,
+                    "key": IsKey,
+                    "box": IsBox
+                }  
+                color_to_predicate = {
+                    "red": IsRed,
+                    "green": IsGreen,
+                    "blue": IsBlue,
+                    "purple": IsPurple,
+                    "yellow": IsYellow,
+                    "grey": IsGrey
+                } 
+                goal = {GroundAtom(Holding, [goal_obj]),
+                        GroundAtom(obj_type_to_predicate[obj_type], [goal_obj]),
+                        GroundAtom(color_to_predicate[color], [goal_obj])}
+        else:
+            raise NotImplementedError(f"Goal description {env_task.goal_description} not supported")
+        return Task(state, goal)
+
+    def reset(self, env_task: EnvironmentTask) -> Task:
+        return self.parse_mini_behavior_task(env_task)
+
+    def step(self, observation: Observation) -> State:
+        return self._observation_to_state(observation)
+    
+    def _observation_to_objects(self, obs: Observation) -> Dict[str, Tuple[int, int]]:
+        objs = []
+        import ipdb; ipdb.set_trace()
+        visual = obs['image']
+        self.grid.decode(visual)
+        for r in range(visual.shape[0]):
+            for c in range(visual.shape[1]):
+                obj = [IDX_TO_OBJECT[visual[r, c][0]], IDX_TO_COLOR[visual[r, c][1]], visual[r, c][2], r - self.agent_pov_pos[0], c - self.agent_pov_pos[1]]
+                if obj[0] == 'empty':
+                    obj[1] = 'black'
+                objs.append(tuple(obj))
+        return objs
+    
+    def transform_point(self, x1, y1, o1, x2, y2):
+        # Compute global coordinates directly
+        x_prime = x1 + x2 * np.cos(o1) - y2 * np.sin(o1)
+        y_prime = y1 + x2 * np.sin(o1) + y2 * np.cos(o1)
+        return x_prime, y_prime
+    
+    def _globalize_coords(self, r: int, c: int) -> Tuple[int, int]:
+        # Adjusted direction-to-radian mapping
+        direction_to_radian = {
+            0: 0,                # right
+            1: -np.pi / 2,       # down
+            2: np.pi,            # left
+            3: np.pi / 2         # up
+        }
+        o1 = direction_to_radian[self.direction]
+        x1, y1 = self.agent_pos[0], self.agent_pos[1]
+        x2, y2 = r, -c  # Use c directly
+        x_prime, y_prime = self.transform_point(x1, y1, o1, x2, y2)
+        return int(round(x_prime)), int(round(y_prime))
+
+    def _observation_to_state(self, obs: Observation) -> State:
+        import numpy as np
+        self.last_obs = obs
+        self.agent_pos = None
+
+        objs = self._observation_to_objects(obs)
+
+        def _get_object_name(r: int, c: int, type_name: str, color: str) -> str:
+            # Put the location of the static objects in their names for easier
+            # debugging.
+            if type_name == "agent":
+                return "agent"
+            if type_name in ["empty", "wall"]:
+                return f"{type_name}_{r}_{c}"
+            else:
+                return f"{color}_{type_name}"
+
+        for type_name, color, obj_state, r, c in objs:
+            enum = MiniBehavior.name_to_enum[type_name]
+            if CFG.mini_behavior_gym_fully_observable:
+                global_r, global_c = r, c
+            else:
+                global_r, global_c = self._globalize_coords(r, c)
+            if type_name in ["goal", "agent"]:
+                object_name = type_name
+                if type_name == "agent" and not CFG.mini_behavior_gym_fully_observable:
+                    assert (global_r, global_c) == self.agent_pos
+            else:
+                object_name = _get_object_name(global_r, global_c, type_name, color)
+            obj = Object(object_name, MiniBehavior.object_type)
+            self.state_dict[obj] = {
+                "row": global_r,
+                "column": global_c,
+                "type": enum,
+                "state": obj_state,
+                "color": color,
+            }
+
+        if all([val["type"] != MiniBehavior.name_to_enum['goal'] for key, val in self.state_dict.items()]):
+            enum = MiniBehavior.name_to_enum["goal"]
+            object_name = "goal"
+            obj = Object(object_name, MiniBehavior.object_type)
+            self.state_dict[obj] = {
+                "row": sys.maxsize,
+                "column": sys.maxsize,
+                "type": enum,
+                "state": -1,
+                "color": 'green',
+            }
+
+        for color in ['blue', 'green', 'grey', 'purple', 'red', 'yellow']:
+            for obj_type in ['key', 'ball', 'box']:
+                if all([not (val["type"] == MiniBehavior.name_to_enum[obj_type] and val["color"] == color) for key, val in self.state_dict.items()]):
+                    enum = MiniBehavior.name_to_enum[obj_type]
+                    object_name = f"{color}_{obj_type}"
+                    obj = Object(object_name, MiniBehavior.object_type)
+                    self.state_dict[obj] = {
+                        "row": sys.maxsize,
+                        "column": sys.maxsize,
+                        "type": enum,
+                        "state": -1,
+                        "color": color,
+                    }
+
+        state = utils.create_state_from_dict(self.state_dict)
+        return state
+
+    def render_mental_images(self, observation: Observation,
+                             env_task: EnvironmentTask) -> Video:
+        raise NotImplementedError("Mental images not implemented for mini_behavior")
diff --git a/predicators/perception/minigrid_env_perceiver.py b/predicators/perception/minigrid_env_perceiver.py
new file mode 100644
index 0000000000..8be4309d88
--- /dev/null
+++ b/predicators/perception/minigrid_env_perceiver.py
@@ -0,0 +1,258 @@
+"""A minigrid-specific perceiver."""
+
+import sys
+from typing import Dict, Tuple
+
+import numpy as np
+
+from predicators import utils
+from predicators.settings import CFG
+from predicators.envs.minigrid_env import MiniGridEnv
+from predicators.perception.base_perceiver import BasePerceiver
+from predicators.structs import EnvironmentTask, GroundAtom, Object, \
+    Observation, State, Task, Video
+
+from minigrid.core.constants import (
+    COLORS,
+    IDX_TO_COLOR,
+    IDX_TO_OBJECT,
+)
+
+class MiniGridPerceiver(BasePerceiver):
+    """A minigrid-specific perceiver."""
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.state_dict = {}
+        self.agent_pov_pos = (3,6) # agent's point of view is always at (3,6)
+        self.agent_pos = (0,0) # starts at origin
+        self.direction = 0 # directions (right, down, left, up)
+        self.last_obs = None
+
+    @classmethod
+    def get_name(cls) -> str:
+        return "minigrid_env"
+
+    def parse_minigrid_task(self, env_task: EnvironmentTask) -> Task:
+        state = self._observation_to_state(env_task.init_obs)
+        if env_task.goal_description == "Get to the goal":
+            IsAgent, At, IsGoal, IsBall, IsKey, IsBox, \
+            IsRed, IsGreen, IsBlue, IsPurple, IsYellow, IsGrey, \
+            Holding, Near = MiniGridEnv.get_goal_predicates()
+            assert len(MiniGridEnv.get_objects_of_enum(state, "agent")) == 1
+            assert len(MiniGridEnv.get_objects_of_enum(state, "goal")) == 1
+            agent_obj = list(MiniGridEnv.get_objects_of_enum(state, "agent"))[0]
+            goal_obj = list(MiniGridEnv.get_objects_of_enum(state, "goal"))[0]
+            goal = {GroundAtom(IsAgent, [agent_obj]),
+                    GroundAtom(At, [agent_obj, goal_obj]),
+                    GroundAtom(IsGoal, [goal_obj])}
+        elif "go to the " in env_task.goal_description:
+            color, obj_type = env_task.goal_description.split("go to the ")[1].split(" ")[0:2]
+            obj_name = f"{color}_{obj_type}"
+            IsAgent, At, IsGoal, IsBall, IsKey, IsBox, \
+            IsRed, IsGreen, IsBlue, IsPurple, IsYellow, IsGrey, \
+            Holding, Near = MiniGridEnv.get_goal_predicates()
+            assert len(MiniGridEnv.get_objects_of_enum(state, "agent")) == 1
+            assert len(MiniGridEnv.get_objects_of_enum(state, obj_type)) > 1
+            agent_obj = list(MiniGridEnv.get_objects_of_enum(state, "agent"))[0]
+            for obj in MiniGridEnv.get_objects_of_enum(state, obj_type):
+                if obj.name == obj_name:
+                    goal_obj = obj
+            obj_type_to_predicate = {
+                "ball": IsBall,
+                "key": IsKey,
+                "box": IsBox
+            }  
+            color_to_predicate = {
+                "red": IsRed,
+                "green": IsGreen,
+                "blue": IsBlue,
+                "purple": IsPurple,
+                "yellow": IsYellow,
+                "grey": IsGrey
+            } 
+            goal = {GroundAtom(IsAgent, [agent_obj]),
+                    GroundAtom(At, [agent_obj, goal_obj]),
+                    GroundAtom(obj_type_to_predicate[obj_type], [goal_obj]),
+                    GroundAtom(color_to_predicate[color], [goal_obj]),
+                    }
+        elif env_task.goal_description == "get to the green goal square":
+            IsAgent, At, IsGoal, IsBall, IsKey, IsBox, \
+            IsRed, IsGreen, IsBlue, IsPurple, IsYellow, IsGrey, \
+            Holding, Near = MiniGridEnv.get_goal_predicates()
+            assert len(MiniGridEnv.get_objects_of_enum(state, "agent")) == 1
+            assert len(MiniGridEnv.get_objects_of_enum(state, "goal")) == 1
+            agent_obj = list(MiniGridEnv.get_objects_of_enum(state, "agent"))[0]
+            goal_obj = list(MiniGridEnv.get_objects_of_enum(state, "goal"))[0]
+            goal = {GroundAtom(IsAgent, [agent_obj]),
+                    GroundAtom(At, [agent_obj, goal_obj]),
+                    GroundAtom(IsGoal, [goal_obj])}
+        elif env_task.goal_description.startswith("get a") or \
+                env_task.goal_description.startswith("go get a") or \
+                env_task.goal_description.startswith("fetch a") or \
+                env_task.goal_description.startswith("go fetch a") or \
+                env_task.goal_description.startswith("you must fetch a") or \
+                env_task.goal_description.startswith("pick up the"):
+                color, obj_type = env_task.goal_description.split(" ")[-2:]
+                obj_name = f"{color}_{obj_type}"
+                IsAgent, At, IsGoal, IsBall, IsKey, IsBox, \
+                IsRed, IsGreen, IsBlue, IsPurple, IsYellow, IsGrey, \
+                Holding, Near = MiniGridEnv.get_goal_predicates()
+                assert len(MiniGridEnv.get_objects_of_enum(state, "agent")) == 1
+                assert len(MiniGridEnv.get_objects_of_enum(state, obj_type)) > 1
+                agent_obj = list(MiniGridEnv.get_objects_of_enum(state, "agent"))[0]
+                for obj in MiniGridEnv.get_objects_of_enum(state, obj_type):
+                    if obj.name == obj_name:
+                        goal_obj = obj
+                obj_type_to_predicate = {
+                    "ball": IsBall,
+                    "key": IsKey,
+                    "box": IsBox
+                }  
+                color_to_predicate = {
+                    "red": IsRed,
+                    "green": IsGreen,
+                    "blue": IsBlue,
+                    "purple": IsPurple,
+                    "yellow": IsYellow,
+                    "grey": IsGrey
+                } 
+                goal = {GroundAtom(Holding, [goal_obj]),
+                        GroundAtom(obj_type_to_predicate[obj_type], [goal_obj]),
+                        GroundAtom(color_to_predicate[color], [goal_obj])}
+        else:
+            raise NotImplementedError(f"Goal description {env_task.goal_description} not supported")
+        return Task(state, goal)
+
+    def reset(self, env_task: EnvironmentTask) -> Task:
+        self.state_dict.clear()
+        return self.parse_minigrid_task(env_task)
+
+    def step(self, observation: Observation) -> State:
+        return self._observation_to_state(observation)
+    
+    def _observation_to_objects(self, obs: Observation) -> Dict[str, Tuple[int, int]]:
+        objs = []
+        visual = obs[0]['image']
+        direction = obs[0]['direction']
+        objs.append(('agent',
+                     None, 
+                     direction,
+                     0,
+                     0))
+        objs.append(('empty',
+                     'black', 
+                     0,
+                     0,
+                     0))
+        for r in range(visual.shape[0]):
+            for c in range(visual.shape[1]):
+                obj = [IDX_TO_OBJECT[visual[r, c][0]], IDX_TO_COLOR[visual[r, c][1]], visual[r, c][2], r - self.agent_pov_pos[0], c - self.agent_pov_pos[1]]
+                if obj[0] == 'empty':
+                    obj[1] = 'black'
+                objs.append(tuple(obj))
+        return objs
+    
+    def transform_point(self, x1, y1, o1, x2, y2):
+        # Compute global coordinates directly
+        x_prime = x1 + x2 * np.cos(o1) - y2 * np.sin(o1)
+        y_prime = y1 + x2 * np.sin(o1) + y2 * np.cos(o1)
+        return x_prime, y_prime
+    
+    def _globalize_coords(self, r: int, c: int) -> Tuple[int, int]:
+        # Adjusted direction-to-radian mapping
+        direction_to_radian = {
+            0: 0,                # right
+            1: -np.pi / 2,       # down
+            2: np.pi,            # left
+            3: np.pi / 2         # up
+        }
+        o1 = direction_to_radian[self.direction]
+        x1, y1 = self.agent_pos[0], self.agent_pos[1]
+        x2, y2 = r, -c  # Use c directly
+        x_prime, y_prime = self.transform_point(x1, y1, o1, x2, y2)
+        return int(round(x_prime)), int(round(y_prime))
+
+    def _observation_to_state(self, obs: Observation) -> State:
+        import numpy as np
+
+        self.direction = obs[0]['direction']
+        if len(obs) == 5:
+            if obs[4]['last_action'] == 2: # Moved Forward
+                if (not np.array_equal(self.last_obs[0]['image'], obs[0]['image'])) or \
+                    not np.array_equal(obs[0]['image'][self.agent_pov_pos[0], self.agent_pov_pos[1]-1], np.array([2, 5, 0], dtype=np.uint8)):
+                    if self.direction == 0: # right (0, 1)
+                        self.agent_pos = (self.agent_pos[0], self.agent_pos[1] + 1)
+                    elif self.direction == 1: # down (1, 0)
+                        self.agent_pos = (self.agent_pos[0] + 1, self.agent_pos[1])
+                    elif self.direction == 2: # left (0, -1)
+                        self.agent_pos = (self.agent_pos[0], self.agent_pos[1] - 1)
+                    elif self.direction == 3: # up (-1, 0)
+                        self.agent_pos = (self.agent_pos[0] - 1, self.agent_pos[1])
+        self.last_obs = obs
+
+        objs = self._observation_to_objects(obs)
+
+        def _get_object_name(r: int, c: int, type_name: str, color: str) -> str:
+            # Put the location of the static objects in their names for easier
+            # debugging.
+            if type_name == "agent":
+                return "agent"
+            if type_name in ["empty", "wall"]:
+                return f"{type_name}_{r}_{c}"
+            else:
+                return f"{color}_{type_name}"
+
+        for type_name, color, obj_state, r, c in objs:
+            enum = MiniGridEnv.name_to_enum[type_name]
+            if CFG.minigrid_gym_fully_observable:
+                global_r, global_c = r, c
+            else:
+                global_r, global_c = self._globalize_coords(r, c)
+            if type_name in ["goal", "agent"]:
+                object_name = type_name
+                if type_name == "agent" and not CFG.minigrid_gym_fully_observable:
+                    assert (global_r, global_c) == self.agent_pos
+            else:
+                object_name = _get_object_name(global_r, global_c, type_name, color)
+            obj = Object(object_name, MiniGridEnv.object_type)
+            self.state_dict[obj] = {
+                "row": global_r,
+                "column": global_c,
+                "type": enum,
+                "state": obj_state,
+                "color": color,
+            }
+
+        if all([val["type"] != MiniGridEnv.name_to_enum['goal'] for key, val in self.state_dict.items()]):
+            enum = MiniGridEnv.name_to_enum["goal"]
+            object_name = "goal"
+            obj = Object(object_name, MiniGridEnv.object_type)
+            self.state_dict[obj] = {
+                "row": sys.maxsize,
+                "column": sys.maxsize,
+                "type": enum,
+                "state": -1,
+                "color": 'green',
+            }
+
+        for color in ['blue', 'green', 'grey', 'purple', 'red', 'yellow']:
+            for obj_type in ['key', 'ball', 'box']:
+                if all([not (val["type"] == MiniGridEnv.name_to_enum[obj_type] and val["color"] == color) for key, val in self.state_dict.items()]):
+                    enum = MiniGridEnv.name_to_enum[obj_type]
+                    object_name = f"{color}_{obj_type}"
+                    obj = Object(object_name, MiniGridEnv.object_type)
+                    self.state_dict[obj] = {
+                        "row": sys.maxsize,
+                        "column": sys.maxsize,
+                        "type": enum,
+                        "state": -1,
+                        "color": color,
+                    }
+
+        state = utils.create_state_from_dict(self.state_dict)
+        return state
+
+    def render_mental_images(self, observation: Observation,
+                             env_task: EnvironmentTask) -> Video:
+        raise NotImplementedError("Mental images not implemented for minigrid")
diff --git a/predicators/planning.py b/predicators/planning.py
index 123323ff0a..74f4603ef3 100644
--- a/predicators/planning.py
+++ b/predicators/planning.py
@@ -284,8 +284,10 @@ def task_plan_grounding(
     for nsrt in sorted(nsrts):
         for ground_nsrt in utils.all_ground_nsrts(nsrt, objects):
             if allow_noops or (ground_nsrt.add_effects
-                               | ground_nsrt.delete_effects):
+                            | ground_nsrt.delete_effects):
                 ground_nsrts.append(ground_nsrt)
+                if CFG.single_grounding:
+                    break
     reachable_atoms = utils.get_reachable_atoms(ground_nsrts, init_atoms)
     reachable_nsrts = [
         nsrt for nsrt in ground_nsrts
@@ -1208,15 +1210,15 @@ def run_task_plan_once(
         timeout -= duration
         plan, atoms_seq, metrics = next(
             task_plan(init_atoms,
-                      goal,
-                      ground_nsrts,
-                      reachable_atoms,
-                      heuristic,
-                      seed,
-                      timeout,
-                      max_skeletons_optimized=1,
-                      use_visited_state_set=True,
-                      **kwargs))
+                    goal,
+                    ground_nsrts,
+                    reachable_atoms,
+                    heuristic,
+                    seed,
+                    timeout,
+                    max_skeletons_optimized=1,
+                    use_visited_state_set=True,
+                    **kwargs))
         if len(plan) > max_horizon:
             raise PlanningFailure(
                 "Skeleton produced by A-star exceeds horizon!")
@@ -1243,7 +1245,7 @@ def run_task_plan_once(
             alias_flag = "--alias lama-first"
         else:
             raise ValueError("Unrecognized sesame_task_planner: "
-                             f"{CFG.sesame_task_planner}")
+                            f"{CFG.sesame_task_planner}")
 
         sas_file = generate_sas_file_for_fd(task, nsrts, preds, types, timeout,
                                             timeout_cmd, alias_flag, exec_str,
@@ -1262,11 +1264,10 @@ def run_task_plan_once(
             list(objects), init_atoms, nsrts, float(max_horizon))
     else:
         raise ValueError("Unrecognized sesame_task_planner: "
-                         f"{CFG.sesame_task_planner}")
+                        f"{CFG.sesame_task_planner}")
 
     necessary_atoms_seq = utils.compute_necessary_atoms_seq(
         plan, atoms_seq, goal)
-
     return plan, necessary_atoms_seq, metrics
 
 
diff --git a/predicators/settings.py b/predicators/settings.py
index 4dc482e377..cd4642a59f 100644
--- a/predicators/settings.py
+++ b/predicators/settings.py
@@ -331,6 +331,22 @@ class GlobalSettings:
     # initialization and resetting. use Sokoban-small-v0 for tests
     sokoban_gym_name = "Sokoban-v0"
 
+    # minigrid env parameters
+    # Currently tested envs:
+    # "MiniGrid-Empty-5x5-v0"
+    # "MiniGrid-Empty-8x8-v0"
+    # "MiniGrid-Empty-16x16-v0"
+    # "MiniGrid-GoToObject-8x8-N2-v0"
+    # "MiniGrid-Fetch-8x8-N3-v0" 
+    minigrid_gym_name = "MiniGrid-Fetch-8x8-N3-v0" 
+    minigrid_gym_render = False
+    minigrid_gym_fully_observable = False
+
+    # mini_behavior env parameters
+    mini_behavior_env_name = "MiniGrid-SortingBooks-16x16-N2-v0"
+    mini_behavior_env_render = False
+    mini_behavior_env_fully_observable = True
+
     # kitchen env parameters
     kitchen_use_perfect_samplers = False
     kitchen_goals = "all"
@@ -496,6 +512,9 @@ class GlobalSettings:
     enable_harmless_op_pruning = False  # some methods may want this to be True
     precondition_soft_intersection_threshold_percent = 0.8  # between 0 and 1
     backchaining_check_intermediate_harmlessness = False
+    backward_forward_load_initial = False
+    single_grounding = False
+    pnad_search_load_initial = False
     pnad_search_without_del = False
     pnad_search_timeout = 10.0
     compute_sidelining_objective_value = False
@@ -714,7 +733,9 @@ def get_arg_specific_settings(cls, args: Dict[str, Any]) -> Dict[str, Any]:
             # The method used for perception: now only "trivial" or "sokoban".
             perceiver=defaultdict(lambda: "trivial", {
                 "sokoban": "sokoban",
-                "kitchen": "kitchen",
+                "minigrid_env": "minigrid_env",
+                "mini_behavior_env": "mini_behavior_env",
+                "kitchen": "kitchen"
             })[args.get("env", "")],
             # Horizon for each environment. When checking if a policy solves a
             # task, we run the policy for at most this many steps.
@@ -728,6 +749,7 @@ def get_arg_specific_settings(cls, args: Dict[str, Any]) -> Dict[str, Any]:
                     "doors": 1000,
                     "coffee": 1000,
                     "kitchen": 1000,
+                    "minigrid_env": 1000,
                     # For the very simple touch point environment, restrict
                     # the horizon to be shorter.
                     "touch_point": 15,
diff --git a/predicators/utils.py b/predicators/utils.py
index 3500562b56..e2f3fddaac 100644
--- a/predicators/utils.py
+++ b/predicators/utils.py
@@ -1473,7 +1473,8 @@ def _get_entity_combinations(
                 this_choices.append(ent)
         choices.append(this_choices)
     for choice in itertools.product(*choices):
-        yield list(choice)
+        if len(set(choice)) == len(choice):
+            yield list(choice)
 
 
 def get_object_combinations(objects: Collection[Object],
diff --git a/test_backward_forward copy.py b/test_backward_forward copy.py
new file mode 100644
index 0000000000..b4595c8dcc
--- /dev/null
+++ b/test_backward_forward copy.py	
@@ -0,0 +1,509 @@
+import random
+from typing import List, Set, Tuple
+import numpy as np
+from collections import deque, defaultdict
+
+np.random.seed(0)
+random.seed(0)
+
+# Parameters
+NUM_PREDICATES = 10
+NUM_OPERATORS = 10
+TRAJ_MAX = 10
+TRAJ_LEN = 5
+NUM_TRAJS = 50 # Fewer for readability
+ACTION_SPACE = list(range(NUM_OPERATORS))
+
+# --- Operator Representation ---
+class Operator:
+    def __init__(self, pre: Set[int], add: Set[int], delete: Set[int], action: int):
+        self.pre = pre
+        self.add = add
+        self.delete = delete
+        self.action = action
+
+    def is_applicable(self, state: Set[int]) -> bool:
+        return self.pre.issubset(state)
+
+    def apply(self, state: Set[int]) -> Set[int]:
+        if not self.is_applicable(state):
+            return state  # no-op if not applicable
+        new_state = state.copy()
+        new_state.difference_update(self.delete)
+        new_state.update(self.add)
+        return new_state
+
+    def __repr__(self):
+        return f"Op(action={self.action}, pre={self.pre}, add={self.add}, del={self.delete})"
+
+
+def plan(start: Set[int], goal: Set[int], operators: List[Operator], max_depth=10):
+    visited = set()
+    queue = deque()
+    queue.append((start.copy(), []))
+
+    while queue:
+        state, path = queue.popleft()
+        state_key = frozenset(state)
+        if state_key in visited:
+            continue
+        visited.add(state_key)
+
+        if goal.issubset(state):
+            return path
+
+        if len(path) >= max_depth:
+            continue
+
+        for op in operators:
+            if op.is_applicable(state):
+                next_state = op.apply(state)
+                if next_state != state:
+                    queue.append((next_state, path + [(state.copy(), op.action, next_state.copy())]))
+
+    return None
+
+
+# --- Generate Random Operators ---
+def generate_random_operator(pred_pool: List[int], action_id: int) -> Operator:
+    pre = set(random.sample(pred_pool, random.randint(1, 3)))
+    effects = list(set(pred_pool) - pre)
+    add = set(random.sample(effects, random.randint(1, min(2, len(effects)))))
+    delete = set(random.sample(list(pre), random.randint(0, len(pre))))
+    return Operator(pre, add, delete, action_id)
+
+
+def compute_reachable_states(init_state: Set[int], operators: List[Operator], max_iters: int = 100) -> List[Set[int]]:
+    reached_states = set()
+    reachable = []
+    frontier = [init_state.copy()]
+
+    for _ in range(max_iters):
+        new_frontier = []
+
+        for state in frontier:
+            state_key = frozenset(state)
+            if state_key in reached_states:
+                continue
+
+            reached_states.add(state_key)
+            reachable.append(frozenset(state))
+
+            for op in operators:
+                if op.is_applicable(state):
+                    next_state = op.apply(state)
+                    next_key = frozenset(next_state)
+                    if next_key not in reached_states:
+                        new_frontier.append(next_state)
+
+        if not new_frontier:
+            break
+        frontier = new_frontier
+
+    return set(reachable)
+
+
+# --- Generate Demo Data ---
+def generate_planned_demo_trajectories(operators: List[Operator], num_trajs: int, max_depth: int) -> List[Tuple[List[Tuple[Set[int], int, Set[int]]], Set[int]]]:
+    demos = []
+    attempts = 0
+
+    while len(demos) < num_trajs and attempts < 10000000:
+        attempts += 1
+        init_state = set(random.sample(range(NUM_PREDICATES), random.randint(2, NUM_PREDICATES)))
+        reachable = compute_reachable_states(init_state, operators) - init_state
+
+        if len(reachable) == 0:
+            continue
+
+        plan_traj = []
+        goals = reachable
+        while len(goals) > 0 and len(plan_traj) < TRAJ_LEN:
+            goal_state = random.choice(list(goals))
+            goal = goal_state - init_state
+
+            if not goal:
+                goals.remove(goal_state)
+                continue
+
+            plan_traj = plan(init_state, goal, operators, max_depth)
+            if plan_traj is None or len(plan_traj) < TRAJ_LEN:
+                goals.remove(goal_state)
+                plan_traj = []
+
+        if plan_traj and len(plan_traj) >= TRAJ_LEN:
+            demos.append((plan_traj, goal))
+
+    return demos
+
+
+# --- Backwards-Forwards Operator Learning ---
+def backward_infer_minimal_effects(demo_data, current_operators=None):
+    candidate_ops = defaultdict(lambda: {'demos': []})
+    op_index = {}
+    if current_operators:
+        op_index = {(op.action, frozenset(op.add)): op for op in current_operators}
+
+    for traj, goal in sorted(demo_data, key=lambda x: len(x[0])): #sorted(demo_data, key=lambda x: len(x[0])*(1+len(x[1]))): # order by smallest demo
+        current_goal = goal.copy()
+
+        for (s, action, s_prime) in reversed(traj):
+            effect = s_prime - s
+            if len(effect) == 0:
+                raise Exception("No effect")
+            elif len(effect) == 1:
+                necessary_effect = effect
+            else:
+                necessary_effect = effect & current_goal
+
+            key = (action, frozenset(necessary_effect))
+            candidate_ops[key]['demos'].append((s, action, s_prime))
+
+            preconditions = set()
+            if key in op_index:
+                preconditions = op_index[key].pre
+            current_goal = (current_goal - necessary_effect) | preconditions
+
+    return candidate_ops
+
+
+def refine_by_plan_divergence(demos, learned_operators):
+    op_index = {(op.action, frozenset(op.add)): op for op in learned_operators}
+    support_sets = {key: [] for key in op_index}
+
+    for traj, goal in demos:
+        current_goal = goal.copy()
+        for s, a, s_prime in traj:
+            effect = s_prime - s
+            necessary_effect = effect if len(effect) == 1 else effect & current_goal
+            key = (a, frozenset(necessary_effect))
+            if key in support_sets:
+                support_sets[key].append(s)
+
+            preconditions = set()
+            if key in op_index:
+                preconditions = op_index[key].pre
+            current_goal = (current_goal - necessary_effect) | preconditions
+
+    for traj, goal in demos:
+        state = traj[0][0]
+        for (s_true, a_true, s_next_true) in traj:
+            applicable = [op for op in learned_operators if op.is_applicable(state)]
+            if not applicable:
+                break
+            op_planner = random.choice(applicable)
+
+            key_true = (a_true, frozenset(s_next_true - s_true))
+            op_true = op_index.get(key_true, None)
+            if op_true is None:
+                continue
+
+            if op_planner is op_true:
+                state = op_true.apply(state)
+                continue
+
+            key_planner = (op_planner.action, frozenset(op_planner.add))
+            support = support_sets.get(key_planner, [])
+            if not support:
+                continue
+
+            common_preds = set.intersection(*support)
+            potential_preds_to_add = common_preds - state
+            if not potential_preds_to_add:
+                continue
+            # if len(potential_preds_to_add - goal) > 0:
+            #     preds_to_add = {random.choice(list(potential_preds_to_add - goal))}
+            # else:
+            #     preds_to_add = {random.choice(list(potential_preds_to_add))}
+            preds_to_add = {random.choice(list(potential_preds_to_add))}
+            op_planner.pre.update(preds_to_add)
+
+            state = op_true.apply(state)
+
+    return list(op_index.values())
+
+
+def learn_operators_from_demos(demo_data, max_iters=100, verbose=True):
+    learned_ops = []
+    last_preconds = None
+
+    for iteration in range(max_iters):
+        if verbose:
+            print(f"\n--- Iteration {iteration + 1} (Backward + Forward) ---")
+
+        candidate_ops = backward_infer_minimal_effects(demo_data, current_operators=learned_ops or None)
+
+        if learned_ops == []:
+            learned_ops = [
+                Operator(pre=set(), add=set(effect_frozen), delete=set(), action=action)
+                for (action, effect_frozen), entry in candidate_ops.items()
+            ]
+        else:
+            op_index = {(op.action, frozenset(op.add)): op for op in learned_ops}
+            learned_ops = []
+            for (action, effect_frozen), entry in candidate_ops.items():
+                if (action, effect_frozen) not in op_index:
+                    learned_ops.append(Operator(pre=set(), add=set(effect_frozen), delete=set(), action=action))
+                else:
+                    learned_ops.append(Operator(
+                        pre=op_index[(action, effect_frozen)].pre,
+                        add=set(effect_frozen),
+                        delete=set(),
+                        action=action
+                    ))
+
+        if verbose:
+            print("Backward Learned Operators:")
+            for op in sorted(learned_ops, key=lambda x: x.action):
+                print(op)
+
+        learned_ops = refine_by_plan_divergence(demo_data, learned_ops)
+
+        if verbose:
+            print("Forward Learned Operators:")
+            for op in sorted(learned_ops, key=lambda x: x.action):
+                print(op)
+
+    return learned_ops
+
+
+# --- Run Learning ---
+def run_operator_learning_trials(num_trials=10, verbose=True) -> int:
+    invalid_count = 0
+    valid_count = 0
+
+    for _ in range(num_trials):
+        pred_pool = [i for i in range(NUM_PREDICATES)]
+        operators = [generate_random_operator(pred_pool, i) for i in range(NUM_OPERATORS)]
+        demo_data = generate_planned_demo_trajectories(operators, NUM_TRAJS, max_depth=TRAJ_MAX)
+
+        if verbose:
+            print("\n--- Ground Truth Operators ---")
+            for op in sorted(operators, key=lambda x: x.action):
+                print(op)
+            print()
+
+            print("Demos:")
+            for traj in demo_data:
+                print("Goal:", traj[1], "Length:", len(traj[0]))
+
+        refined_ops = learn_operators_from_demos(demo_data, max_iters=1000, verbose=verbose)
+
+        if verbose:
+            print("\n--- Final Learned Operators ---")
+            for op in sorted(refined_ops, key=lambda x: x.action):
+                print(op)
+            print()
+
+        # Add delete effects
+        op_index = {(op.action, frozenset(op.add)): op for op in refined_ops}
+        for (action, effect_frozen), entry in backward_infer_minimal_effects(demo_data, current_operators=refined_ops).items():
+            delete = op_index[(action, effect_frozen)].pre & set.intersection(
+                *[set(entry['demos'][i][0] - entry['demos'][i][2]) for i in range(len(entry['demos']))]
+            )
+            op_index[(action, effect_frozen)].delete = delete
+
+        for op in op_index.values():
+            for actual_op in operators:
+                if op.action == actual_op.action:
+                    if op.pre <= actual_op.pre and op.add <= actual_op.add and op.delete <= actual_op.delete:
+                        valid_count += 1
+                        if verbose:
+                            print("VALID\n\tLEARNED |", op, "\n\tORIGINAL|", actual_op)
+                    else:
+                        invalid_count += 1
+                        if verbose:
+                            print("INVALID\n\tLEARNED |", op, "\n\tORIGINAL|", actual_op)
+
+    return invalid_count, valid_count
+
+def deduplicate_predicates_by_equivalence(demos, operators, num_preds):
+    from collections import defaultdict
+
+    # Step 1: Build truth vectors for each predicate
+    pred_vectors = defaultdict(list)
+
+    for traj, goal in demos:
+        for s, _, s_prime in traj:
+            for i in range(num_preds):
+                pred_vectors[i].append(int(i in s))
+                pred_vectors[i].append(int(i in s_prime))
+        for i in range(num_preds):
+            pred_vectors[i].append(int(i in goal))
+
+    # Step 2: Group predicates with identical truth vectors
+    vector_to_preds = defaultdict(list)
+    for pred, vec in pred_vectors.items():
+        vector_to_preds[tuple(vec)].append(pred)
+
+    # Step 3: Build a mapping from redundant predicate -> representative
+    replace_map = {}
+    for group in vector_to_preds.values():
+        representative = min(group)  # pick smallest index as canonical
+        for pred in group:
+            replace_map[pred] = representative
+
+    # Step 4: Replace predicates in demos
+    new_demos = []
+    for traj, goal in demos:
+        new_traj = []
+        for s, a, s_prime in traj:
+            s_new = {replace_map[p] for p in s}
+            s_prime_new = {replace_map[p] for p in s_prime}
+            new_traj.append((s_new, a, s_prime_new))
+        new_goal = {replace_map[p] for p in goal}
+        new_demos.append((new_traj, new_goal))
+
+    # Step 5: Replace predicates in operators
+    new_operators = []
+    for op in operators:
+        pre = {replace_map[p] for p in op.pre}
+        add = {replace_map[p] for p in op.add}
+        delete = {replace_map[p] for p in op.delete}
+        new_operators.append(Operator(pre, add, delete, op.action))
+
+    return new_demos, new_operators, replace_map
+
+
+# invalids, valids = run_operator_learning_trials(num_trials=50, verbose=False)
+# print(f"Number of invalid learned operators: {invalids} / {invalids+valids}")
+
+pred_pool = [i for i in range(NUM_PREDICATES)]
+operators = [generate_random_operator(pred_pool, i) for i in range(NUM_OPERATORS)]
+demo_data = generate_planned_demo_trajectories(operators, NUM_TRAJS, max_depth=TRAJ_MAX)
+
+demo_data, operators, pred_replace_map = deduplicate_predicates_by_equivalence(demo_data, operators, NUM_PREDICATES)
+print("Predicate replacement map:", pred_replace_map)
+
+print("\n--- Ground Truth Operators ---")
+for op in sorted(operators, key=lambda x: x.action):
+    print(op)
+print()
+
+print("Demos:")
+# for traj in demo_data:
+#     print("Goal:", traj[1], "Length:", len(traj[0]))
+print(len(demo_data))
+
+x = []
+y = []
+op_index = None
+
+for run_i in range(1, len(demo_data), 5):
+    refined_ops = learn_operators_from_demos(demo_data[:run_i], max_iters=1000, verbose=False)
+    op_index = {(op.action, frozenset(op.add)): op for op in refined_ops}
+    for (action, effect_frozen), entry in backward_infer_minimal_effects(demo_data[:run_i], current_operators=refined_ops).items():
+        delete = op_index[(action, effect_frozen)].pre & set.intersection(
+            *[set(entry['demos'][i][0] - entry['demos'][i][2]) for i in range(len(entry['demos']))]
+        )
+        op_index[(action, effect_frozen)].delete = delete
+
+    print("\n--- Final Learned Operators ---")
+    for op in sorted(op_index.values(), key=lambda x: x.action):
+        print(op)
+    print()
+
+    num_match = 0
+    actions = set()
+    for actual_op in operators:
+        is_match = False
+        for op in op_index.values():
+            actions.add(op.action)
+            if op.action == actual_op.action:
+                if op.pre == actual_op.pre and op.add == actual_op.add and op.delete == actual_op.delete:
+                    is_match = True
+        if is_match:
+            num_match += 1
+    num_actions = len(actions)
+    x.append(run_i)
+    y.append(num_match)
+    actions = set()
+    for traj in demo_data[:run_i]:
+        for (s, a, s_prime) in traj[0]:
+            actions.add(a)
+    print(len(actions))
+    print(actions)
+    print(run_i, num_match, num_actions)
+
+
+print("\n--- Ground Truth Operators ---")
+for op in sorted(operators, key=lambda x: x.action):
+    print(op)
+print()
+
+def augment_demos_with_missing_ground_truth_ops(demos, learned_ops, true_ops, num_preds, num_augments=1):
+    from collections import defaultdict
+
+    # Index learned ops by (action, add, delete, pre)
+    learned_op_keys = set(
+        (op.action, frozenset(op.add), frozenset(op.delete), frozenset(op.pre))
+        for op in learned_ops
+    )
+
+    augmented = []
+
+    for true_op in true_ops:
+        key = (true_op.action, frozenset(true_op.add), frozenset(true_op.delete), frozenset(true_op.pre))
+        if key in learned_op_keys:
+            continue  # already learned correctly
+
+        # Add demos for this missing operator
+        for _ in range(num_augments):
+            possible_goals = None
+            while not possible_goals:
+                base_state = set(random.sample(range(num_preds), random.randint(2, num_preds)))
+                false_pre = set()
+                for op in learned_ops:
+                    if op.action == true_op.action and op.add == true_op.add:
+                        false_pre |= op.pre - true_op.pre
+                base_state -= false_pre # remove wrong precondition
+                base_state |= true_op.pre  # ensure it's applicable
+                next_state = true_op.apply(base_state)
+
+                # Choose a goal that is newly added by the operator
+                possible_goals = true_op.add - base_state
+
+            goal = possible_goals
+            demo = [(base_state.copy(), true_op.action, next_state.copy())]
+            augmented.append((demo, goal))
+
+    return demos + augmented
+
+for _ in range(10):
+    demo_data = augment_demos_with_missing_ground_truth_ops(
+        demo_data, list(op_index.values()), operators, NUM_PREDICATES, num_augments=1
+    )
+
+    refined_ops = learn_operators_from_demos(demo_data, max_iters=1000, verbose=False)
+    op_index = {(op.action, frozenset(op.add)): op for op in refined_ops}
+    for (action, effect_frozen), entry in backward_infer_minimal_effects(demo_data[:run_i], current_operators=refined_ops).items():
+        delete = op_index[(action, effect_frozen)].pre & set.intersection(
+            *[set(entry['demos'][i][0] - entry['demos'][i][2]) for i in range(len(entry['demos']))]
+        )
+        op_index[(action, effect_frozen)].delete = delete
+
+print("\n--- Final Learned Operators ---")
+for op in sorted(op_index.values(), key=lambda x: x.action):
+    print(op)
+print()
+
+num_match = 0
+actions = set()
+for actual_op in operators:
+    is_match = False
+    for op in op_index.values():
+        actions.add(op.action)
+        if op.action == actual_op.action:
+            if op.pre == actual_op.pre and op.add == actual_op.add and op.delete == actual_op.delete:
+                is_match = True
+    if is_match:
+        num_match += 1
+    else:
+        print(actual_op)
+num_actions = len(actions)
+print("final", num_match, num_actions)
+
+
+# TODO Fix Delete Effects
+
+import ipdb; ipdb.set_trace()
\ No newline at end of file
diff --git a/test_backward_forward.py b/test_backward_forward.py
new file mode 100644
index 0000000000..c707b666b4
--- /dev/null
+++ b/test_backward_forward.py
@@ -0,0 +1,561 @@
+import random
+from typing import List, Set, Tuple
+import numpy as np
+from collections import deque, defaultdict
+
+np.random.seed(0)
+random.seed(0)
+
+# Parameters
+NUM_PREDICATES = 10
+NUM_OPERATORS = 10
+TRAJ_MAX = 10
+TRAJ_LEN = 5
+NUM_TRAJS = 50 # Fewer for readability
+ACTION_SPACE = list(range(NUM_OPERATORS))
+
+# --- Operator Representation ---
+class Operator:
+    def __init__(self, pre: Set[int], add: Set[int], delete: Set[int], action: int):
+        self.pre = pre
+        self.add = add
+        self.delete = delete
+        self.action = action
+
+    def is_applicable(self, state: Set[int]) -> bool:
+        return self.pre.issubset(state)
+
+    def apply(self, state: Set[int]) -> Set[int]:
+        if not self.is_applicable(state):
+            return state  # no-op if not applicable
+        new_state = state.copy()
+        new_state.difference_update(self.delete)
+        new_state.update(self.add)
+        return new_state
+
+    def __repr__(self):
+        return f"Op(action={self.action}, pre={self.pre}, add={self.add}, del={self.delete})"
+
+
+def plan(start: Set[int], goal: Set[int], operators: List[Operator], max_depth=10):
+    visited = set()
+    queue = deque()
+    queue.append((start.copy(), []))
+
+    while queue:
+        state, path = queue.popleft()
+        state_key = frozenset(state)
+        if state_key in visited:
+            continue
+        visited.add(state_key)
+
+        if goal.issubset(state):
+            return path
+
+        if len(path) >= max_depth:
+            continue
+
+        for op in operators:
+            if op.is_applicable(state):
+                next_state = op.apply(state)
+                if next_state != state:
+                    queue.append((next_state, path + [(state.copy(), op.action, next_state.copy())]))
+
+    return None
+
+
+# --- Generate Random Operators ---
+def generate_random_operator(pred_pool: List[int], action_id: int) -> Operator:
+    pre = set(random.sample(pred_pool, random.randint(1, 3)))
+    effects = list(set(pred_pool) - pre)
+    add = set(random.sample(effects, random.randint(1, min(2, len(effects)))))
+    delete = set(random.sample(list(pre), random.randint(0, len(pre))))
+    return Operator(pre, add, delete, action_id)
+
+
+def compute_reachable_states(init_state: Set[int], operators: List[Operator], max_iters: int = 100) -> List[Set[int]]:
+    reached_states = set()
+    reachable = []
+    frontier = [init_state.copy()]
+
+    for _ in range(max_iters):
+        new_frontier = []
+
+        for state in frontier:
+            state_key = frozenset(state)
+            if state_key in reached_states:
+                continue
+
+            reached_states.add(state_key)
+            reachable.append(frozenset(state))
+
+            for op in operators:
+                if op.is_applicable(state):
+                    next_state = op.apply(state)
+                    next_key = frozenset(next_state)
+                    if next_key not in reached_states:
+                        new_frontier.append(next_state)
+
+        if not new_frontier:
+            break
+        frontier = new_frontier
+
+    return set(reachable)
+
+
+# --- Generate Demo Data ---
+def generate_planned_demo_trajectories(operators: List[Operator], num_trajs: int, max_depth: int) -> List[Tuple[List[Tuple[Set[int], int, Set[int]]], Set[int]]]:
+    demos = []
+    attempts = 0
+
+    while len(demos) < num_trajs and attempts < 10000000:
+        attempts += 1
+        init_state = set(random.sample(range(NUM_PREDICATES), random.randint(2, NUM_PREDICATES)))
+        reachable = compute_reachable_states(init_state, operators) - init_state
+
+        if len(reachable) == 0:
+            continue
+
+        plan_traj = []
+        goals = reachable
+        while len(goals) > 0 and len(plan_traj) < TRAJ_LEN:
+            goal_state = random.choice(list(goals))
+            goal = goal_state - init_state
+
+            if not goal:
+                goals.remove(goal_state)
+                continue
+
+            plan_traj = plan(init_state, goal, operators, max_depth)
+            if plan_traj is None or len(plan_traj) < TRAJ_LEN:
+                goals.remove(goal_state)
+                plan_traj = []
+
+        if plan_traj and len(plan_traj) >= TRAJ_LEN:
+            demos.append((plan_traj, goal))
+
+    return demos
+
+
+# --- Backwards-Forwards Operator Learning ---
+def backward_infer_minimal_effects(demo_data, current_operators=None):
+    candidate_ops = defaultdict(lambda: {'demos': []})
+    op_index = {}
+    if current_operators:
+        op_index = {(op.action, frozenset(op.add)): op for op in current_operators}
+
+    for traj, goal in sorted(demo_data, key=lambda x: len(x[0])): #sorted(demo_data, key=lambda x: len(x[0])*(1+len(x[1]))): # order by smallest demo
+        current_goal = goal.copy()
+
+        for (s, action, s_prime) in reversed(traj):
+            effect = s_prime - s
+            if len(effect) == 0:
+                raise Exception("No effect")
+            elif len(effect) == 1:
+                necessary_effect = effect
+            else:
+                necessary_effect = effect & current_goal
+
+            key = (action, frozenset(necessary_effect))
+            candidate_ops[key]['demos'].append((s, action, s_prime))
+
+            preconditions = set()
+            if key in op_index:
+                preconditions = op_index[key].pre
+            current_goal = (current_goal - necessary_effect) | preconditions
+
+    return candidate_ops
+
+
+def refine_by_plan_divergence(demos, learned_operators):
+    op_index = {(op.action, frozenset(op.add)): op for op in learned_operators}
+    support_sets = {key: [] for key in op_index}
+
+    for traj, goal in demos:
+        current_goal = goal.copy()
+        for s, a, s_prime in traj:
+            effect = s_prime - s
+            necessary_effect = effect if len(effect) == 1 else effect & current_goal
+            key = (a, frozenset(necessary_effect))
+            if key in support_sets:
+                support_sets[key].append(s)
+
+            preconditions = set()
+            if key in op_index:
+                preconditions = op_index[key].pre
+            current_goal = (current_goal - necessary_effect) | preconditions
+
+    for traj, goal in demos:
+        state = traj[0][0]
+        for (s_true, a_true, s_next_true) in traj:
+            applicable = [op for op in learned_operators if op.is_applicable(state)]
+            if not applicable:
+                break
+            op_planner = random.choice(applicable)
+
+            key_true = (a_true, frozenset(s_next_true - s_true))
+            op_true = op_index.get(key_true, None)
+            if op_true is None:
+                continue
+
+            if op_planner is op_true:
+                state = op_true.apply(state)
+                continue
+
+            key_planner = (op_planner.action, frozenset(op_planner.add))
+            support = support_sets.get(key_planner, [])
+            if not support:
+                continue
+
+            common_preds = set.intersection(*support)
+            potential_preds_to_add = common_preds - state
+            if not potential_preds_to_add:
+                continue
+            # if len(potential_preds_to_add - goal) > 0:
+            #     preds_to_add = {random.choice(list(potential_preds_to_add - goal))}
+            # else:
+            #     preds_to_add = {random.choice(list(potential_preds_to_add))}
+            preds_to_add = {random.choice(list(potential_preds_to_add))}
+            op_planner.pre.update(preds_to_add)
+
+            state = op_true.apply(state)
+
+    return list(op_index.values())
+
+
+def learn_operators_from_demos(demo_data, max_iters=100, verbose=True):
+    learned_ops = []
+    last_preconds = None
+
+    for iteration in range(max_iters):
+        if verbose:
+            print(f"\n--- Iteration {iteration + 1} (Backward + Forward) ---")
+
+        candidate_ops = backward_infer_minimal_effects(demo_data, current_operators=learned_ops or None)
+
+        if learned_ops == []:
+            learned_ops = [
+                Operator(pre=set(), add=set(effect_frozen), delete=set(), action=action)
+                for (action, effect_frozen), entry in candidate_ops.items()
+            ]
+        else:
+            op_index = {(op.action, frozenset(op.add)): op for op in learned_ops}
+            learned_ops = []
+            for (action, effect_frozen), entry in candidate_ops.items():
+                if (action, effect_frozen) not in op_index:
+                    learned_ops.append(Operator(pre=set(), add=set(effect_frozen), delete=set(), action=action))
+                else:
+                    learned_ops.append(Operator(
+                        pre=op_index[(action, effect_frozen)].pre,
+                        add=set(effect_frozen),
+                        delete=set(),
+                        action=action
+                    ))
+
+        if verbose:
+            print("Backward Learned Operators:")
+            for op in sorted(learned_ops, key=lambda x: x.action):
+                print(op)
+
+        learned_ops = refine_by_plan_divergence(demo_data, learned_ops)
+
+        if verbose:
+            print("Forward Learned Operators:")
+            for op in sorted(learned_ops, key=lambda x: x.action):
+                print(op)
+
+    return learned_ops
+
+
+# --- Run Learning ---
+def run_operator_learning_trials(num_trials=10, verbose=True) -> int:
+    invalid_count = 0
+    valid_count = 0
+
+    for _ in range(num_trials):
+        pred_pool = [i for i in range(NUM_PREDICATES)]
+        operators = [generate_random_operator(pred_pool, i) for i in range(NUM_OPERATORS)]
+        demo_data = generate_planned_demo_trajectories(operators, NUM_TRAJS, max_depth=TRAJ_MAX)
+
+        if verbose:
+            print("\n--- Ground Truth Operators ---")
+            for op in sorted(operators, key=lambda x: x.action):
+                print(op)
+            print()
+
+            print("Demos:")
+            for traj in demo_data:
+                print("Goal:", traj[1], "Length:", len(traj[0]))
+
+        refined_ops = learn_operators_from_demos(demo_data, max_iters=1000, verbose=verbose)
+
+        if verbose:
+            print("\n--- Final Learned Operators ---")
+            for op in sorted(refined_ops, key=lambda x: x.action):
+                print(op)
+            print()
+
+        # Add delete effects
+        op_index = {(op.action, frozenset(op.add)): op for op in refined_ops}
+        for (action, effect_frozen), entry in backward_infer_minimal_effects(demo_data, current_operators=refined_ops).items():
+            delete = op_index[(action, effect_frozen)].pre & set.intersection(
+                *[set(entry['demos'][i][0] - entry['demos'][i][2]) for i in range(len(entry['demos']))]
+            )
+            op_index[(action, effect_frozen)].delete = delete
+
+        for op in op_index.values():
+            for actual_op in operators:
+                if op.action == actual_op.action:
+                    if op.pre <= actual_op.pre and op.add <= actual_op.add and op.delete <= actual_op.delete:
+                        valid_count += 1
+                        if verbose:
+                            print("VALID\n\tLEARNED |", op, "\n\tORIGINAL|", actual_op)
+                    else:
+                        invalid_count += 1
+                        if verbose:
+                            print("INVALID\n\tLEARNED |", op, "\n\tORIGINAL|", actual_op)
+
+    return invalid_count, valid_count
+
+def deduplicate_predicates_by_equivalence(demos, operators, num_preds):
+    from collections import defaultdict
+
+    # Step 1: Build truth vectors for each predicate
+    pred_vectors = defaultdict(list)
+
+    for traj, goal in demos:
+        for s, _, s_prime in traj:
+            for i in range(num_preds):
+                pred_vectors[i].append(int(i in s))
+                pred_vectors[i].append(int(i in s_prime))
+        for i in range(num_preds):
+            pred_vectors[i].append(int(i in goal))
+
+    # Step 2: Group predicates with identical truth vectors
+    vector_to_preds = defaultdict(list)
+    for pred, vec in pred_vectors.items():
+        vector_to_preds[tuple(vec)].append(pred)
+
+    # Step 3: Build a mapping from redundant predicate -> representative
+    replace_map = {}
+    for group in vector_to_preds.values():
+        representative = min(group)  # pick smallest index as canonical
+        for pred in group:
+            replace_map[pred] = representative
+
+    # Step 4: Replace predicates in demos
+    new_demos = []
+    for traj, goal in demos:
+        new_traj = []
+        for s, a, s_prime in traj:
+            s_new = {replace_map[p] for p in s}
+            s_prime_new = {replace_map[p] for p in s_prime}
+            new_traj.append((s_new, a, s_prime_new))
+        new_goal = {replace_map[p] for p in goal}
+        new_demos.append((new_traj, new_goal))
+
+    # Step 5: Replace predicates in operators
+    new_operators = []
+    for op in operators:
+        pre = {replace_map[p] for p in op.pre}
+        add = {replace_map[p] for p in op.add}
+        delete = {replace_map[p] for p in op.delete}
+        new_operators.append(Operator(pre, add, delete, op.action))
+
+    return new_demos, new_operators, replace_map
+
+def augment_demos_with_missing_ground_truth_ops(demos, learned_ops, true_ops, num_preds, num_augments=1):
+    from collections import defaultdict
+
+    # Index learned ops by (action, add, delete, pre)
+    learned_op_keys = set(
+        (op.action, frozenset(op.add), frozenset(op.delete), frozenset(op.pre))
+        for op in learned_ops
+    )
+
+    augmented = []
+
+    for true_op in true_ops:
+        key = (true_op.action, frozenset(true_op.add), frozenset(true_op.delete), frozenset(true_op.pre))
+        if key in learned_op_keys:
+            continue  # already learned correctly
+
+        # Add demos for this missing operator
+        for _ in range(num_augments):
+            possible_goals = None
+            while not possible_goals:
+                base_state = set(random.sample(range(num_preds), random.randint(2, num_preds)))
+                false_pre = set()
+                for op in learned_ops:
+                    if op.action == true_op.action and op.add == true_op.add:
+                        false_pre |= op.pre - true_op.pre
+                base_state -= false_pre # remove wrong precondition
+                base_state |= true_op.pre  # ensure it's applicable
+                next_state = true_op.apply(base_state)
+
+                # Choose a goal that is newly added by the operator
+                possible_goals = true_op.add - base_state
+
+            goal = possible_goals
+            demo = [(base_state.copy(), true_op.action, next_state.copy())]
+            augmented.append((demo, goal))
+
+    return demos + augmented
+
+# invalids, valids = run_operator_learning_trials(num_trials=50, verbose=False)
+# print(f"Number of invalid learned operators: {invalids} / {invalids+valids}")
+
+#
+results = {"tot_matches":[], "tot_soft_matches":[],"tot_exsoft_matches":[], "tot_num_ops":[]}
+#
+pred_pool = [i for i in range(NUM_PREDICATES)]
+operators = [generate_random_operator(pred_pool, i) for i in range(NUM_OPERATORS)]
+all_demo_data = generate_planned_demo_trajectories(operators, NUM_TRAJS, max_depth=TRAJ_MAX)
+
+# all_demo_data, operators, pred_replace_map = deduplicate_predicates_by_equivalence(all_demo_data, operators, NUM_PREDICATES)
+# print("Predicate replacement map:", pred_replace_map)
+
+for num_trajs in range(1, 52, 10):
+
+    print("\n--- Ground Truth Operators ---")
+    for op in sorted(operators, key=lambda x: x.action):
+        print(op)
+    print()
+
+    print("Demos:")
+    # for traj in demo_data:
+    #     print("Goal:", traj[1], "Length:", len(traj[0]))
+    demo_data = all_demo_data[:num_trajs]
+    print(len(demo_data))
+
+    op_index = None
+
+    potential_op_sets = {}
+    for run_i in range(100):
+        refined_ops = learn_operators_from_demos(demo_data, max_iters=1000, verbose=False)
+        op_index = {(op.action, frozenset(op.add)): op for op in refined_ops}
+        for (action, effect_frozen), entry in backward_infer_minimal_effects(demo_data, current_operators=refined_ops).items():
+            delete = op_index[(action, effect_frozen)].pre & set.intersection(
+                *[set(entry['demos'][i][0] - entry['demos'][i][2]) for i in range(len(entry['demos']))]
+            )
+            op_index[(action, effect_frozen)].delete = delete
+
+        augmented_demo_data = augment_demos_with_missing_ground_truth_ops(
+            demo_data, list(op_index.values()), operators, NUM_PREDICATES, num_augments=5
+        )
+
+        refined_ops = learn_operators_from_demos(augmented_demo_data, max_iters=1000, verbose=False)
+        op_index = {(op.action, frozenset(op.add)): op for op in refined_ops}
+        for (action, effect_frozen), entry in backward_infer_minimal_effects(augmented_demo_data, current_operators=refined_ops).items():
+            delete = op_index[(action, effect_frozen)].pre & set.intersection(
+                *[set(entry['demos'][i][0] - entry['demos'][i][2]) for i in range(len(entry['demos']))]
+            )
+            op_index[(action, effect_frozen)].delete = delete
+
+        print("\n--- Final Learned Operators ---")
+        for op in sorted(op_index.values(), key=lambda x: x.action):
+            print(op)
+        print()
+
+        num_match = 0
+        num_soft_match = 0
+        num_exsoft_match = 0
+        actions = set()
+        for actual_op in operators:
+            is_match = False
+            is_soft_match = False
+            is_exsoft_match = False
+            for op in op_index.values():
+                actions.add(op.action)
+                if op.action == actual_op.action:
+                    if len(op.pre - actual_op.pre) <= 2 and op.add == actual_op.add and op.delete == actual_op.delete:
+                        is_exsoft_match = True
+                        if len(op.pre - actual_op.pre) <= 1 and op.add == actual_op.add and op.delete == actual_op.delete:
+                            is_soft_match = True
+                            if op.pre == actual_op.pre and op.add == actual_op.add and op.delete == actual_op.delete:
+                                is_match = True
+            if is_exsoft_match:
+                num_exsoft_match += 1
+            if is_soft_match:
+                num_soft_match += 1
+            if is_match:
+                num_match += 1
+        num_actions = len(actions)
+        actions = set()
+        for traj in demo_data[:run_i]:
+            for (s, a, s_prime) in traj[0]:
+                actions.add(a)
+        new_op_set_str = str([op for op in sorted(op_index.values(), key=lambda x: x.action)])
+        if new_op_set_str in potential_op_sets:
+            potential_op_sets[new_op_set_str] += 1
+        else:
+            potential_op_sets[new_op_set_str] = 0
+        results["tot_matches"].append((num_trajs, run_i, num_match))
+        results["tot_soft_matches"].append((num_trajs, run_i, num_soft_match))
+        results["tot_exsoft_matches"].append((num_trajs, run_i, num_exsoft_match))
+        results["tot_num_ops"].append((num_trajs, run_i, len(potential_op_sets.keys()), num_actions))
+        print(num_trajs, run_i, num_match, num_soft_match, num_exsoft_match, len(potential_op_sets.keys()), num_actions)
+
+    print("\n--- Ground Truth Operators ---")
+    for op in sorted(operators, key=lambda x: x.action):
+        print(op)
+    print()
+
+import pickle
+
+filename = 'HITL_more_results.pkl'
+
+# Open the file in binary write mode ('wb')
+with open(filename, 'wb') as file:
+    pickle.dump(results, file)
+
+import ipdb; ipdb.set_trace()
+
+
+
+
+
+
+
+
+
+quit()
+
+for _ in range(10):
+    demo_data = augment_demos_with_missing_ground_truth_ops(
+        demo_data, list(op_index.values()), operators, NUM_PREDICATES, num_augments=1
+    )
+
+    refined_ops = learn_operators_from_demos(demo_data, max_iters=1000, verbose=False)
+    op_index = {(op.action, frozenset(op.add)): op for op in refined_ops}
+    for (action, effect_frozen), entry in backward_infer_minimal_effects(demo_data[:run_i], current_operators=refined_ops).items():
+        delete = op_index[(action, effect_frozen)].pre & set.intersection(
+            *[set(entry['demos'][i][0] - entry['demos'][i][2]) for i in range(len(entry['demos']))]
+        )
+        op_index[(action, effect_frozen)].delete = delete
+
+print("\n--- Final Learned Operators ---")
+for op in sorted(op_index.values(), key=lambda x: x.action):
+    print(op)
+print()
+
+num_match = 0
+actions = set()
+for actual_op in operators:
+    is_match = False
+    for op in op_index.values():
+        actions.add(op.action)
+        if op.action == actual_op.action:
+            if op.pre == actual_op.pre and op.add == actual_op.add and op.delete == actual_op.delete:
+                is_match = True
+    if is_match:
+        num_match += 1
+    else:
+        print(actual_op)
+num_actions = len(actions)
+print("final", num_match, num_actions)
+
+
+# TODO Fix Delete Effects
+
+import ipdb; ipdb.set_trace()
\ No newline at end of file
diff --git a/test_backward_forward_pick_and_place.py b/test_backward_forward_pick_and_place.py
new file mode 100644
index 0000000000..c49286cf94
--- /dev/null
+++ b/test_backward_forward_pick_and_place.py
@@ -0,0 +1,509 @@
+import random
+from typing import List, Set, Tuple
+import numpy as np
+from collections import deque, defaultdict
+
+np.random.seed(0)
+random.seed(0)
+
+# Parameters
+NUM_PREDICATES = 10
+NUM_OPERATORS = 10
+TRAJ_MAX = 10
+TRAJ_LEN = 5
+NUM_TRAJS = 50 # Fewer for readability
+ACTION_SPACE = list(range(NUM_OPERATORS))
+
+# --- Operator Representation ---
+class Operator:
+    def __init__(self, pre: Set[int], add: Set[int], delete: Set[int], action: int):
+        self.pre = pre
+        self.add = add
+        self.delete = delete
+        self.action = action
+
+    def is_applicable(self, state: Set[int]) -> bool:
+        return self.pre.issubset(state)
+
+    def apply(self, state: Set[int]) -> Set[int]:
+        if not self.is_applicable(state):
+            return state  # no-op if not applicable
+        new_state = state.copy()
+        new_state.difference_update(self.delete)
+        new_state.update(self.add)
+        return new_state
+
+    def __repr__(self):
+        return f"Op(action={self.action}, pre={self.pre}, add={self.add}, del={self.delete})"
+
+
+def plan(start: Set[int], goal: Set[int], operators: List[Operator], max_depth=10):
+    visited = set()
+    queue = deque()
+    queue.append((start.copy(), []))
+
+    while queue:
+        state, path = queue.popleft()
+        state_key = frozenset(state)
+        if state_key in visited:
+            continue
+        visited.add(state_key)
+
+        if goal.issubset(state):
+            return path
+
+        if len(path) >= max_depth:
+            continue
+
+        for op in operators:
+            if op.is_applicable(state):
+                next_state = op.apply(state)
+                if next_state != state:
+                    queue.append((next_state, path + [(state.copy(), op.action, next_state.copy())]))
+
+    return None
+
+
+# --- Generate Random Operators ---
+def generate_random_operator(pred_pool: List[int], action_id: int) -> Operator:
+    pre = set(random.sample(pred_pool, random.randint(1, 3)))
+    effects = list(set(pred_pool) - pre)
+    add = set(random.sample(effects, random.randint(1, min(2, len(effects)))))
+    delete = set(random.sample(list(pre), random.randint(0, len(pre))))
+    return Operator(pre, add, delete, action_id)
+
+
+def compute_reachable_states(init_state: Set[int], operators: List[Operator], max_iters: int = 100) -> List[Set[int]]:
+    reached_states = set()
+    reachable = []
+    frontier = [init_state.copy()]
+
+    for _ in range(max_iters):
+        new_frontier = []
+
+        for state in frontier:
+            state_key = frozenset(state)
+            if state_key in reached_states:
+                continue
+
+            reached_states.add(state_key)
+            reachable.append(frozenset(state))
+
+            for op in operators:
+                if op.is_applicable(state):
+                    next_state = op.apply(state)
+                    next_key = frozenset(next_state)
+                    if next_key not in reached_states:
+                        new_frontier.append(next_state)
+
+        if not new_frontier:
+            break
+        frontier = new_frontier
+
+    return set(reachable)
+
+
+# --- Generate Demo Data ---
+def generate_planned_demo_trajectories(operators: List[Operator], num_trajs: int, max_depth: int) -> List[Tuple[List[Tuple[Set[int], int, Set[int]]], Set[int]]]:
+    demos = []
+    attempts = 0
+
+    while len(demos) < num_trajs and attempts < 10000000:
+        attempts += 1
+        init_state = set(random.sample(range(NUM_PREDICATES), random.randint(2, NUM_PREDICATES)))
+        reachable = compute_reachable_states(init_state, operators) - init_state
+
+        if len(reachable) == 0:
+            continue
+
+        plan_traj = []
+        goals = reachable
+        while len(goals) > 0 and len(plan_traj) < TRAJ_LEN:
+            goal_state = random.choice(list(goals))
+            goal = goal_state - init_state
+
+            if not goal:
+                goals.remove(goal_state)
+                continue
+
+            plan_traj = plan(init_state, goal, operators, max_depth)
+            if plan_traj is None or len(plan_traj) < TRAJ_LEN:
+                goals.remove(goal_state)
+                plan_traj = []
+
+        if plan_traj and len(plan_traj) >= TRAJ_LEN:
+            demos.append((plan_traj, goal))
+
+    return demos
+
+
+# --- Backwards-Forwards Operator Learning ---
+def backward_infer_minimal_effects(demo_data, current_operators=None):
+    candidate_ops = defaultdict(lambda: {'demos': []})
+    op_index = {}
+    if current_operators:
+        op_index = {(op.action, frozenset(op.add)): op for op in current_operators}
+
+    for traj, goal in sorted(demo_data, key=lambda x: len(x[0])): #sorted(demo_data, key=lambda x: len(x[0])*(1+len(x[1]))): # order by smallest demo
+        current_goal = goal.copy()
+
+        for (s, action, s_prime) in reversed(traj):
+            effect = s_prime - s
+            if len(effect) == 0:
+                raise Exception("No effect")
+            elif len(effect) == 1:
+                necessary_effect = effect
+            else:
+                necessary_effect = effect & current_goal
+
+            key = (action, frozenset(necessary_effect))
+            candidate_ops[key]['demos'].append((s, action, s_prime))
+
+            preconditions = set()
+            if key in op_index:
+                preconditions = op_index[key].pre
+            current_goal = (current_goal - necessary_effect) | preconditions
+
+    return candidate_ops
+
+
+def refine_by_plan_divergence(demos, learned_operators):
+    op_index = {(op.action, frozenset(op.add)): op for op in learned_operators}
+    support_sets = {key: [] for key in op_index}
+
+    for traj, goal in demos:
+        current_goal = goal.copy()
+        for s, a, s_prime in traj:
+            effect = s_prime - s
+            necessary_effect = effect if len(effect) == 1 else effect & current_goal
+            key = (a, frozenset(necessary_effect))
+            if key in support_sets:
+                support_sets[key].append(s)
+
+            preconditions = set()
+            if key in op_index:
+                preconditions = op_index[key].pre
+            current_goal = (current_goal - necessary_effect) | preconditions
+
+    for traj, goal in demos:
+        state = traj[0][0]
+        for (s_true, a_true, s_next_true) in traj:
+            applicable = [op for op in learned_operators if op.is_applicable(state)]
+            if not applicable:
+                break
+            op_planner = random.choice(applicable)
+
+            key_true = (a_true, frozenset(s_next_true - s_true))
+            op_true = op_index.get(key_true, None)
+            if op_true is None:
+                continue
+
+            if op_planner is op_true:
+                state = op_true.apply(state)
+                continue
+
+            key_planner = (op_planner.action, frozenset(op_planner.add))
+            support = support_sets.get(key_planner, [])
+            if not support:
+                continue
+
+            common_preds = set.intersection(*support)
+            potential_preds_to_add = common_preds - state
+            if not potential_preds_to_add:
+                continue
+            # if len(potential_preds_to_add - goal) > 0:
+            #     preds_to_add = {random.choice(list(potential_preds_to_add - goal))}
+            # else:
+            #     preds_to_add = {random.choice(list(potential_preds_to_add))}
+            preds_to_add = {random.choice(list(potential_preds_to_add))}
+            op_planner.pre.update(preds_to_add)
+
+            state = op_true.apply(state)
+
+    return list(op_index.values())
+
+
+def learn_operators_from_demos(demo_data, max_iters=100, verbose=True):
+    learned_ops = []
+    last_preconds = None
+
+    for iteration in range(max_iters):
+        if verbose:
+            print(f"\n--- Iteration {iteration + 1} (Backward + Forward) ---")
+
+        candidate_ops = backward_infer_minimal_effects(demo_data, current_operators=learned_ops or None)
+
+        if learned_ops == []:
+            learned_ops = [
+                Operator(pre=set(), add=set(effect_frozen), delete=set(), action=action)
+                for (action, effect_frozen), entry in candidate_ops.items()
+            ]
+        else:
+            op_index = {(op.action, frozenset(op.add)): op for op in learned_ops}
+            learned_ops = []
+            for (action, effect_frozen), entry in candidate_ops.items():
+                if (action, effect_frozen) not in op_index:
+                    learned_ops.append(Operator(pre=set(), add=set(effect_frozen), delete=set(), action=action))
+                else:
+                    learned_ops.append(Operator(
+                        pre=op_index[(action, effect_frozen)].pre,
+                        add=set(effect_frozen),
+                        delete=set(),
+                        action=action
+                    ))
+
+        if verbose:
+            print("Backward Learned Operators:")
+            for op in sorted(learned_ops, key=lambda x: x.action):
+                print(op)
+
+        learned_ops = refine_by_plan_divergence(demo_data, learned_ops)
+
+        if verbose:
+            print("Forward Learned Operators:")
+            for op in sorted(learned_ops, key=lambda x: x.action):
+                print(op)
+
+    return learned_ops
+
+
+# --- Run Learning ---
+def run_operator_learning_trials(num_trials=10, verbose=True) -> int:
+    invalid_count = 0
+    valid_count = 0
+
+    for _ in range(num_trials):
+        pred_pool = [i for i in range(NUM_PREDICATES)]
+        operators = [generate_random_operator(pred_pool, i) for i in range(NUM_OPERATORS)]
+        demo_data = generate_planned_demo_trajectories(operators, NUM_TRAJS, max_depth=TRAJ_MAX)
+
+        if verbose:
+            print("\n--- Ground Truth Operators ---")
+            for op in sorted(operators, key=lambda x: x.action):
+                print(op)
+            print()
+
+            print("Demos:")
+            for traj in demo_data:
+                print("Goal:", traj[1], "Length:", len(traj[0]))
+
+        refined_ops = learn_operators_from_demos(demo_data, max_iters=1000, verbose=verbose)
+
+        if verbose:
+            print("\n--- Final Learned Operators ---")
+            for op in sorted(refined_ops, key=lambda x: x.action):
+                print(op)
+            print()
+
+        # Add delete effects
+        op_index = {(op.action, frozenset(op.add)): op for op in refined_ops}
+        for (action, effect_frozen), entry in backward_infer_minimal_effects(demo_data, current_operators=refined_ops).items():
+            delete = op_index[(action, effect_frozen)].pre & set.intersection(
+                *[set(entry['demos'][i][0] - entry['demos'][i][2]) for i in range(len(entry['demos']))]
+            )
+            op_index[(action, effect_frozen)].delete = delete
+
+        for op in op_index.values():
+            for actual_op in operators:
+                if op.action == actual_op.action:
+                    if op.pre <= actual_op.pre and op.add <= actual_op.add and op.delete <= actual_op.delete:
+                        valid_count += 1
+                        if verbose:
+                            print("VALID\n\tLEARNED |", op, "\n\tORIGINAL|", actual_op)
+                    else:
+                        invalid_count += 1
+                        if verbose:
+                            print("INVALID\n\tLEARNED |", op, "\n\tORIGINAL|", actual_op)
+
+    return invalid_count, valid_count
+
+def deduplicate_predicates_by_equivalence(demos, operators, num_preds):
+    from collections import defaultdict
+
+    # Step 1: Build truth vectors for each predicate
+    pred_vectors = defaultdict(list)
+
+    for traj, goal in demos:
+        for s, _, s_prime in traj:
+            for i in range(num_preds):
+                pred_vectors[i].append(int(i in s))
+                pred_vectors[i].append(int(i in s_prime))
+        for i in range(num_preds):
+            pred_vectors[i].append(int(i in goal))
+
+    # Step 2: Group predicates with identical truth vectors
+    vector_to_preds = defaultdict(list)
+    for pred, vec in pred_vectors.items():
+        vector_to_preds[tuple(vec)].append(pred)
+
+    # Step 3: Build a mapping from redundant predicate -> representative
+    replace_map = {}
+    for group in vector_to_preds.values():
+        representative = min(group)  # pick smallest index as canonical
+        for pred in group:
+            replace_map[pred] = representative
+
+    # Step 4: Replace predicates in demos
+    new_demos = []
+    for traj, goal in demos:
+        new_traj = []
+        for s, a, s_prime in traj:
+            s_new = {replace_map[p] for p in s}
+            s_prime_new = {replace_map[p] for p in s_prime}
+            new_traj.append((s_new, a, s_prime_new))
+        new_goal = {replace_map[p] for p in goal}
+        new_demos.append((new_traj, new_goal))
+
+    # Step 5: Replace predicates in operators
+    new_operators = []
+    for op in operators:
+        pre = {replace_map[p] for p in op.pre}
+        add = {replace_map[p] for p in op.add}
+        delete = {replace_map[p] for p in op.delete}
+        new_operators.append(Operator(pre, add, delete, op.action))
+
+    return new_demos, new_operators, replace_map
+
+
+# invalids, valids = run_operator_learning_trials(num_trials=50, verbose=False)
+# print(f"Number of invalid learned operators: {invalids} / {invalids+valids}")
+
+
+PREDICATES = {
+    "at_A": 0,
+    "at_B": 1,
+    "handempty": 2,
+    "holding_block1": 3,
+    "holding_block2": 4,
+    "clear_block1": 5,
+    "clear_block2": 6,
+    "inside_block1": 7,
+    "inside_block2": 8,
+}
+
+OPERATORS = [
+    # move from B to A
+    Operator(pre={PREDICATES["at_B"]}, add={PREDICATES["at_A"]}, delete={PREDICATES["at_B"]}, action=0),
+    # move from A to B
+    Operator(pre={PREDICATES["at_A"]}, add={PREDICATES["at_B"]}, delete={PREDICATES["at_A"]}, action=1),
+
+    # pick block1
+    Operator(pre={PREDICATES["at_A"], PREDICATES["clear_block1"], PREDICATES["handempty"]},
+             add={PREDICATES["holding_block1"]},
+             delete={PREDICATES["clear_block1"], PREDICATES["handempty"]},
+             action=2),
+
+    # pick block2
+    Operator(pre={PREDICATES["at_A"], PREDICATES["clear_block2"], PREDICATES["handempty"]},
+             add={PREDICATES["holding_block2"]},
+             delete={PREDICATES["clear_block2"], PREDICATES["handempty"]},
+             action=3),
+
+    # place block1 in box (at B)
+    Operator(pre={PREDICATES["at_B"], PREDICATES["holding_block1"]},
+             add={PREDICATES["inside_block1"], PREDICATES["handempty"]},
+             delete={PREDICATES["holding_block1"]},
+             action=4),
+
+    # place block2 in box (at B)
+    Operator(pre={PREDICATES["at_B"], PREDICATES["holding_block2"]},
+             add={PREDICATES["inside_block2"], PREDICATES["handempty"]},
+             delete={PREDICATES["holding_block2"]},
+             action=5),
+]
+
+
+init_state = {
+    PREDICATES["at_B"], PREDICATES["handempty"],
+    PREDICATES["clear_block1"], PREDICATES["clear_block2"]
+}
+
+actions = [0, 2, 1, 4, 0, 3, 1, 5]  # move→pick→move→place (block1), move→pick→move→place (block2)
+
+state = init_state.copy()
+traj1 = []
+
+for action_id in actions:
+    op = OPERATORS[action_id]
+    next_state = op.apply(state)
+    traj1.append((state.copy(), action_id, next_state.copy()))
+    state = next_state.copy()
+
+goal1 = {PREDICATES["inside_block1"], PREDICATES["inside_block2"]}
+
+actions = [0, 3, 1, 5, 0, 2, 1, 4]  # move→pick→move→place (block1), move→pick→move→place (block2)
+
+state = init_state.copy()
+traj2 = []
+
+for action_id in actions:
+    op = OPERATORS[action_id]
+    next_state = op.apply(state)
+    traj2.append((state.copy(), action_id, next_state.copy()))
+    state = next_state.copy()
+
+goal2 = {PREDICATES["inside_block1"], PREDICATES["inside_block2"]}
+demo_data = [(traj1, goal1), (traj2, goal2)]
+
+demo_data, operators, pred_replace_map = deduplicate_predicates_by_equivalence(demo_data, OPERATORS, NUM_PREDICATES)
+print("Predicate replacement map:", pred_replace_map)
+
+print("\n--- Ground Truth Operators ---")
+for op in sorted(operators, key=lambda x: x.action):
+    print(op)
+print()
+
+print("Demos:")
+# for traj in demo_data:
+#     print("Goal:", traj[1], "Length:", len(traj[0]))
+print(len(demo_data))
+
+learned = learn_operators_from_demos(demo_data, max_iters=20, verbose=True)
+op_index = {(op.action, frozenset(op.add)): op for op in learned}
+for (action, effect_frozen), entry in backward_infer_minimal_effects(demo_data, current_operators=learned).items():
+    delete = op_index[(action, effect_frozen)].pre & set.intersection(
+        *[set(entry['demos'][i][0] - entry['demos'][i][2]) for i in range(len(entry['demos']))]
+    )
+    op_index[(action, effect_frozen)].delete = delete
+
+
+print("\n--- Final Learned Operators ---")
+for op in sorted(learned, key=lambda x: x.action):
+    print(op)
+print()
+
+print("\n--- Ground Truth Operators ---")
+for op in sorted(operators, key=lambda x: x.action):
+    print(op)
+print()
+
+# PREDICATES.update({
+#     "reachable_block1": 9,
+#     "reachable_block2": 10,
+# })
+
+# demo_data = [([step if i not in (0, 4) else (step[0], step[1], step[2] | {9, 10}) for i, step in enumerate(traj)], goal) for traj, goal in demo_data]
+# demo_data = [([step if i not in (1, 5) else (step[0] | {9, 10}, step[1], step[2] | {9, 10}) for i, step in enumerate(traj)], goal) for traj, goal in demo_data]
+# demo_data = [([step if i not in (2, 6) else (step[0] | {9, 10}, step[1], step[2]) for i, step in enumerate(traj)], goal) for traj, goal in demo_data]
+
+# demo_data, operators, pred_replace_map = deduplicate_predicates_by_equivalence(demo_data, OPERATORS, NUM_PREDICATES + 2)
+# print("Predicate replacement map:", pred_replace_map)
+
+# print("Demos:")
+# # for traj in demo_data:
+# #     print("Goal:", traj[1], "Length:", len(traj[0]))
+# print(len(demo_data))
+
+# learned = learn_operators_from_demos(demo_data, max_iters=20, verbose=True)
+
+# print("\n--- Final Learned Operators ---")
+# for op in sorted(learned, key=lambda x: x.action):
+#     print(op)
+# print()
+
+# print("\n--- Ground Truth Operators ---")
+# for op in sorted(operators, key=lambda x: x.action):
+#     print(op)
+# print()
+
+import ipdb; ipdb.set_trace()
\ No newline at end of file
diff --git a/test_colla_env.py b/test_colla_env.py
new file mode 100644
index 0000000000..326866f4e6
--- /dev/null
+++ b/test_colla_env.py
@@ -0,0 +1,354 @@
+import gym
+import numpy as np
+from PIL import Image
+from minigrid.wrappers import *
+from mini_behavior.window import Window
+from mini_behavior.utils.save import get_step, save_demo
+from mini_behavior.grid import GridDimension
+from mini_behavior.states import *
+from collections import deque
+import random
+
+TILE_PIXELS = 32
+
+class MiniBehaviorEnv:
+    def __init__(self, env_id='MiniGrid-InstallingAPrinter-8x8-N2-v0', seed=-1, tile_size=32,
+                 agent_view=False, save_demo_flag=False, load_path=None):
+
+        self.env_id = env_id
+        self.seed = seed
+        self.tile_size = tile_size
+        self.agent_view = agent_view
+        self.save_demo_flag = save_demo_flag
+        self.load_path = load_path
+        self.show_furniture = False
+        self.all_steps = {}
+
+        self.env = gym.make(env_id)
+        self.env.teleop_mode()
+        self.key_to_action = {
+            '0': self.env.actions.pickup_0,
+            '1': self.env.actions.pickup_1,
+            '2': self.env.actions.pickup_2,
+            '3': self.env.actions.drop_0,
+            '4': self.env.actions.drop_1,
+            '5': self.env.actions.drop_2,
+            't': self.env.actions.toggle,
+            'o': self.env.actions.open,
+            'c': self.env.actions.close,
+            'k': self.env.actions.cook,
+            '6': self.env.actions.slice,
+            'i': self.env.actions.drop_in,
+        }
+        for obj_type, obj_list in self.env.objs.items():
+            for obj in obj_list:
+                self.key_to_action["moveto-" + obj.name] = "moveto-" + obj.name
+        
+
+        if self.agent_view:
+            self.env = RGBImgPartialObsWrapper(self.env)
+            self.env = ImgObsWrapper(self.env)
+
+        self.window = Window('mini_behavior - ' + env_id)
+        self.window.no_closeup()
+
+        if self.load_path is not None:
+            self._load_state()
+
+        self.nav_sampler_cache = {}
+        self.short_task = True
+
+    def redraw(self, img):
+        if not self.agent_view:
+            img = self.env.render()
+        self.window.set_inventory(self.env)
+        self.window.show_img(img)
+        self.window.save_img("output_image.jpeg")
+
+    def render_furniture(self):
+        self.show_furniture = not self.show_furniture
+        if self.show_furniture:
+            img = np.copy(self.env.furniture_view)
+            i, j = self.env.agent_pos
+            ymin = j * TILE_PIXELS
+            ymax = (j + 1) * TILE_PIXELS
+            xmin = i * TILE_PIXELS
+            xmax = (i + 1) * TILE_PIXELS
+            img[ymin:ymax, xmin:xmax, :] = GridDimension.render_agent(
+                img[ymin:ymax, xmin:xmax, :], self.env.agent_dir)
+            img = self.env.render_furniture_states(img)
+            self.window.show_img(img)
+        else:
+            obs = self.env.gen_obs()
+            self.redraw(obs)
+
+    def show_states(self):
+        imgs = self.env.render_states()
+        self.window.show_closeup(imgs)
+
+    def switch_dim(self, dim):
+        self.env.switch_dim(dim)
+        print(f'switching to dim: {self.env.render_dim}')
+        obs = self.env.gen_obs()
+        self.redraw(obs)
+
+    def _load_state(self):
+        if self.seed != -1:
+            self.env.seed(self.seed)
+        self.env.reset()
+        obs = self.env.load_state(self.load_path)
+        if hasattr(self.env, 'mission'):
+            print('Mission: %s' % self.env.mission)
+            self.window.set_caption(self.env.mission)
+        self.redraw(obs)
+
+    def reset(self):
+        if self.seed != -1:
+            self.env.seed(self.seed)
+        obs = self.env.reset()
+        if hasattr(self.env, 'mission'):
+            print('Mission: %s' % self.env.mission)
+            self.window.set_caption(self.env.mission)
+        self.redraw(obs)
+        return obs
+
+    def get_lifted_state(self):
+        objs = self.env.objs
+        obj_instances = {}
+        for obj_type, obj_list in objs.items():
+            for obj in obj_list:
+                obj_instances[obj.name] = obj
+
+        ground_atoms = []
+        for k, o in obj_instances.items():
+            for pred_name, pred in o.states.items():
+                if isinstance(pred, (AbsoluteObjectState, AbilityState, ObjectProperty)):
+                    if pred.get_value(self.env):
+                        ground_atoms.append(f"{pred_name}({k})")
+                elif isinstance(pred, RelativeObjectState):
+                    for k2, o2 in obj_instances.items():
+                        if o.check_rel_state(self.env, o2, pred_name):
+                            ground_atoms.append(f"{pred_name}({k},{k2})")
+        return ground_atoms
+
+    def step(self, action):
+        prev_obs = self.env.gen_obs()
+        prev_state = self.get_lifted_state()
+        if isinstance(action, str) and action.startswith("moveto-"):
+            self.move_in_front_of(action.replace("moveto-","")) 
+            obs = self.env.gen_obs()
+            reward = 0.0
+            done = False
+            terminated = False
+            info = {}
+        else:
+            obs, reward, done, terminated, info = self.env.step(action)
+            if self.short_task:
+                if self.env_id == 'MiniGrid-SortingBooks-16x16-N2-v0':
+                    book = self.env.objs['book']
+                    hardback = self.env.objs['hardback']
+                    shelf = self.env.objs['shelf'][0]
+                    for obj in book + hardback:
+                        if obj.check_rel_state(self.env, shelf, 'onTop'):
+                            reward = 1.0
+                            done = 1.0
+                elif self.env_id == 'MiniGrid-WateringHouseplants-16x16-N2-v0':
+                    pot_plants = self.env.objs['pot_plant']
+                    for plant in pot_plants:
+                        if plant.check_abs_state(self.env, 'soakable'):
+                            reward = 1.0
+                            done = 1.0
+                elif self.env_id == 'MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0':
+                    for hamburger in self.env.objs['hamburger']:
+                        is_inside = [hamburger.check_rel_state(self.env, ashcan, 'inside') for ashcan in self.env.objs['ashcan']]
+                        if True in is_inside:
+                            reward = 1.0
+                            done = 1.0
+                elif self.env_id == 'MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0':
+                    book = self.env.objs['book']
+                    box = self.env.objs['box'][0]
+                    for obj in book:
+                        if obj.check_rel_state(self.env, box, 'inside'):
+                            reward = 1.0
+                            done = 1.0
+        state = self.get_lifted_state()
+
+        print(f'env_id={self.env_id}, step={self.env.step_count}, reward={reward:.2f}')
+        # for atom in state:
+        #     print(atom)
+
+        if self.save_demo_flag:
+            self.all_steps[self.env.step_count] = (prev_obs, prev_state, action, obs, state)
+
+        if done:
+            print('done!')
+            if self.save_demo_flag:
+                save_demo(self.all_steps, self.env_id, self.env.episode)
+            self.reset()
+        else:
+            self.redraw(obs)
+        return obs, reward, done, terminated, info
+
+    def show(self):
+        self.window.show(block=False)
+
+    def key_handler_primitive(self, event):
+        print('pressed', event.key)
+        action_map = {
+            'left': self.env.actions.left,
+            'right': self.env.actions.right,
+            'up': self.env.actions.forward,
+            '0': self.env.actions.pickup_0,
+            '1': self.env.actions.pickup_1,
+            '2': self.env.actions.pickup_2,
+            '3': self.env.actions.drop_0,
+            '4': self.env.actions.drop_1,
+            '5': self.env.actions.drop_2,
+            't': self.env.actions.toggle,
+            'o': self.env.actions.open,
+            'c': self.env.actions.close,
+            'k': self.env.actions.cook,
+            '6': self.env.actions.slice,
+            'i': self.env.actions.drop_in
+        }
+
+        if event.key == 'escape':
+            self.window.close()
+        elif event.key in action_map:
+            self.step(action_map[event.key])
+        elif event.key == 'pagedown':
+            self.show_states()
+
+    def bfs_path(self, start, goal):
+        grid = self.env.grid
+        width, height = grid.width, grid.height
+        visited = set()
+        queue = deque([(start, [])])
+        
+        while queue:
+            current_pos, path = queue.popleft()
+            if current_pos == goal:
+                return path
+
+            for dx, dy in [(-1,0), (1,0), (0,-1), (0,1)]:
+                nx, ny = current_pos[0] + dx, current_pos[1] + dy
+                next_pos = (nx, ny)
+
+                if not (0 <= nx < width and 0 <= ny < height):
+                    continue
+                if next_pos in visited:
+                    continue
+                if grid.get(nx, ny) != [[None, None], [None, None], [None, None]]:
+                    if grid.get(nx, ny)[0][0] is None or grid.get(nx, ny)[0][0].name != "door":
+                        continue  # Obstacle
+
+                visited.add(next_pos)
+                queue.append((next_pos, path + [next_pos]))
+        
+        return None  # No path found
+
+    def move_in_front_of(self, target_name):
+        # Find the target object
+        target_obj = None
+        for obj_list in self.env.objs.values():
+            for obj in obj_list:
+                if obj.name == target_name:
+                    target_obj = obj
+                    break
+            if target_obj:
+                break
+
+        if target_obj is None:
+            print(f"[Error] Object '{target_name}' not found.")
+            return
+
+        reachable = []
+        start_pos = tuple(self.env.agent_pos)
+        target_pos = target_obj.cur_pos
+        adjacents = [
+            (target_pos[0] + 1, target_pos[1]),
+            (target_pos[0] - 1, target_pos[1]),
+            (target_pos[0], target_pos[1] + 1),
+            (target_pos[0], target_pos[1] - 1)
+        ]
+        pos_to_target = {}
+        for adj in adjacents:
+            pos_to_target[adj] = target_pos
+        if hasattr(target_obj, 'all_pos'):
+            adjacents = []
+            for target_pos in target_obj.all_pos:
+                if 'cabinet' not in target_name:
+                    if target_name in self.nav_sampler_cache:
+                        if target_pos in self.nav_sampler_cache[target_name]:
+                            continue
+                new_adjacents = [
+                    (target_pos[0] + 1, target_pos[1]),
+                    (target_pos[0] - 1, target_pos[1]),
+                    (target_pos[0], target_pos[1] + 1),
+                    (target_pos[0], target_pos[1] - 1)
+                ]
+                for adj in new_adjacents:
+                    pos_to_target[adj] = target_pos
+                adjacents += new_adjacents
+        # Choose a reachable adjacent position
+        random.shuffle(adjacents)
+        for pos in adjacents:
+            if (0 <= pos[0] < self.env.grid.width and 0 <= pos[1] < self.env.grid.height):
+                if self.env.grid.get(*pos) == [[None, None], [None, None], [None, None]]:
+                    path = self.bfs_path(start_pos, pos)
+                    if path:
+                        reachable.append((pos, path))
+                elif self.env.grid.get(*pos)[0][0] is not None:
+                    if self.env.grid.get(*pos)[0][0].name == "door":
+                        path = self.bfs_path(start_pos, pos)
+                        if path:
+                            reachable.append((pos, path))
+                else:
+                    pass
+
+        if not reachable:
+            print(f"[Error] No accessible position next to '{target_name}'")
+            return
+
+        # Choose shortest reachable
+        goal_pos, path = min(reachable, key=lambda x: len(x[1]))
+
+        # Follow path
+        for next_pos in path:
+            dx = next_pos[0] - self.env.agent_pos[0]
+            dy = next_pos[1] - self.env.agent_pos[1]
+
+            desired_dir = {
+                (1, 0): 0,
+                (0, 1): 1,
+                (-1, 0): 2,
+                (0, -1): 3
+            }.get((dx, dy))
+
+            if desired_dir is None:
+                continue
+
+            while self.env.agent_dir != desired_dir:
+                self.step(self.env.actions.right)
+            self.step(self.env.actions.forward)
+
+        # Face the object
+        target_pos = pos_to_target[tuple(self.env.agent_pos)]
+        face_dir = (target_pos[0] - self.env.agent_pos[0], target_pos[1] - self.env.agent_pos[1])
+        target_dir = {
+            (1, 0): 0,
+            (0, 1): 1,
+            (-1, 0): 2,
+            (0, -1): 3
+        }.get(face_dir)
+
+        if target_dir is not None:
+            while self.env.agent_dir != target_dir:
+                self.step(self.env.actions.right)
+
+        print(f"[Success] Reached position in front of '{target_name}', facing it.")
+        if target_name in self.nav_sampler_cache:
+            self.nav_sampler_cache[target_name].append(target_pos)
+        else:
+            self.nav_sampler_cache[target_name] = [target_pos]
+
diff --git a/test_colla_final.py b/test_colla_final.py
new file mode 100644
index 0000000000..6859df2105
--- /dev/null
+++ b/test_colla_final.py
@@ -0,0 +1,798 @@
+# (1) implement the evaluation function evaluate(agent) returns dictionary of results
+# (2) implment evaluation visualization visualize(results)
+# (3) do whatever it takes to make results better (CI, BC, FF+BC, FF+BC+LLMs)
+
+from test_colla_env import MiniBehaviorEnv
+from test_colla_helpers import Box, LowLevelTrajectory, State, Task, \
+    demo_files, get_demo_traj, learn_nsrts_from_data, parse_nsrt_block
+import matplotlib.pyplot as plt
+import seaborn as sns
+import pandas as pd
+import random
+from predicators.planning import task_plan, task_plan_grounding, _SkeletonSearchTimeout, PlanningFailure
+from predicators import utils
+from predicators.structs import Action, LowLevelTrajectory, Predicate, State, \
+    Type, GroundAtom, Task, STRIPSOperator
+import numpy as np
+from collections import Counter
+
+from predicators.nsrt_learning.strips_learning.gen_to_spec_learner import parse_objs_preds_and_options
+
+import pickle as pkl
+import numpy as np
+from predicators.structs import Action, LowLevelTrajectory, Predicate, State, \
+    Type
+from test_operator_learning_all import get_demo_traj, demo_files
+from predicators.nsrt_learning.segmentation import segment_trajectory
+
+opname_to_key = {
+    'Actions.pickup_0': '0',
+    'Actions.pickup_1': '1',
+    'Actions.pickup_2': '2',
+    'Actions.drop_0': '3',
+    'Actions.drop_1': '4',
+    'Actions.drop_2': '5',
+    'Actions.drop_in': 'i',
+    'Actions.toggle': 't',
+    'Actions.close': 'c',
+    'Actions.open': 'o',
+    'Actions.cook': 'k',
+    'Actions.slice': '6'
+}
+
+class RandomAgent():
+    def __init__(self, name):
+        self.name = name
+        self.actions = None
+
+    def reset(self, task_name, learn=False):
+        pass
+
+    def policy(self, obs, env):
+        #print(env.get_lifted_state())
+        return env.key_to_action[random.choice(list(env.key_to_action.keys()))]
+
+def evaluation(agents, tasks, num_iterations=10, start_seed=100, short_task=True, learn=True):
+    results = {}
+    task_i = 0
+    for i in range(num_iterations):
+        for task in tasks:
+            for agent in agents:
+                try:
+                    env = MiniBehaviorEnv(env_id=task, seed=i+start_seed)
+                    env.short_task = short_task
+                    observation, _ = env.reset()
+                    agent.short_task = short_task
+                    agent.reset(env.env_id, learn=learn)
+                    steps = 0
+                    for _ in range(50):
+                        action = agent.policy(observation, env)
+                        observation, reward, terminated, truncated, _ = env.step(action)
+                        steps += 1
+                        env.show()
+
+                        if reward != 0:
+                            break
+
+                        if terminated or truncated:
+                            break
+                    found_plan = 0
+                    plan_diff = -1
+                    if agent.actions is not None and agent.actions != []:
+                        found_plan = 1
+            
+                        key_to_opname = {v:k  for k,v in opname_to_key.items()}
+                        plan = [key_to_opname[action] if not action.startswith("moveto") else "Move" for action in agent.actions]
+                        dataset_plan = agent.dataset[0].actions
+
+                        def differing_reoccurring_counts(list1, list2):
+                            count1 = Counter(list1)
+                            count2 = Counter(list2)
+                            all_keys = set(count1.keys()) | set(count2.keys())
+                            result = {}
+                            total_diff = 0
+                            for key in all_keys:
+                                c1 = count1.get(key, 0)
+                                c2 = count2.get(key, 0)
+                                if (c1 > 1 or c2 > 1) and c1 != c2:
+                                    diff = abs(c1 - c2)
+                                    result[key] = diff
+                                    total_diff += diff
+                            result['total'] = total_diff
+                            return result
+                        plan_diff = differing_reoccurring_counts(plan, dataset_plan)['total']
+                    results[str(task_i) + "_" + task + "_" + agent.name] = (steps, reward, i, found_plan, plan_diff)
+                except _SkeletonSearchTimeout:
+                    print("did not find skeleton - timeout")
+                    results[str(task_i) + "_" + task + "_" + agent.name] = (-1,0,0,0,-1)
+                except PlanningFailure:
+                    print("did not find skeleton - plan failure")
+                    results[str(task_i) + "_" + task + "_" + agent.name] = (-1,0,0,0,-1)
+            task_i += 1
+    return results
+
+def structure_results(results_dict):
+    data = []
+    for key, (steps, reward, iteration, found_plan, plan_diff) in results_dict.items():
+        task_idx, task_name, agent_name = key.split("_", 2)
+        data.append({
+            "task_name": task_name,
+            "task_idx": int(task_idx),
+            "iteration": int(iteration),
+            "found_plan": int(found_plan),
+            "plan_diff": int(plan_diff),
+            "steps": steps,
+            "reward": reward,
+            "success": 1 if reward > 0 else 0,
+            "agent": agent_name
+        })
+    df = pd.DataFrame(data)
+    df["task_order"] = df["task_idx"]
+    return df.sort_values(["agent", "iteration", "task_order"])
+
+def plot_lifelong_success(df):
+    plt.figure(figsize=(14, 5))
+    
+    sns.lineplot(
+        data=df,
+        x="task_order",
+        y="success",
+        hue="agent",
+        marker="o"
+    )
+
+    # Set up x-ticks with task names, spaced across iterations
+    xticks = df["task_order"]
+    xticklabels = df["task_name"]
+    plt.xticks(ticks=xticks, labels=xticklabels, rotation=45, ha='right')
+
+    plt.ylim(-0.1, 1.1)
+    plt.yticks([0, 1], ["Fail", "Success"])
+    plt.ylabel("Success")
+    plt.xlabel("Tasks over Lifelong Iterations")
+    plt.title("Lifelong Learning Success per Task")
+    plt.tight_layout()
+    plt.grid(True, linestyle='--', alpha=0.3)
+    plt.savefig("lifelong_learning_success.png", dpi=200)
+
+class OperatorLearningAgent():
+    def __init__(self, name, strips_learner, single_grounding=False):
+        self.name = name
+        self.num_demos = 1
+
+        # Initialized once; populated in get_data()
+        self.dataset = []
+        self.ground_atom_dataset = []
+        self.tasks = []
+        self.action_space = Box(0, 7, (1,))
+        self.objs = set()
+        self.preds = set()
+        self.options = set()
+        self.ground_atoms_traj = []
+        self.goal = None
+
+        # Runtime variables
+        self.nsrts = None
+        self.actions = None
+        self.i = 0
+        self.seed_i = 0
+        self.short_task = True
+
+        # Learning Params
+        self.strips_learner = strips_learner
+        self.single_grounding = single_grounding
+        utils.reset_config({
+            "strips_learner": self.strips_learner,
+            "segmenter": "every_step",
+            "disable_harmlessness_check": True,
+            "pnad_search_load_initial": True,
+            "backward_forward_load_initial": True,
+            "min_data_for_nsrt": 0,
+            "min_perc_data_for_nsrt": 0,
+            "pnad_search_timeout":1000.0,
+            "single_grounding": self.single_grounding,
+            "option_learner": "no_learning"
+        })
+
+    def reset(self, task_name, learn=True):
+        if False:
+            self.dataset = []
+            self.ground_atom_dataset = []
+            self.tasks = []
+            self.objs = set()
+            self.preds = set()
+            self.options = set()
+            self.ground_atoms_traj = []
+        self.action_space = Box(0, 7, (1,))
+        self.seed_i = 0
+
+        # Learning Params
+        utils.reset_config({
+            "strips_learner": self.strips_learner,
+            "segmenter": "every_step",
+            "disable_harmlessness_check": True,
+            "pnad_search_load_initial": True,
+            "backward_forward_load_initial": True,
+            "min_data_for_nsrt": 0,
+            "min_perc_data_for_nsrt": 0,
+            "pnad_search_timeout":1000.0,
+            "single_grounding": self.single_grounding,
+            "option_learner": "no_learning"
+        })
+
+        if learn:
+            self.nsrts = self.learn_nsrts(task_name)
+        else:
+            dataset, tasks, preds, options, action_space, ground_atom_dataset = self.get_data(task_name)
+            with open("test_saved.NSRTs.txt", "r") as file:
+                content = file.read()
+            nsrt_strs = ["NSRT-" + nsrt_str for nsrt_str in content.split("NSRT-") if nsrt_str != '']
+            segmented_trajs = [segment_trajectory(traj, self.preds, atom_seq=atom_seq) for traj, atom_seq in self.ground_atom_dataset]
+            self.nsrts = [parse_nsrt_block(nsrt_str, segmented_trajs) for nsrt_str in nsrt_strs]
+        self.goal = self.parse_goal(task_name, self.ground_atoms_traj[1][-1])
+        self.actions = None
+        self.i = 0
+
+    def parse_goal(self, task_name, ground_atoms_state):
+        if task_name == "MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if  str(atom).startswith("inside(")])
+        
+        elif task_name == "MiniGrid-OpeningPackages-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("openable(")])
+        
+        elif task_name == "MiniGrid-CleaningACar-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("inside(")]) | set([atom for atom in ground_atoms_state if str(atom).startswith("~dustyable(")])
+        
+        elif task_name == "MiniGrid-CleaningShoes-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("~stainable(") and "shoe" in str(atom)]) | \
+                set([atom for atom in ground_atoms_state if str(atom).startswith("~dustyable(") and "shoe" in str(atom)]) | \
+                set([atom for atom in ground_atoms_state if str(atom).startswith("onfloor(") and "towel" in str(atom)])
+
+        elif task_name == "MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0":
+            return set([
+                atom for atom in ground_atoms_state
+                if (
+                    str(atom).startswith("onTop(") and "blender" in str(atom) and "countertop" in str(atom)
+                ) or (
+                    str(atom).startswith("nextto(") and "soap" in str(atom) and "sink" in str(atom)
+                ) or (
+                    str(atom).startswith("inside(") and "vegetable_oil" in str(atom) and "cabinet" in str(atom)
+                ) or (
+                    str(atom).startswith("inside(") and "plate" in str(atom) and "cabinet" in str(atom)
+                ) or (
+                    str(atom).startswith("inside(") and "casserole" in str(atom) and "electric_refrigerator" in str(atom)
+                ) or (
+                    str(atom).startswith("inside(") and "apple" in str(atom) and "electric_refrigerator" in str(atom)
+                ) or (
+                    str(atom).startswith("inside(") and "rag" in str(atom) and "sink" in str(atom)
+                ) or (
+                    str(atom).startswith("nextto(") and "rag" in str(atom) and "sink" in str(atom)
+                ) or (
+                    str(atom).startswith("~dustyable(") and "cabinet" in str(atom)
+                ) or (
+                    str(atom).startswith("~stainable(") and "plate" in str(atom)
+                )
+            ])
+
+        elif task_name == "MiniGrid-CollectMisplacedItems-16x16-N2-v0":
+            return set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("onTop(") and "table" in str(atom) and (
+                    "gym_shoe" in str(atom) or
+                    "necklace" in str(atom) or
+                    "notebook" in str(atom) or
+                    "sock" in str(atom)
+                ) and not str(atom).startswith("onTop(table") 
+            ])
+        
+        elif task_name == "MiniGrid-InstallingAPrinter-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(")]) | \
+                set([atom for atom in ground_atoms_state if str(atom).startswith("toggleable(")])
+        
+        elif task_name == "MiniGrid-LayingWoodFloors-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("nextto(")])
+        
+        elif task_name == "MiniGrid-MakingTea-16x16-N2-v0":
+            return set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("sliceable(") and "lemon" in str(atom)
+            ]) | set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("onTop(") and "teapot" in str(atom) and "stove" in str(atom)
+            ]) | set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("atsamelocation(") and "tea_bag" in str(atom) and "teapot" in str(atom)
+            ]) | set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("soakable(") and "teapot" in str(atom)
+            ]) | set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("toggleable(") and "stove" in str(atom)
+            ])
+
+        elif task_name == "MiniGrid-MovingBoxesToStorage-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(")])
+        
+        elif task_name == "MiniGrid-OrganizingFileCabinet-16x16-N2-v0":
+            return set([
+            atom for atom in ground_atoms_state
+            if str(atom).startswith("onTop(") and "marker" in str(atom) and "table" in str(atom)
+        ]) | set([
+            atom for atom in ground_atoms_state
+            if str(atom).startswith("inside(") and "document" in str(atom) and "cabinet" in str(atom)
+        ]) | set([
+            atom for atom in ground_atoms_state
+            if str(atom).startswith("inside(") and "folder" in str(atom) and "cabinet" in str(atom)
+        ])
+        
+        elif task_name == "MiniGrid-PreparingSalad-16x16-N2-v0":
+            import ipdb; ipdb.set_trace()
+            raise NotImplementedError("parse_goal not implemented for PreparingSalad")
+        
+        elif task_name == "MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0":
+            return set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("inside(") and "plate" in str(atom) and "cabinet" in str(atom)
+            ])
+
+        
+        elif task_name == "MiniGrid-SettingUpCandles-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(")])
+        
+        elif task_name == "MiniGrid-SortingBooks-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(") and "shelf" in str(atom) and ("book" in str(atom) or "hardback" in str(atom))])
+        
+        elif task_name == "MiniGrid-StoringFood-16x16-N2-v0":
+            return set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("inside(") and "cabinet" in str(atom) and (
+                    "oatmeal" in str(atom) or "chip" in str(atom) or "vegetable_oil" in str(atom) or "sugar" in str(atom)
+                )
+            ])
+
+        elif task_name == "MiniGrid-ThawingFrozenFood-16x16-N2-v0":
+            return set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("nextto(") and (
+                    ("date" in str(atom) and "fish" in str(atom)) or
+                    ("fish" in str(atom) and "sink" in str(atom)) or
+                    ("olive" in str(atom) and "sink" in str(atom))
+                )
+            ])
+        
+        elif task_name == "MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("inside(") and "hamburger" in str(atom) and "ashcan" in str(atom)])
+        
+        elif task_name == "MiniGrid-WashingPotsAndPans-16x16-N2-v0":
+            return set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("~stainable(") and (
+                    "pan" in str(atom) or "kettle" in str(atom) or "teapot" in str(atom)
+                )
+            ]) | set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("inside(") and "cabinet" in str(atom) and (
+                    "pan" in str(atom) or "kettle" in str(atom) or "teapot" in str(atom)
+                )
+            ])
+
+        elif task_name == "MiniGrid-WateringHouseplants-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("soakable(") and "pot_plant" in str(atom)])
+        else:
+            import ipdb; ipdb.set_trace()
+
+
+    def get_plan(self, state, seed):
+        objs, _, _, ground_atoms_traj, all_atoms = parse_objs_preds_and_options(self.dataset[-1], train_task_idx=len(self.dataset))
+        task = Task(State({}, None), self.goal)
+
+        _, _, _, ground_atoms_traj, _ = parse_objs_preds_and_options(LowLevelTrajectory([state], [], _is_demo=True, _train_task_idx=0), train_task_idx=0, all_atoms=all_atoms)
+        init_atoms = ground_atoms_traj[1][0]
+        plan = self.plan(init_atoms, objs, self.preds, self.nsrts, task, seed)
+        return plan
+
+    def policy(self, obs, env):
+        if self.actions is None:
+            seed = self.seed_i
+            self.seed_i += 1
+            num_remove_pre = 0
+            while self.actions is None or self.actions == []:
+                self.actions = self.get_plan(env.get_lifted_state(), seed)
+                self.i = 0
+            #     num_remove_pre += 1
+            #     new_nsrts = set()
+            #     for nsrt in self.nsrts:
+            #         pre = set()
+            #         tot_pre = len(nsrt.op.preconditions) - num_remove_pre
+            #         if tot_pre > 0:
+            #             pre = random.sample(nsrt.op.preconditions, tot_pre)
+            #         ignore_effects = nsrt.op.ignore_effects
+            #         del_effs = nsrt.op.delete_effects
+            #         # if num_remove_pre > 10:
+            #         #     ignore_effects = set()
+            #         #     del_effs = set()
+            #         new_nsrts.add(
+            #             nsrt.op.copy_with(preconditions=pre,
+            #                               ignore_effects=ignore_effects,
+            #                               delete_effects=del_effs).make_nsrt(
+            #                 nsrt.option,
+            #                 [],  # dummy sampler
+            #                 lambda s, g, rng, o: np.zeros(1, dtype=np.float32)))
+            #     self.nsrts = new_nsrts
+            # with open("test_saved.NSRTs.txt", "w") as file:
+            #     for nsrt in self.nsrts:
+            #         if nsrt.op.add_effects != set():
+            #             file.write(str(nsrt)+"\n")
+
+        self.i += 1
+        if self.i-1 < len(self.actions):
+            return env.key_to_action[self.actions[self.i-1]]
+        else:
+            self.actions = None
+            self.i = 0
+            return env.key_to_action["0"]
+    
+    def clean_action_plan(self, action_plan):
+        plan = []
+        for step in action_plan:
+            name = step[0]
+            objs = step[1]
+            if len(objs) > 0:
+                obj_name = objs[0].name
+                if name.startswith("Move"):
+                    plan.append(f"moveto-{obj_name}")
+                else:
+                    for opname, key in opname_to_key.items():
+                        if opname in name:
+                            plan.append(key)
+                            break
+        return plan
+    
+    def plan(self, init_atoms, objects, predicates, nsrts, task, seed):
+        ground_nsrts, reachable_atoms = task_plan_grounding(init_atoms, objects, nsrts, allow_noops=True)
+        heuristic = utils.create_task_planning_heuristic("hadd", init_atoms,
+                                                        task.goal, ground_nsrts,
+                                                        predicates, objects)
+        task_plan_generator = task_plan(init_atoms,
+                                        task.goal,
+                                        ground_nsrts,
+                                        reachable_atoms,
+                                        heuristic,
+                                        timeout=1,
+                                        seed=seed,
+                                        max_skeletons_optimized=3)
+        skeleton, _, _ = next(task_plan_generator)
+
+        action_plan = []
+        for step in skeleton:
+            action_plan.append((step.option.name, step.objects))
+        return self.clean_action_plan(action_plan)
+    
+    def get_data(self, task_name):
+        for demo_file in demo_files:
+            if task_name in demo_file:
+                demo_traj = get_demo_traj(demo_file=demo_file, verbose=False)
+
+                if self.short_task:
+                    if task_name == 'MiniGrid-SortingBooks-16x16-N2-v0':
+                        demo_traj = LowLevelTrajectory(demo_traj.states[:5], demo_traj.actions[:4], _is_demo=True, _train_task_idx=0)
+                    elif task_name == 'MiniGrid-WateringHouseplants-16x16-N2-v0':
+                        demo_traj = LowLevelTrajectory(demo_traj.states[:7], demo_traj.actions[:6], _is_demo=True, _train_task_idx=0)
+                    elif task_name == 'MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0':
+                        demo_traj = LowLevelTrajectory(demo_traj.states[:5], demo_traj.actions[:4], _is_demo=True, _train_task_idx=0)
+                    elif task_name == 'MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0':
+                        demo_traj = LowLevelTrajectory(demo_traj.states[:5], demo_traj.actions[:4], _is_demo=True, _train_task_idx=0) 
+
+                idx = len(self.dataset)
+                demo_traj = LowLevelTrajectory(demo_traj.states, demo_traj.actions, _is_demo=True, _train_task_idx=idx)
+
+                self.dataset.append(demo_traj)
+                new_objs, new_preds, new_options, self.ground_atoms_traj, _ = parse_objs_preds_and_options(demo_traj, train_task_idx=idx)
+                self.objs |= new_objs
+                self.preds |= new_preds
+                self.options |= new_options
+                self.ground_atom_dataset.append(self.ground_atoms_traj)
+                goal = self.parse_goal(task_name, self.ground_atoms_traj[1][-1])
+                self.tasks.append(Task(State({}, None), goal))
+                # if len(self.dataset) >= self.num_demos:
+                #     break
+        # assert len(self.dataset) == self.num_demos  
+        return self.dataset, self.tasks, self.preds, self.options, self.action_space, self.ground_atom_dataset
+
+    def learn_nsrts(self, task_name):
+        dataset, tasks, preds, options, action_space, ground_atom_dataset = self.get_data(task_name)
+        nsrts, _, _ = learn_nsrts_from_data(dataset,
+                                            tasks,
+                                            preds,
+                                            options,
+                                            action_space,
+                                            ground_atom_dataset,
+                                            sampler_learner="neural",
+                                            annotations=None)  
+        with open("test_saved.NSRTs.txt", "w") as file:
+            for nsrt in nsrts:
+                if nsrt.op.add_effects != set():
+                    file.write(str(nsrt)+"\n")
+        return nsrts
+ 
+class DummyAgent(OperatorLearningAgent):
+    def __init__(self, name="dummy", strips_learner="dummy"):
+        super().__init__(name=name, strips_learner=strips_learner, single_grounding=True)
+
+    def learn_nsrts(self, task_name):
+        dataset, tasks, preds, options, action_space, ground_atom_dataset = self.get_data(task_name)
+        goal = self.parse_goal(task_name, self.ground_atoms_traj[1][-1])
+        obj_to_var = {obj:obj.type("?" + obj.name) for obj in self.objs}
+        lifted_goal = {atom.lift(obj_to_var) for atom in goal}
+
+        nsrts = set()
+        name_i = 0
+        for option in options:
+            op = STRIPSOperator(
+                name="Dummy" + str(name_i),
+                parameters=[],
+                preconditions=set(),
+                add_effects=set(),
+                delete_effects=set(),
+                ignore_effects=set()
+            )
+            dummy_nsrt = op.make_nsrt(
+                option,
+                [],  # dummy sampler
+                lambda s, g, rng, o: np.zeros(1, dtype=np.float32))
+            nsrts.add(dummy_nsrt)
+            name_i += 1
+            
+            params = []
+            for sublist in [lifted_atom.variables for lifted_atom in lifted_goal]:
+                params += sublist
+            params = [x for x in set(params)]
+            op = STRIPSOperator(
+                name="Dummy" + str(name_i),
+                parameters=params,
+                preconditions=set(),
+                add_effects=lifted_goal,
+                delete_effects=set(),
+                ignore_effects=set()
+            )
+            dummy_nsrt = op.make_nsrt(
+                option,
+                [],  # dummy sampler
+                lambda s, g, rng, o: np.zeros(1, dtype=np.float32))
+            nsrts.add(dummy_nsrt)
+            name_i += 1
+        return nsrts
+    
+class GroundTruthAgent(OperatorLearningAgent):
+    def __init__(self, name):
+        super().__init__(name=name, strips_learner="NONE")
+        self.name = name
+        self.ground_truth_trajs = {}
+        self.i = 0
+        self.actions = None
+    
+    def reset(self, task_name, learn=False):
+        self.dataset = []
+        self.ground_atom_dataset = []
+        self.tasks = []
+        self.action_space = Box(0, 7, (1,))
+        self.objs = set()
+        self.preds = set()
+        self.options = set()
+        self.ground_atoms_traj = []
+
+        dataset, tasks, preds, options, action_space, ground_atom_dataset = self.get_data(task_name)
+
+        self.goal = self.parse_goal(task_name, self.ground_atoms_traj[1][-1])
+        self.actions = None
+        self.i = 0
+
+        action_plan = []
+        for i, step in enumerate(self.ground_atoms_traj[0].actions):
+            curr_state = self.ground_atoms_traj[1][i]
+            next_state = self.ground_atoms_traj[1][i+1]
+            def count_object_occurrences(atom_set):
+                counter = Counter()
+                for atom in atom_set:
+                    for obj in atom.objects:
+                        if not atom.predicate.name.startswith("~inreachofrobot"):
+                            counter[obj] += 1
+                return counter
+            counter = count_object_occurrences(next_state - curr_state)
+            def get_max_count_object(counter, exclude_types=("table", "shelf")):
+                max_count = max(counter.values())
+                candidates = [
+                    obj for obj, count in counter.items()
+                    if count == max_count and all(ex_type not in str(obj) for ex_type in exclude_types)
+                ]
+
+                if candidates:
+                    return candidates[0]
+                else:
+                    return None
+                
+            try:
+                if get_max_count_object(counter) is None:
+                    objs = [max(counter, key=counter.get)]
+                else:
+                    objs = [get_max_count_object(counter)]
+            except:
+                objs = random.sample(self.objs, 1)
+            action_plan.append((step._option.name, objs))
+        self.ground_truth_trajs[task_name] = self.clean_action_plan(action_plan)
+
+    def policy(self, obs, env):
+        #print(env.get_lifted_state())
+        try:
+            assert env.env_id in self.ground_truth_trajs.keys()
+        except:
+            import ipdb; ipdb.set_trace()
+        self.i += 1
+        if self.i-1 < len(self.ground_truth_trajs[env.env_id]):
+            return env.key_to_action[self.ground_truth_trajs[env.env_id][self.i-1]]
+        else:
+            return env.key_to_action["0"]
+        
+    def learn_nsrts(self, task_name):
+        return None
+
+# tasks = ["MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0",
+#             "MiniGrid-CollectMisplacedItems-16x16-N2-v0",
+#             "MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0",
+#             "MiniGrid-OpeningPackages-16x16-N2-v0",
+#             "MiniGrid-WateringHouseplants-16x16-N2-v0",
+#             "MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0"]
+
+# agents = [RandomAgent("random"), GroundTruthAgent("ground-truth")]
+# results = evaluation(agents, tasks, num_iterations=3)
+# df = structure_results(results)
+# plot_lifelong_success(df)
+
+#####
+
+# tasks = ["MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0"]
+# agents = [GroundTruthAgent("ground-truth")]
+# results = evaluation(agents, tasks, num_iterations=1)
+# df = structure_results(results)
+# #plot_lifelong_success(df)
+# import ipdb; ipdb.set_trace()
+
+#####
+
+############ 
+# tasks = ["MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0"]
+# agents = [
+#             DummyAgent("dummy", strips_learner="dummy"),
+#             OperatorLearningAgent("cluster-intersect", strips_learner="cluster_and_intersect"),
+#             OperatorLearningAgent("backchaining", strips_learner="backchaining"),
+#             OperatorLearningAgent("hill-climbing", strips_learner="pnad_search"),
+#             # OperatorLearningAgent("llm", strips_learner="llm")
+#         ]
+# results = evaluation(agents, tasks, num_iterations=1)
+# df = structure_results(results)
+# #plot_lifelong_success(df)
+# for agent in agents:
+#     print(agent.name, len(agent.nsrts), agent.actions)
+#     print()
+# import ipdb; ipdb.set_trace()
+
+# Note: grounding should only be for operators based on the goal....
+# Maybe LLM can help with grounding too
+
+# TODO Finally - Collect Demos, Increment Num_Demos
+
+# TODO Try Run 3-5 Env Eval on all 5 Baselines (Dummy, CI, Pnad_Search, Back_Chaining)
+
+# TODO Fix LLM Agents
+
+# TODO Make BC+FS+LLM Agent
+# see other code
+
+# TODO Make Version-Space Agent
+# Note: from CI it should fall back to BC+FS+LLM then to BC then to Dummy
+
+# TODO Try Run Full Eval on all 7 Agents
+
+# tasks = [
+#     'MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0',
+#     'MiniGrid-CleaningACar-16x16-N2-v0',
+#     'MiniGrid-CleaningShoes-16x16-N2-v0', #1
+#     'MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0',
+#     'MiniGrid-CollectMisplacedItems-16x16-N2-v0',
+#     'MiniGrid-InstallingAPrinter-16x16-N2-v0',
+#     'MiniGrid-LayingWoodFloors-16x16-N2-v0',
+#     'MiniGrid-MakingTea-16x16-N2-v0',
+#     'MiniGrid-MovingBoxesToStorage-16x16-N2-v0',
+#     'MiniGrid-OpeningPackages-16x16-N2-v0',
+#     'MiniGrid-OrganizingFileCabinet-16x16-N2-v0',
+#     #[DEBUG]'MiniGrid-PreparingSalad-16x16-N2-v0',
+#     'MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0',
+#     'MiniGrid-SettingUpCandles-16x16-N2-v0', #1
+#     'MiniGrid-SortingBooks-16x16-N2-v0',
+#     'MiniGrid-StoringFood-16x16-N2-v0',
+#     #[DEBUG]'MiniGrid-ThawingFrozenFood-16x16-N2-v0',
+#     'MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0',
+#     'MiniGrid-WashingPotsAndPans-16x16-N2-v0',
+#     'MiniGrid-WateringHouseplants-16x16-N2-v0'
+# ]
+
+# task_info = {}
+
+# for demo_file in demo_files:
+#     traj = get_demo_traj(demo_file, verbose=False)
+#     add_count = 0
+#     for i, action in enumerate(traj.actions):
+#         curr_state = set(traj.states[i])
+#         next_state = set(traj.states[i+1])
+#         del_effs = curr_state - next_state
+#         add_effs = next_state - curr_state
+#         # print(action)
+#         # print("DEL:", del_effs)
+#         # print("ADD:", add_effs)
+#         # print()
+#         add_count += len(add_effs)
+#         assert len(add_effs) != 0 or str(action) == "Move"
+#     task_name = demo_file.split("/")[-1].split("_")[0]
+#     agent = OperatorLearningAgent("cluster-intersect", strips_learner="cluster_and_intersect")
+#     agent.get_data(task_name=task_name)
+#     goal = agent.parse_goal(task_name=task_name, ground_atoms_state=agent.ground_atoms_traj[1][-1])
+#     task_info[demo_file.split("/")[-1]] = (len(traj.actions), len(goal), add_count)
+
+# i = 0
+# curriculum = []
+# for k,v in sorted([(k,v) for k,v in task_info.items()], key=lambda x: x[1][2]): # by add effects
+#     i+=1
+#     print("|", v[0], "| goal length:",  v[1], "| add count:", v[2], "|", k.split("_")[0], i)
+#     curriculum.append(k.split("_")[0])
+
+import time
+start_time = time.time()
+tasks = ['MiniGrid-OpeningPackages-16x16-N2-v0',
+         'MiniGrid-InstallingAPrinter-16x16-N2-v0',
+         'MiniGrid-MovingBoxesToStorage-16x16-N2-v0',
+         'MiniGrid-SortingBooks-16x16-N2-v0',#
+         'MiniGrid-WateringHouseplants-16x16-N2-v0',#
+         #'MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0',#
+         'MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0'#
+         ]
+# tasks = curriculum
+# tasks.remove('MiniGrid-LayingWoodFloors-16x16-N2-v0')
+# tasks.remove('MiniGrid-CleaningACar-16x16-N2-v0')
+# tasks.remove('MiniGrid-SortingBooks-16x16-N2-v0')
+# tasks.remove('MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0')
+print("#"*30)
+print(tasks)
+
+all_agents = [
+                GroundTruthAgent("ground-truth"),
+                #DummyAgent("dummy", strips_learner="dummy"),
+                #OperatorLearningAgent("cluster-intersect", strips_learner="cluster_and_intersect"),
+                #OperatorLearningAgent("backchaining", strips_learner="backchaining"),
+                #OperatorLearningAgent("hill-climbing", strips_learner="pnad_search"),
+                #OperatorLearningAgent("llm", strips_learner="llm"),
+                #OperatorLearningAgent("backward-forward", strips_learner="backward-forward"),
+            ]
+
+for agent in all_agents:
+    with open("test_saved.NSRTs.txt", "w") as file:
+        file.write("""NSRT-Move0:
+    Parameters: [?x0:obj_type]
+    Preconditions: []
+    Add Effects: [inreachofrobot(?x0:obj_type)]
+    Delete Effects: [~inreachofrobot(?x0:obj_type)]
+    Ignore Effects: [inreachofrobot, ~inreachofrobot]
+    Option Spec: Move()""")
+    results = evaluation([agent], tasks, num_iterations=1, start_seed=100)
+    df = structure_results(results)
+    plot_lifelong_success(df)
+    end_time = time.time()
+    print("time elasped", end_time - start_time)
+    df.to_csv('test_results/' + agent.name + '_output.csv')
+
+    results = evaluation([agent], tasks, num_iterations=1, start_seed=100, short_task=False, learn=False)
+    df2 = structure_results(results)
+    plot_lifelong_success(df2)
+    end_time = time.time()
+    print("time elasped", end_time - start_time)
+    df2.to_csv('test_results/' + agent.name + '_long_output.csv')
+
diff --git a/test_colla_helpers.py b/test_colla_helpers.py
new file mode 100644
index 0000000000..36b8873e9f
--- /dev/null
+++ b/test_colla_helpers.py
@@ -0,0 +1,160 @@
+import numpy as np
+from gym.spaces import Box
+import re
+import pickle as pkl
+
+from predicators import utils
+from predicators.nsrt_learning.nsrt_learning_main import learn_nsrts_from_data
+from predicators.structs import Action, LowLevelTrajectory, Predicate, State, \
+    Type, GroundAtom, Task, Variable, LiftedAtom, NSRT, Set
+import glob
+
+name_to_actions = {
+    "Move": 0,
+    "Actions.pickup_0": 3,
+    "Actions.pickup_1": 4,
+    "Actions.pickup_2": 5,
+    "Actions.drop_0": 6,
+    "Actions.drop_1": 7,
+    "Actions.drop_2": 8,
+    "Actions.drop_in": 9,
+    "Actions.toggle": 10,
+    "Actions.close": 11,
+    "Actions.open": 12,
+    "Actions.cook": 13,
+    "Actions.slice": 14
+}
+
+demo_files = sorted([filename for filename in glob.glob("/Users/shashlik/Documents/GitHub/predicators/demos/*/*")])
+demo_tasks = set([demo_file.split("/")[-1].split("_")[0] for demo_file in demo_files])
+
+# Load and do this from MiniBeahvior Demo
+
+def get_demo_traj(demo_file, verbose=True):
+    with open(demo_file, 'rb') as f:
+        data = pkl.load(f)
+
+    last_skill = "Move"
+    state = [a for a in data[1][1] if "infovofrobot" not in a]
+    states = [state]
+    actions = []
+    for step in data.keys():
+        obs = data[step][0]['image']
+        direction = data[step][0]['direction']
+        action = data[step][2]
+        skill = None
+
+        if "forward" in str(action) or \
+            "left" in str(action) or \
+            "right" in str(action):
+
+            skill = "Move"
+        else:
+            skill = str(action)
+        
+        has_effect = True
+        try:
+            next_obs = data[step][3]['image']
+            next_direction = data[step][3]['direction']
+            if np.allclose(obs, next_obs) and (direction == next_direction):
+                has_effect = False  
+        except:
+            pass
+
+        if has_effect:
+            if last_skill != skill:
+                if verbose:
+                    print("#")
+                    print(last_skill)
+                try:
+                    next_state = [a for a in data[step][1] if "infovofrobot" not in a]
+                    if verbose:
+                        print("PREV:", set(state))
+                        print("ADD:", set(next_state) - set(state))
+                        print("DEL:", set(state) - set(next_state))
+                    state = next_state
+                    actions.append(last_skill)
+                    states.append(state)
+                except:
+                    pass
+                last_skill = skill
+    else:
+        if verbose:
+            print("#")
+            print(last_skill)
+        next_state = [a for a in data[step][4] if "infovofrobot" not in a]
+        if verbose:
+            print("PREV:", set(state))
+            print("ADD:", set(next_state) - set(state))
+            print("DEL:", set(state) - set(next_state))
+        state = next_state
+        if verbose:
+            print("#")
+        actions.append(last_skill)
+        states.append(state)
+    
+    return LowLevelTrajectory(states, actions, _is_demo=True, _train_task_idx=0)
+
+def parse_nsrt_block(block, segmented_trajs) -> NSRT:
+        """Parses a single NSRT block into an PNAD object."""
+        lines = block.strip().split("\n")
+        
+        name_match = re.match(r"(\S+):", lines[0])
+        name = name_match.group(1) if name_match else ""
+
+        parameters = re.findall(r"\?x\d+:\w+", lines[1])
+        
+        def extract_effects(label: str) -> Set[str]:
+            """Extracts a list of predicates from labeled sections."""
+            for line in lines:
+                if line.strip().startswith(label):
+                    return set(re.findall(r"\w+\(.*?\)", line))
+            return set()
+        
+        preconditions = extract_effects("Preconditions")
+        add_effects = extract_effects("Add Effects")
+        delete_effects = extract_effects("Delete Effects")
+        ignore_effects = extract_effects("Ignore Effects")
+
+        option_spec_match = re.search(r"Option Spec:\s*(.*)", block)
+        option_spec = option_spec_match.group(1) if option_spec_match else ""
+
+        objects = set()
+        atoms = set()
+        option_specs = {}
+        for traj in segmented_trajs:
+            for segment in traj:
+                for state in segment.states:
+                    for k, v in state.items():
+                        objects.add(k)
+                atoms |= segment.init_atoms | segment.final_atoms
+                option_specs[segment.get_option().parent.name] = segment.get_option().parent
+        all_predicates_list = [(atom.predicate.name,atom.predicate) for atom in atoms]
+        def get_predicate(name, entities):
+            for pred_name, pred in all_predicates_list:
+                if pred_name == pred_name and pred.arity == len(entities):
+                    valid_types = True
+                    for i, ent in enumerate(entities):
+                        if ent.type != pred.types[i]:
+                            valid_types = False
+                    if valid_types:
+                        return pred
+            raise NotImplementedError
+            
+        types = {obj.type.name:obj.type for obj in objects}
+
+        def extract_parameters(predicate: str) -> Set[str]:
+            parameter_pattern = re.compile(r"\?x\d+:\w+")  # Matches variables like ?x0:obj_type
+            matches = parameter_pattern.findall(predicate)
+            return matches
+        
+        parameters = [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in parameters]
+        preconditions = set([LiftedAtom(get_predicate(pre.split("(")[0], [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(pre)]), [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(pre)]) for pre in preconditions])
+        add_effects = set([LiftedAtom(get_predicate(add.split("(")[0], [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(add)]), [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(add)]) for add in add_effects])
+        delete_effects = set([LiftedAtom(get_predicate(dle.split("(")[0], [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(dle)]), [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(dle)]) for dle in delete_effects])
+        ignore_effects = set([get_predicate(ige, None) for ige in ignore_effects])
+        a_name = option_spec.split("(")[0]
+        option_spec = utils.SingletonParameterizedOption(
+                a_name, lambda s, m, o, p: Action(name_to_actions[a_name]))
+
+        return NSRT(name, parameters, preconditions, add_effects, delete_effects, ignore_effects, option_spec, [], None)
\ No newline at end of file
diff --git a/test_colla_results copy.py b/test_colla_results copy.py
new file mode 100644
index 0000000000..34976f9143
--- /dev/null
+++ b/test_colla_results copy.py	
@@ -0,0 +1,760 @@
+# (1) implement the evaluation function evaluate(agent) returns dictionary of results
+# (2) implment evaluation visualization visualize(results)
+# (3) do whatever it takes to make results better (CI, BC, FF+BC, FF+BC+LLMs)
+
+from test_colla_env import MiniBehaviorEnv
+from test_colla_helpers import Box, LowLevelTrajectory, State, Task, \
+    demo_files, get_demo_traj, learn_nsrts_from_data
+import matplotlib.pyplot as plt
+import seaborn as sns
+import pandas as pd
+import random
+from predicators.planning import task_plan, task_plan_grounding, _SkeletonSearchTimeout, PlanningFailure
+from predicators import utils
+from predicators.structs import Action, LowLevelTrajectory, Predicate, State, \
+    Type, GroundAtom, Task, STRIPSOperator
+import numpy as np
+from collections import Counter
+
+from predicators.nsrt_learning.strips_learning.gen_to_spec_learner import parse_objs_preds_and_options
+
+import pickle as pkl
+import numpy as np
+from predicators.structs import Action, LowLevelTrajectory, Predicate, State, \
+    Type
+from test_operator_learning_all import get_demo_traj, demo_files
+
+opname_to_key = {
+    'Actions.pickup_0': '0',
+    'Actions.pickup_1': '1',
+    'Actions.pickup_2': '2',
+    'Actions.drop_0': '3',
+    'Actions.drop_1': '4',
+    'Actions.drop_2': '5',
+    'Actions.drop_in': 'i',
+    'Actions.toggle': 't',
+    'Actions.close': 'c',
+    'Actions.open': 'o',
+    'Actions.cook': 'k',
+    'Actions.slice': '6'
+}
+
+class RandomAgent():
+    def __init__(self, name):
+        self.name = name
+        self.actions = None
+
+    def reset(self, task_name):
+        pass
+
+    def policy(self, obs, env):
+        #print(env.get_lifted_state())
+        return env.key_to_action[random.choice(list(env.key_to_action.keys()))]
+
+def evaluation(agents, tasks, num_iterations=10, start_seed=100):
+    results = {}
+    task_i = 0
+    for i in range(num_iterations):
+        for task in tasks:
+            for agent in agents:
+                env = MiniBehaviorEnv(env_id=task, seed=i+start_seed)
+                observation, _ = env.reset()
+                agent.reset(env.env_id)
+                steps = 0
+                for _ in range(50):
+                    action = agent.policy(observation, env)
+                    observation, reward, terminated, truncated, _ = env.step(action)
+                    steps += 1
+                    env.show()
+
+                    if reward != 0:
+                        break
+
+                    if terminated or truncated:
+                        break
+                found_plan = 0
+                plan_diff = -1
+                if agent.actions is not None and agent.actions != []:
+                    found_plan = 1
+        
+                    key_to_opname = {v:k  for k,v in opname_to_key.items()}
+                    plan = [key_to_opname[action] if not action.startswith("moveto") else "Move" for action in agent.actions]
+                    dataset_plan = agent.dataset[0].actions
+
+                    def differing_reoccurring_counts(list1, list2):
+                        count1 = Counter(list1)
+                        count2 = Counter(list2)
+                        all_keys = set(count1.keys()) | set(count2.keys())
+                        result = {}
+                        total_diff = 0
+                        for key in all_keys:
+                            c1 = count1.get(key, 0)
+                            c2 = count2.get(key, 0)
+                            if (c1 > 1 or c2 > 1) and c1 != c2:
+                                diff = abs(c1 - c2)
+                                result[key] = diff
+                                total_diff += diff
+                        result['total'] = total_diff
+                        return result
+                    plan_diff = differing_reoccurring_counts(plan, dataset_plan)['total']
+                results[str(task_i) + "_" + task + "_" + agent.name] = (steps, reward, i, found_plan, plan_diff)
+            task_i += 1
+    return results
+
+def structure_results(results_dict):
+    data = []
+    for key, (steps, reward, iteration, found_plan, plan_diff) in results_dict.items():
+        task_idx, task_name, agent_name = key.split("_", 2)
+        data.append({
+            "task_name": task_name,
+            "task_idx": int(task_idx),
+            "iteration": int(iteration),
+            "found_plan": int(found_plan),
+            "plan_diff": int(plan_diff),
+            "steps": steps,
+            "reward": reward,
+            "success": 1 if reward > 0 else 0,
+            "agent": agent_name
+        })
+    df = pd.DataFrame(data)
+    df["task_order"] = df["task_idx"]
+    return df.sort_values(["agent", "iteration", "task_order"])
+
+def plot_lifelong_success(df):
+    plt.figure(figsize=(14, 5))
+    
+    sns.lineplot(
+        data=df,
+        x="task_order",
+        y="success",
+        hue="agent",
+        marker="o"
+    )
+
+    # Set up x-ticks with task names, spaced across iterations
+    xticks = df["task_order"]
+    xticklabels = df["task_name"]
+    plt.xticks(ticks=xticks, labels=xticklabels, rotation=45, ha='right')
+
+    plt.ylim(-0.1, 1.1)
+    plt.yticks([0, 1], ["Fail", "Success"])
+    plt.ylabel("Success")
+    plt.xlabel("Tasks over Lifelong Iterations")
+    plt.title("Lifelong Learning Success per Task")
+    plt.tight_layout()
+    plt.grid(True, linestyle='--', alpha=0.3)
+    plt.savefig("lifelong_learning_success.png", dpi=200)
+
+class OperatorLearningAgent():
+    def __init__(self, name, strips_learner, single_grounding=False):
+        self.name = name
+        self.num_demos = 1
+
+        # Initialized once; populated in get_data()
+        self.dataset = []
+        self.ground_atom_dataset = []
+        self.tasks = []
+        self.action_space = Box(0, 7, (1,))
+        self.objs = set()
+        self.preds = set()
+        self.options = set()
+        self.ground_atoms_traj = []
+        self.goal = None
+
+        # Runtime variables
+        self.nsrts = None
+        self.actions = None
+        self.i = 0
+        self.seed_i = 0
+
+        # Learning Params
+        self.strips_learner = strips_learner
+        self.single_grounding = single_grounding
+        utils.reset_config({
+            "strips_learner": self.strips_learner,
+            "segmenter": "every_step",
+            "disable_harmlessness_check": True,
+            "pnad_search_load_initial": True,
+            "backward_forward_load_initial": True,
+            "min_data_for_nsrt": 0,
+            "min_perc_data_for_nsrt": 0,
+            "pnad_search_timeout":100.0,
+            "single_grounding": self.single_grounding,
+            "option_learner": "no_learning"
+        })
+
+    def reset(self, task_name):
+        if False:
+            self.dataset = []
+            self.ground_atom_dataset = []
+            self.tasks = []
+            self.objs = set()
+            self.preds = set()
+            self.options = set()
+            self.ground_atoms_traj = []
+        self.action_space = Box(0, 7, (1,))
+        self.seed_i = 0
+
+        # Learning Params
+        utils.reset_config({
+            "strips_learner": self.strips_learner,
+            "segmenter": "every_step",
+            "disable_harmlessness_check": True,
+            "pnad_search_load_initial": True,
+            "backward_forward_load_initial": True,
+            "min_data_for_nsrt": 0,
+            "min_perc_data_for_nsrt": 0,
+            "pnad_search_timeout":100.0,
+            "single_grounding": self.single_grounding,
+            "option_learner": "no_learning"
+        })
+
+        self.nsrts = self.learn_nsrts(task_name)
+        self.goal = self.parse_goal(task_name, self.ground_atoms_traj[1][-1])
+        self.actions = None
+        self.i = 0
+
+    def parse_goal(self, task_name, ground_atoms_state):
+        if task_name == "MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if  str(atom).startswith("inside(")])
+        
+        elif task_name == "MiniGrid-OpeningPackages-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("openable(")])
+        
+        elif task_name == "MiniGrid-CleaningACar-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("inside(")]) | set([atom for atom in ground_atoms_state if str(atom).startswith("~dustyable(")])
+        
+        elif task_name == "MiniGrid-CleaningShoes-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("~stainable(") and "shoe" in str(atom)]) | \
+                set([atom for atom in ground_atoms_state if str(atom).startswith("~dustyable(") and "shoe" in str(atom)]) | \
+                set([atom for atom in ground_atoms_state if str(atom).startswith("onfloor(") and "towel" in str(atom)])
+
+        
+        elif task_name == "MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0":
+            return set([
+                atom for atom in ground_atoms_state
+                if (
+                    str(atom).startswith("onTop(") and "blender" in str(atom) and "countertop" in str(atom)
+                ) or (
+                    str(atom).startswith("nextto(") and "soap" in str(atom) and "sink" in str(atom)
+                ) or (
+                    str(atom).startswith("inside(") and "vegetable_oil" in str(atom) and "cabinet" in str(atom)
+                ) or (
+                    str(atom).startswith("inside(") and "plate" in str(atom) and "cabinet" in str(atom)
+                ) or (
+                    str(atom).startswith("inside(") and "casserole" in str(atom) and "electric_refrigerator" in str(atom)
+                ) or (
+                    str(atom).startswith("inside(") and "apple" in str(atom) and "electric_refrigerator" in str(atom)
+                ) or (
+                    str(atom).startswith("inside(") and "rag" in str(atom) and "sink" in str(atom)
+                ) or (
+                    str(atom).startswith("nextto(") and "rag" in str(atom) and "sink" in str(atom)
+                ) or (
+                    str(atom).startswith("~dustyable(") and "cabinet" in str(atom)
+                ) or (
+                    str(atom).startswith("~stainable(") and "plate" in str(atom)
+                )
+            ])
+
+        elif task_name == "MiniGrid-CollectMisplacedItems-16x16-N2-v0":
+            return set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("onTop(") and "table" in str(atom) and (
+                    "gym_shoe" in str(atom) or
+                    "necklace" in str(atom) or
+                    "notebook" in str(atom) or
+                    "sock" in str(atom)
+                ) and not str(atom).startswith("onTop(table") 
+            ])
+        
+        elif task_name == "MiniGrid-InstallingAPrinter-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(")]) | \
+                set([atom for atom in ground_atoms_state if str(atom).startswith("toggleable(")])
+        
+        elif task_name == "MiniGrid-LayingWoodFloors-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("nextto(")])
+        
+        elif task_name == "MiniGrid-MakingTea-16x16-N2-v0":
+            return set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("sliceable(") and "lemon" in str(atom)
+            ]) | set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("onTop(") and "teapot" in str(atom) and "stove" in str(atom)
+            ]) | set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("atsamelocation(") and "tea_bag" in str(atom) and "teapot" in str(atom)
+            ]) | set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("soakable(") and "teapot" in str(atom)
+            ]) | set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("toggleable(") and "stove" in str(atom)
+            ])
+
+        elif task_name == "MiniGrid-MovingBoxesToStorage-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(")])
+        
+        elif task_name == "MiniGrid-OrganizingFileCabinet-16x16-N2-v0":
+            return set([
+            atom for atom in ground_atoms_state
+            if str(atom).startswith("onTop(") and "marker" in str(atom) and "table" in str(atom)
+        ]) | set([
+            atom for atom in ground_atoms_state
+            if str(atom).startswith("inside(") and "document" in str(atom) and "cabinet" in str(atom)
+        ]) | set([
+            atom for atom in ground_atoms_state
+            if str(atom).startswith("inside(") and "folder" in str(atom) and "cabinet" in str(atom)
+        ])
+        
+        elif task_name == "MiniGrid-PreparingSalad-16x16-N2-v0":
+            import ipdb; ipdb.set_trace()
+            raise NotImplementedError("parse_goal not implemented for PreparingSalad")
+        
+        elif task_name == "MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0":
+            return set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("inside(") and "plate" in str(atom) and "cabinet" in str(atom)
+            ])
+
+        
+        elif task_name == "MiniGrid-SettingUpCandles-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(")])
+        
+        elif task_name == "MiniGrid-SortingBooks-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(") and "shelf" in str(atom) and ("book" in str(atom) or "hardback" in str(atom))])
+        
+        elif task_name == "MiniGrid-StoringFood-16x16-N2-v0":
+            return set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("inside(") and "cabinet" in str(atom) and (
+                    "oatmeal" in str(atom) or "chip" in str(atom) or "vegetable_oil" in str(atom) or "sugar" in str(atom)
+                )
+            ])
+
+        elif task_name == "MiniGrid-ThawingFrozenFood-16x16-N2-v0":
+            return set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("nextto(") and (
+                    ("date" in str(atom) and "fish" in str(atom)) or
+                    ("fish" in str(atom) and "sink" in str(atom)) or
+                    ("olive" in str(atom) and "sink" in str(atom))
+                )
+            ])
+        
+        elif task_name == "MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("inside(") and "hamburger" in str(atom) and "ashcan" in str(atom)])
+        
+        elif task_name == "MiniGrid-WashingPotsAndPans-16x16-N2-v0":
+            return set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("~stainable(") and (
+                    "pan" in str(atom) or "kettle" in str(atom) or "teapot" in str(atom)
+                )
+            ]) | set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("inside(") and "cabinet" in str(atom) and (
+                    "pan" in str(atom) or "kettle" in str(atom) or "teapot" in str(atom)
+                )
+            ])
+
+        elif task_name == "MiniGrid-WateringHouseplants-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("soakable(") and "pot_plant" in str(atom)])
+        else:
+            import ipdb; ipdb.set_trace()
+
+
+    def get_plan(self, state, seed):
+        objs, _, _, ground_atoms_traj, all_atoms = parse_objs_preds_and_options(self.dataset[-1], train_task_idx=0)
+        task = Task(State({}, None), self.goal)
+
+        _, _, _, ground_atoms_traj, _ = parse_objs_preds_and_options(LowLevelTrajectory([state], [], _is_demo=True, _train_task_idx=0), train_task_idx=0, all_atoms=all_atoms)
+        init_atoms = ground_atoms_traj[1][0]
+        plan = self.plan(init_atoms, objs, self.preds, self.nsrts, task, seed)
+        return plan
+
+    def policy(self, obs, env):
+        if self.actions is None:
+            seed = self.seed_i
+            self.seed_i += 1
+            num_remove_pre = 0
+            while self.actions is None or self.actions == []:
+                try:
+                    self.actions = self.get_plan(env.get_lifted_state(), seed)
+                    break
+                except _SkeletonSearchTimeout:
+                    print("did not find skeleton - timeout")
+                except PlanningFailure:
+                    print("did not find skeleton - plan failure")
+                num_remove_pre += 1
+                new_nsrts = set()
+                for nsrt in self.nsrts:
+                    pre = set()
+                    tot_pre = len(nsrt.op.preconditions) - num_remove_pre
+                    if tot_pre > 0:
+                        pre = random.sample(nsrt.op.preconditions, tot_pre)
+                    ignore_effects = nsrt.op.ignore_effects
+                    del_effs = nsrt.op.delete_effects
+                    if num_remove_pre > 10:
+                        ignore_effects = set()
+                        del_effs = set()
+                    new_nsrts.add(
+                        nsrt.op.copy_with(preconditions=pre,
+                                          ignore_effects=ignore_effects,
+                                          delete_effects=del_effs)).make_nsrt(
+                            nsrt.option,
+                            [],  # dummy sampler
+                            lambda s, g, rng, o: np.zeros(1, dtype=np.float32))
+                self.nsrts = new_nsrts
+            with open("test_saved.NSRTs.txt", "w") as file:
+                for nsrt in self.nsrts:
+                    if nsrt.op.add_effects != set():
+                        file.write(str(nsrt)+"\n")
+
+        self.i += 1
+        if self.i-1 < len(self.actions):
+            return env.key_to_action[self.actions[self.i-1]]
+        else:
+            self.actions = None
+            self.i = 0
+            return env.key_to_action["0"]
+    
+    def clean_action_plan(self, action_plan):
+        plan = []
+        for step in action_plan:
+            name = step[0]
+            objs = step[1]
+            if len(objs) > 0:
+                obj_name = objs[0].name
+                if name.startswith("Move"):
+                    plan.append(f"moveto-{obj_name}")
+                else:
+                    for opname, key in opname_to_key.items():
+                        if opname in name:
+                            plan.append(key)
+                            break
+        return plan
+    
+    def plan(self, init_atoms, objects, predicates, nsrts, task, seed):
+        ground_nsrts, reachable_atoms = task_plan_grounding(init_atoms, objects, nsrts, allow_noops=True)
+        heuristic = utils.create_task_planning_heuristic("hadd", init_atoms,
+                                                        task.goal, ground_nsrts,
+                                                        predicates, objects)
+        task_plan_generator = task_plan(init_atoms,
+                                        task.goal,
+                                        ground_nsrts,
+                                        reachable_atoms,
+                                        heuristic,
+                                        timeout=1,
+                                        seed=seed,
+                                        max_skeletons_optimized=3)
+        skeleton, _, _ = next(task_plan_generator)
+
+        action_plan = []
+        for step in skeleton:
+            action_plan.append((step.option.name, step.objects))
+        return self.clean_action_plan(action_plan)
+    
+    def get_data(self, task_name):
+        for demo_file in demo_files:
+            if task_name in demo_file:
+                demo_traj = get_demo_traj(demo_file=demo_file, verbose=False)
+
+                idx = len(self.dataset)
+                demo_traj = LowLevelTrajectory(demo_traj.states, demo_traj.actions, _is_demo=True, _train_task_idx=idx)
+
+                self.dataset.append(demo_traj)
+                new_objs, new_preds, new_options, self.ground_atoms_traj, _ = parse_objs_preds_and_options(demo_traj, train_task_idx=idx)
+                self.objs |= new_objs
+                self.preds |= new_preds
+                self.options |= new_options
+                self.ground_atom_dataset.append(self.ground_atoms_traj)
+                goal = self.parse_goal(task_name, self.ground_atoms_traj[1][-1])
+                self.tasks.append(Task(State({}, None), goal))
+                # if len(self.dataset) >= self.num_demos:
+                #     break
+        # assert len(self.dataset) == self.num_demos  
+        return self.dataset, self.tasks, self.preds, self.options, self.action_space, self.ground_atom_dataset
+
+    def learn_nsrts(self, task_name):
+        dataset, tasks, preds, options, action_space, ground_atom_dataset = self.get_data(task_name)
+        nsrts, _, _ = learn_nsrts_from_data(dataset,
+                                            tasks,
+                                            preds,
+                                            options,
+                                            action_space,
+                                            ground_atom_dataset,
+                                            sampler_learner="neural",
+                                            annotations=None)  
+        with open("test_saved.NSRTs.txt", "w") as file:
+            for nsrt in nsrts:
+                if nsrt.op.add_effects != set():
+                    file.write(str(nsrt)+"\n")
+        return nsrts
+ 
+class DummyAgent(OperatorLearningAgent):
+    def __init__(self, name="dummy", strips_learner="dummy"):
+        super().__init__(name=name, strips_learner=strips_learner, single_grounding=True)
+
+    def learn_nsrts(self, task_name):
+        dataset, tasks, preds, options, action_space, ground_atom_dataset = self.get_data(task_name)
+        goal = self.parse_goal(task_name, self.ground_atoms_traj[1][-1])
+        obj_to_var = {obj:obj.type("?" + obj.name) for obj in self.objs}
+        lifted_goal = {atom.lift(obj_to_var) for atom in goal}
+
+        nsrts = set()
+        name_i = 0
+        for option in options:
+            op = STRIPSOperator(
+                name="Dummy" + str(name_i),
+                parameters=[],
+                preconditions=set(),
+                add_effects=set(),
+                delete_effects=set(),
+                ignore_effects=set()
+            )
+            dummy_nsrt = op.make_nsrt(
+                option,
+                [],  # dummy sampler
+                lambda s, g, rng, o: np.zeros(1, dtype=np.float32))
+            nsrts.add(dummy_nsrt)
+            name_i += 1
+            
+            params = []
+            for sublist in [lifted_atom.variables for lifted_atom in lifted_goal]:
+                params += sublist
+            params = [x for x in set(params)]
+            op = STRIPSOperator(
+                name="Dummy" + str(name_i),
+                parameters=params,
+                preconditions=set(),
+                add_effects=lifted_goal,
+                delete_effects=set(),
+                ignore_effects=set()
+            )
+            dummy_nsrt = op.make_nsrt(
+                option,
+                [],  # dummy sampler
+                lambda s, g, rng, o: np.zeros(1, dtype=np.float32))
+            nsrts.add(dummy_nsrt)
+            name_i += 1
+        return nsrts
+    
+class GroundTruthAgent(OperatorLearningAgent):
+    def __init__(self, name):
+        super().__init__(name=name, strips_learner="NONE")
+        self.name = name
+        self.ground_truth_trajs = {}
+        self.i = 0
+        self.actions = None
+    
+    def reset(self, task_name):
+        self.dataset = []
+        self.ground_atom_dataset = []
+        self.tasks = []
+        self.action_space = Box(0, 7, (1,))
+        self.objs = set()
+        self.preds = set()
+        self.options = set()
+        self.ground_atoms_traj = []
+
+        dataset, tasks, preds, options, action_space, ground_atom_dataset = self.get_data(task_name)
+
+        self.goal = self.parse_goal(task_name, self.ground_atoms_traj[1][-1])
+        self.actions = None
+        self.i = 0
+
+        action_plan = []
+        for i, step in enumerate(self.ground_atoms_traj[0].actions):
+            curr_state = self.ground_atoms_traj[1][i]
+            next_state = self.ground_atoms_traj[1][i+1]
+            def count_object_occurrences(atom_set):
+                counter = Counter()
+                for atom in atom_set:
+                    for obj in atom.objects:
+                        if not atom.predicate.name.startswith("~inreachofrobot"):
+                            counter[obj] += 1
+                return counter
+            counter = count_object_occurrences(next_state - curr_state)
+            def get_max_count_object(counter, exclude_types=("table", "shelf")):
+                max_count = max(counter.values())
+                candidates = [
+                    obj for obj, count in counter.items()
+                    if count == max_count and all(ex_type not in str(obj) for ex_type in exclude_types)
+                ]
+
+                if candidates:
+                    return candidates[0]
+                else:
+                    return None
+                
+            try:
+                if get_max_count_object(counter) is None:
+                    objs = [max(counter, key=counter.get)]
+                else:
+                    objs = [get_max_count_object(counter)]
+            except:
+                objs = random.sample(self.objs, 1)
+            action_plan.append((step._option.name, objs))
+        self.ground_truth_trajs[task_name] = self.clean_action_plan(action_plan)
+
+    def policy(self, obs, env):
+        #print(env.get_lifted_state())
+        try:
+            assert env.env_id in self.ground_truth_trajs.keys()
+        except:
+            import ipdb; ipdb.set_trace()
+        self.i += 1
+        if self.i-1 < len(self.ground_truth_trajs[env.env_id]):
+            return env.key_to_action[self.ground_truth_trajs[env.env_id][self.i-1]]
+        else:
+            return env.key_to_action["0"]
+        
+    def learn_nsrts(self, task_name):
+        return None
+
+# tasks = ["MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0",
+#             "MiniGrid-CollectMisplacedItems-16x16-N2-v0",
+#             "MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0",
+#             "MiniGrid-OpeningPackages-16x16-N2-v0",
+#             "MiniGrid-WateringHouseplants-16x16-N2-v0",
+#             "MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0"]
+
+# agents = [RandomAgent("random"), GroundTruthAgent("ground-truth")]
+# results = evaluation(agents, tasks, num_iterations=3)
+# df = structure_results(results)
+# plot_lifelong_success(df)
+
+#####
+
+# tasks = ["MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0"]
+# agents = [GroundTruthAgent("ground-truth")]
+# results = evaluation(agents, tasks, num_iterations=1)
+# df = structure_results(results)
+# #plot_lifelong_success(df)
+# import ipdb; ipdb.set_trace()
+
+#####
+
+############ 
+# tasks = ["MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0"]
+# agents = [
+#             DummyAgent("dummy", strips_learner="dummy"),
+#             OperatorLearningAgent("cluster-intersect", strips_learner="cluster_and_intersect"),
+#             OperatorLearningAgent("backchaining", strips_learner="backchaining"),
+#             OperatorLearningAgent("hill-climbing", strips_learner="pnad_search"),
+#             # OperatorLearningAgent("llm", strips_learner="llm")
+#         ]
+# results = evaluation(agents, tasks, num_iterations=1)
+# df = structure_results(results)
+# #plot_lifelong_success(df)
+# for agent in agents:
+#     print(agent.name, len(agent.nsrts), agent.actions)
+#     print()
+# import ipdb; ipdb.set_trace()
+
+# Note: grounding should only be for operators based on the goal....
+# Maybe LLM can help with grounding too
+
+# TODO Finally - Collect Demos, Increment Num_Demos
+
+# TODO Try Run 3-5 Env Eval on all 5 Baselines (Dummy, CI, Pnad_Search, Back_Chaining)
+
+# TODO Fix LLM Agents
+
+# TODO Make BC+FS+LLM Agent
+# see other code
+
+# TODO Make Version-Space Agent
+# Note: from CI it should fall back to BC+FS+LLM then to BC then to Dummy
+
+# TODO Try Run Full Eval on all 7 Agents
+
+tasks = [
+    'MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0',
+    'MiniGrid-CleaningACar-16x16-N2-v0',
+    'MiniGrid-CleaningShoes-16x16-N2-v0', #1
+    'MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0',
+    'MiniGrid-CollectMisplacedItems-16x16-N2-v0',
+    'MiniGrid-InstallingAPrinter-16x16-N2-v0',
+    'MiniGrid-LayingWoodFloors-16x16-N2-v0',
+    'MiniGrid-MakingTea-16x16-N2-v0',
+    'MiniGrid-MovingBoxesToStorage-16x16-N2-v0',
+    'MiniGrid-OpeningPackages-16x16-N2-v0',
+    'MiniGrid-OrganizingFileCabinet-16x16-N2-v0',
+    #[DEBUG]'MiniGrid-PreparingSalad-16x16-N2-v0',
+    'MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0',
+    'MiniGrid-SettingUpCandles-16x16-N2-v0', #1
+    'MiniGrid-SortingBooks-16x16-N2-v0',
+    'MiniGrid-StoringFood-16x16-N2-v0',
+    #[DEBUG]'MiniGrid-ThawingFrozenFood-16x16-N2-v0',
+    'MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0',
+    'MiniGrid-WashingPotsAndPans-16x16-N2-v0',
+    'MiniGrid-WateringHouseplants-16x16-N2-v0'
+]
+
+task_info = {}
+
+for demo_file in demo_files:
+    traj = get_demo_traj(demo_file, verbose=False)
+    add_count = 0
+    for i, action in enumerate(traj.actions):
+        curr_state = set(traj.states[i])
+        next_state = set(traj.states[i+1])
+        del_effs = curr_state - next_state
+        add_effs = next_state - curr_state
+        # print(action)
+        # print("DEL:", del_effs)
+        # print("ADD:", add_effs)
+        # print()
+        add_count += len(add_effs)
+        assert len(add_effs) != 0 or str(action) == "Move"
+    task_name = demo_file.split("/")[-1].split("_")[0]
+    agent = OperatorLearningAgent("cluster-intersect", strips_learner="cluster_and_intersect")
+    agent.get_data(task_name=task_name)
+    goal = agent.parse_goal(task_name=task_name, ground_atoms_state=agent.ground_atoms_traj[1][-1])
+    task_info[demo_file.split("/")[-1]] = (len(traj.actions), len(goal), add_count)
+
+i = 0
+curriculum = []
+for k,v in sorted([(k,v) for k,v in task_info.items()], key=lambda x: x[1][2]): # by add effects
+    i+=1
+    print("|", v[0], "| goal length:",  v[1], "| add count:", v[2], "|", k.split("_")[0], i)
+    curriculum.append(k.split("_")[0])
+
+import time
+start_time = time.time()
+tasks = ['MiniGrid-OpeningPackages-16x16-N2-v0',
+         'MiniGrid-InstallingAPrinter-16x16-N2-v0',
+         'MiniGrid-MovingBoxesToStorage-16x16-N2-v0',
+        #  'MiniGrid-SortingBooks-16x16-N2-v0',#
+         'MiniGrid-WateringHouseplants-16x16-N2-v0',#
+        #  'MiniGrid-MakingTea-16x16-N2-v0',#
+        #  'MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0',#
+        #  'MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0'#
+         ]
+# tasks = curriculum
+# tasks.remove('MiniGrid-LayingWoodFloors-16x16-N2-v0')
+# tasks.remove('MiniGrid-CleaningACar-16x16-N2-v0')
+# tasks.remove('MiniGrid-SortingBooks-16x16-N2-v0')
+# tasks.remove('MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0')
+print("#"*30)
+print(tasks)
+agents = [
+            # GroundTruthAgent("ground-truth"),
+            # DummyAgent("dummy", strips_learner="dummy"),
+            # OperatorLearningAgent("cluster-intersect", strips_learner="cluster_and_intersect"),
+            # OperatorLearningAgent("backchaining", strips_learner="backchaining"),
+            # OperatorLearningAgent("hill-climbing", strips_learner="pnad_search"),
+            # OperatorLearningAgent("llm", strips_learner="llm"),
+            OperatorLearningAgent("backward-forward", strips_learner="backward-forward"),
+        ]
+with open("test_saved.NSRTs.txt", "w") as file:
+    file.write("")
+results = evaluation(agents, tasks, num_iterations=1, start_seed=100)
+df = structure_results(results)
+plot_lifelong_success(df)
+end_time = time.time()
+print("time elasped", end_time - start_time)
+
+import ipdb; ipdb.set_trace()
diff --git a/test_colla_results.py b/test_colla_results.py
new file mode 100644
index 0000000000..69f9d7c818
--- /dev/null
+++ b/test_colla_results.py
@@ -0,0 +1,788 @@
+# (1) implement the evaluation function evaluate(agent) returns dictionary of results
+# (2) implment evaluation visualization visualize(results)
+# (3) do whatever it takes to make results better (CI, BC, FF+BC, FF+BC+LLMs)
+
+from test_colla_env import MiniBehaviorEnv
+from test_colla_helpers import Box, LowLevelTrajectory, State, Task, \
+    demo_files, get_demo_traj, learn_nsrts_from_data
+import matplotlib.pyplot as plt
+import seaborn as sns
+import pandas as pd
+import random
+from predicators.planning import task_plan, task_plan_grounding, _SkeletonSearchTimeout, PlanningFailure
+from predicators import utils
+from predicators.structs import Action, LowLevelTrajectory, Predicate, State, \
+    Type, GroundAtom, Task, STRIPSOperator
+import numpy as np
+from collections import Counter
+
+from predicators.nsrt_learning.strips_learning.gen_to_spec_learner import parse_objs_preds_and_options
+
+import pickle as pkl
+import numpy as np
+from predicators.structs import Action, LowLevelTrajectory, Predicate, State, \
+    Type
+from test_operator_learning_all import get_demo_traj, demo_files
+
+opname_to_key = {
+    'Actions.pickup_0': '0',
+    'Actions.pickup_1': '1',
+    'Actions.pickup_2': '2',
+    'Actions.drop_0': '3',
+    'Actions.drop_1': '4',
+    'Actions.drop_2': '5',
+    'Actions.drop_in': 'i',
+    'Actions.toggle': 't',
+    'Actions.close': 'c',
+    'Actions.open': 'o',
+    'Actions.cook': 'k',
+    'Actions.slice': '6'
+}
+
+class RandomAgent():
+    def __init__(self, name):
+        self.name = name
+        self.actions = None
+
+    def reset(self, task_name):
+        pass
+
+    def policy(self, obs, env):
+        #print(env.get_lifted_state())
+        return env.key_to_action[random.choice(list(env.key_to_action.keys()))]
+
+def evaluation(agents, tasks, num_iterations=10, start_seed=100, short_task=True):
+    results = {}
+    task_i = 0
+    for i in range(num_iterations):
+        for task in tasks:
+            for agent in agents:
+                env = MiniBehaviorEnv(env_id=task, seed=i+start_seed)
+                env.short_task = short_task
+                observation, _ = env.reset()
+                agent.short_task = short_task
+                agent.reset(env.env_id)
+                steps = 0
+                for _ in range(50):
+                    action = agent.policy(observation, env)
+                    observation, reward, terminated, truncated, _ = env.step(action)
+                    steps += 1
+                    env.show()
+
+                    if reward != 0:
+                        break
+
+                    if terminated or truncated:
+                        break
+                found_plan = 0
+                plan_diff = -1
+                if agent.actions is not None and agent.actions != []:
+                    found_plan = 1
+        
+                    key_to_opname = {v:k  for k,v in opname_to_key.items()}
+                    plan = [key_to_opname[action] if not action.startswith("moveto") else "Move" for action in agent.actions]
+                    dataset_plan = agent.dataset[0].actions
+
+                    def differing_reoccurring_counts(list1, list2):
+                        count1 = Counter(list1)
+                        count2 = Counter(list2)
+                        all_keys = set(count1.keys()) | set(count2.keys())
+                        result = {}
+                        total_diff = 0
+                        for key in all_keys:
+                            c1 = count1.get(key, 0)
+                            c2 = count2.get(key, 0)
+                            if (c1 > 1 or c2 > 1) and c1 != c2:
+                                diff = abs(c1 - c2)
+                                result[key] = diff
+                                total_diff += diff
+                        result['total'] = total_diff
+                        return result
+                    plan_diff = differing_reoccurring_counts(plan, dataset_plan)['total']
+                results[str(task_i) + "_" + task + "_" + agent.name] = (steps, reward, i, found_plan, plan_diff)
+            task_i += 1
+    return results
+
+def structure_results(results_dict):
+    data = []
+    for key, (steps, reward, iteration, found_plan, plan_diff) in results_dict.items():
+        task_idx, task_name, agent_name = key.split("_", 2)
+        data.append({
+            "task_name": task_name,
+            "task_idx": int(task_idx),
+            "iteration": int(iteration),
+            "found_plan": int(found_plan),
+            "plan_diff": int(plan_diff),
+            "steps": steps,
+            "reward": reward,
+            "success": 1 if reward > 0 else 0,
+            "agent": agent_name
+        })
+    df = pd.DataFrame(data)
+    df["task_order"] = df["task_idx"]
+    return df.sort_values(["agent", "iteration", "task_order"])
+
+def plot_lifelong_success(df):
+    plt.figure(figsize=(14, 5))
+    
+    sns.lineplot(
+        data=df,
+        x="task_order",
+        y="success",
+        hue="agent",
+        marker="o"
+    )
+
+    # Set up x-ticks with task names, spaced across iterations
+    xticks = df["task_order"]
+    xticklabels = df["task_name"]
+    plt.xticks(ticks=xticks, labels=xticklabels, rotation=45, ha='right')
+
+    plt.ylim(-0.1, 1.1)
+    plt.yticks([0, 1], ["Fail", "Success"])
+    plt.ylabel("Success")
+    plt.xlabel("Tasks over Lifelong Iterations")
+    plt.title("Lifelong Learning Success per Task")
+    plt.tight_layout()
+    plt.grid(True, linestyle='--', alpha=0.3)
+    plt.savefig("lifelong_learning_success.png", dpi=200)
+
+class OperatorLearningAgent():
+    def __init__(self, name, strips_learner, single_grounding=False):
+        self.name = name
+        self.num_demos = 1
+
+        # Initialized once; populated in get_data()
+        self.dataset = []
+        self.ground_atom_dataset = []
+        self.tasks = []
+        self.action_space = Box(0, 7, (1,))
+        self.objs = set()
+        self.preds = set()
+        self.options = set()
+        self.ground_atoms_traj = []
+        self.goal = None
+
+        # Runtime variables
+        self.nsrts = None
+        self.actions = None
+        self.i = 0
+        self.seed_i = 0
+        self.short_task = True
+
+        # Learning Params
+        self.strips_learner = strips_learner
+        self.single_grounding = single_grounding
+        utils.reset_config({
+            "strips_learner": self.strips_learner,
+            "segmenter": "every_step",
+            "disable_harmlessness_check": True,
+            "pnad_search_load_initial": True,
+            "backward_forward_load_initial": True,
+            "min_data_for_nsrt": 0,
+            "min_perc_data_for_nsrt": 0,
+            "pnad_search_timeout":1000.0,
+            "single_grounding": self.single_grounding,
+            "option_learner": "no_learning"
+        })
+
+    def reset(self, task_name):
+        if False:
+            self.dataset = []
+            self.ground_atom_dataset = []
+            self.tasks = []
+            self.objs = set()
+            self.preds = set()
+            self.options = set()
+            self.ground_atoms_traj = []
+        self.action_space = Box(0, 7, (1,))
+        self.seed_i = 0
+
+        # Learning Params
+        utils.reset_config({
+            "strips_learner": self.strips_learner,
+            "segmenter": "every_step",
+            "disable_harmlessness_check": True,
+            "pnad_search_load_initial": True,
+            "backward_forward_load_initial": True,
+            "min_data_for_nsrt": 0,
+            "min_perc_data_for_nsrt": 0,
+            "pnad_search_timeout":1000.0,
+            "single_grounding": self.single_grounding,
+            "option_learner": "no_learning"
+        })
+
+        self.nsrts = self.learn_nsrts(task_name)
+        self.goal = self.parse_goal(task_name, self.ground_atoms_traj[1][-1])
+        self.actions = None
+        self.i = 0
+
+    def parse_goal(self, task_name, ground_atoms_state):
+        if task_name == "MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if  str(atom).startswith("inside(")])
+        
+        elif task_name == "MiniGrid-OpeningPackages-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("openable(")])
+        
+        elif task_name == "MiniGrid-CleaningACar-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("inside(")]) | set([atom for atom in ground_atoms_state if str(atom).startswith("~dustyable(")])
+        
+        elif task_name == "MiniGrid-CleaningShoes-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("~stainable(") and "shoe" in str(atom)]) | \
+                set([atom for atom in ground_atoms_state if str(atom).startswith("~dustyable(") and "shoe" in str(atom)]) | \
+                set([atom for atom in ground_atoms_state if str(atom).startswith("onfloor(") and "towel" in str(atom)])
+
+        elif task_name == "MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0":
+            return set([
+                atom for atom in ground_atoms_state
+                if (
+                    str(atom).startswith("onTop(") and "blender" in str(atom) and "countertop" in str(atom)
+                ) or (
+                    str(atom).startswith("nextto(") and "soap" in str(atom) and "sink" in str(atom)
+                ) or (
+                    str(atom).startswith("inside(") and "vegetable_oil" in str(atom) and "cabinet" in str(atom)
+                ) or (
+                    str(atom).startswith("inside(") and "plate" in str(atom) and "cabinet" in str(atom)
+                ) or (
+                    str(atom).startswith("inside(") and "casserole" in str(atom) and "electric_refrigerator" in str(atom)
+                ) or (
+                    str(atom).startswith("inside(") and "apple" in str(atom) and "electric_refrigerator" in str(atom)
+                ) or (
+                    str(atom).startswith("inside(") and "rag" in str(atom) and "sink" in str(atom)
+                ) or (
+                    str(atom).startswith("nextto(") and "rag" in str(atom) and "sink" in str(atom)
+                ) or (
+                    str(atom).startswith("~dustyable(") and "cabinet" in str(atom)
+                ) or (
+                    str(atom).startswith("~stainable(") and "plate" in str(atom)
+                )
+            ])
+
+        elif task_name == "MiniGrid-CollectMisplacedItems-16x16-N2-v0":
+            return set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("onTop(") and "table" in str(atom) and (
+                    "gym_shoe" in str(atom) or
+                    "necklace" in str(atom) or
+                    "notebook" in str(atom) or
+                    "sock" in str(atom)
+                ) and not str(atom).startswith("onTop(table") 
+            ])
+        
+        elif task_name == "MiniGrid-InstallingAPrinter-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(")]) | \
+                set([atom for atom in ground_atoms_state if str(atom).startswith("toggleable(")])
+        
+        elif task_name == "MiniGrid-LayingWoodFloors-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("nextto(")])
+        
+        elif task_name == "MiniGrid-MakingTea-16x16-N2-v0":
+            return set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("sliceable(") and "lemon" in str(atom)
+            ]) | set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("onTop(") and "teapot" in str(atom) and "stove" in str(atom)
+            ]) | set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("atsamelocation(") and "tea_bag" in str(atom) and "teapot" in str(atom)
+            ]) | set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("soakable(") and "teapot" in str(atom)
+            ]) | set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("toggleable(") and "stove" in str(atom)
+            ])
+
+        elif task_name == "MiniGrid-MovingBoxesToStorage-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(")])
+        
+        elif task_name == "MiniGrid-OrganizingFileCabinet-16x16-N2-v0":
+            return set([
+            atom for atom in ground_atoms_state
+            if str(atom).startswith("onTop(") and "marker" in str(atom) and "table" in str(atom)
+        ]) | set([
+            atom for atom in ground_atoms_state
+            if str(atom).startswith("inside(") and "document" in str(atom) and "cabinet" in str(atom)
+        ]) | set([
+            atom for atom in ground_atoms_state
+            if str(atom).startswith("inside(") and "folder" in str(atom) and "cabinet" in str(atom)
+        ])
+        
+        elif task_name == "MiniGrid-PreparingSalad-16x16-N2-v0":
+            import ipdb; ipdb.set_trace()
+            raise NotImplementedError("parse_goal not implemented for PreparingSalad")
+        
+        elif task_name == "MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0":
+            return set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("inside(") and "plate" in str(atom) and "cabinet" in str(atom)
+            ])
+
+        
+        elif task_name == "MiniGrid-SettingUpCandles-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(")])
+        
+        elif task_name == "MiniGrid-SortingBooks-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(") and "shelf" in str(atom) and ("book" in str(atom) or "hardback" in str(atom))])
+        
+        elif task_name == "MiniGrid-StoringFood-16x16-N2-v0":
+            return set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("inside(") and "cabinet" in str(atom) and (
+                    "oatmeal" in str(atom) or "chip" in str(atom) or "vegetable_oil" in str(atom) or "sugar" in str(atom)
+                )
+            ])
+
+        elif task_name == "MiniGrid-ThawingFrozenFood-16x16-N2-v0":
+            return set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("nextto(") and (
+                    ("date" in str(atom) and "fish" in str(atom)) or
+                    ("fish" in str(atom) and "sink" in str(atom)) or
+                    ("olive" in str(atom) and "sink" in str(atom))
+                )
+            ])
+        
+        elif task_name == "MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("inside(") and "hamburger" in str(atom) and "ashcan" in str(atom)])
+        
+        elif task_name == "MiniGrid-WashingPotsAndPans-16x16-N2-v0":
+            return set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("~stainable(") and (
+                    "pan" in str(atom) or "kettle" in str(atom) or "teapot" in str(atom)
+                )
+            ]) | set([
+                atom for atom in ground_atoms_state
+                if str(atom).startswith("inside(") and "cabinet" in str(atom) and (
+                    "pan" in str(atom) or "kettle" in str(atom) or "teapot" in str(atom)
+                )
+            ])
+
+        elif task_name == "MiniGrid-WateringHouseplants-16x16-N2-v0":
+            return set([atom for atom in ground_atoms_state if str(atom).startswith("soakable(") and "pot_plant" in str(atom)])
+        else:
+            import ipdb; ipdb.set_trace()
+
+
+    def get_plan(self, state, seed):
+        objs, _, _, ground_atoms_traj, all_atoms = parse_objs_preds_and_options(self.dataset[-1], train_task_idx=len(self.dataset))
+        task = Task(State({}, None), self.goal)
+
+        _, _, _, ground_atoms_traj, _ = parse_objs_preds_and_options(LowLevelTrajectory([state], [], _is_demo=True, _train_task_idx=0), train_task_idx=0, all_atoms=all_atoms)
+        init_atoms = ground_atoms_traj[1][0]
+        plan = self.plan(init_atoms, objs, self.preds, self.nsrts, task, seed)
+        return plan
+
+    def policy(self, obs, env):
+        if self.actions is None:
+            seed = self.seed_i
+            self.seed_i += 1
+            num_remove_pre = 0
+            while self.actions is None or self.actions == []:
+                try:
+                    self.actions = self.get_plan(env.get_lifted_state(), seed)
+                    break
+                except _SkeletonSearchTimeout:
+                    print("did not find skeleton - timeout")
+                except PlanningFailure:
+                    print("did not find skeleton - plan failure")
+                self.actions = []
+            #     num_remove_pre += 1
+            #     new_nsrts = set()
+            #     for nsrt in self.nsrts:
+            #         pre = set()
+            #         tot_pre = len(nsrt.op.preconditions) - num_remove_pre
+            #         if tot_pre > 0:
+            #             pre = random.sample(nsrt.op.preconditions, tot_pre)
+            #         ignore_effects = nsrt.op.ignore_effects
+            #         del_effs = nsrt.op.delete_effects
+            #         # if num_remove_pre > 10:
+            #         #     ignore_effects = set()
+            #         #     del_effs = set()
+            #         new_nsrts.add(
+            #             nsrt.op.copy_with(preconditions=pre,
+            #                               ignore_effects=ignore_effects,
+            #                               delete_effects=del_effs).make_nsrt(
+            #                 nsrt.option,
+            #                 [],  # dummy sampler
+            #                 lambda s, g, rng, o: np.zeros(1, dtype=np.float32)))
+            #     self.nsrts = new_nsrts
+            # with open("test_saved.NSRTs.txt", "w") as file:
+            #     for nsrt in self.nsrts:
+            #         if nsrt.op.add_effects != set():
+            #             file.write(str(nsrt)+"\n")
+
+        self.i += 1
+        if self.i-1 < len(self.actions):
+            return env.key_to_action[self.actions[self.i-1]]
+        else:
+            self.actions = None
+            self.i = 0
+            return env.key_to_action["0"]
+    
+    def clean_action_plan(self, action_plan):
+        plan = []
+        for step in action_plan:
+            name = step[0]
+            objs = step[1]
+            if len(objs) > 0:
+                obj_name = objs[0].name
+                if name.startswith("Move"):
+                    plan.append(f"moveto-{obj_name}")
+                else:
+                    for opname, key in opname_to_key.items():
+                        if opname in name:
+                            plan.append(key)
+                            break
+        return plan
+    
+    def plan(self, init_atoms, objects, predicates, nsrts, task, seed):
+        ground_nsrts, reachable_atoms = task_plan_grounding(init_atoms, objects, nsrts, allow_noops=True)
+        heuristic = utils.create_task_planning_heuristic("hadd", init_atoms,
+                                                        task.goal, ground_nsrts,
+                                                        predicates, objects)
+        task_plan_generator = task_plan(init_atoms,
+                                        task.goal,
+                                        ground_nsrts,
+                                        reachable_atoms,
+                                        heuristic,
+                                        timeout=1,
+                                        seed=seed,
+                                        max_skeletons_optimized=3)
+        skeleton, _, _ = next(task_plan_generator)
+
+        action_plan = []
+        for step in skeleton:
+            action_plan.append((step.option.name, step.objects))
+        return self.clean_action_plan(action_plan)
+    
+    def get_data(self, task_name):
+        for demo_file in demo_files:
+            if task_name in demo_file:
+                demo_traj = get_demo_traj(demo_file=demo_file, verbose=False)
+
+                if self.short_task:
+                    if task_name == 'MiniGrid-SortingBooks-16x16-N2-v0':
+                        demo_traj = LowLevelTrajectory(demo_traj.states[:5], demo_traj.actions[:4], _is_demo=True, _train_task_idx=0)
+                    elif task_name == 'MiniGrid-WateringHouseplants-16x16-N2-v0':
+                        demo_traj = LowLevelTrajectory(demo_traj.states[:7], demo_traj.actions[:6], _is_demo=True, _train_task_idx=0)
+                    elif task_name == 'MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0':
+                        demo_traj = LowLevelTrajectory(demo_traj.states[:5], demo_traj.actions[:4], _is_demo=True, _train_task_idx=0)
+                    elif task_name == 'MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0':
+                        demo_traj = LowLevelTrajectory(demo_traj.states[:5], demo_traj.actions[:4], _is_demo=True, _train_task_idx=0) 
+
+                idx = len(self.dataset)
+                demo_traj = LowLevelTrajectory(demo_traj.states, demo_traj.actions, _is_demo=True, _train_task_idx=idx)
+
+                self.dataset.append(demo_traj)
+                new_objs, new_preds, new_options, self.ground_atoms_traj, _ = parse_objs_preds_and_options(demo_traj, train_task_idx=idx)
+                self.objs |= new_objs
+                self.preds |= new_preds
+                self.options |= new_options
+                self.ground_atom_dataset.append(self.ground_atoms_traj)
+                goal = self.parse_goal(task_name, self.ground_atoms_traj[1][-1])
+                self.tasks.append(Task(State({}, None), goal))
+                # if len(self.dataset) >= self.num_demos:
+                #     break
+        # assert len(self.dataset) == self.num_demos  
+        return self.dataset, self.tasks, self.preds, self.options, self.action_space, self.ground_atom_dataset
+
+    def learn_nsrts(self, task_name):
+        dataset, tasks, preds, options, action_space, ground_atom_dataset = self.get_data(task_name)
+        nsrts, _, _ = learn_nsrts_from_data(dataset,
+                                            tasks,
+                                            preds,
+                                            options,
+                                            action_space,
+                                            ground_atom_dataset,
+                                            sampler_learner="neural",
+                                            annotations=None)  
+        with open("test_saved.NSRTs.txt", "w") as file:
+            for nsrt in nsrts:
+                if nsrt.op.add_effects != set():
+                    file.write(str(nsrt)+"\n")
+        return nsrts
+ 
+class DummyAgent(OperatorLearningAgent):
+    def __init__(self, name="dummy", strips_learner="dummy"):
+        super().__init__(name=name, strips_learner=strips_learner, single_grounding=True)
+
+    def learn_nsrts(self, task_name):
+        dataset, tasks, preds, options, action_space, ground_atom_dataset = self.get_data(task_name)
+        goal = self.parse_goal(task_name, self.ground_atoms_traj[1][-1])
+        obj_to_var = {obj:obj.type("?" + obj.name) for obj in self.objs}
+        lifted_goal = {atom.lift(obj_to_var) for atom in goal}
+
+        nsrts = set()
+        name_i = 0
+        for option in options:
+            op = STRIPSOperator(
+                name="Dummy" + str(name_i),
+                parameters=[],
+                preconditions=set(),
+                add_effects=set(),
+                delete_effects=set(),
+                ignore_effects=set()
+            )
+            dummy_nsrt = op.make_nsrt(
+                option,
+                [],  # dummy sampler
+                lambda s, g, rng, o: np.zeros(1, dtype=np.float32))
+            nsrts.add(dummy_nsrt)
+            name_i += 1
+            
+            params = []
+            for sublist in [lifted_atom.variables for lifted_atom in lifted_goal]:
+                params += sublist
+            params = [x for x in set(params)]
+            op = STRIPSOperator(
+                name="Dummy" + str(name_i),
+                parameters=params,
+                preconditions=set(),
+                add_effects=lifted_goal,
+                delete_effects=set(),
+                ignore_effects=set()
+            )
+            dummy_nsrt = op.make_nsrt(
+                option,
+                [],  # dummy sampler
+                lambda s, g, rng, o: np.zeros(1, dtype=np.float32))
+            nsrts.add(dummy_nsrt)
+            name_i += 1
+        return nsrts
+    
+class GroundTruthAgent(OperatorLearningAgent):
+    def __init__(self, name):
+        super().__init__(name=name, strips_learner="NONE")
+        self.name = name
+        self.ground_truth_trajs = {}
+        self.i = 0
+        self.actions = None
+    
+    def reset(self, task_name):
+        self.dataset = []
+        self.ground_atom_dataset = []
+        self.tasks = []
+        self.action_space = Box(0, 7, (1,))
+        self.objs = set()
+        self.preds = set()
+        self.options = set()
+        self.ground_atoms_traj = []
+
+        dataset, tasks, preds, options, action_space, ground_atom_dataset = self.get_data(task_name)
+
+        self.goal = self.parse_goal(task_name, self.ground_atoms_traj[1][-1])
+        self.actions = None
+        self.i = 0
+
+        action_plan = []
+        for i, step in enumerate(self.ground_atoms_traj[0].actions):
+            curr_state = self.ground_atoms_traj[1][i]
+            next_state = self.ground_atoms_traj[1][i+1]
+            def count_object_occurrences(atom_set):
+                counter = Counter()
+                for atom in atom_set:
+                    for obj in atom.objects:
+                        if not atom.predicate.name.startswith("~inreachofrobot"):
+                            counter[obj] += 1
+                return counter
+            counter = count_object_occurrences(next_state - curr_state)
+            def get_max_count_object(counter, exclude_types=("table", "shelf")):
+                max_count = max(counter.values())
+                candidates = [
+                    obj for obj, count in counter.items()
+                    if count == max_count and all(ex_type not in str(obj) for ex_type in exclude_types)
+                ]
+
+                if candidates:
+                    return candidates[0]
+                else:
+                    return None
+                
+            try:
+                if get_max_count_object(counter) is None:
+                    objs = [max(counter, key=counter.get)]
+                else:
+                    objs = [get_max_count_object(counter)]
+            except:
+                objs = random.sample(self.objs, 1)
+            action_plan.append((step._option.name, objs))
+        self.ground_truth_trajs[task_name] = self.clean_action_plan(action_plan)
+
+    def policy(self, obs, env):
+        #print(env.get_lifted_state())
+        try:
+            assert env.env_id in self.ground_truth_trajs.keys()
+        except:
+            import ipdb; ipdb.set_trace()
+        self.i += 1
+        if self.i-1 < len(self.ground_truth_trajs[env.env_id]):
+            return env.key_to_action[self.ground_truth_trajs[env.env_id][self.i-1]]
+        else:
+            return env.key_to_action["0"]
+        
+    def learn_nsrts(self, task_name):
+        return None
+
+# tasks = ["MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0",
+#             "MiniGrid-CollectMisplacedItems-16x16-N2-v0",
+#             "MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0",
+#             "MiniGrid-OpeningPackages-16x16-N2-v0",
+#             "MiniGrid-WateringHouseplants-16x16-N2-v0",
+#             "MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0"]
+
+# agents = [RandomAgent("random"), GroundTruthAgent("ground-truth")]
+# results = evaluation(agents, tasks, num_iterations=3)
+# df = structure_results(results)
+# plot_lifelong_success(df)
+
+#####
+
+# tasks = ["MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0"]
+# agents = [GroundTruthAgent("ground-truth")]
+# results = evaluation(agents, tasks, num_iterations=1)
+# df = structure_results(results)
+# #plot_lifelong_success(df)
+# import ipdb; ipdb.set_trace()
+
+#####
+
+############ 
+# tasks = ["MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0"]
+# agents = [
+#             DummyAgent("dummy", strips_learner="dummy"),
+#             OperatorLearningAgent("cluster-intersect", strips_learner="cluster_and_intersect"),
+#             OperatorLearningAgent("backchaining", strips_learner="backchaining"),
+#             OperatorLearningAgent("hill-climbing", strips_learner="pnad_search"),
+#             # OperatorLearningAgent("llm", strips_learner="llm")
+#         ]
+# results = evaluation(agents, tasks, num_iterations=1)
+# df = structure_results(results)
+# #plot_lifelong_success(df)
+# for agent in agents:
+#     print(agent.name, len(agent.nsrts), agent.actions)
+#     print()
+# import ipdb; ipdb.set_trace()
+
+# Note: grounding should only be for operators based on the goal....
+# Maybe LLM can help with grounding too
+
+# TODO Finally - Collect Demos, Increment Num_Demos
+
+# TODO Try Run 3-5 Env Eval on all 5 Baselines (Dummy, CI, Pnad_Search, Back_Chaining)
+
+# TODO Fix LLM Agents
+
+# TODO Make BC+FS+LLM Agent
+# see other code
+
+# TODO Make Version-Space Agent
+# Note: from CI it should fall back to BC+FS+LLM then to BC then to Dummy
+
+# TODO Try Run Full Eval on all 7 Agents
+
+# tasks = [
+#     'MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0',
+#     'MiniGrid-CleaningACar-16x16-N2-v0',
+#     'MiniGrid-CleaningShoes-16x16-N2-v0', #1
+#     'MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0',
+#     'MiniGrid-CollectMisplacedItems-16x16-N2-v0',
+#     'MiniGrid-InstallingAPrinter-16x16-N2-v0',
+#     'MiniGrid-LayingWoodFloors-16x16-N2-v0',
+#     'MiniGrid-MakingTea-16x16-N2-v0',
+#     'MiniGrid-MovingBoxesToStorage-16x16-N2-v0',
+#     'MiniGrid-OpeningPackages-16x16-N2-v0',
+#     'MiniGrid-OrganizingFileCabinet-16x16-N2-v0',
+#     #[DEBUG]'MiniGrid-PreparingSalad-16x16-N2-v0',
+#     'MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0',
+#     'MiniGrid-SettingUpCandles-16x16-N2-v0', #1
+#     'MiniGrid-SortingBooks-16x16-N2-v0',
+#     'MiniGrid-StoringFood-16x16-N2-v0',
+#     #[DEBUG]'MiniGrid-ThawingFrozenFood-16x16-N2-v0',
+#     'MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0',
+#     'MiniGrid-WashingPotsAndPans-16x16-N2-v0',
+#     'MiniGrid-WateringHouseplants-16x16-N2-v0'
+# ]
+
+# task_info = {}
+
+# for demo_file in demo_files:
+#     traj = get_demo_traj(demo_file, verbose=False)
+#     add_count = 0
+#     for i, action in enumerate(traj.actions):
+#         curr_state = set(traj.states[i])
+#         next_state = set(traj.states[i+1])
+#         del_effs = curr_state - next_state
+#         add_effs = next_state - curr_state
+#         # print(action)
+#         # print("DEL:", del_effs)
+#         # print("ADD:", add_effs)
+#         # print()
+#         add_count += len(add_effs)
+#         assert len(add_effs) != 0 or str(action) == "Move"
+#     task_name = demo_file.split("/")[-1].split("_")[0]
+#     agent = OperatorLearningAgent("cluster-intersect", strips_learner="cluster_and_intersect")
+#     agent.get_data(task_name=task_name)
+#     goal = agent.parse_goal(task_name=task_name, ground_atoms_state=agent.ground_atoms_traj[1][-1])
+#     task_info[demo_file.split("/")[-1]] = (len(traj.actions), len(goal), add_count)
+
+# i = 0
+# curriculum = []
+# for k,v in sorted([(k,v) for k,v in task_info.items()], key=lambda x: x[1][2]): # by add effects
+#     i+=1
+#     print("|", v[0], "| goal length:",  v[1], "| add count:", v[2], "|", k.split("_")[0], i)
+#     curriculum.append(k.split("_")[0])
+
+import time
+start_time = time.time()
+tasks = ['MiniGrid-OpeningPackages-16x16-N2-v0',
+         'MiniGrid-InstallingAPrinter-16x16-N2-v0',
+         'MiniGrid-MovingBoxesToStorage-16x16-N2-v0',
+         'MiniGrid-SortingBooks-16x16-N2-v0',#
+         'MiniGrid-WateringHouseplants-16x16-N2-v0',#
+         #'MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0',#
+         'MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0'#
+         ]
+# tasks = curriculum
+# tasks.remove('MiniGrid-LayingWoodFloors-16x16-N2-v0')
+# tasks.remove('MiniGrid-CleaningACar-16x16-N2-v0')
+# tasks.remove('MiniGrid-SortingBooks-16x16-N2-v0')
+# tasks.remove('MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0')
+print("#"*30)
+print(tasks)
+
+all_agents = [
+                #GroundTruthAgent("ground-truth"),
+                #DummyAgent("dummy", strips_learner="dummy"),
+                #OperatorLearningAgent("cluster-intersect", strips_learner="cluster_and_intersect"),
+                #OperatorLearningAgent("backchaining", strips_learner="backchaining"),
+                #OperatorLearningAgent("hill-climbing", strips_learner="pnad_search"),
+                #OperatorLearningAgent("llm", strips_learner="llm"),
+                #OperatorLearningAgent("backward-forward", strips_learner="backward-forward"),
+            ]
+
+for agent in all_agents:
+    with open("test_saved.NSRTs.txt", "w") as file:
+        file.write("""NSRT-Move0:
+    Parameters: [?x0:obj_type]
+    Preconditions: []
+    Add Effects: [inreachofrobot(?x0:obj_type)]
+    Delete Effects: [~inreachofrobot(?x0:obj_type)]
+    Ignore Effects: [inreachofrobot, ~inreachofrobot]
+    Option Spec: Move()""")
+    results = evaluation([agent], tasks, num_iterations=1, start_seed=100)
+    df = structure_results(results)
+    plot_lifelong_success(df)
+    end_time = time.time()
+    print("time elasped", end_time - start_time)
+    df.to_csv('test_results/' + agent.name + '_output.csv')
+
+    # results = evaluation([agent], tasks, num_iterations=1, start_seed=100, short_task=False)
+    # df2 = structure_results(results)
+    # plot_lifelong_success(df2)
+    # end_time = time.time()
+    # print("time elasped", end_time - start_time)
+    # df2.to_csv('test_results/' + agent.name + '_long_output.csv')
+
diff --git a/test_colla_visualize_results.py b/test_colla_visualize_results.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test_curriculum.py b/test_curriculum.py
new file mode 100644
index 0000000000..6eb33454fe
--- /dev/null
+++ b/test_curriculum.py
@@ -0,0 +1,377 @@
+import numpy as np
+from gym.spaces import Box
+import re
+import pickle as pkl
+
+from predicators import utils
+from predicators.nsrt_learning.nsrt_learning_main import learn_nsrts_from_data
+from predicators.structs import Action, LowLevelTrajectory, Predicate, State, \
+    Type, GroundAtom, Task
+import glob
+
+demo_files = sorted([filename for filename in glob.glob("/Users/shashlik/Documents/GitHub/predicators/demos/*/*")])
+demo_tasks = set([demo_file.split("/")[-1].split("_")[0] for demo_file in demo_files])
+
+utils.reset_config({
+        "strips_learner": "pnad_search",
+        "segmenter": "every_step",
+        "disable_harmlessness_check": True,
+        "pnad_search_load_initial": True,
+        "min_data_for_nsrt": 0,
+        "min_perc_data_for_nsrt": 0,
+        "pnad_search_timeout":1000.0
+    })
+
+# Load and do this from MiniBeahvior Demo
+
+def get_demo_traj(demo_file, verbose=True):
+    with open(demo_file, 'rb') as f:
+        data = pkl.load(f)
+
+    last_skill = "Move"
+    state = [a for a in data[1][1] if "infovofrobot" not in a]
+    states = [state]
+    actions = []
+    for step in data.keys():
+        obs = data[step][0]['image']
+        direction = data[step][0]['direction']
+        action = data[step][2]
+        skill = None
+
+        if "forward" in str(action) or \
+            "left" in str(action) or \
+            "right" in str(action):
+
+            skill = "Move"
+        else:
+            skill = str(action)
+        
+        has_effect = True
+        try:
+            next_obs = data[step][3]['image']
+            next_direction = data[step][3]['direction']
+            if np.allclose(obs, next_obs) and (direction == next_direction):
+                has_effect = False  
+        except:
+            pass
+
+        if has_effect:
+            if last_skill != skill:
+                if verbose:
+                    print("#")
+                    print(last_skill)
+                try:
+                    next_state = [a for a in data[step][1] if "infovofrobot" not in a]
+                    if verbose:
+                        print("PREV:", set(state))
+                        print("ADD:", set(next_state) - set(state))
+                        print("DEL:", set(state) - set(next_state))
+                    state = next_state
+                    actions.append(last_skill)
+                    states.append(state)
+                except:
+                    pass
+                last_skill = skill
+    else:
+        if verbose:
+            print("#")
+            print(last_skill)
+        next_state = [a for a in data[step][4] if "infovofrobot" not in a]
+        if verbose:
+            print("PREV:", set(state))
+            print("ADD:", set(next_state) - set(state))
+            print("DEL:", set(state) - set(next_state))
+        state = next_state
+        if verbose:
+            print("#")
+        actions.append(last_skill)
+        states.append(state)
+    
+    return LowLevelTrajectory(states, actions, _is_demo=True, _train_task_idx=0)
+
+def parse_objs_preds_and_options(trajectory, train_task_idx=0):
+    objs = set()
+    preds = set()
+    options = set()
+    state = None
+    states = []
+    actions = []
+    ground_atoms_traj = []
+    obj_type = Type("obj_type", ["is_obj"])
+    
+    for i, s in enumerate(trajectory.states):
+        ground_atoms = set()
+        for pred_str in s:
+            pred = None
+            choice = []
+            pattern = re.compile(r"(\w+)\((.*?)\)")
+            match = pattern.match(pred_str)
+            if match:
+                func_name = match.group(1)
+                args = match.group(2).split(',') if match.group(2) else []
+                for arg in args:
+                    obj = obj_type(arg.strip())
+                    choice.append(obj)
+                    objs.add(obj)
+                if len(args) == 1:
+                    pred = Predicate(func_name, [obj_type], lambda s, o: True)
+                    preds.add(pred)
+                elif len(args) == 2:
+                    pred = Predicate(func_name, [obj_type, obj_type], lambda s, o: True)
+                    preds.add(pred)
+                else:
+                    NotImplementedError("")
+            ground_atoms.add(GroundAtom(pred, choice))
+        states.append(state)
+        ground_atoms_traj.append(ground_atoms)
+
+        if i < len(trajectory.actions):
+            a_name = trajectory.actions[i]
+            name_to_actions = actions_dict = {
+                "Move": 0,
+                "Actions.pickup_0": 3,
+                "Actions.pickup_1": 4,
+                "Actions.pickup_2": 5,
+                "Actions.drop_0": 6,
+                "Actions.drop_1": 7,
+                "Actions.drop_2": 8,
+                "Actions.drop_in": 9,
+                "Actions.toggle": 10,
+                "Actions.close": 11,
+                "Actions.open": 12,
+                "Actions.cook": 13,
+                "Actions.slice": 14
+            }
+
+            param_option = utils.SingletonParameterizedOption(
+                a_name, lambda s, m, o, p: Action(name_to_actions[a_name]))
+            options.add(param_option)
+            option = param_option.ground([], [])
+            action = option.policy(state)
+            action.set_option(option)
+            actions.append(action)
+
+    return objs, preds, options, (LowLevelTrajectory([{obj:[0.0] for obj in objs} for _ in states], actions, _is_demo=True, _train_task_idx=train_task_idx), ground_atoms_traj)
+
+dataset = []
+ground_atom_dataset = []
+tasks = []
+action_space = Box(0, 7, (1, ))
+all_preds = set()
+all_options = set()
+
+demo_traj = get_demo_traj("demos/MiniGrid-OpeningPackages-16x16-N2-v0/MiniGrid-OpeningPackages-16x16-N2-v0_0", verbose=False)
+
+idx = len(dataset)
+demo_traj = LowLevelTrajectory(demo_traj.states, demo_traj.actions, _is_demo=True, _train_task_idx=idx)
+
+dataset += [demo_traj]
+objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx)
+all_preds = preds | all_preds
+all_options = options | all_options
+ground_atom_dataset += [ground_atoms_traj]
+goal = set([atom for atom in ground_atoms_traj[1][-1] if "openable(" in str(atom)])
+tasks += [Task(State({}, None), goal)]
+
+nsrts, _, _ = learn_nsrts_from_data(dataset, tasks,
+                                    preds,
+                                    options,
+                                    action_space,
+                                    ground_atom_dataset,
+                                    sampler_learner="neural",
+                                    annotations=None)
+
+for nsrt in nsrts:
+    print(nsrt)
+    print()
+
+with open("test_saved.NSRTs.txt", "w") as file:
+    for nsrt in nsrts:
+        file.write(str(nsrt)+"\n")
+
+demo_traj = get_demo_traj("demos/MiniGrid-SortingBooks-16x16-N2-v0/MiniGrid-SortingBooks-16x16-N2-v0_0", verbose=False)
+
+idx = len(dataset)
+demo_traj = LowLevelTrajectory(demo_traj.states[0:5], demo_traj.actions[0:4], _is_demo=True, _train_task_idx=idx)
+
+dataset += [demo_traj]
+objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx)
+all_preds = preds | all_preds
+all_options = options | all_options
+ground_atom_dataset += [ground_atoms_traj]
+goal = set([atom for atom in ground_atoms_traj[1][-1] if "onTop(" in str(atom) and "shelf" in str(atom)])
+tasks += [Task(State({}, None), goal)]
+
+nsrts, _, _ = learn_nsrts_from_data(dataset, tasks,
+                                    all_preds,
+                                    all_options,
+                                    action_space,
+                                    ground_atom_dataset,
+                                    sampler_learner="neural",
+                                    annotations=None)
+
+print("#"*60)
+for nsrt in nsrts:
+    print(nsrt)
+    print()
+
+with open("test_saved.NSRTs.txt", "w") as file:
+    for nsrt in nsrts:
+        file.write(str(nsrt)+"\n")
+
+
+demo_traj = get_demo_traj("demos/MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0/MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0_0", verbose=False)
+
+idx = len(dataset)
+demo_traj = LowLevelTrajectory(demo_traj.states[0:5], demo_traj.actions[0:4], _is_demo=True, _train_task_idx=idx)
+
+dataset += [demo_traj]
+objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx)
+all_preds = preds | all_preds
+all_options = options | all_options
+ground_atom_dataset += [ground_atoms_traj]
+goal = set([atom for atom in ground_atoms_traj[1][-1] if "inside(book_3" in str(atom)])
+tasks += [Task(State({}, None), goal)]
+
+nsrts, _, _ = learn_nsrts_from_data(dataset, tasks,
+                                    all_preds,
+                                    all_options,
+                                    action_space,
+                                    ground_atom_dataset,
+                                    sampler_learner="neural",
+                                    annotations=None)
+
+print("#"*60)
+for nsrt in nsrts:
+    print(nsrt)
+    print()
+
+with open("test_saved.NSRTs.txt", "w") as file:
+    for nsrt in nsrts:
+        file.write(str(nsrt)+"\n")
+
+
+demo_traj = get_demo_traj("demos/MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0/MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0_0", verbose=False)
+
+idx = len(dataset)
+demo_traj = LowLevelTrajectory(demo_traj.states[0:7], demo_traj.actions[0:6], _is_demo=True, _train_task_idx=idx)
+
+dataset += [demo_traj]
+objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx)
+all_preds = preds | all_preds
+all_options = options | all_options
+ground_atom_dataset += [ground_atoms_traj]
+goal = set([atom for atom in ground_atoms_traj[1][-1] if "inside(" in str(atom)])
+tasks += [Task(State({}, None), goal)]
+
+nsrts, _, _ = learn_nsrts_from_data(dataset, tasks,
+                                    all_preds,
+                                    all_options,
+                                    action_space,
+                                    ground_atom_dataset,
+                                    sampler_learner="neural",
+                                    annotations=None)
+
+print("#"*60)
+for nsrt in nsrts:
+    print(nsrt)
+    print()
+
+with open("test_saved.NSRTs.txt", "w") as file:
+    for nsrt in nsrts:
+        file.write(str(nsrt)+"\n")
+
+
+
+demo_traj = get_demo_traj("demos/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0_0", verbose=False)
+
+idx = len(dataset)
+demo_traj = LowLevelTrajectory(demo_traj.states[0:5], demo_traj.actions[0:4], _is_demo=True, _train_task_idx=idx)
+
+dataset += [demo_traj]
+objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx)
+all_preds = preds | all_preds
+all_options = options | all_options
+ground_atom_dataset += [ground_atoms_traj]
+goal = set([atom for atom in ground_atoms_traj[1][-1] if "inside(" in str(atom)])
+tasks += [Task(State({}, None), goal)]
+
+nsrts, _, _ = learn_nsrts_from_data(dataset, tasks,
+                                    all_preds,
+                                    all_options,
+                                    action_space,
+                                    ground_atom_dataset,
+                                    sampler_learner="neural",
+                                    annotations=None)
+
+print("#"*60)
+for nsrt in nsrts:
+    print(nsrt)
+    print()
+
+with open("test_saved.NSRTs.txt", "w") as file:
+    for nsrt in nsrts:
+        file.write(str(nsrt)+"\n")
+
+demo_traj = get_demo_traj("demos/MiniGrid-CollectMisplacedItems-16x16-N2-v0/MiniGrid-CollectMisplacedItems-16x16-N2-v0_0", verbose=False)
+
+idx = len(dataset)
+demo_traj = LowLevelTrajectory(demo_traj.states[0:5], demo_traj.actions[0:4], _is_demo=True, _train_task_idx=idx)
+
+dataset += [demo_traj]
+objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx)
+all_preds = preds | all_preds
+all_options = options | all_options
+ground_atom_dataset += [ground_atoms_traj]
+goal = set([atom for atom in ground_atoms_traj[1][-1] if "onTop(" in str(atom) and "table_1" in str(atom)])
+tasks += [Task(State({}, None), goal)]
+
+nsrts, _, _ = learn_nsrts_from_data(dataset, tasks,
+                                    all_preds,
+                                    all_options,
+                                    action_space,
+                                    ground_atom_dataset,
+                                    sampler_learner="neural",
+                                    annotations=None)
+
+print("#"*60)
+for nsrt in nsrts:
+    print(nsrt)
+    print()
+
+with open("test_saved.NSRTs.txt", "w") as file:
+    for nsrt in nsrts:
+        file.write(str(nsrt)+"\n")
+
+quit()
+
+demo_traj = get_demo_traj("demos/MiniGrid-WateringHouseplants-16x16-N2-v0/MiniGrid-WateringHouseplants-16x16-N2-v0_0", verbose=False)
+
+idx = len(dataset)
+demo_traj = LowLevelTrajectory(demo_traj.states[0:7], demo_traj.actions[0:6], _is_demo=True, _train_task_idx=idx)
+
+dataset += [demo_traj]
+objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx)
+all_preds = preds | all_preds
+all_options = options | all_options
+ground_atom_dataset += [ground_atoms_traj]
+goal = set([atom for atom in ground_atoms_traj[1][-1] if "inside(" in str(atom) or "soakable(" in str(atom)])
+tasks += [Task(State({}, None), goal)]
+
+nsrts, _, _ = learn_nsrts_from_data(dataset, tasks,
+                                    all_preds,
+                                    all_options,
+                                    action_space,
+                                    ground_atom_dataset,
+                                    sampler_learner="neural",
+                                    annotations=None)
+
+print("#"*60)
+for nsrt in nsrts:
+    print(nsrt)
+    print()
+
+with open("test_saved.NSRTs.txt", "w") as file:
+    for nsrt in nsrts:
+        file.write(str(nsrt)+"\n")
+
diff --git a/test_graph_results.py b/test_graph_results.py
new file mode 100644
index 0000000000..b1dcdfbd27
--- /dev/null
+++ b/test_graph_results.py
@@ -0,0 +1,89 @@
+import pickle
+
+filename = 'results.pkl'
+
+with open(filename, 'rb') as file:
+    results = pickle.load(file)
+
+filename = 'HITL_results.pkl'
+
+with open(filename, 'rb') as file:
+    results_hitl = pickle.load(file)
+
+filename = 'HITL_more_results.pkl'
+
+with open(filename, 'rb') as file:
+    results_more_hitl = pickle.load(file)
+
+
+import pandas as pd
+
+# Assuming `results` is your dictionary of lists of tuples
+df_matches = pd.DataFrame(results["tot_matches"], columns=["num_trajs", "run_i", "num_match"])
+df_soft_matches = pd.DataFrame(results["tot_soft_matches"], columns=["num_trajs", "run_i", "num_soft_match"])
+df_exsoft_matches = pd.DataFrame(results["tot_exsoft_matches"], columns=["num_trajs", "run_i", "num_exsoft_match"])
+df_num_ops = pd.DataFrame(results["tot_num_ops"], columns=["num_trajs", "run_i", "num_op_sets", "num_actions"])
+
+agg_matches = df_matches.groupby("num_trajs")["num_match"].agg(["mean", "std"]).reset_index()
+agg_soft = df_soft_matches.groupby("num_trajs")["num_soft_match"].agg(["mean", "std"]).reset_index()
+agg_exsoft = df_exsoft_matches.groupby("num_trajs")["num_exsoft_match"].agg(["mean", "std"]).reset_index()
+agg_ops = df_num_ops.groupby("num_trajs")[["num_op_sets", "num_actions"]].agg(["mean", "std"]).reset_index()
+
+df_matches_hitl = pd.DataFrame(results_hitl["tot_matches"], columns=["num_trajs", "run_i", "num_match"])
+df_soft_matches_hitl = pd.DataFrame(results_hitl["tot_soft_matches"], columns=["num_trajs", "run_i", "num_soft_match"])
+df_exsoft_matches_hitl = pd.DataFrame(results_hitl["tot_exsoft_matches"], columns=["num_trajs", "run_i", "num_exsoft_match"])
+df_num_ops_hitl = pd.DataFrame(results_hitl["tot_num_ops"], columns=["num_trajs", "run_i", "num_op_sets", "num_actions"])
+
+agg_matches_hitl = df_matches_hitl.groupby("num_trajs")["num_match"].agg(["mean", "std"]).reset_index()
+agg_soft_hitl = df_soft_matches_hitl.groupby("num_trajs")["num_soft_match"].agg(["mean", "std"]).reset_index()
+agg_exsoft_hitl = df_exsoft_matches_hitl.groupby("num_trajs")["num_exsoft_match"].agg(["mean", "std"]).reset_index()
+agg_ops_hitl = df_num_ops_hitl.groupby("num_trajs")[["num_op_sets", "num_actions"]].agg(["mean", "std"]).reset_index()
+
+df_matches_more_hitl = pd.DataFrame(results_more_hitl["tot_matches"], columns=["num_trajs", "run_i", "num_match"])
+df_soft_matches_more_hitl = pd.DataFrame(results_more_hitl["tot_soft_matches"], columns=["num_trajs", "run_i", "num_soft_match"])
+df_exsoft_matches_more_hitl = pd.DataFrame(results_more_hitl["tot_exsoft_matches"], columns=["num_trajs", "run_i", "num_exsoft_match"])
+df_num_ops_more_hitl = pd.DataFrame(results_more_hitl["tot_num_ops"], columns=["num_trajs", "run_i", "num_op_sets", "num_actions"])
+
+agg_matches_more_hitl = df_matches_more_hitl.groupby("num_trajs")["num_match"].agg(["mean", "std"]).reset_index()
+agg_soft_more_hitl = df_soft_matches_more_hitl.groupby("num_trajs")["num_soft_match"].agg(["mean", "std"]).reset_index()
+agg_exsoft_more_hitl = df_exsoft_matches_more_hitl.groupby("num_trajs")["num_exsoft_match"].agg(["mean", "std"]).reset_index()
+agg_ops_more_hitl = df_num_ops_more_hitl.groupby("num_trajs")[["num_op_sets", "num_actions"]].agg(["mean", "std"]).reset_index()
+
+import matplotlib.pyplot as plt
+
+plt.figure()
+# No-HITL
+plt.errorbar(agg_matches["num_trajs"], agg_matches["mean"], yerr=agg_matches["std"], label="Match (no-HITL)")
+plt.errorbar(agg_soft["num_trajs"], agg_soft["mean"], yerr=agg_soft["std"], label="Soft Match (no-HITL)")
+plt.errorbar(agg_exsoft["num_trajs"], agg_exsoft["mean"], yerr=agg_exsoft["std"], label="ExSoft Match (no-HITL)")
+
+# HITL
+plt.errorbar(agg_matches_hitl["num_trajs"], agg_matches_hitl["mean"], yerr=agg_matches_hitl["std"], linestyle='--', label="Match (HITL)")
+plt.errorbar(agg_soft_hitl["num_trajs"], agg_soft_hitl["mean"], yerr=agg_soft_hitl["std"], linestyle='--', label="Soft Match (HITL)")
+plt.errorbar(agg_exsoft_hitl["num_trajs"], agg_exsoft_hitl["mean"], yerr=agg_exsoft_hitl["std"], linestyle='--', label="ExSoft Match (HITL)")
+
+# More HITL
+plt.errorbar(agg_matches_more_hitl["num_trajs"], agg_matches_more_hitl["mean"], yerr=agg_matches_more_hitl["std"], linestyle=':', label="Match (HITL+)")
+plt.errorbar(agg_soft_more_hitl["num_trajs"], agg_soft_more_hitl["mean"], yerr=agg_soft_more_hitl["std"], linestyle=':', label="Soft Match (HITL+)")
+plt.errorbar(agg_exsoft_more_hitl["num_trajs"], agg_exsoft_more_hitl["mean"], yerr=agg_exsoft_more_hitl["std"], linestyle=':', label="ExSoft Match (HITL+)")
+
+plt.xlabel("Number of Trajectories")
+plt.ylabel("Matches")
+plt.title("Match Types vs Number of Trajectories (HITL vs No-HITL vs. More-HITL)")
+plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
+plt.tight_layout()  # Adjust layout to make space for legend
+plt.grid(True)
+plt.show()
+
+plt.figure()
+# No-HITL
+plt.errorbar(agg_ops["num_trajs"], agg_ops[("num_op_sets", "mean")], yerr=agg_ops[("num_op_sets", "std")], label="# Operators (no-HITL)")
+plt.errorbar(agg_ops["num_trajs"], agg_ops[("num_actions", "mean")], yerr=agg_ops[("num_actions", "std")], label="# Actions (no-HITL)")
+
+plt.xlabel("Number of Trajectories")
+plt.ylabel("Count")
+plt.title("Operator and Action Counts vs Number of Trajectories (No-HITL)")
+plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
+plt.tight_layout()  # Adjust layout to make space for legend
+plt.grid(True)
+plt.show()
diff --git a/test_lbf_final.py b/test_lbf_final.py
new file mode 100644
index 0000000000..7f917a8e1f
--- /dev/null
+++ b/test_lbf_final.py
@@ -0,0 +1,662 @@
+# Cleaned-up and organized version of your operator learning code
+# - Uses dataclasses
+# - Removes duplication
+# - Adds helpers
+# - Keeps everything in one file
+
+from dataclasses import dataclass, field
+from typing import List, Set, Tuple, Dict
+import random
+import numpy as np
+from collections import deque, defaultdict
+
+# --- Config ---
+
+np.random.seed(1)
+random.seed(1)
+
+NUM_PREDICATES = 10
+NUM_OPERATORS = 10
+TRAJ_MAX = 10
+TRAJ_LEN = 5
+NUM_TRAJS = 100
+
+# --- Operator Representation ---
+
+@dataclass
+class Operator:
+    action: int
+    pre: Set[int] = field(default_factory=set)
+    add: Set[int] = field(default_factory=set)
+    delete: Set[int] = field(default_factory=set)
+
+    def is_applicable(self, state: Set[int]) -> bool:
+        return self.pre.issubset(state)
+
+    def apply(self, state: Set[int]) -> Set[int]:
+        if not self.is_applicable(state):
+            return state
+        return (state - self.delete) | self.add
+
+# Utility
+
+def op_key(op: Operator) -> Tuple[int, frozenset]:
+    return (op.action, frozenset(op.add))
+
+def is_equivalent(op1: Operator, op2: Operator) -> bool:
+    return op1.pre == op2.pre and op1.add == op2.add and op1.delete == op2.delete
+
+def is_covered_by(op1: Operator, op2: Operator) -> bool:
+    return op1.pre >= op2.pre and op1.add == op2.add and op1.delete >= op2.delete
+
+# Planning
+
+def plan(start: Set[int], goal: Set[int], operators: List[Operator], max_depth=10):
+    visited, queue = set(), deque([(start.copy(), [])])
+    
+    while queue:
+        state, path = queue.popleft()
+        state_key = frozenset(state)
+        if state_key in visited: continue
+        visited.add(state_key)
+
+        if goal.issubset(state): return path
+        if len(path) >= max_depth: continue
+
+        for op in operators:
+            if op.is_applicable(state):
+                next_state = op.apply(state)
+                if next_state != state:
+                    queue.append((next_state, path + [(state.copy(), op.action, next_state.copy())]))
+    return None
+
+# Random Generator
+
+def generate_random_operator(pred_pool: List[int], action_id: int) -> Operator:
+    pre = set(random.sample(pred_pool, random.randint(1, 3)))
+    effects = list(set(pred_pool) - pre)
+    add = set(random.sample(effects, random.randint(1, min(2, len(effects)))))
+    delete = set(random.sample(list(pre), random.randint(0, len(pre))))
+    return Operator(action=action_id, pre=pre, add=add, delete=delete)
+
+# Reachability
+
+def compute_reachable_states(init_state: Set[int], operators: List[Operator], max_iters=100) -> Set[frozenset]:
+    reached_states, frontier = set(), [init_state.copy()]
+    reachable = set()
+
+    for _ in range(max_iters):
+        new_frontier = []
+        for state in frontier:
+            key = frozenset(state)
+            if key in reached_states: continue
+            reached_states.add(key)
+            reachable.add(key)
+
+            for op in operators:
+                if op.is_applicable(state):
+                    next_state = op.apply(state)
+                    if frozenset(next_state) not in reached_states:
+                        new_frontier.append(next_state)
+
+        if not new_frontier: break
+        frontier = new_frontier
+
+    return reachable
+
+# Demo Generation
+
+def generate_planned_demo_trajectories(operators: List[Operator], num_trajs: int, max_depth: int) -> List[Tuple[List[Tuple[Set[int], int, Set[int]]], Set[int]]]:
+    demos, attempts = [], 0
+
+    while len(demos) < num_trajs and attempts < 100000:
+        attempts += 1
+        init_state = set(random.sample(range(NUM_PREDICATES), random.randint(2, NUM_PREDICATES)))
+        reachable = compute_reachable_states(init_state, operators) - {frozenset(init_state)}
+        if not reachable: continue
+
+        plan_traj = []
+        goals = list(reachable)
+
+        while goals and len(plan_traj) < TRAJ_LEN:
+            goal_state = random.choice(goals)
+            goal = set(goal_state) - init_state
+            if not goal:
+                goals.remove(goal_state)
+                continue
+
+            plan_traj = plan(init_state, goal, operators, max_depth)
+            if plan_traj is None or len(plan_traj) < TRAJ_LEN:
+                goals.remove(goal_state)
+                plan_traj = []
+
+        if plan_traj and len(plan_traj) >= TRAJ_LEN:
+            demos.append((plan_traj, goal))
+
+    return demos
+
+# Backward Pass
+
+def backward_infer_minimal_effects(demo_data, current_operators=None):
+    candidate_ops = defaultdict(lambda: {'demos': []})
+    op_index = {(op.action, frozenset(op.add)): op for op in current_operators} if current_operators else {}
+
+    for traj, goal in sorted(demo_data, key=lambda x: len(x[0])):
+        current_goal = goal.copy()
+
+        for (s, action, s_prime) in reversed(traj):
+            effect = s_prime - s
+            if not effect:
+                raise Exception("No effect")
+            necessary_effect = effect if len(effect) == 1 else effect & current_goal
+            key = (action, frozenset(necessary_effect))
+            candidate_ops[key]['demos'].append((s, action, s_prime))
+
+            preconditions = op_index.get(key, Operator(action)).pre
+            current_goal = (current_goal - necessary_effect) | preconditions
+
+    return candidate_ops
+
+# Forward Refinement
+
+def refine_by_plan_divergence(demos, learned_operators):
+    op_index = {(op.action, frozenset(op.add)): op for op in learned_operators}
+    support_sets = {key: [] for key in op_index}
+
+    for traj, goal in demos:
+        current_goal = goal.copy()
+        for s, a, s_prime in traj:
+            effect = s_prime - s
+            necessary_effect = effect if len(effect) == 1 else effect & current_goal
+            key = (a, frozenset(necessary_effect))
+            if key in support_sets:
+                support_sets[key].append(s)
+            preconditions = op_index.get(key, Operator(a)).pre
+            current_goal = (current_goal - necessary_effect) | preconditions
+
+    for traj, _ in demos:
+        state = traj[0][0]
+        for (s_true, a_true, s_next_true) in traj:
+            applicable = [op for op in learned_operators if op.is_applicable(state)]
+            if not applicable: break
+            op_planner = random.choice(applicable)
+
+            key_true = (a_true, frozenset(s_next_true - s_true))
+            op_true = op_index.get(key_true)
+            if op_true is None: continue
+
+            if op_planner is op_true:
+                state = op_true.apply(state)
+                continue
+
+            key_planner = (op_planner.action, frozenset(op_planner.add))
+            support = support_sets.get(key_planner, [])
+            if not support: continue
+
+            common_preds = set.intersection(*support)
+            potential_preds = common_preds - state
+            if not potential_preds: continue
+
+            preds_to_add = set(random.sample(list(potential_preds), random.randint(1, len(potential_preds))))
+            op_planner.pre.update(preds_to_add)
+            state = op_true.apply(state)
+
+    return list(op_index.values())
+
+# Learning Loop
+
+def learn_operators_from_demos(demo_data, max_iters=100, verbose=True):
+    learned_ops = []
+
+    for iteration in range(max_iters):
+        if verbose:
+            print(f"\n--- Iteration {iteration + 1} ---")
+
+        # Backward pass
+        candidate_ops = backward_infer_minimal_effects(demo_data, current_operators=learned_ops or None)
+
+        # Create new operators from candidate effects
+        op_index = {}
+        for (action, effect_frozen), entry in candidate_ops.items():
+            op = Operator(action=action, add=set(effect_frozen), pre=set(), delete=set())
+            op_index[(action, frozenset(op.add))] = op
+        learned_ops = list(op_index.values())
+
+        # Assign each transition to at most one operator
+        demo_assignments = defaultdict(list)
+        assigned_transitions = {}
+        for traj, _ in demo_data:
+            for s, a, s_prime in traj:
+                effect = s_prime - s
+                matching_keys = [(key, op) for key, op in op_index.items() if key[0] == a and key[1] <= set(effect)]
+                matching_vals = [len(set(effect) - key[1]) for key, op in op_index.items() if key[0] == a and key[1] <= set(effect)]
+                if matching_keys:
+                    best_key, _ = matching_keys[np.argmin(matching_vals)]  # choose the first match
+                    demo_assignments[best_key].append((s, a, s_prime))
+                    if (frozenset(s), a, frozenset(s_prime)) in assigned_transitions:
+                        assigned_transitions[(frozenset(s), a, frozenset(s_prime))] += 1
+                    else:
+                        assigned_transitions[(frozenset(s), a, frozenset(s_prime))] = 1
+
+        # Assert total assignments match demo transitions
+        total_transitions = sum(len(traj) for traj, _ in demo_data)
+        assert sum(assigned_transitions.values()) == total_transitions, (
+            f"Assigned transitions ({len(assigned_transitions)}) != total demo transitions ({total_transitions})")
+        used_keys = set(demo_assignments.keys())
+        learned_ops = [op for key, op in op_index.items() if key in used_keys]
+
+        if verbose:
+            print("Backward Pass Result:")
+            for op in sorted(learned_ops, key=lambda x: x.action):
+                print(op)
+
+        # Forward refinement
+        learned_ops = refine_by_plan_divergence(demo_data, learned_ops)
+
+        if verbose:
+            print("Forward Pass Result:")
+            for op in sorted(learned_ops, key=lambda x: x.action):
+                print(op)
+
+    return learned_ops
+
+# Evaluation
+
+def evaluate_learned_operators(learned_ops: List[Operator], true_ops: List[Operator], verbose=True, is_equal=True) -> Tuple[int, int]:
+    valid, invalid = 0, 0
+    for true_op in true_ops:
+        match_found = False
+        for learned_op in learned_ops:
+            if is_equal:
+                if learned_op.action == true_op.action and is_equivalent(true_op, learned_op):
+                    match_found = True
+                    break
+            else:
+                if learned_op.action == true_op.action and is_covered_by(true_op, learned_op):
+                    match_found = True
+                    break
+        if match_found:
+            valid += 1
+            if verbose:
+                print(f"VALID\n\tLEARNED | {learned_op}\n\tTRUE    | {true_op}")
+        else:
+            invalid += 1
+            if verbose:
+                print(f"INVALID\n\tLEARNED | MISSING\n\tTRUE    | {true_op}")
+    return valid, invalid
+
+def augment_demos_with_missing_ground_truth_ops(demos, learned_ops, true_ops, num_preds, num_augments=1):
+    from collections import defaultdict
+
+    learned_op_keys = set(
+        (op.action, frozenset(op.add), frozenset(op.delete), frozenset(op.pre))
+        for op in learned_ops
+    )
+
+    augmented = []
+
+    for true_op in true_ops:
+        key = (true_op.action, frozenset(true_op.add), frozenset(true_op.delete), frozenset(true_op.pre))
+        if key in learned_op_keys:
+            continue
+
+        for _ in range(num_augments):
+            possible_goals = None
+            while not possible_goals:
+                base_state = set(random.sample(range(num_preds), random.randint(2, num_preds)))
+                false_pre = set()
+                for op in learned_ops:
+                    if op.action == true_op.action and op.add == true_op.add:
+                        false_pre |= op.pre - true_op.pre
+                base_state -= false_pre
+                base_state |= true_op.pre
+                next_state = true_op.apply(base_state)
+                possible_goals = true_op.add - base_state
+
+            goal = possible_goals
+            demo = [(base_state.copy(), true_op.action, next_state.copy())]
+            augmented.append((demo, goal))
+
+    return demos + augmented
+
+def deduplicate_predicates_by_equivalence(demos, operators, num_preds):
+    from collections import defaultdict
+
+    # Step 1: Build truth vectors for each predicate
+    pred_vectors = defaultdict(list)
+
+    for traj, goal in demos:
+        for s, _, s_prime in traj:
+            for i in range(num_preds):
+                pred_vectors[i].append(int(i in s))
+                pred_vectors[i].append(int(i in s_prime))
+        for i in range(num_preds):
+            pred_vectors[i].append(int(i in goal))
+
+    # Step 2: Group predicates with identical truth vectors
+    vector_to_preds = defaultdict(list)
+    for pred, vec in pred_vectors.items():
+        vector_to_preds[tuple(vec)].append(pred)
+
+    # Step 3: Build a mapping from redundant predicate -> representative
+    replace_map = {}
+    for group in vector_to_preds.values():
+        representative = min(group)  # pick smallest index as canonical
+        for pred in group:
+            replace_map[pred] = representative
+
+    # Step 4: Replace predicates in demos
+    new_demos = []
+    for traj, goal in demos:
+        new_traj = []
+        for s, a, s_prime in traj:
+            s_new = {replace_map[p] for p in s}
+            s_prime_new = {replace_map[p] for p in s_prime}
+            new_traj.append((s_new, a, s_prime_new))
+        new_goal = {replace_map[p] for p in goal}
+        new_demos.append((new_traj, new_goal))
+
+    # Step 5: Replace predicates in operators
+    new_operators = []
+    for op in operators:
+        pre = {replace_map[p] for p in op.pre}
+        add = {replace_map[p] for p in op.add}
+        delete = {replace_map[p] for p in op.delete}
+        new_operators.append(Operator(op.action, pre, add, delete))
+
+    return new_demos, new_operators, replace_map
+
+# Main Execution
+
+def main():
+    pred_pool = list(range(NUM_PREDICATES))
+    operators = [generate_random_operator(pred_pool, i) for i in range(NUM_OPERATORS)]
+    print("\n--- Ground Truth Operators ---")
+    for op in sorted(operators, key=lambda x: x.action):
+        print(op)
+
+    demo_data = generate_planned_demo_trajectories(operators, NUM_TRAJS, max_depth=TRAJ_MAX)
+    print(f"\nGenerated {len(demo_data)} demo trajectories.")
+
+    op_nums = {i: 0 for i in range(NUM_OPERATORS)}
+    for traj, goal in  demo_data:
+        for t in traj:
+            op_nums[t[1]] += 1
+
+    print("\nOPERATOR DEMO COUNT:", op_nums,"\n")
+
+
+    results = {}
+    for op_set_idx in range(100):
+        learned_ops = learn_operators_from_demos(demo_data, max_iters=10, verbose=False)
+        op_index = {(op.action, frozenset(op.add)): op for op in learned_ops}
+        for (action, effect_frozen), entry in backward_infer_minimal_effects(demo_data, current_operators=learned_ops).items():
+            if (action, effect_frozen) in op_index:
+                delete = op_index[(action, effect_frozen)].pre & set.intersection(
+                    *[set(entry['demos'][i][0] - entry['demos'][i][2]) for i in range(len(entry['demos']))]
+                )
+                op_index[(action, effect_frozen)].delete = delete
+        print(op_set_idx, sum([len(op.pre) for op in learned_ops]))
+        valid, invalid = evaluate_learned_operators(learned_ops, operators, verbose=False)
+        print(f"Summary: {valid} valid / {valid + invalid} total operators correctly learned.\n")
+        val = sum([len(op.pre) for op in learned_ops])
+        if val in results:
+            results[val] += [float(valid) / float(valid + invalid)]
+        else:
+            results[val] = [float(valid) / float(valid + invalid)]
+
+    print([(k, np.mean(v)) for k,v in sorted(results.items(), key=lambda x: np.mean(x[1]))])
+    
+    print("\n--- Final Learned Operators ---")
+    for op in sorted(learned_ops, key=lambda x: x.action):
+        print(op)
+
+    valid, invalid = evaluate_learned_operators(learned_ops, operators, verbose=True)
+    print(f"\nSummary: {valid} valid / {valid + invalid} total operators correctly learned.")
+
+    valid, invalid = evaluate_learned_operators(learned_ops, operators, verbose=True, is_equal=False)
+    print(f"\n(Coverage) Summary: {valid} valid / {valid + invalid} total operators correctly learned.")
+
+
+    # Augment and re-evaluate
+    for round in range(1, 10):
+        demo_data = augment_demos_with_missing_ground_truth_ops(demo_data, learned_ops, operators, NUM_PREDICATES, num_augments=1)
+        learned_ops = learn_operators_from_demos(demo_data, max_iters=5, verbose=False)
+        op_index = {(op.action, frozenset(op.add)): op for op in learned_ops}
+        for (action, effect_frozen), entry in backward_infer_minimal_effects(demo_data, current_operators=learned_ops).items():
+            if (action, effect_frozen) in op_index:
+                delete = op_index[(action, effect_frozen)].pre & set.intersection(
+                    *[set(entry['demos'][i][0] - entry['demos'][i][2]) for i in range(len(entry['demos']))]
+                )
+                op_index[(action, effect_frozen)].delete = delete
+
+        print(f"\n--- After Augmentation Round {round} ---")
+        # for op in sorted(learned_ops, key=lambda x: x.action):
+        #     print(op)
+
+        valid, invalid = evaluate_learned_operators(learned_ops, operators, verbose=False)
+        print(f"Round {round} Summary: {valid} valid / {valid + invalid} total operators correctly learned.")
+
+        valid, invalid = evaluate_learned_operators(learned_ops, operators, verbose=False, is_equal=False)
+        print(f"\n(Coverage) Summary: {valid} valid / {valid + invalid} total operators correctly learned.")
+
+    print(f"\n--- After Augmentation Round {round} ---")
+    for op in sorted(learned_ops, key=lambda x: x.action):
+        print(op)
+
+    ##################
+
+    # PREDICATES = {
+    #     "at_A": 0,
+    #     "at_B": 1,
+    #     "handempty": 2,
+    #     "holding_block1": 3,
+    #     "holding_block2": 4,
+    #     "inside_block1": 7,
+    #     "inside_block2": 8,
+    # }
+
+    # OPERATORS = [
+    #     # move from B to A
+    #     Operator(pre={PREDICATES["at_B"]}, add={PREDICATES["at_A"]}, delete={PREDICATES["at_B"]}, action=0),
+    #     # move from A to B
+    #     Operator(pre={PREDICATES["at_A"]}, add={PREDICATES["at_B"]}, delete={PREDICATES["at_A"]}, action=1),
+
+    #     # pick block1
+    #     Operator(pre={PREDICATES["at_A"], PREDICATES["handempty"]},
+    #             add={PREDICATES["holding_block1"]},
+    #             delete={PREDICATES["handempty"]},
+    #             action=2),
+
+    #     # pick block2
+    #     Operator(pre={PREDICATES["at_A"], PREDICATES["handempty"]},
+    #             add={PREDICATES["holding_block2"]},
+    #             delete={PREDICATES["handempty"]},
+    #             action=3),
+
+    #     # place block1 in box (at B)
+    #     Operator(pre={PREDICATES["at_B"], PREDICATES["holding_block1"]},
+    #             add={PREDICATES["inside_block1"], PREDICATES["handempty"]},
+    #             delete={PREDICATES["holding_block1"]},
+    #             action=4),
+
+    #     # place block2 in box (at B)
+    #     Operator(pre={PREDICATES["at_B"], PREDICATES["holding_block2"]},
+    #             add={PREDICATES["inside_block2"], PREDICATES["handempty"]},
+    #             delete={PREDICATES["holding_block2"]},
+    #             action=5),
+    # ]
+
+
+    # init_state = {
+    #     PREDICATES["at_B"], PREDICATES["handempty"]
+    # }
+
+    # actions = [0, 2, 1, 4, 0, 3, 1, 5]  # move→pick→move→place (block1), move→pick→move→place (block2)
+
+    # state = init_state.copy()
+    # traj1 = []
+
+    # for action_id in actions:
+    #     op = OPERATORS[action_id]
+    #     next_state = op.apply(state)
+    #     traj1.append((state.copy(), action_id, next_state.copy()))
+    #     state = next_state.copy()
+
+    # goal1 = {PREDICATES["inside_block1"], PREDICATES["inside_block2"], PREDICATES["handempty"]}
+
+    # actions = [0, 3, 1, 5, 0, 2, 1, 4]  # move→pick→move→place (block1), move→pick→move→place (block2)
+
+    # state = init_state.copy()
+    # traj2 = []
+
+    # for action_id in actions:
+    #     op = OPERATORS[action_id]
+    #     next_state = op.apply(state)
+    #     traj2.append((state.copy(), action_id, next_state.copy()))
+    #     state = next_state.copy()
+
+    # goal2 = {PREDICATES["inside_block1"], PREDICATES["inside_block2"], PREDICATES["handempty"]}
+    # demo_data = [(traj1, goal1), (traj2, goal2)]
+
+    # demo_data, operators, pred_replace_map = deduplicate_predicates_by_equivalence(demo_data, OPERATORS, NUM_PREDICATES)
+    # print("Predicate replacement map:", pred_replace_map)
+
+    # print("\n--- Ground Truth Operators ---")
+    # for op in sorted(operators, key=lambda x: x.action):
+    #     print(op)
+    # print()
+
+    # print("Demos:")
+    # # for traj in demo_data:
+    # #     print("Goal:", traj[1], "Length:", len(traj[0]))
+    # print(len(demo_data))
+
+    # learned_ops = learn_operators_from_demos(demo_data, max_iters=1000, verbose=True)
+    # op_index = {(op.action, frozenset(op.add)): op for op in learned_ops}
+    # for (action, effect_frozen), entry in backward_infer_minimal_effects(demo_data, current_operators=learned_ops).items():
+    #     if (action, effect_frozen) in op_index:
+    #         delete = op_index[(action, effect_frozen)].pre & set.intersection(
+    #             *[set(entry['demos'][i][0] - entry['demos'][i][2]) for i in range(len(entry['demos']))]
+    #         )
+    #         op_index[(action, effect_frozen)].delete = delete
+    # valid, invalid = evaluate_learned_operators(learned_ops, operators, verbose=False)
+    # print(f"Summary: {valid} valid / {valid + invalid} total operators correctly learned.\n")
+
+
+    # print("\n--- Final Learned Operators ---")
+    # for op in sorted(learned_ops, key=lambda x: x.action):
+    #     print(op)
+
+    # valid, invalid = evaluate_learned_operators(learned_ops, operators, verbose=True)
+    # print(f"\nSummary: {valid} valid / {valid + invalid} total operators correctly learned.")
+
+    # # Augment and re-evaluate
+    # demo_data = augment_demos_with_missing_ground_truth_ops(demo_data, learned_ops, operators, NUM_PREDICATES, num_augments=1)
+    # learned_ops = learn_operators_from_demos(demo_data, max_iters=5, verbose=False)
+    # op_index = {(op.action, frozenset(op.add)): op for op in learned_ops}
+    # for (action, effect_frozen), entry in backward_infer_minimal_effects(demo_data, current_operators=learned_ops).items():
+    #     if (action, effect_frozen) in op_index:
+    #         delete = op_index[(action, effect_frozen)].pre & set.intersection(
+    #             *[set(entry['demos'][i][0] - entry['demos'][i][2]) for i in range(len(entry['demos']))]
+    #         )
+    #         op_index[(action, effect_frozen)].delete = delete
+
+    # print(f"\n--- After Augmentation Round {round} ---")
+    # for op in sorted(learned_ops, key=lambda x: x.action):
+    #     print(op)
+
+    # valid, invalid = evaluate_learned_operators(learned_ops, operators, verbose=True)
+    # print(f"HITL Summary: {valid} valid / {valid + invalid} total operators correctly learned.")
+
+# Batch Evaluation Experiment
+
+def main_experiment():
+    results = {
+        "num_actions": [],
+        "equivalent": [], "covered": [], "overfit": [], "missed": [],
+        "hitl_1_equivalent": [], "hitl_1_covered": [], "hitl_1_overfit": [], "hitl_1_missed": [],
+        "hitl_5_equivalent": [], "hitl_5_covered": [], "hitl_5_overfit": [], "hitl_5_missed": []
+    }
+
+    for num_trajs in range(1, 102, 10):
+        for run_i in range(100):
+            pred_pool = list(range(NUM_PREDICATES))
+            operators = [generate_random_operator(pred_pool, i) for i in range(NUM_OPERATORS)]
+            all_demo_data = generate_planned_demo_trajectories(operators, 110, max_depth=TRAJ_MAX)
+            # all_demo_data, operators, pred_replace_map = deduplicate_predicates_by_equivalence(all_demo_data, operators, NUM_PREDICATES)
+            # print("Predicate replacement map:", pred_replace_map)
+            
+            demo_data = all_demo_data[:num_trajs]
+            unique_actions = set()
+            for traj, _ in demo_data:
+                for (s, a, s_prime) in traj:
+                    unique_actions.add(a)
+            results["num_actions"].append((num_trajs, len(unique_actions)))
+
+            refined_ops = learn_operators_from_demos(demo_data, max_iters=1000, verbose=False)
+            op_index = {(op.action, frozenset(op.add)): op for op in refined_ops}
+            for (action, effect_frozen), entry in backward_infer_minimal_effects(demo_data, current_operators=refined_ops).items():
+                if (action, effect_frozen) in op_index:
+                    delete = op_index[(action, effect_frozen)].pre & set.intersection(
+                        *[set(entry['demos'][i][0] - entry['demos'][i][2]) for i in range(len(entry['demos']))]
+                    )
+                    op_index[(action, effect_frozen)].delete = delete
+
+            refined_ops_hitl = augment_demos_with_missing_ground_truth_ops(
+                demo_data, list(op_index.values()), operators, NUM_PREDICATES, num_augments=1
+            )
+            refined_ops_hitl5 = augment_demos_with_missing_ground_truth_ops(
+                demo_data, list(op_index.values()), operators, NUM_PREDICATES, num_augments=5
+            )
+
+            def count_matches(learned_ops):
+                eq, cov, ofit = 0, 0, 0
+                done_ops = set()
+                for actual_op in operators:
+                    for op in learned_ops:
+                        if op.action == actual_op.action:
+                            if (actual_op.action, frozenset(actual_op.add)) not in done_ops:
+                                if op.pre == actual_op.pre and op.add == actual_op.add and op.delete == actual_op.delete:
+                                    eq += 1
+                                    done_ops.add((actual_op.action, frozenset(actual_op.add)))
+                                elif is_covered_by(actual_op, op):
+                                    cov += 1
+                                    done_ops.add((actual_op.action, frozenset(actual_op.add)))
+                                elif len(op.pre - actual_op.pre) <= 2 and op.add == actual_op.add and op.delete == actual_op.delete:
+                                    ofit += 1
+                                    done_ops.add((actual_op.action, frozenset(actual_op.add)))
+                return eq, cov, ofit
+
+            eq, cov, ofit = count_matches(list(op_index.values()))
+            miss = len(unique_actions) - (eq+cov+ofit)
+            results["equivalent"].append((num_trajs, run_i, eq))
+            results["covered"].append((num_trajs, run_i, cov))
+            results["overfit"].append((num_trajs, run_i, ofit))
+            results["missed"].append((num_trajs, run_i, miss))
+
+            ops_hitl1 = learn_operators_from_demos(refined_ops_hitl, max_iters=1000, verbose=False)
+            eq1, cov1, ofit1 = count_matches(ops_hitl1)
+            miss1 = NUM_OPERATORS - (eq1+cov1+ofit1)
+            results["hitl_1_equivalent"].append((num_trajs, run_i, eq1))
+            results["hitl_1_covered"].append((num_trajs, run_i, cov1))
+            results["hitl_1_overfit"].append((num_trajs, run_i, ofit1))
+            results["hitl_1_missed"].append((num_trajs, run_i, miss1))
+
+            ops_hitl5 = learn_operators_from_demos(refined_ops_hitl5, max_iters=1000, verbose=False)
+            eq5, cov5, ofit5 = count_matches(ops_hitl5)
+            miss5 = NUM_OPERATORS - (eq5+cov5+ofit5)
+            results["hitl_5_equivalent"].append((num_trajs, run_i, eq5))
+            results["hitl_5_covered"].append((num_trajs, run_i, cov5))
+            results["hitl_5_overfit"].append((num_trajs, run_i, ofit5))
+            results["hitl_5_missed"].append((num_trajs, run_i, miss5))
+
+            print(f"Trajs: {num_trajs}, Actions: {len(unique_actions)}, Run: {run_i}, Eq: {eq}, Cov: {cov}, Ofit: {ofit}, Missed: {miss}, HITL1: Eq={eq1}, Cov={cov1}, Ofit={ofit1}, Missed={miss1}, HITL5: Eq={eq5}, Cov={cov5}, Ofit={ofit5}, Missed={miss5}")
+
+    import pickle
+    with open('HITL_experiment_results_random.pkl', 'wb') as f:
+        pickle.dump(results, f)
+
+if __name__ == "__main__":
+    main()
+    main_experiment()
\ No newline at end of file
diff --git a/test_lbf_plot.py b/test_lbf_plot.py
new file mode 100644
index 0000000000..0b0adc0b60
--- /dev/null
+++ b/test_lbf_plot.py
@@ -0,0 +1,83 @@
+import pickle
+import matplotlib.pyplot as plt
+from collections import defaultdict
+import numpy as np
+
+# Load results
+with open("HITL_experiment_results_random.pkl", "rb") as f:
+    results = pickle.load(f)
+
+NUM_OPERATORS = 10  # ensure consistency if changed in experiment
+
+# Helper: aggregate to mean/std
+def aggregate(metric):
+    agg = defaultdict(list)
+    for num_trajs, _, score in results[metric]:
+        agg[num_trajs].append(score)
+    means = {k: np.mean(v) for k, v in agg.items()}
+    stds = {k: np.std(v) for k, v in agg.items()}
+    return means, stds
+
+# Compute cumulative scores
+def compute_total(eq, cov, ofit):
+    cov_total = {k: eq[k] + cov[k] for k in eq}
+    ofit_total = {k: cov_total[k] + ofit[k] for k in eq}
+    return cov_total, ofit_total
+
+# Get all metrics
+eq, eq_std = aggregate("equivalent")
+cov, cov_std = aggregate("covered")
+ofit, ofit_std = aggregate("overfit")
+miss, miss_std = aggregate("missed")
+
+eq1, eq1_std = aggregate("hitl_1_equivalent")
+cov1, cov1_std = aggregate("hitl_1_covered")
+ofit1, ofit1_std = aggregate("hitl_1_overfit")
+miss1, miss1_std = aggregate("hitl_1_missed")
+
+eq5, eq5_std = aggregate("hitl_5_equivalent")
+cov5, cov5_std = aggregate("hitl_5_covered")
+ofit5, ofit5_std = aggregate("hitl_5_overfit")
+miss5, miss5_std = aggregate("hitl_5_missed")
+
+# Compute cumulative
+cov_total, ofit_total = compute_total(eq, cov, ofit)
+cov1_total, ofit1_total = compute_total(eq1, cov1, ofit1)
+cov5_total, ofit5_total = compute_total(eq5, cov5, ofit5)
+
+# Plot
+plt.figure(figsize=(14, 8))
+x_vals = sorted(eq)
+
+def plot_with_error(x, y_mean, y_std, label, color, linestyle):
+    y = [y_mean[k] for k in x]
+    err = [y_std.get(k, 0) for k in x]
+    plt.errorbar(x, y, yerr=err, label=label, fmt=linestyle, color=color, capsize=4)
+
+# Baseline
+# plot_with_error(x_vals, eq, eq_std, "Exact", "black", "o-")
+# plot_with_error(x_vals, cov_total, cov_std, "Covered (incl. exact)", "black", "--")
+# plot_with_error(x_vals, ofit_total, ofit_std, "Overfit (incl. cov)", "black", ":")
+# plot_with_error(x_vals, miss, miss_std, "Missed", "black", "-.")
+
+# # HITL-1
+plot_with_error(x_vals, eq1, eq1_std, "HITL-1 Exact", "blue", "o-")
+plot_with_error(x_vals, cov1_total, cov1_std, "HITL-1 Covered", "blue", "--")
+plot_with_error(x_vals, ofit1_total, ofit1_std, "HITL-1 Overfit", "blue", ":")
+# plot_with_error(x_vals, miss1, miss1_std, "HITL-1 Missed", "blue", "-.")
+
+# HITL-5
+# plot_with_error(x_vals, eq5, eq5_std, "HITL-5 Exact", "green", "o-")
+# plot_with_error(x_vals, cov5_total, cov5_std, "HITL-5 Covered", "green", "--")
+# plot_with_error(x_vals, ofit5_total, ofit5_std, "HITL-5 Overfit", "green", ":")
+# plot_with_error(x_vals, miss5, miss5_std, "HITL-5 Missed", "green", "-.")
+
+plt.xlabel("Number of Demonstrations")
+plt.ylabel("Operators")
+plt.title("Operator Learning Comparison: Exact, Covered, Overfit, Missed")
+plt.legend(loc="upper left", fontsize="small", ncol=2)
+plt.grid(True)
+plt.tight_layout()
+plt.savefig("operator_learning_summary.png")
+plt.ylim(0, 10)
+plt.show()
diff --git a/test_minibehavior_envs.txt b/test_minibehavior_envs.txt
new file mode 100644
index 0000000000..c03a2ccdb3
--- /dev/null
+++ b/test_minibehavior_envs.txt
@@ -0,0 +1,24 @@
+python3 test_solve_task.py --env "MiniGrid-CollectMisplacedItems-16x16-N2-v0" --save True --seed 0
+
+[
+    'MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0',
+    'MiniGrid-CleaningACar-16x16-N2-v0',
+    'MiniGrid-CleaningShoes-16x16-N2-v0', #1
+    'MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0',
+    'MiniGrid-CollectMisplacedItems-16x16-N2-v0',
+    'MiniGrid-InstallingAPrinter-16x16-N2-v0',
+    'MiniGrid-LayingWoodFloors-16x16-N2-v0',
+    'MiniGrid-MakingTea-16x16-N2-v0',
+    'MiniGrid-MovingBoxesToStorage-16x16-N2-v0',
+    'MiniGrid-OpeningPackages-16x16-N2-v0',
+    'MiniGrid-OrganizingFileCabinet-16x16-N2-v0',
+    [DEBUG]'MiniGrid-PreparingSalad-16x16-N2-v0',
+    'MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0',
+    'MiniGrid-SettingUpCandles-16x16-N2-v0', #1
+    'MiniGrid-SortingBooks-16x16-N2-v0',
+    'MiniGrid-StoringFood-16x16-N2-v0',
+    [DEBUG]'MiniGrid-ThawingFrozenFood-16x16-N2-v0',
+    'MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0',
+    'MiniGrid-WashingPotsAndPans-16x16-N2-v0',
+    'MiniGrid-WateringHouseplants-16x16-N2-v0'
+]
\ No newline at end of file
diff --git a/test_minigrid.py b/test_minigrid.py
new file mode 100644
index 0000000000..51bae0683b
--- /dev/null
+++ b/test_minigrid.py
@@ -0,0 +1,14 @@
+import gymnasium as gym
+env = gym.make("MiniGrid-Fetch-8x8-N3-v0", render_mode="human")
+observation, info = env.reset(seed=42)
+import ipdb; ipdb.set_trace()
+for _ in range(1000):
+   action = int(input("Action: "))  # User-defined policy function
+   observation, reward, terminated, truncated, info = env.step(action)
+
+   if terminated or truncated:
+      observation, info = env.reset()
+env.close()
+
+# Need a look at new region operator
+
diff --git a/test_operator_learning_all.py b/test_operator_learning_all.py
new file mode 100644
index 0000000000..4cebb643f1
--- /dev/null
+++ b/test_operator_learning_all.py
@@ -0,0 +1,445 @@
+import numpy as np
+from gym.spaces import Box
+import re
+import pickle as pkl
+
+from predicators import utils
+from predicators.nsrt_learning.nsrt_learning_main import learn_nsrts_from_data
+from predicators.structs import Action, LowLevelTrajectory, Predicate, State, \
+    Type, GroundAtom, Task
+import glob
+
+demo_files = sorted([filename for filename in glob.glob("/Users/shashlik/Documents/GitHub/predicators/demos/*/*")])
+demo_tasks = set([demo_file.split("/")[-1].split("_")[0] for demo_file in demo_files])
+
+utils.reset_config({
+        "strips_learner": "pnad_search",
+        "segmenter": "every_step",
+        "disable_harmlessness_check": True,
+        "pnad_search_load_initial": False,
+        "min_data_for_nsrt": 0,
+        "min_perc_data_for_nsrt": 0,
+        "pnad_search_timeout":1000.0
+    })
+
+# Load and do this from MiniBeahvior Demo
+
+def get_demo_traj(demo_file, verbose=True):
+    with open(demo_file, 'rb') as f:
+        data = pkl.load(f)
+
+    last_skill = "Move"
+    state = [a for a in data[1][1] if "infovofrobot" not in a]
+    states = [state]
+    actions = []
+    for step in data.keys():
+        obs = data[step][0]['image']
+        direction = data[step][0]['direction']
+        action = data[step][2]
+        skill = None
+
+        if "forward" in str(action) or \
+            "left" in str(action) or \
+            "right" in str(action):
+
+            skill = "Move"
+        else:
+            skill = str(action)
+        
+        has_effect = True
+        try:
+            next_obs = data[step][3]['image']
+            next_direction = data[step][3]['direction']
+            if np.allclose(obs, next_obs) and (direction == next_direction):
+                has_effect = False  
+        except:
+            pass
+
+        if has_effect:
+            if last_skill != skill:
+                if verbose:
+                    print("#")
+                    print(last_skill)
+                try:
+                    next_state = [a for a in data[step][1] if "infovofrobot" not in a]
+                    if verbose:
+                        print("PREV:", set(state))
+                        print("ADD:", set(next_state) - set(state))
+                        print("DEL:", set(state) - set(next_state))
+                    state = next_state
+                    actions.append(last_skill)
+                    states.append(state)
+                except:
+                    pass
+                last_skill = skill
+    else:
+        if verbose:
+            print("#")
+            print(last_skill)
+        next_state = [a for a in data[step][4] if "infovofrobot" not in a]
+        if verbose:
+            print("PREV:", set(state))
+            print("ADD:", set(next_state) - set(state))
+            print("DEL:", set(state) - set(next_state))
+        state = next_state
+        if verbose:
+            print("#")
+        actions.append(last_skill)
+        states.append(state)
+    
+    return LowLevelTrajectory(states, actions, _is_demo=True, _train_task_idx=0)
+
+def parse_objs_preds_and_options(trajectory, train_task_idx=0):
+    objs = set()
+    preds = set()
+    options = set()
+    state = None
+    states = []
+    actions = []
+    ground_atoms_traj = []
+    obj_type = Type("obj_type", ["is_obj"])
+    
+    for i, s in enumerate(trajectory.states):
+        ground_atoms = set()
+        for pred_str in s:
+            pred = None
+            choice = []
+            pattern = re.compile(r"(\w+)\((.*?)\)")
+            match = pattern.match(pred_str)
+            if match:
+                func_name = match.group(1)
+                args = match.group(2).split(',') if match.group(2) else []
+                for arg in args:
+                    obj = obj_type(arg.strip())
+                    choice.append(obj)
+                    objs.add(obj)
+                if len(args) == 1:
+                    pred = Predicate(func_name, [obj_type], lambda s, o: True)
+                    preds.add(pred)
+                elif len(args) == 2:
+                    pred = Predicate(func_name, [obj_type, obj_type], lambda s, o: True)
+                    preds.add(pred)
+                else:
+                    NotImplementedError("")
+            ground_atoms.add(GroundAtom(pred, choice))
+        states.append(state)
+        ground_atoms_traj.append(ground_atoms)
+
+        if i < len(trajectory.actions):
+            a_name = trajectory.actions[i]
+            name_to_actions = actions_dict = {
+                "Move": 0,
+                "Actions.pickup_0": 3,
+                "Actions.pickup_1": 4,
+                "Actions.pickup_2": 5,
+                "Actions.drop_0": 6,
+                "Actions.drop_1": 7,
+                "Actions.drop_2": 8,
+                "Actions.drop_in": 9,
+                "Actions.toggle": 10,
+                "Actions.close": 11,
+                "Actions.open": 12,
+                "Actions.cook": 13,
+                "Actions.slice": 14
+            }
+
+            param_option = utils.SingletonParameterizedOption(
+                a_name, lambda s, m, o, p: Action(name_to_actions[a_name]))
+            options.add(param_option)
+            option = param_option.ground([], [])
+            action = option.policy(state)
+            action.set_option(option)
+            actions.append(action)
+
+    return objs, preds, options, (LowLevelTrajectory([{obj:[0.0] for obj in objs} for _ in states], actions, _is_demo=True, _train_task_idx=train_task_idx), ground_atoms_traj)
+
+
+# dataset = []
+# ground_atom_dataset = []
+# tasks = []
+# action_space = Box(0, 7, (1, ))
+
+# task_name = "MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0"
+# for demo_file in demo_files:
+#     if task_name in demo_file:
+#         demo_traj = get_demo_traj(demo_file=demo_file, verbose=False)
+
+#         idx = len(dataset)
+#         demo_traj = LowLevelTrajectory(demo_traj.states[0:5], demo_traj.actions[0:4], _is_demo=True, _train_task_idx=idx)
+
+#         dataset += [demo_traj]
+#         objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx)
+#         ground_atom_dataset += [ground_atoms_traj]
+#         goal = set([atom for atom in ground_atoms_traj[1][-1] if "inside(" in str(atom)])
+#         tasks += [Task(State({}, None), goal)]
+
+# print("#"*30)
+# print(task_name)
+# print("#"*30)
+
+# nsrts, _, _ = learn_nsrts_from_data(dataset, tasks,
+#                                     preds,
+#                                     options,
+#                                     action_space,
+#                                     ground_atom_dataset,
+#                                     sampler_learner="neural",
+#                                     annotations=None)
+
+# assert len(nsrts) == 3
+# import ipdb; ipdb.set_trace()
+# quit()
+
+
+# dataset = []
+# ground_atom_dataset = []
+# tasks = []
+# action_space = Box(0, 7, (1, ))
+
+# task_name = "MiniGrid-CollectMisplacedItems-16x16-N2-v0"
+# for demo_file in demo_files:
+#     if task_name in demo_file:
+#         demo_traj = get_demo_traj(demo_file=demo_file, verbose=False)
+
+#         idx = len(dataset)
+#         demo_traj = LowLevelTrajectory(demo_traj.states[0:5], demo_traj.actions[0:4], _is_demo=True, _train_task_idx=idx)
+
+#         dataset += [demo_traj]
+#         objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx)
+#         ground_atom_dataset += [ground_atoms_traj]
+#         goal = set([atom for atom in ground_atoms_traj[1][-1] if "onTop(" in str(atom) and "table_1" in str(atom)])
+#         tasks += [Task(State({}, None), goal)]
+
+# print("#"*30)
+# print(task_name)
+# print("#"*30)
+
+# nsrts, _, _ = learn_nsrts_from_data(dataset, tasks,
+#                                     preds,
+#                                     options,
+#                                     action_space,
+#                                     ground_atom_dataset,
+#                                     sampler_learner="neural",
+#                                     annotations=None)
+
+# assert len(nsrts) == 3
+
+# import ipdb; ipdb.set_trace()
+
+# dataset = []
+# ground_atom_dataset = []
+# tasks = []
+# action_space = Box(0, 7, (1, ))
+# all_options = set()
+
+# task_name = "MiniGrid-SortingBooks-16x16-N2-v0"
+# for demo_file in demo_files:
+#     if task_name in demo_file:
+#         demo_traj = get_demo_traj(demo_file=demo_file, verbose=False)
+
+#         idx = len(dataset)
+#         demo_traj = LowLevelTrajectory(demo_traj.states[0:5], demo_traj.actions[0:4], _is_demo=True, _train_task_idx=idx)
+
+#         dataset += [demo_traj]
+#         objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx)
+#         all_options = all_options | options
+#         ground_atom_dataset += [ground_atoms_traj]
+#         goal = set([atom for atom in ground_atoms_traj[1][-1] if "onTop(" in str(atom) and "shelf" in str(atom)])
+#         tasks += [Task(State({}, None), goal)]
+
+# print("#"*30)
+# print(task_name)
+# print("#"*30)
+
+# nsrts, _, _ = learn_nsrts_from_data(dataset, tasks,
+#                                     preds,
+#                                     all_options,
+#                                     action_space,
+#                                     ground_atom_dataset,
+#                                     sampler_learner="neural",
+#                                     annotations=None)
+
+# import ipdb; ipdb.set_trace()
+# # assert len(nsrts) == 3
+
+# dataset = []
+# ground_atom_dataset = []
+# tasks = []
+# action_space = Box(0, 7, (1, ))
+
+# task_name = "MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0"
+# for demo_file in demo_files:
+#     if task_name in demo_file:
+#         demo_traj = get_demo_traj(demo_file=demo_file, verbose=False)
+
+#         idx = len(dataset)
+#         demo_traj = LowLevelTrajectory(demo_traj.states[0:5], demo_traj.actions[0:4], _is_demo=True, _train_task_idx=idx)
+
+#         dataset += [demo_traj]
+#         objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx)
+#         ground_atom_dataset += [ground_atoms_traj]
+#         goal = set([atom for atom in ground_atoms_traj[1][-1] if "inside(" in str(atom)])
+#         tasks += [Task(State({}, None), goal)]
+
+# print("#"*30)
+# print(task_name)
+# print("#"*30)
+
+# nsrts, _, _ = learn_nsrts_from_data(dataset, tasks,
+#                                     preds,
+#                                     options,
+#                                     action_space,
+#                                     ground_atom_dataset,
+#                                     sampler_learner="neural",
+#                                     annotations=None)
+
+# # assert len(nsrts) == 3
+
+
+# dataset = []
+# ground_atom_dataset = []
+# tasks = []
+# action_space = Box(0, 7, (1, ))
+
+# task_name = "MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0"
+# for demo_file in demo_files:
+#     if task_name in demo_file:
+#         demo_traj = get_demo_traj(demo_file=demo_file, verbose=False)
+
+#         idx = len(dataset)
+#         demo_traj = LowLevelTrajectory(demo_traj.states, demo_traj.actions, _is_demo=True, _train_task_idx=idx)
+
+#         dataset += [demo_traj]
+#         objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx)
+#         ground_atom_dataset += [ground_atoms_traj]
+#         goal = set([atom for atom in ground_atoms_traj[1][-1] if "inside(" in str(atom)])
+#         tasks += [Task(State({}, None), goal)]
+
+# print("#"*30)
+# print(task_name)
+# print("#"*30)
+
+# print("Skipped")
+
+# # nsrts, _, _ = learn_nsrts_from_data(dataset, tasks,
+# #                                     preds,
+# #                                     options,
+# #                                     action_space,
+# #                                     ground_atom_dataset,
+# #                                     sampler_learner="neural",
+# #                                     annotations=None)
+
+# # assert len(nsrts) == 2
+
+
+# #### BROKEN #####
+# dataset = []
+# ground_atom_dataset = []
+# tasks = []
+# action_space = Box(0, 7, (1, ))
+
+# task_name = "MiniGrid-CleaningACar-16x16-N2-v0"
+# for demo_file in demo_files:
+#     if task_name in demo_file:
+#         demo_traj = get_demo_traj(demo_file=demo_file, verbose=False)
+
+#         idx = len(dataset)
+#         demo_traj = LowLevelTrajectory(demo_traj.states, demo_traj.actions, _is_demo=True, _train_task_idx=idx)
+
+#         dataset += [demo_traj]
+#         objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx)
+#         ground_atom_dataset += [ground_atoms_traj]
+#         goal = set([atom for atom in ground_atoms_traj[1][-1] if "inside(" in str(atom) or "dustyable(" in str(atom)])
+#         tasks += [Task(State({}, None), goal)]
+
+# print("#"*30)
+# print(task_name)
+# print("#"*30)
+
+# print("Broken - No dustyable")
+
+# # import ipdb; ipdb.set_trace()
+
+# # nsrts, _, _ = learn_nsrts_from_data(dataset, tasks,
+# #                                     preds,
+# #                                     options,
+# #                                     action_space,
+# #                                     ground_atom_dataset,
+# #                                     sampler_learner="neural",
+# #                                     annotations=None)
+
+# # assert len(nsrts) == 2
+
+# dataset = []
+# ground_atom_dataset = []
+# tasks = []
+# action_space = Box(0, 7, (1, ))
+
+# task_name = "MiniGrid-WateringHouseplants-16x16-N2-v0"
+# for demo_file in demo_files:
+#     if task_name in demo_file:
+#         demo_traj = get_demo_traj(demo_file=demo_file, verbose=False)
+
+#         idx = len(dataset)
+#         demo_traj = LowLevelTrajectory(demo_traj.states, demo_traj.actions, _is_demo=True, _train_task_idx=idx)
+
+#         dataset += [demo_traj]
+#         objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx)
+#         ground_atom_dataset += [ground_atoms_traj]
+#         goal = set([atom for atom in ground_atoms_traj[1][-1] if "soakable(" in str(atom)])
+#         tasks += [Task(State({}, None), goal)]
+
+# print("#"*30)
+# print(task_name)
+# print("#"*30)
+
+# print("Skipped")
+
+# # nsrts, _, _ = learn_nsrts_from_data(dataset, tasks,
+# #                                     preds,
+# #                                     options,
+# #                                     action_space,
+# #                                     ground_atom_dataset,
+# #                                     sampler_learner="neural",
+# #                                     annotations=None)
+
+# # assert len(nsrts) == 2
+
+# dataset = []
+# ground_atom_dataset = []
+# tasks = []
+# action_space = Box(0, 7, (1, ))
+
+# task_name = "MiniGrid-OpeningPackages-16x16-N2-v0"
+# for demo_file in demo_files:
+#     if task_name in demo_file:
+#         demo_traj = get_demo_traj(demo_file=demo_file, verbose=False)
+
+#         idx = len(dataset)
+#         demo_traj = LowLevelTrajectory(demo_traj.states, demo_traj.actions, _is_demo=True, _train_task_idx=idx)
+
+#         dataset += [demo_traj]
+#         objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx)
+#         ground_atom_dataset += [ground_atoms_traj]
+#         goal = set([atom for atom in ground_atoms_traj[1][-1] if "openable(" in str(atom)])
+#         tasks += [Task(State({}, None), goal)]
+
+# print("#"*30)
+# print(task_name)
+# print("#"*30)
+
+# nsrts, _, _ = learn_nsrts_from_data(dataset, tasks,
+#                                     preds,
+#                                     options,
+#                                     action_space,
+#                                     ground_atom_dataset,
+#                                     sampler_learner="neural",
+#                                     annotations=None)
+
+# assert len(nsrts) == 2
+
+
+# ##########################################
+# # Generate Random Operator Demos
+# ##########################################
+
+
diff --git a/test_random_operator_learning.py b/test_random_operator_learning.py
new file mode 100644
index 0000000000..5749f0525e
--- /dev/null
+++ b/test_random_operator_learning.py
@@ -0,0 +1,20 @@
+
+
+
+
+# TODO #
+# 1. Generate Random Operators
+#   - Parameters
+#   - Precondions
+#   - Add Effects
+#   - Del Effects
+#   - Option
+# 2. Generate Random Tasks
+#   - Current Atoms
+#   - Goal Atoms
+# 3. Use those Operators and Task to generate trajectory data
+#   - Search (Output Plan)
+#   - Step by Step get states and actions
+# 4. Learn NSRTs
+# 5. Assert these are the same as the Random Operators
+
diff --git a/test_saved.NSRTs.txt b/test_saved.NSRTs.txt
new file mode 100644
index 0000000000..ffd890216c
--- /dev/null
+++ b/test_saved.NSRTs.txt
@@ -0,0 +1,7 @@
+NSRT-Move0:
+    Parameters: [?x0:obj_type]
+    Preconditions: []
+    Add Effects: [inreachofrobot(?x0:obj_type)]
+    Delete Effects: [~inreachofrobot(?x0:obj_type)]
+    Ignore Effects: [inreachofrobot, ~inreachofrobot]
+    Option Spec: Move()
\ No newline at end of file
diff --git a/test_saved.NSRTs_copy.txt b/test_saved.NSRTs_copy.txt
new file mode 100644
index 0000000000..ae9e5c8116
--- /dev/null
+++ b/test_saved.NSRTs_copy.txt
@@ -0,0 +1,14 @@
+NSRT-Move0:
+    Parameters: [?x0:obj_type]
+    Preconditions: [atsamelocation(?x0:obj_type, ?x0:obj_type), insameroomasrobot(?x0:obj_type), onfloor(?x0:obj_type)]
+    Add Effects: [inreachofrobot(?x0:obj_type)]
+    Delete Effects: []
+    Ignore Effects: [inreachofrobot]
+    Option Spec: Move()
+NSRT-Actions.open0:
+    Parameters: [?x0:obj_type]
+    Preconditions: [atsamelocation(?x0:obj_type, ?x0:obj_type), inreachofrobot(?x0:obj_type), insameroomasrobot(?x0:obj_type), onfloor(?x0:obj_type)]
+    Add Effects: [openable(?x0:obj_type)]
+    Delete Effects: []
+    Ignore Effects: []
+    Option Spec: Actions.open()
diff --git a/test_segment_traj.py b/test_segment_traj.py
new file mode 100644
index 0000000000..07f6452bda
--- /dev/null
+++ b/test_segment_traj.py
@@ -0,0 +1,101 @@
+import pickle as pkl
+import numpy as np
+from predicators.structs import Action, LowLevelTrajectory, Predicate, State, \
+    Type
+from test_operator_learning_all import get_demo_traj, demo_files
+from test_colla_results import OperatorLearningAgent
+
+completed = [
+    'MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0',
+    'MiniGrid-CleaningACar-16x16-N2-v0',
+    'MiniGrid-CleaningShoes-16x16-N2-v0', #1
+    'MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0',
+    'MiniGrid-CollectMisplacedItems-16x16-N2-v0',
+    'MiniGrid-InstallingAPrinter-16x16-N2-v0',
+    'MiniGrid-LayingWoodFloors-16x16-N2-v0',
+    'MiniGrid-MakingTea-16x16-N2-v0',
+    'MiniGrid-MovingBoxesToStorage-16x16-N2-v0',
+    'MiniGrid-OpeningPackages-16x16-N2-v0',
+    'MiniGrid-OrganizingFileCabinet-16x16-N2-v0',
+    #[DEBUG]'MiniGrid-PreparingSalad-16x16-N2-v0',
+    'MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0',
+    'MiniGrid-SettingUpCandles-16x16-N2-v0', #1
+    'MiniGrid-SortingBooks-16x16-N2-v0',
+    'MiniGrid-StoringFood-16x16-N2-v0',
+    #[DEBUG]'MiniGrid-ThawingFrozenFood-16x16-N2-v0',
+    'MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0',
+    'MiniGrid-WashingPotsAndPans-16x16-N2-v0',
+    'MiniGrid-WateringHouseplants-16x16-N2-v0'
+]
+
+task_info = {}
+
+for demo_file in demo_files:
+    # print("#"*60)
+    # print(demo_file.split("/")[-1])
+    # print("#"*60)
+    # print("# PLAN #")
+    traj = get_demo_traj(demo_file, verbose=False)
+    add_count = 0
+    for i, action in enumerate(traj.actions):
+        curr_state = set(traj.states[i])
+        next_state = set(traj.states[i+1])
+        del_effs = curr_state - next_state
+        add_effs = next_state - curr_state
+        # print(action)
+        # print("DEL:", del_effs)
+        # print("ADD:", add_effs)
+        # print()
+        add_count += len(add_effs)
+        assert len(add_effs) != 0 or str(action) == "Move"
+    task_name = demo_file.split("/")[-1].split("_")[0]
+    agent = OperatorLearningAgent("cluster-intersect", strips_learner="cluster_and_intersect")
+    agent.get_data(task_name=task_name)
+    goal = agent.parse_goal(task_name=task_name, ground_atoms_state=agent.ground_atoms_traj[1][-1])
+    task_info[demo_file.split("/")[-1]] = (len(traj.actions), len(goal), add_count)
+
+i = 0
+for k,v in sorted([(k,v) for k,v in task_info.items()], key=lambda x: x[1][2]): # by add effects
+    i+=1
+    print("|", v[0], "| goal length:",  v[1], "| add count:", v[2], "|", k.split("_")[0], i)
+    # for atom in agent.parse_goal(task_name=task_name, ground_atoms_state=agent.ground_atoms_traj[1][-1]):
+    #     print(atom)
+
+#########################################
+
+# ### NEED To Turn Images into Objects or Save Object Centric State
+# from minigrid.wrappers import *
+# from mini_behavior.states import *
+
+# env = gym.make('MiniGrid-SortingBooks-16x16-N2-v0')
+# env.reset()
+
+# # AbilityState
+# # AbsoluteObjectState
+# # RelativeObjectState
+# # ObjectProperty
+
+# def get_lifted_state(env):
+#     mb_state = env.get_state()
+#     grid = mb_state['grid']
+#     agent_pos = mb_state['agent_pos']
+#     agent_dir = mb_state['agent_dir']
+#     objs = mb_state['objs']
+#     obj_instances = mb_state['obj_instances']
+#     ground_atoms = []
+#     for k, o in obj_instances.items():
+#         for pred_name, pred in o.states.items():
+#             if isinstance(o.states[pred_name], AbsoluteObjectState):
+#                 if o.states[pred_name].get_value(env):
+#                     ground_atoms.append(pred_name+'('+k+')')
+#             elif isinstance(o.states[pred_name], AbilityState):
+#                 if o.states[pred_name].get_value(env):
+#                     ground_atoms.append(pred_name+'('+k+')')
+#             elif isinstance(o.states[pred_name], ObjectProperty):
+#                 if o.states[pred_name].get_value(env):
+#                     ground_atoms.append(pred_name+'('+k+')')
+#             elif isinstance(o.states[pred_name], RelativeObjectState):
+#                 for k2, o2 in obj_instances.items():
+#                     if o.states[pred_name].get_value(o2, env=env):
+#                         ground_atoms.append(pred_name+'('+k+','+k2+')')
+#     return ground_atoms
\ No newline at end of file
diff --git a/test_solve_task.py b/test_solve_task.py
new file mode 100644
index 0000000000..d419502719
--- /dev/null
+++ b/test_solve_task.py
@@ -0,0 +1,307 @@
+#!/usr/bin/env python3
+
+import argparse
+from minigrid.wrappers import *
+from mini_behavior.window import Window
+from mini_behavior.utils.save import get_step, save_demo
+from mini_behavior.grid import GridDimension
+import numpy as np
+from PIL import Image
+from mini_behavior.states import *
+
+# Size in pixels of a tile in the full-scale human view
+TILE_PIXELS = 32
+show_furniture = False
+
+
+def redraw(img):
+    if not args.agent_view:
+        img = env.render()
+    window.no_closeup()
+    window.set_inventory(env)
+    window.show_img(img)
+    image_path = "output_image.jpeg"
+    window.save_img(image_path)
+
+def render_furniture():
+    global show_furniture
+    show_furniture = not show_furniture
+
+    if show_furniture:
+        img = np.copy(env.furniture_view)
+
+        # i, j = env.agent.cur_pos
+        i, j = env.agent_pos
+        ymin = j * TILE_PIXELS
+        ymax = (j + 1) * TILE_PIXELS
+        xmin = i * TILE_PIXELS
+        xmax = (i + 1) * TILE_PIXELS
+
+        img[ymin:ymax, xmin:xmax, :] = GridDimension.render_agent(
+            img[ymin:ymax, xmin:xmax, :], env.agent_dir)
+        img = env.render_furniture_states(img)
+
+        window.show_img(img)
+    else:
+        obs = env.gen_obs()
+        redraw(obs)
+
+
+def show_states():
+    imgs = env.render_states()
+    window.show_closeup(imgs)
+
+
+def reset():
+    if args.seed != -1:
+        env.seed(args.seed)
+
+    obs = env.reset()
+
+    if hasattr(env, 'mission'):
+        print('Mission: %s' % env.mission)
+        window.set_caption(env.mission)
+
+    redraw(obs)
+
+
+def load():
+    if args.seed != -1:
+        env.seed(args.seed)
+
+    env.reset()
+    obs = env.load_state(args.load)
+
+    if hasattr(env, 'mission'):
+        print('Mission: %s' % env.mission)
+        window.set_caption(env.mission)
+
+    redraw(obs)
+
+def get_lifted_state(env):
+    objs = env.objs
+    obj_instances = {}
+    for obj_type, obj_list in objs.items():
+        for obj in obj_list:
+            obj_instances[obj.name] = obj
+    ground_atoms = []
+    try:
+        for k, o in obj_instances.items():
+            for pred_name, pred in o.states.items():
+                if isinstance(o.states[pred_name], AbsoluteObjectState):
+                    if o.states[pred_name].get_value(env):
+                        ground_atoms.append(pred_name+'('+k+')')
+                elif isinstance(o.states[pred_name], AbilityState):
+                    if o.states[pred_name].get_value(env):
+                        ground_atoms.append(pred_name+'('+k+')')
+                elif isinstance(o.states[pred_name], ObjectProperty):
+                    if o.states[pred_name].get_value(env):
+                        ground_atoms.append(pred_name+'('+k+')')
+                elif isinstance(o.states[pred_name], RelativeObjectState):
+                    for k2, o2 in obj_instances.items():
+                        if o.check_rel_state(env, o2, pred_name):
+                            ground_atoms.append(pred_name+'('+k+','+k2+')')
+    except:
+        import ipdb; ipdb.set_trace()
+    return ground_atoms
+                         
+
+
+def step(action):
+    prev_obs = env.gen_obs()
+
+    prev_state = get_lifted_state(env)
+    obs, reward, done, terminated, info = env.step(action)
+    state = get_lifted_state(env)
+
+    print('step=%s, reward=%.2f' % (env.step_count, reward))
+    for atom in state:
+        print(atom)
+
+    if args.save:
+        all_steps[env.step_count] = (prev_obs, prev_state, action, obs, state)
+
+    if done:
+        print('done!')
+        if args.save:
+            save_demo(all_steps, args.env, env.episode)
+        reset()
+    else:
+        redraw(obs)
+
+
+def switch_dim(dim):
+    env.switch_dim(dim)
+    print(f'switching to dim: {env.render_dim}')
+    obs = env.gen_obs()
+    redraw(obs)
+
+
+def key_handler_cartesian(event):
+    print('pressed', event.key)
+    if event.key == 'escape':
+        window.close()
+        return
+    if event.key == 'backspace':
+        reset()
+        return
+    if event.key == 'left':
+        step(env.actions.left)
+        return
+    if event.key == 'right':
+        step(env.actions.right)
+        return
+    if event.key == 'up':
+        step(env.actions.forward)
+        return
+    # Spacebar
+    if event.key == ' ':
+        render_furniture()
+        return
+    if event.key == 'pageup':
+        step('choose')
+        return
+    if event.key == 'enter':
+        env.save_state()
+        return
+    if event.key == 'pagedown':
+        show_states()
+        return
+    if event.key == '0':
+        switch_dim(None)
+        return
+    if event.key == '1':
+        switch_dim(0)
+        return
+    if event.key == '2':
+        switch_dim(1)
+        return
+    if event.key == '3':
+        switch_dim(2)
+        return
+
+def key_handler_primitive(event):
+    print('pressed', event.key)
+    if event.key == 'escape':
+        window.close()
+        return
+    if event.key == 'left':
+        step(env.actions.left)
+        return
+    if event.key == 'right':
+        step(env.actions.right)
+        return
+    if event.key == 'up':
+        step(env.actions.forward)
+        return
+    if event.key == '0':
+        step(env.actions.pickup_0)
+        return
+    if event.key == '1':
+        step(env.actions.pickup_1)
+        return
+    if event.key == '2':
+        step(env.actions.pickup_2)
+        return
+    if event.key == '3':
+        step(env.actions.drop_0)
+        return
+    if event.key == '4':
+        step(env.actions.drop_1)
+        return
+    if event.key == '5':
+        step(env.actions.drop_2)
+        return
+    if event.key == 't':
+        step(env.actions.toggle)
+        return
+    if event.key == 'o':
+        step(env.actions.open)
+        return
+    if event.key == 'c':
+        step(env.actions.close)
+        return
+    if event.key == 'k':
+        step(env.actions.cook)
+        return
+    if event.key == '6':
+        step(env.actions.slice)
+        return
+    if event.key == 'i':
+        step(env.actions.drop_in)
+        return
+    if event.key == 'pagedown':
+        show_states()
+        return
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "--env",
+    help="gym environment to load",
+    default='MiniGrid-InstallingAPrinter-8x8-N2-v0'
+)
+parser.add_argument(
+    "--seed",
+    type=int,
+    help="random seed to generate the environment with",
+    default=-1
+)
+parser.add_argument(
+    "--tile_size",
+    type=int,
+    help="size at which to render tiles",
+    default=32
+)
+parser.add_argument(
+    '--agent_view',
+    default=False,
+    help="draw the agent sees (partially observable view)",
+    action='store_true'
+)
+# NEW
+parser.add_argument(
+    "--save",
+    default=False,
+    help="whether or not to save the demo_16"
+)
+# NEW
+parser.add_argument(
+    "--load",
+    default=None,
+    help="path to load state from"
+)
+
+args = parser.parse_args()
+# ###
+# all_envs = [env_id for env_id in gym.envs.registry.keys() if "MiniGrid-" in env_id]
+# print(args)
+# print(all_envs)
+# quit()
+# ###
+
+env = gym.make(args.env)
+env.teleop_mode()
+if args.save:
+    # We do not support save for cartesian action space
+    assert env.mode == "primitive"
+
+all_steps = {}
+
+if args.agent_view:
+    env = RGBImgPartialObsWrapper(env)
+    env = ImgObsWrapper(env)
+
+window = Window('mini_behavior - ' + args.env)
+if env.mode == "cartesian":
+    window.reg_key_handler(key_handler_cartesian)
+elif env.mode == "primitive":
+    window.reg_key_handler(key_handler_primitive)
+
+if args.load is None:
+    reset()
+else:
+    load()
+
+# Blocking event loop
+window.show(block=True)
\ No newline at end of file
diff --git a/willie_req.txt b/willie_req.txt
new file mode 100644
index 0000000000..30051c5c13
--- /dev/null
+++ b/willie_req.txt
@@ -0,0 +1,180 @@
+aiodns==3.2.0
+aiohappyeyeballs==2.4.4
+aiohttp==3.10.11
+aiosignal==1.3.2
+annotated-types==0.7.0
+anyio==4.9.0
+apriltag==0.0.16
+astroid==2.11.7
+asttokens==3.0.0
+async-timeout==5.0.1
+attrs==24.3.0
+beautifulsoup4==4.12.3
+bosdyn-api==4.1.1
+bosdyn-client==4.1.1
+bosdyn-core==4.1.1
+cachetools==5.5.2
+ccxt==4.4.42
+certifi==2024.12.14
+cffi==1.17.1
+charset-normalizer==3.4.0
+click==8.1.8
+cloudpickle==3.1.1
+-e git+https://github.com/concepts-ai/Concepts.git@44ecfd7aff3d48f967a92e455d78cc87f6f97971#egg=concepts
+contourpy==1.3.1
+cryptography==44.0.0
+cycler==0.12.1
+Cython==3.0.12
+decorator==4.4.2
+Deprecated==1.2.18
+dill==0.3.5.1
+distro==1.9.0
+exceptiongroup==1.2.2
+executing==2.1.0
+Farama-Notifications==0.0.4
+filelock==3.17.0
+fonttools==4.56.0
+frozendict==2.4.6
+frozenlist==1.5.0
+fsspec==2025.2.0
+google-ai-generativelanguage==0.6.15
+google-api-core==2.24.2
+google-api-python-client==2.167.0
+google-auth==2.39.0
+google-auth-httplib2==0.2.0
+google-generativeai==0.8.5
+googleapis-common-protos==1.70.0
+graphlib_backport==1.1.0
+grpcio==1.71.0
+grpcio-status==1.62.3
+gym==0.26.2
+gym-minigrid==1.0.3
+gym-notices==0.0.8
+gym-sokoban @ git+https://github.com/Learning-and-Intelligent-Systems/gym-sokoban.git@0ff1758c3cade36339a9ff1c766daceadc65bb6a
+gymnasium==0.29.1
+h11==0.14.0
+h5py==3.13.0
+html5lib==1.1
+httpcore==1.0.8
+httplib2==0.22.0
+httpx==0.27.0
+idna==3.10
+ImageHash==4.3.2
+imageio==2.22.2
+imageio-ffmpeg==0.6.0
+iniconfig==2.1.0
+ipdb==0.13.13
+ipython==8.31.0
+isort==5.13.2
+jedi==0.19.2
+Jinja2==3.1.6
+joblib==1.4.2
+kiwisolver==1.4.8
+lark==1.2.2
+lazy-object-proxy==1.11.0
+lisdf==0.1.1
+lxml==5.3.0
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+matplotlib==3.6.2
+matplotlib-inline==0.1.7
+mccabe==0.7.0
+mdurl==0.1.2
+-e git+https://github.com/StanfordVL/mini_behavior.git@66155f42b1f273cab5a6b82e5a007461125e0d26#egg=mini_behavior
+minigrid==3.0.0
+moviepy==1.0.3
+mpmath==1.3.0
+multidict==6.1.0
+multiprocess==0.70.13
+multitasking==0.0.11
+mypy==1.8.0
+mypy_extensions==1.1.0
+mysql-connector-python==9.1.0
+networkx==3.4.2
+nltk==3.9.1
+numpy==1.23.5
+openai==1.19.0
+opencv-python==4.7.0.72
+packaging==24.2
+pandas==1.5.1
+pandasql==0.7.3
+parso==0.8.4
+pathos==0.2.9
+pbrspot @ git+https://github.com/NishanthJKumar/pbrspot.git@0ad581da646523325bfb808625a87e6a898fd2bc
+peewee==3.17.8
+pexpect==4.9.0
+pg3 @ git+https://github.com/tomsilver/pg3.git@d93fd9f5037b58fa1e10f65555558474415adadc
+pillow==10.3.0
+pkgconfig==1.5.5
+platformdirs==4.3.6
+pluggy==1.5.0
+pox==0.3.6
+ppft==1.7.7
+-e git+https://github.com/bdaiinstitute/predicators.git@a40f8cebd099ad8546532ca5b1feaa7cdd7d2240#egg=predicators
+proglog==0.1.11
+prompt_toolkit==3.0.48
+propcache==0.2.1
+proto-plus==1.26.1
+protobuf==4.22.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+py==1.11.0
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pybullet==3.2.7
+pycares==4.5.0
+pycparser==2.22
+pydantic==2.11.3
+pydantic_core==2.33.1
+pygame==2.6.1
+Pygments==2.18.0
+PyJWT==2.10.1
+pylint==2.14.5
+pynmea2==1.19.0
+pyparsing==3.2.1
+pyperplan==2.1
+pytest==7.1.3
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+pytz==2024.2
+PyWavelets==1.8.0
+PyYAML==6.0
+recordclass==0.23.1
+regex==2024.11.6
+requests==2.32.3
+rich==14.0.0
+rsa==4.9.1
+scikit-image==0.19.3
+scikit-learn==1.1.2
+scipy==1.9.3
+seaborn==0.12.1
+six==1.17.0
+slack_bolt==1.23.0
+slack_sdk==3.35.0
+smepy @ git+https://github.com/sebdumancic/structure_mapping.git@df2553a1e07cedebf4ceb01992b8d275d15dc22c
+sniffio==1.3.1
+soupsieve==2.6
+SQLAlchemy==2.0.36
+stack-data==0.6.3
+sympy==1.13.1
+tabulate==0.9.0
+tenacity==9.1.2
+threadpoolctl==3.6.0
+tifffile==2025.3.30
+tomli==2.2.1
+tomlkit==0.13.2
+torch==2.0.1
+torchvision==0.21.0
+tqdm==4.67.1
+traitlets==5.14.3
+types-PyYAML==6.0.12.20250402
+typing-inspection==0.4.0
+typing_extensions==4.12.2
+tzdata==2024.2
+uritemplate==4.1.1
+urllib3==2.2.3
+wcwidth==0.2.13
+webencodings==0.5.1
+wrapt==1.17.2
+yarl==1.18.3
+yfinance==0.2.51