diff --git a/486577184_3352016061601916_1683643685160174155_n.jpg b/486577184_3352016061601916_1683643685160174155_n.jpg new file mode 100644 index 0000000000..bc3aca5a58 Binary files /dev/null and b/486577184_3352016061601916_1683643685160174155_n.jpg differ diff --git a/all_metrics_plot.png b/all_metrics_plot.png new file mode 100644 index 0000000000..eeafb661e5 Binary files /dev/null and b/all_metrics_plot.png differ diff --git a/covered.png b/covered.png new file mode 100644 index 0000000000..b1cb2dc446 Binary files /dev/null and b/covered.png differ diff --git a/demos/MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0/MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0_0 b/demos/MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0/MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0_0 new file mode 100644 index 0000000000..fc13cbf17c Binary files /dev/null and b/demos/MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0/MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-CleaningACar-16x16-N2-v0/MiniGrid-CleaningACar-16x16-N2-v0_0 b/demos/MiniGrid-CleaningACar-16x16-N2-v0/MiniGrid-CleaningACar-16x16-N2-v0_0 new file mode 100644 index 0000000000..39451f2a89 Binary files /dev/null and b/demos/MiniGrid-CleaningACar-16x16-N2-v0/MiniGrid-CleaningACar-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-CleaningShoes-16x16-N2-v0/MiniGrid-CleaningShoes-16x16-N2-v0_0 b/demos/MiniGrid-CleaningShoes-16x16-N2-v0/MiniGrid-CleaningShoes-16x16-N2-v0_0 new file mode 100644 index 0000000000..2a68cf6c7b Binary files /dev/null and b/demos/MiniGrid-CleaningShoes-16x16-N2-v0/MiniGrid-CleaningShoes-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0/MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0_0 b/demos/MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0/MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0_0 new file mode 100644 index 0000000000..77c13ec731 Binary files /dev/null and b/demos/MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0/MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-CollectMisplacedItems-16x16-N2-v0/MiniGrid-CollectMisplacedItems-16x16-N2-v0_0 b/demos/MiniGrid-CollectMisplacedItems-16x16-N2-v0/MiniGrid-CollectMisplacedItems-16x16-N2-v0_0 new file mode 100644 index 0000000000..3a312b34d9 Binary files /dev/null and b/demos/MiniGrid-CollectMisplacedItems-16x16-N2-v0/MiniGrid-CollectMisplacedItems-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-InstallingAPrinter-16x16-N2-v0/MiniGrid-InstallingAPrinter-16x16-N2-v0_0 b/demos/MiniGrid-InstallingAPrinter-16x16-N2-v0/MiniGrid-InstallingAPrinter-16x16-N2-v0_0 new file mode 100644 index 0000000000..7020128144 Binary files /dev/null and b/demos/MiniGrid-InstallingAPrinter-16x16-N2-v0/MiniGrid-InstallingAPrinter-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-LayingWoodFloors-16x16-N2-v0/MiniGrid-LayingWoodFloors-16x16-N2-v0_0 b/demos/MiniGrid-LayingWoodFloors-16x16-N2-v0/MiniGrid-LayingWoodFloors-16x16-N2-v0_0 new file mode 100644 index 0000000000..ea8b5874a3 Binary files /dev/null and b/demos/MiniGrid-LayingWoodFloors-16x16-N2-v0/MiniGrid-LayingWoodFloors-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-MakingTea-16x16-N2-v0/MiniGrid-MakingTea-16x16-N2-v0_0 b/demos/MiniGrid-MakingTea-16x16-N2-v0/MiniGrid-MakingTea-16x16-N2-v0_0 new file mode 100644 index 0000000000..ab28d8d64b Binary files /dev/null and b/demos/MiniGrid-MakingTea-16x16-N2-v0/MiniGrid-MakingTea-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-MovingBoxesToStorage-16x16-N2-v0/MiniGrid-MovingBoxesToStorage-16x16-N2-v0_0 b/demos/MiniGrid-MovingBoxesToStorage-16x16-N2-v0/MiniGrid-MovingBoxesToStorage-16x16-N2-v0_0 new file mode 100644 index 0000000000..382c43cea0 Binary files /dev/null and b/demos/MiniGrid-MovingBoxesToStorage-16x16-N2-v0/MiniGrid-MovingBoxesToStorage-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-OpeningPackages-16x16-N2-v0/MiniGrid-OpeningPackages-16x16-N2-v0_0 b/demos/MiniGrid-OpeningPackages-16x16-N2-v0/MiniGrid-OpeningPackages-16x16-N2-v0_0 new file mode 100644 index 0000000000..96b0eecb02 Binary files /dev/null and b/demos/MiniGrid-OpeningPackages-16x16-N2-v0/MiniGrid-OpeningPackages-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-OrganizingFileCabinet-16x16-N2-v0/MiniGrid-OrganizingFileCabinet-16x16-N2-v0_0 b/demos/MiniGrid-OrganizingFileCabinet-16x16-N2-v0/MiniGrid-OrganizingFileCabinet-16x16-N2-v0_0 new file mode 100644 index 0000000000..77701ff4b9 Binary files /dev/null and b/demos/MiniGrid-OrganizingFileCabinet-16x16-N2-v0/MiniGrid-OrganizingFileCabinet-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0/MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0_0 b/demos/MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0/MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0_0 new file mode 100644 index 0000000000..bf751d78b9 Binary files /dev/null and b/demos/MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0/MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-SettingUpCandles-16x16-N2-v0/MiniGrid-SettingUpCandles-16x16-N2-v0_0 b/demos/MiniGrid-SettingUpCandles-16x16-N2-v0/MiniGrid-SettingUpCandles-16x16-N2-v0_0 new file mode 100644 index 0000000000..26a152ccdb Binary files /dev/null and b/demos/MiniGrid-SettingUpCandles-16x16-N2-v0/MiniGrid-SettingUpCandles-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-SortingBooks-16x16-N2-v0/MiniGrid-SortingBooks-16x16-N2-v0_0 b/demos/MiniGrid-SortingBooks-16x16-N2-v0/MiniGrid-SortingBooks-16x16-N2-v0_0 new file mode 100644 index 0000000000..77ae2a429d Binary files /dev/null and b/demos/MiniGrid-SortingBooks-16x16-N2-v0/MiniGrid-SortingBooks-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-SortingBooks-16x16-N2-v0/MiniGrid-SortingBooks-16x16-N2-v0_1 b/demos/MiniGrid-SortingBooks-16x16-N2-v0/MiniGrid-SortingBooks-16x16-N2-v0_1 new file mode 100644 index 0000000000..e8ae981608 Binary files /dev/null and b/demos/MiniGrid-SortingBooks-16x16-N2-v0/MiniGrid-SortingBooks-16x16-N2-v0_1 differ diff --git a/demos/MiniGrid-SortingBooks-16x16-N2-v0/MiniGrid-SortingBooks-16x16-N2-v0_2 b/demos/MiniGrid-SortingBooks-16x16-N2-v0/MiniGrid-SortingBooks-16x16-N2-v0_2 new file mode 100644 index 0000000000..b160d06854 Binary files /dev/null and b/demos/MiniGrid-SortingBooks-16x16-N2-v0/MiniGrid-SortingBooks-16x16-N2-v0_2 differ diff --git a/demos/MiniGrid-StoringFood-16x16-N2-v0/MiniGrid-StoringFood-16x16-N2-v0_0 b/demos/MiniGrid-StoringFood-16x16-N2-v0/MiniGrid-StoringFood-16x16-N2-v0_0 new file mode 100644 index 0000000000..90acc25406 Binary files /dev/null and b/demos/MiniGrid-StoringFood-16x16-N2-v0/MiniGrid-StoringFood-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0_0 b/demos/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0_0 new file mode 100644 index 0000000000..6a0bb3279a Binary files /dev/null and b/demos/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-WashingPotsAndPans-16x16-N2-v0/MiniGrid-WashingPotsAndPans-16x16-N2-v0_0 b/demos/MiniGrid-WashingPotsAndPans-16x16-N2-v0/MiniGrid-WashingPotsAndPans-16x16-N2-v0_0 new file mode 100644 index 0000000000..7884ebdaf6 Binary files /dev/null and b/demos/MiniGrid-WashingPotsAndPans-16x16-N2-v0/MiniGrid-WashingPotsAndPans-16x16-N2-v0_0 differ diff --git a/demos/MiniGrid-WateringHouseplants-16x16-N2-v0/MiniGrid-WateringHouseplants-16x16-N2-v0_0 b/demos/MiniGrid-WateringHouseplants-16x16-N2-v0/MiniGrid-WateringHouseplants-16x16-N2-v0_0 new file mode 100644 index 0000000000..966f1a9653 Binary files /dev/null and b/demos/MiniGrid-WateringHouseplants-16x16-N2-v0/MiniGrid-WateringHouseplants-16x16-N2-v0_0 differ diff --git a/equivalent.png b/equivalent.png new file mode 100644 index 0000000000..afde84eeca Binary files /dev/null and b/equivalent.png differ diff --git a/extra/MiniGrid-CleaningACar-16x16-N2-v0_0 b/extra/MiniGrid-CleaningACar-16x16-N2-v0_0 new file mode 100644 index 0000000000..0cda03cceb Binary files /dev/null and b/extra/MiniGrid-CleaningACar-16x16-N2-v0_0 differ diff --git a/extra/MiniGrid-CollectMisplacedItems-16x16-N2-v0_0 b/extra/MiniGrid-CollectMisplacedItems-16x16-N2-v0_0 new file mode 100644 index 0000000000..525d1b37cb Binary files /dev/null and b/extra/MiniGrid-CollectMisplacedItems-16x16-N2-v0_0 differ diff --git a/extra/MiniGrid-OpeningPackages-16x16-N2-v0_0 b/extra/MiniGrid-OpeningPackages-16x16-N2-v0_0 new file mode 100644 index 0000000000..70a54c0248 Binary files /dev/null and b/extra/MiniGrid-OpeningPackages-16x16-N2-v0_0 differ diff --git a/extra/MiniGrid-SortingBooks-16x16-N2-v0_0 b/extra/MiniGrid-SortingBooks-16x16-N2-v0_0 new file mode 100644 index 0000000000..40139ef049 Binary files /dev/null and b/extra/MiniGrid-SortingBooks-16x16-N2-v0_0 differ diff --git a/extra/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0_0 b/extra/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0_0 new file mode 100644 index 0000000000..3deaefae46 Binary files /dev/null and b/extra/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0_0 differ diff --git a/hitl_1_covered.png b/hitl_1_covered.png new file mode 100644 index 0000000000..c666a4d288 Binary files /dev/null and b/hitl_1_covered.png differ diff --git a/hitl_1_equivalent.png b/hitl_1_equivalent.png new file mode 100644 index 0000000000..6a81282c35 Binary files /dev/null and b/hitl_1_equivalent.png differ diff --git a/hitl_1_missed.png b/hitl_1_missed.png new file mode 100644 index 0000000000..0fa9c7d24e Binary files /dev/null and b/hitl_1_missed.png differ diff --git a/hitl_1_overfit.png b/hitl_1_overfit.png new file mode 100644 index 0000000000..e7e1e13103 Binary files /dev/null and b/hitl_1_overfit.png differ diff --git a/hitl_5_covered.png b/hitl_5_covered.png new file mode 100644 index 0000000000..7a09ebb99c Binary files /dev/null and b/hitl_5_covered.png differ diff --git a/hitl_5_equivalent.png b/hitl_5_equivalent.png new file mode 100644 index 0000000000..56221d0766 Binary files /dev/null and b/hitl_5_equivalent.png differ diff --git a/hitl_5_missed.png b/hitl_5_missed.png new file mode 100644 index 0000000000..1b4ae6b216 Binary files /dev/null and b/hitl_5_missed.png differ diff --git a/hitl_5_overfit.png b/hitl_5_overfit.png new file mode 100644 index 0000000000..38cf7c75c9 Binary files /dev/null and b/hitl_5_overfit.png differ diff --git a/learning_curve_all_metrics.png b/learning_curve_all_metrics.png new file mode 100644 index 0000000000..18fd3848c9 Binary files /dev/null and b/learning_curve_all_metrics.png differ diff --git a/lifelong_learning_success.png b/lifelong_learning_success.png new file mode 100644 index 0000000000..746cb6de2c Binary files /dev/null and b/lifelong_learning_success.png differ diff --git a/llx MITSCx21 Certificate _ LLX.pdf b/llx MITSCx21 Certificate _ LLX.pdf new file mode 100644 index 0000000000..3b78769f00 Binary files /dev/null and b/llx MITSCx21 Certificate _ LLX.pdf differ diff --git a/missed.png b/missed.png new file mode 100644 index 0000000000..429e101873 Binary files /dev/null and b/missed.png differ diff --git a/operator_learning_summary.png b/operator_learning_summary.png new file mode 100644 index 0000000000..8fbc0574c2 Binary files /dev/null and b/operator_learning_summary.png differ diff --git a/output_image.jpeg b/output_image.jpeg new file mode 100644 index 0000000000..36f4f458f4 Binary files /dev/null and b/output_image.jpeg differ diff --git a/overfit.png b/overfit.png new file mode 100644 index 0000000000..1ac31c44df Binary files /dev/null and b/overfit.png differ diff --git a/predicators/approaches/minigrid_controller_approach.py b/predicators/approaches/minigrid_controller_approach.py new file mode 100644 index 0000000000..de2748db29 --- /dev/null +++ b/predicators/approaches/minigrid_controller_approach.py @@ -0,0 +1,30 @@ +"""An approach that just takes random low-level actions.""" + +from typing import Callable + +from predicators.approaches import BaseApproach +from predicators.structs import Action, State, Task + + +class MinigridControllerApproach(BaseApproach): + """Samples random low-level actions.""" + + @classmethod + def get_name(cls) -> str: + return "minigrid_controller" + + @property + def is_learning_based(self) -> bool: + return False + + def _solve(self, task: Task, timeout: int) -> Callable[[State], Action]: + zero_vec = self._action_space.low + + def _policy(_: State) -> Action: + action_vec = zero_vec.copy() + print(task.goal) + action_vec[int(input("Action: "))] = 1.0 + print(action_vec) + return Action(action_vec) + + return _policy diff --git a/predicators/envs/mini_behavior_env.py b/predicators/envs/mini_behavior_env.py new file mode 100644 index 0000000000..56467cf84f --- /dev/null +++ b/predicators/envs/mini_behavior_env.py @@ -0,0 +1,386 @@ +"""A MiniBehavior environment wrapping https://github.com/StanfordVL/mini_behavior.""" +import sys +from typing import ClassVar, Dict, List, Optional, Sequence, Set + +import gymnasium as gym +import matplotlib +import numpy as np +from gym.spaces import Box + +from predicators import utils +from predicators.envs import BaseEnv +from predicators.settings import CFG +from predicators.structs import Action, EnvironmentTask, Image, Object, \ + Observation, Predicate, State, Type, Video + +from minigrid.wrappers import * +from mini_behavior.window import Window +from mini_behavior.utils.save import get_step, save_demo +from mini_behavior.grid import GridDimension +from mini_behavior.utils.wrappers import MiniBHFullyObsWrapper +from mini_behavior.utils.save import all_state_values + +class MiniBehavior(BaseEnv): + """MiniBehavior environment wrapping gym-sokoban.""" + + name_to_enum: ClassVar[Dict[str, int]] = OBJECT_TO_IDX + + object_type = Type("obj", ["row", "column", "type", "state", "color"]) + + def __init__(self, use_gui: bool = True) -> None: + super().__init__(use_gui) + + # Predicates + self._IsLoc = Predicate("IsLoc", [self.object_type], self._IsLoc_holds) + self._Above = Predicate("Above", [self.object_type, self.object_type], + self._Above_holds) + self._Below = Predicate("Below", [self.object_type, self.object_type], + self._Below_holds) + self._RightOf = Predicate("RightOf", + [self.object_type, self.object_type], + self._RightOf_holds) + self._LeftOf = Predicate("LeftOf", + [self.object_type, self.object_type], + self._LeftOf_holds) + self._IsFacingUp = Predicate("IsFacingUp", [self.object_type], + self._IsFacingUp_holds) + self._IsFacingDown = Predicate("IsFacingDown", [self.object_type], + self._IsFacingDown_holds) + self._IsFacingLeft = Predicate("IsFacingLeft", [self.object_type], + self._IsFacingLeft_holds) + self._IsFacingRight = Predicate("IsFacingRight", [self.object_type], + self._IsFacingRight_holds) + self._IsNonGoalLoc = Predicate("IsNonGoalLoc", [self.object_type], + self._IsNonGoalLoc_holds) + self._Unknown = Predicate("Unknown", [self.object_type], + self._Unknown_holds) + self._Found = Predicate("Found", [self.object_type], + self._Found_holds) + self._IsAgent, self._At, self._IsGoal, self._IsBall, \ + self._IsKey, self._IsBox, self._IsRed, self._IsGreen, \ + self._IsBlue, self._IsPurple, self._IsYellow, self._IsGrey, \ + self._Holding, self._Near = self.get_goal_predicates() + + self.last_action = None + + # NOTE: we can change the level by modifying what we pass + + # into gym.make here. + if CFG.mini_behavior_env_fully_observable: + self._gym_env = MiniBHFullyObsWrapper(gym.make(CFG.mini_behavior_env_name)) + else: + NotImplementedError("Partial Observability Not implemented yet") + + @classmethod + def get_goal_predicates(cls) -> list[Predicate]: + """Defined public so that the perceiver can use it.""" + return [Predicate("IsAgent", [cls.object_type], cls._IsAgent_holds), + Predicate("At", [cls.object_type, cls.object_type], cls._At_holds), + Predicate("IsGoal", [cls.object_type], cls._IsGoal_holds), + Predicate("IsBall", [cls.object_type], cls._IsBall_holds), + Predicate("IsKey", [cls.object_type], cls._IsKey_holds), + Predicate("IsBox", [cls.object_type], cls._IsBox_holds), + Predicate("IsRed", [cls.object_type], cls._IsRed_holds), + Predicate("IsGreen", [cls.object_type], cls._IsGreen_holds), + Predicate("IsBlue", [cls.object_type], cls._IsBlue_holds), + Predicate("IsPurple", [cls.object_type], cls._IsPurple_holds), + Predicate("IsYellow", [cls.object_type], cls._IsYellow_holds), + Predicate("IsGrey", [cls.object_type], cls._IsGrey_holds), + Predicate("Holding", [cls.object_type], cls._Holding_holds), + Predicate("Near", [cls.object_type, cls.object_type], cls._Near_holds)] + + + def _generate_train_tasks(self) -> List[EnvironmentTask]: + return self._get_tasks(num=CFG.num_train_tasks, train_or_test="train") + + def _generate_test_tasks(self) -> List[EnvironmentTask]: + return self._get_tasks(num=CFG.num_test_tasks, train_or_test="test") + + @classmethod + def get_name(cls) -> str: + return "mini_behavior_env" + + def get_observation(self) -> Observation: + return self._copy_observation(self._current_observation) + + def render_state_plt( + self, + state: State, + task: EnvironmentTask, + action: Optional[Action] = None, + caption: Optional[str] = None) -> matplotlib.figure.Figure: + raise NotImplementedError("This env does not use Matplotlib") + + def render_state(self, + state: State, + task: EnvironmentTask, + action: Optional[Action] = None, + caption: Optional[str] = None) -> Video: + raise NotImplementedError("A gym environment cannot render " + "arbitrary states.") + + def render(self, + action: Optional[Action] = None, + caption: Optional[str] = None) -> Video: + assert caption is None + arr: Image = self._gym_env.get_frame() + return [arr] + + @property + def predicates(self) -> Set[Predicate]: + return { + self._At, self._IsLoc, self._Above, self._Below, + self._RightOf, self._LeftOf, self._IsAgent, self._IsGoal, self._IsNonGoalLoc, + self._IsFacingUp, self._IsFacingDown, self._IsFacingLeft, self._IsFacingRight, + self._Unknown, self._Found, self._IsBall, self._IsKey, self._IsBox, self._IsRed, + self._IsGreen, self._IsBlue, self._IsPurple, self._IsYellow, self._IsGrey, + self._Holding, self._Near + } + + @property + def goal_predicates(self) -> Set[Predicate]: + return {self._IsAgent, self._At, self._IsGoal} + + @property + def types(self) -> Set[Type]: + return {self.object_type} + + @property + def action_space(self) -> Box: + # One-hot encoding of discrete action space. + num_actions = 15 + assert self._gym_env.action_space.n == num_actions # type: ignore + lowers = np.zeros(num_actions, dtype=np.float32) + uppers = np.ones(num_actions, dtype=np.float32) + return Box(lowers, uppers) + + def reset(self, train_or_test: str, task_idx: int) -> Observation: + """Resets the current state to the train or test task initial state.""" + self._current_task = self.get_task(train_or_test, task_idx) + self._current_observation = self._current_task.init_obs + # We now need to reset the underlying gym environment to the correct + # state. + seed = utils.get_task_seed(train_or_test, task_idx) + self._reset_initial_state_from_seed(seed) + return self._copy_observation(self._current_observation) + + def simulate(self, state: State, action: Action) -> State: + raise NotImplementedError("Simulate not implemented for gym envs. " + + "Try using --bilevel_plan_without_sim True") + + def step(self, action: Action) -> Observation: + # Convert our actions to their discrete action space. + discrete_action = np.argmax(action.arr) + + goal_position = [ + y.cur_pos for x, y in enumerate(self._gym_env.grid.grid) if isinstance(y, Goal) + ] + self._current_observation = self._gym_env.step(discrete_action) + self._gym_env.render() + self.last_action = discrete_action + self._current_observation[4]['last_action'] = self.last_action + + if CFG.mini_behavior_gym_render: + # save frame to png + visual = self._gym_env.get_frame() + import matplotlib.pyplot as plt + plt.imsave('render.png', visual.astype('uint8')) + + + return self._copy_observation(self._current_observation) + + def goal_reached(self) -> bool: + if len(self._current_observation) == 5: + return self._current_observation[2] + return False + + def _get_tasks(self, num: int, + train_or_test: str) -> List[EnvironmentTask]: + tasks = [] + for task_idx in range(num): + seed = utils.get_task_seed(train_or_test, task_idx) + init_obs = self._reset_initial_state_from_seed(seed) + goal_description = self._gym_env.mission + task = EnvironmentTask(init_obs, goal_description) + tasks.append(task) + return tasks + + def _reset_initial_state_from_seed(self, seed: int) -> Observation: + self._gym_env.reset(seed=seed) + return self._gym_env.gen_full_obs() + + @classmethod + def _IsLoc_holds(cls, state: State, objects: Sequence[Object]) -> bool: + # Free spaces and goals are locations. + loc, = objects + obj_type = int(state.get(loc, "type")) + return obj_type in {cls.name_to_enum["empty"], cls.name_to_enum["goal"]} + + @classmethod + def _IsGoal_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_enum(state, objects, "goal") + + @classmethod + def _IsAgent_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_enum(state, objects, "agent") + + @classmethod + def _IsBall_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_enum(state, objects, "ball") + + @classmethod + def _IsKey_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_enum(state, objects, "key") + + @classmethod + def _IsBox_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_enum(state, objects, "box") + + @classmethod + def _IsRed_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return state.get(obj, "color") == 'red' + + @classmethod + def _IsGreen_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return state.get(obj, "color") == 'green' + + @classmethod + def _IsBlue_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return state.get(obj, "color") == 'blue' + + @classmethod + def _IsPurple_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return state.get(obj, "color") == 'purple' + + @classmethod + def _IsYellow_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return state.get(obj, "color") == 'yellow' + + @classmethod + def _IsGrey_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return state.get(obj, "color") == 'grey' + + @classmethod + def _IsNonGoalLoc_holds(cls, state: State, + objects: Sequence[Object]) -> bool: + return cls._check_enum(state, objects, "empty") + + @classmethod + def _At_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj1, _ = objects + if cls._check_enum(state, [obj1], "agent"): + return cls._check_spatial_relation(state, objects, 0, 0) + return False + + @classmethod + def _Above_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_spatial_relation(state, objects, 1, 0) + + @classmethod + def _Below_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_spatial_relation(state, objects, -1, 0) + + @classmethod + def _RightOf_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_spatial_relation(state, objects, 0, -1) + + @classmethod + def _LeftOf_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_spatial_relation(state, objects, 0, 1) + + @classmethod + def _IsFacingRight_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + if cls._check_enum(state, [obj], "agent"): + return state.get(obj, "state") == 0 + return False + + @classmethod + def _IsFacingDown_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + if cls._check_enum(state, [obj], "agent"): + return state.get(obj, "state") == 1 + return False + + @classmethod + def _IsFacingLeft_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + if cls._check_enum(state, [obj], "agent"): + return state.get(obj, "state") == 2 + return False + + @classmethod + def _IsFacingUp_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + if cls._check_enum(state, [obj], "agent"): + return state.get(obj, "state") == 3 + return False + + @classmethod + def _Holding_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return int(state.get(obj, "state")) == 3 + + @classmethod + def _Near_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj1, ob2 = objects + return cls._Above_holds(state, [obj1, ob2]) or \ + cls._Below_holds(state, [obj1, ob2]) or \ + cls._RightOf_holds(state, [obj1, ob2]) or \ + cls._LeftOf_holds(state, [obj1, ob2]) + + @classmethod + def _Unknown_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return int(state.get(obj, "state")) == -1 + + @classmethod + def _Found_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return int(state.get(obj, "state")) != -1 + + @classmethod + def get_objects_of_enum(cls, state: State, enum_name: str) -> Set[Object]: + """Made public for use by perceiver.""" + return { + o + for o in state + if int(state.get(o, "type")) == int(cls.name_to_enum[enum_name]) + } + + @classmethod + def _check_spatial_relation(cls, state: State, objects: Sequence[Object], + dr: int, dc: int) -> bool: + obj1, obj2 = objects + obj1_r = int(state.get(obj1, "row")) + obj1_c = int(state.get(obj1, "column")) + obj2_r = int(state.get(obj2, "row")) + obj2_c = int(state.get(obj2, "column")) + if obj1_r == sys.maxsize or obj2_r == sys.maxsize or obj1_c == sys.maxsize or obj2_c == sys.maxsize: + return False + return ((obj1_r + dr) == obj2_r) and ((obj1_c + dc) == obj2_c) + + @classmethod + def _check_enum(cls, state: State, objects: Sequence[Object], + enum_name: str) -> bool: + obj, = objects + obj_type = state.get(obj, "type") + return int(obj_type) == int(cls.name_to_enum[enum_name]) + + @classmethod + def _is_static(cls, obj: Object, state: State) -> bool: + return cls._IsGoal_holds(state, [obj]) or \ + cls._IsNonGoalLoc_holds(state, [obj]) + + @classmethod + def _is_dynamic(cls, obj: Object, state: State) -> bool: + return not cls._is_static(obj, state) + + def _copy_observation(self, obs: Observation) -> Observation: + return tuple(m.copy() if type(m) not in [bool, int, float] else m for m in obs) diff --git a/predicators/envs/minigrid_env.py b/predicators/envs/minigrid_env.py new file mode 100644 index 0000000000..9f86008e15 --- /dev/null +++ b/predicators/envs/minigrid_env.py @@ -0,0 +1,386 @@ +"""A MiniGrid environment wrapping https://github.com/mpSchrader/gym-sokoban.""" +import sys +from typing import ClassVar, Dict, List, Optional, Sequence, Set + +import gymnasium as gym +import matplotlib +import numpy as np +from gym.spaces import Box + +from predicators import utils +from predicators.envs import BaseEnv +from predicators.settings import CFG +from predicators.structs import Action, EnvironmentTask, Image, Object, \ + Observation, Predicate, State, Type, Video + +from minigrid.core.constants import ( + OBJECT_TO_IDX, +) +from minigrid.core.world_object import Ball as BallObj, Goal, Key as KeyObj, Box as BoxObj +from minigrid.wrappers import FullyObsWrapper + +class MiniGridEnv(BaseEnv): + """MiniGrid environment wrapping gym-sokoban.""" + + name_to_enum: ClassVar[Dict[str, int]] = OBJECT_TO_IDX + + object_type = Type("obj", ["row", "column", "type", "state", "color"]) + + def __init__(self, use_gui: bool = True) -> None: + super().__init__(use_gui) + + # Predicates + self._IsLoc = Predicate("IsLoc", [self.object_type], self._IsLoc_holds) + self._Above = Predicate("Above", [self.object_type, self.object_type], + self._Above_holds) + self._Below = Predicate("Below", [self.object_type, self.object_type], + self._Below_holds) + self._RightOf = Predicate("RightOf", + [self.object_type, self.object_type], + self._RightOf_holds) + self._LeftOf = Predicate("LeftOf", + [self.object_type, self.object_type], + self._LeftOf_holds) + self._IsFacingUp = Predicate("IsFacingUp", [self.object_type], + self._IsFacingUp_holds) + self._IsFacingDown = Predicate("IsFacingDown", [self.object_type], + self._IsFacingDown_holds) + self._IsFacingLeft = Predicate("IsFacingLeft", [self.object_type], + self._IsFacingLeft_holds) + self._IsFacingRight = Predicate("IsFacingRight", [self.object_type], + self._IsFacingRight_holds) + self._IsNonGoalLoc = Predicate("IsNonGoalLoc", [self.object_type], + self._IsNonGoalLoc_holds) + self._Unknown = Predicate("Unknown", [self.object_type], + self._Unknown_holds) + self._Found = Predicate("Found", [self.object_type], + self._Found_holds) + self._IsAgent, self._At, self._IsGoal, self._IsBall, \ + self._IsKey, self._IsBox, self._IsRed, self._IsGreen, \ + self._IsBlue, self._IsPurple, self._IsYellow, self._IsGrey, \ + self._Holding, self._Near = self.get_goal_predicates() + + self.last_action = None + + # NOTE: we can change the level by modifying what we pass + + # into gym.make here. + if CFG.minigrid_gym_fully_observable: + self._gym_env = FullyObsWrapper(gym.make(CFG.minigrid_gym_name)) + else: + self._gym_env = gym.make(CFG.minigrid_gym_name) + + @classmethod + def get_goal_predicates(cls) -> list[Predicate]: + """Defined public so that the perceiver can use it.""" + return [Predicate("IsAgent", [cls.object_type], cls._IsAgent_holds), + Predicate("At", [cls.object_type, cls.object_type], cls._At_holds), + Predicate("IsGoal", [cls.object_type], cls._IsGoal_holds), + Predicate("IsBall", [cls.object_type], cls._IsBall_holds), + Predicate("IsKey", [cls.object_type], cls._IsKey_holds), + Predicate("IsBox", [cls.object_type], cls._IsBox_holds), + Predicate("IsRed", [cls.object_type], cls._IsRed_holds), + Predicate("IsGreen", [cls.object_type], cls._IsGreen_holds), + Predicate("IsBlue", [cls.object_type], cls._IsBlue_holds), + Predicate("IsPurple", [cls.object_type], cls._IsPurple_holds), + Predicate("IsYellow", [cls.object_type], cls._IsYellow_holds), + Predicate("IsGrey", [cls.object_type], cls._IsGrey_holds), + Predicate("Holding", [cls.object_type], cls._Holding_holds), + Predicate("Near", [cls.object_type, cls.object_type], cls._Near_holds)] + + + def _generate_train_tasks(self) -> List[EnvironmentTask]: + return self._get_tasks(num=CFG.num_train_tasks, train_or_test="train") + + def _generate_test_tasks(self) -> List[EnvironmentTask]: + return self._get_tasks(num=CFG.num_test_tasks, train_or_test="test") + + @classmethod + def get_name(cls) -> str: + return "minigrid_env" + + def get_observation(self) -> Observation: + return self._copy_observation(self._current_observation) + + def render_state_plt( + self, + state: State, + task: EnvironmentTask, + action: Optional[Action] = None, + caption: Optional[str] = None) -> matplotlib.figure.Figure: + raise NotImplementedError("This env does not use Matplotlib") + + def render_state(self, + state: State, + task: EnvironmentTask, + action: Optional[Action] = None, + caption: Optional[str] = None) -> Video: + raise NotImplementedError("A gym environment cannot render " + "arbitrary states.") + + def render(self, + action: Optional[Action] = None, + caption: Optional[str] = None) -> Video: + assert caption is None + arr: Image = self._gym_env.get_frame() + import matplotlib.pyplot as plt + plt.imsave('visual_image.png', arr.astype('uint8')) + return [arr] + + @property + def predicates(self) -> Set[Predicate]: + return { + self._At, self._IsLoc, self._Above, self._Below, + self._RightOf, self._LeftOf, self._IsAgent, self._IsGoal, self._IsNonGoalLoc, + self._IsFacingUp, self._IsFacingDown, self._IsFacingLeft, self._IsFacingRight, + self._Unknown, self._Found, self._IsBall, self._IsKey, self._IsBox, self._IsRed, + self._IsGreen, self._IsBlue, self._IsPurple, self._IsYellow, self._IsGrey, + self._Holding, self._Near + } + + @property + def goal_predicates(self) -> Set[Predicate]: + return {self._IsAgent, self._At, self._IsGoal} + + @property + def types(self) -> Set[Type]: + return {self.object_type} + + @property + def action_space(self) -> Box: + # One-hot encoding of discrete action space. + num_actions = 7 + assert self._gym_env.action_space.n == num_actions # type: ignore + lowers = np.zeros(num_actions, dtype=np.float32) + uppers = np.ones(num_actions, dtype=np.float32) + return Box(lowers, uppers) + + def reset(self, train_or_test: str, task_idx: int) -> Observation: + """Resets the current state to the train or test task initial state.""" + self._current_task = self.get_task(train_or_test, task_idx) + self._current_observation = self._current_task.init_obs + # We now need to reset the underlying gym environment to the correct + # state. + seed = utils.get_task_seed(train_or_test, task_idx) + self._reset_initial_state_from_seed(seed) + return self._copy_observation(self._current_observation) + + def simulate(self, state: State, action: Action) -> State: + raise NotImplementedError("Simulate not implemented for gym envs. " + + "Try using --bilevel_plan_without_sim True") + + def step(self, action: Action) -> Observation: + # Convert our actions to their discrete action space. + discrete_action = np.argmax(action.arr) + + goal_position = [ + y.cur_pos for x, y in enumerate(self._gym_env.grid.grid) if isinstance(y, Goal) + ] + self._current_observation = self._gym_env.step(discrete_action) + self._gym_env.render() + self.last_action = discrete_action + self._current_observation[4]['last_action'] = self.last_action + + if CFG.minigrid_gym_render: + # save frame to png + visual = self._gym_env.get_frame() + import matplotlib.pyplot as plt + plt.imsave('render.png', visual.astype('uint8')) + + + return self._copy_observation(self._current_observation) + + def goal_reached(self) -> bool: + if len(self._current_observation) == 5: + return self._current_observation[2] + return False + + def _get_tasks(self, num: int, + train_or_test: str) -> List[EnvironmentTask]: + tasks = [] + for task_idx in range(num): + seed = utils.get_task_seed(train_or_test, task_idx) + init_obs = self._reset_initial_state_from_seed(seed) + goal_description = self._gym_env.mission + task = EnvironmentTask(init_obs, goal_description) + tasks.append(task) + return tasks + + def _reset_initial_state_from_seed(self, seed: int) -> Observation: + return self._gym_env.reset(seed=seed) + + @classmethod + def _IsLoc_holds(cls, state: State, objects: Sequence[Object]) -> bool: + # Free spaces and goals are locations. + loc, = objects + obj_type = int(state.get(loc, "type")) + return obj_type in {cls.name_to_enum["empty"], cls.name_to_enum["goal"]} + + @classmethod + def _IsGoal_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_enum(state, objects, "goal") + + @classmethod + def _IsAgent_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_enum(state, objects, "agent") + + @classmethod + def _IsBall_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_enum(state, objects, "ball") + + @classmethod + def _IsKey_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_enum(state, objects, "key") + + @classmethod + def _IsBox_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_enum(state, objects, "box") + + @classmethod + def _IsRed_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return state.get(obj, "color") == 'red' + + @classmethod + def _IsGreen_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return state.get(obj, "color") == 'green' + + @classmethod + def _IsBlue_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return state.get(obj, "color") == 'blue' + + @classmethod + def _IsPurple_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return state.get(obj, "color") == 'purple' + + @classmethod + def _IsYellow_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return state.get(obj, "color") == 'yellow' + + @classmethod + def _IsGrey_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return state.get(obj, "color") == 'grey' + + @classmethod + def _IsNonGoalLoc_holds(cls, state: State, + objects: Sequence[Object]) -> bool: + return cls._check_enum(state, objects, "empty") + + @classmethod + def _At_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj1, _ = objects + if cls._check_enum(state, [obj1], "agent"): + return cls._check_spatial_relation(state, objects, 0, 0) + return False + + @classmethod + def _Above_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_spatial_relation(state, objects, 1, 0) + + @classmethod + def _Below_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_spatial_relation(state, objects, -1, 0) + + @classmethod + def _RightOf_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_spatial_relation(state, objects, 0, -1) + + @classmethod + def _LeftOf_holds(cls, state: State, objects: Sequence[Object]) -> bool: + return cls._check_spatial_relation(state, objects, 0, 1) + + @classmethod + def _IsFacingRight_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + if cls._check_enum(state, [obj], "agent"): + return state.get(obj, "state") == 0 + return False + + @classmethod + def _IsFacingDown_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + if cls._check_enum(state, [obj], "agent"): + return state.get(obj, "state") == 1 + return False + + @classmethod + def _IsFacingLeft_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + if cls._check_enum(state, [obj], "agent"): + return state.get(obj, "state") == 2 + return False + + @classmethod + def _IsFacingUp_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + if cls._check_enum(state, [obj], "agent"): + return state.get(obj, "state") == 3 + return False + + @classmethod + def _Holding_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return int(state.get(obj, "state")) == 3 + + @classmethod + def _Near_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj1, ob2 = objects + return cls._Above_holds(state, [obj1, ob2]) or \ + cls._Below_holds(state, [obj1, ob2]) or \ + cls._RightOf_holds(state, [obj1, ob2]) or \ + cls._LeftOf_holds(state, [obj1, ob2]) + + @classmethod + def _Unknown_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return int(state.get(obj, "state")) == -1 + + @classmethod + def _Found_holds(cls, state: State, objects: Sequence[Object]) -> bool: + obj, = objects + return int(state.get(obj, "state")) != -1 + + @classmethod + def get_objects_of_enum(cls, state: State, enum_name: str) -> Set[Object]: + """Made public for use by perceiver.""" + return { + o + for o in state + if int(state.get(o, "type")) == int(cls.name_to_enum[enum_name]) + } + + @classmethod + def _check_spatial_relation(cls, state: State, objects: Sequence[Object], + dr: int, dc: int) -> bool: + obj1, obj2 = objects + obj1_r = int(state.get(obj1, "row")) + obj1_c = int(state.get(obj1, "column")) + obj2_r = int(state.get(obj2, "row")) + obj2_c = int(state.get(obj2, "column")) + if obj1_r == sys.maxsize or obj2_r == sys.maxsize or obj1_c == sys.maxsize or obj2_c == sys.maxsize: + return False + return ((obj1_r + dr) == obj2_r) and ((obj1_c + dc) == obj2_c) + + @classmethod + def _check_enum(cls, state: State, objects: Sequence[Object], + enum_name: str) -> bool: + obj, = objects + obj_type = state.get(obj, "type") + return int(obj_type) == int(cls.name_to_enum[enum_name]) + + @classmethod + def _is_static(cls, obj: Object, state: State) -> bool: + return cls._IsGoal_holds(state, [obj]) or \ + cls._IsNonGoalLoc_holds(state, [obj]) + + @classmethod + def _is_dynamic(cls, obj: Object, state: State) -> bool: + return not cls._is_static(obj, state) + + def _copy_observation(self, obs: Observation) -> Observation: + return tuple(m.copy() if type(m) not in [bool, int, float] else m for m in obs) diff --git a/predicators/ground_truth_models/mini_behavior_env/__init__.py b/predicators/ground_truth_models/mini_behavior_env/__init__.py new file mode 100644 index 0000000000..2a8f9dca73 --- /dev/null +++ b/predicators/ground_truth_models/mini_behavior_env/__init__.py @@ -0,0 +1,6 @@ +"""Ground truth models for MiniBehavior gym environment.""" + +from .nsrts import MiniBehaviorGroundTruthNSRTFactory +from .options import MiniBehaviorGroundTruthOptionFactory + +__all__ = ["MiniBehaviorGroundTruthOptionFactory", "MiniBehaviorGroundTruthNSRTFactory"] diff --git a/predicators/ground_truth_models/mini_behavior_env/nsrts.py b/predicators/ground_truth_models/mini_behavior_env/nsrts.py new file mode 100644 index 0000000000..24f6825af4 --- /dev/null +++ b/predicators/ground_truth_models/mini_behavior_env/nsrts.py @@ -0,0 +1,319 @@ +"""Ground-truth NSRTs for the cover environment.""" + +from typing import Dict, List, Set + +from predicators.ground_truth_models import GroundTruthNSRTFactory +from predicators.structs import NSRT, LiftedAtom, ParameterizedOption, \ + Predicate, Type, Variable +from predicators.utils import null_sampler + + +class MiniBehaviorGroundTruthNSRTFactory(GroundTruthNSRTFactory): + """Ground-truth NSRTs for the MiniBehavior environment.""" + + @classmethod + def get_env_names(cls) -> Set[str]: + return {"mini_behavior_env"} + + @staticmethod + def get_nsrts(env_name: str, types: Dict[str, Type], + predicates: Dict[str, Predicate], + options: Dict[str, ParameterizedOption]) -> Set[NSRT]: + # Types + object_type = types["obj"] + + # Objects + obj1 = Variable("?obj1", object_type) + obj2 = Variable("?obj2", object_type) + obj3 = Variable("?obj3", object_type) + + # Predicates + At = predicates["At"] + IsLoc = predicates["IsLoc"] + Above = predicates["Above"] + Below = predicates["Below"] + RightOf = predicates["RightOf"] + LeftOf = predicates["LeftOf"] + IsAgent = predicates["IsAgent"] + IsGoal = predicates["IsGoal"] + IsFacingUp = predicates["IsFacingUp"] + IsFacingDown = predicates["IsFacingDown"] + IsFacingLeft = predicates["IsFacingLeft"] + IsFacingRight = predicates["IsFacingRight"] + Unknown = predicates["Unknown"] + Found = predicates["Found"] + Holding = predicates["Holding"] + Near = predicates["Near"] + + # Options + MoveForward = options["Forward"] + TurnLeft = options["Left"] + TurnRight = options["Right"] + Pickup = options["Pickup_0"] + Drop = options["Drop_0"] + Toggle = options["Toggle"] + FindObj = options["FindObj"] + ReplanToObj = options["ReplanToObj"] + + nsrts = set() + + # MoveUp + # Agent, from_loc, to_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(IsLoc, [obj2]), + LiftedAtom(Above, [obj3, obj2]), + LiftedAtom(At, [obj1, obj2]), + LiftedAtom(IsFacingUp, [obj1]), + } + add_effects = {LiftedAtom(At, [obj1, obj3])} + delete_effects = {LiftedAtom(At, [obj1, obj2])} + option = MoveForward + option_vars: List[Variable] = [] # dummy - not used + move_up_nsrt = NSRT("MoveUp", parameters, preconditions, add_effects, + delete_effects, set(), option, option_vars, + null_sampler) + nsrts.add(move_up_nsrt) + + # MoveDown + # Agent, from_loc, to_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(IsLoc, [obj2]), + LiftedAtom(Below, [obj3, obj2]), + LiftedAtom(At, [obj1, obj2]), + LiftedAtom(IsFacingDown, [obj1]), + } + add_effects = {LiftedAtom(At, [obj1, obj3])} + delete_effects = {LiftedAtom(At, [obj1, obj2])} + option = MoveForward + option_vars = [] # dummy - not used + move_down_nsrt = NSRT("MoveDown", parameters, preconditions, + add_effects, delete_effects, set(), option, + option_vars, null_sampler) + nsrts.add(move_down_nsrt) + + # MoveRight + # Agent, from_loc, to_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(IsLoc, [obj2]), + LiftedAtom(RightOf, [obj3, obj2]), + LiftedAtom(At, [obj1, obj2]), + LiftedAtom(IsFacingRight, [obj1]), + } + add_effects = {LiftedAtom(At, [obj1, obj3])} + delete_effects = {LiftedAtom(At, [obj1, obj2])} + option = MoveForward + option_vars = [] # dummy - not used + move_right_nsrt = NSRT("MoveRight", parameters, preconditions, + add_effects, delete_effects, set(), option, + option_vars, null_sampler) + nsrts.add(move_right_nsrt) + + # MoveLeft + # Agent, from_loc, to_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(IsLoc, [obj2]), + LiftedAtom(LeftOf, [obj3, obj2]), + LiftedAtom(At, [obj1, obj2]), + LiftedAtom(IsFacingLeft, [obj1]), + } + add_effects = {LiftedAtom(At, [obj1, obj3])} + delete_effects = {LiftedAtom(At, [obj1, obj2])} + option = MoveForward + option_vars = [] # dummy - not used + move_left_nsrt = NSRT("MoveLeft", parameters, preconditions, + add_effects, delete_effects, set(), option, + option_vars, null_sampler) + nsrts.add(move_left_nsrt) + + # TurnRight + turn_right_from_up_nsrt = NSRT("TurnRightFromUp", [obj1], + {LiftedAtom(IsFacingUp, [obj1])}, + {LiftedAtom(IsFacingRight, [obj1])}, + {LiftedAtom(IsFacingUp, [obj1])}, + set(), + TurnRight, [], null_sampler) + nsrts.add(turn_right_from_up_nsrt) + + turn_right_from_down_nsrt = NSRT("TurnRightFromDown", [obj1], + {LiftedAtom(IsFacingDown, [obj1])}, + {LiftedAtom(IsFacingLeft, [obj1])}, + {LiftedAtom(IsFacingDown, [obj1])}, + set(), + TurnRight, [], null_sampler) + nsrts.add(turn_right_from_down_nsrt) + + turn_right_from_left_nsrt = NSRT("TurnRightFromLeft", [obj1], + {LiftedAtom(IsFacingLeft, [obj1])}, + {LiftedAtom(IsFacingUp, [obj1])}, + {LiftedAtom(IsFacingLeft, [obj1])}, + set(), + TurnRight, [], null_sampler) + nsrts.add(turn_right_from_left_nsrt) + + turn_right_from_right_nsrt = NSRT("TurnRightFromRight", [obj1], + {LiftedAtom(IsFacingRight, [obj1])}, + {LiftedAtom(IsFacingDown, [obj1])}, + {LiftedAtom(IsFacingRight, [obj1])}, + set(), + TurnRight, [], null_sampler) + nsrts.add(turn_right_from_right_nsrt) + + # TurnLeft + turn_left_from_up_nsrt = NSRT("TurnLeftFromUp", [obj1], + {LiftedAtom(IsFacingUp, [obj1])}, + {LiftedAtom(IsFacingLeft, [obj1])}, + {LiftedAtom(IsFacingUp, [obj1])}, + set(), + TurnLeft, [], null_sampler) + nsrts.add(turn_left_from_up_nsrt) + + turn_left_from_down_nsrt = NSRT("TurnLeftFromDown", [obj1], + {LiftedAtom(IsFacingDown, [obj1])}, + {LiftedAtom(IsFacingRight, [obj1])}, + {LiftedAtom(IsFacingDown, [obj1])}, + set(), + TurnLeft, [], null_sampler) + nsrts.add(turn_left_from_down_nsrt) + + turn_left_from_left_nsrt = NSRT("TurnLeftFromLeft", [obj1], + {LiftedAtom(IsFacingLeft, [obj1])}, + {LiftedAtom(IsFacingDown, [obj1])}, + {LiftedAtom(IsFacingLeft, [obj1])}, + set(), + TurnLeft, [], null_sampler) + nsrts.add(turn_left_from_left_nsrt) + + turn_left_from_right_nsrt = NSRT("TurnLeftFromRight", [obj1], + {LiftedAtom(IsFacingRight, [obj1])}, + {LiftedAtom(IsFacingUp, [obj1])}, + {LiftedAtom(IsFacingRight, [obj1])}, + set(), + TurnLeft, [], null_sampler) + nsrts.add(turn_left_from_right_nsrt) + + # Pickup Left + # Agent, obj, agent_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(At, [obj1, obj3]), + LiftedAtom(LeftOf, [obj2, obj3]), + LiftedAtom(IsFacingLeft, [obj1]), + LiftedAtom(Found, [obj2]) + } + add_effects = {LiftedAtom(Holding, [obj2])} + delete_effects = {LiftedAtom(LeftOf, [obj2, obj1])} + option = Pickup + option_vars: List[Variable] = [] + pickup_left_nsrt = NSRT("Pickup_Left", parameters, preconditions, + add_effects, delete_effects, set(), option, + option_vars, null_sampler) + nsrts.add(pickup_left_nsrt) + + # Pickup Right + # Agent, obj, agent_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(At, [obj1, obj3]), + LiftedAtom(RightOf, [obj2, obj3]), + LiftedAtom(IsFacingRight, [obj1]), + LiftedAtom(Found, [obj2]) + } + add_effects = {LiftedAtom(Holding, [obj2])} + delete_effects = {LiftedAtom(RightOf, [obj2, obj1])} + option = Pickup + option_vars: List[Variable] = [] + pickup_right_nsrt = NSRT("Pickup_Right", parameters, preconditions, + add_effects, delete_effects, set(), option, + option_vars, null_sampler) + nsrts.add(pickup_right_nsrt) + + # Pickup Up + # Agent, obj, agent_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(At, [obj1, obj3]), + LiftedAtom(Above, [obj2, obj3]), + LiftedAtom(IsFacingUp, [obj1]), + LiftedAtom(Found, [obj2]) + } + add_effects = {LiftedAtom(Holding, [obj2])} + delete_effects = {LiftedAtom(Above, [obj2, obj1])} + option = Pickup + option_vars: List[Variable] = [] + pickup_up_nsrt = NSRT("Pickup_Up", parameters, preconditions, + add_effects, delete_effects, set(), option, + option_vars, null_sampler) + nsrts.add(pickup_up_nsrt) + + # Pickup Down + # Agent, obj, agent_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(At, [obj1, obj3]), + LiftedAtom(Below, [obj2, obj3]), + LiftedAtom(IsFacingDown, [obj1]), + LiftedAtom(Found, [obj2]) + } + add_effects = {LiftedAtom(Holding, [obj2])} + delete_effects = {LiftedAtom(Below, [obj2, obj1])} + option = Pickup + option_vars: List[Variable] = [] + pickup_down_nsrt = NSRT("Pickup_Down", parameters, preconditions, + add_effects, delete_effects, set(), option, + option_vars, null_sampler) + nsrts.add(pickup_down_nsrt) + + # Drop + # TODO + + # Toggle + # TODO + + # For Partial Observability + # Find Object + find_obj_nsrt = NSRT("FindObj", [obj1], + {LiftedAtom(Unknown, [obj1])}, + {LiftedAtom(Found, [obj1])}, + set(), + {LeftOf, RightOf, Above, Below}, + FindObj, [obj1], null_sampler) + nsrts.add(find_obj_nsrt) + + # Replan With Obj Known + replan_to_obj_nsrt = NSRT("ReplanToObj", [obj1, obj2], + {LiftedAtom(IsAgent, [obj1]), LiftedAtom(IsLoc, [obj2]), LiftedAtom(Unknown, [obj2]), LiftedAtom(Found, [obj2])}, + {LiftedAtom(At, [obj1, obj2])}, + set(), + {LeftOf, RightOf, Above, Below}, + ReplanToObj, [], null_sampler) + nsrts.add(replan_to_obj_nsrt) + + replan_to_pickable_obj_nsrt = NSRT("ReplanToPickableObj", [obj1, obj2], + {LiftedAtom(IsAgent, [obj1]), LiftedAtom(Unknown, [obj2]), LiftedAtom(Found, [obj2])}, + {LiftedAtom(Holding, [obj2])}, + set(), + {LeftOf, RightOf, Above, Below}, + ReplanToObj, [], null_sampler) + nsrts.add(replan_to_pickable_obj_nsrt) + + return nsrts diff --git a/predicators/ground_truth_models/mini_behavior_env/options.py b/predicators/ground_truth_models/mini_behavior_env/options.py new file mode 100644 index 0000000000..e664a4b6ff --- /dev/null +++ b/predicators/ground_truth_models/mini_behavior_env/options.py @@ -0,0 +1,92 @@ +"""Ground-truth options for the sokoban environment.""" + +from typing import Dict, Sequence, Set + +import numpy as np +from gym.spaces import Box + +from enum import IntEnum +from predicators import utils +from predicators.ground_truth_models import GroundTruthOptionFactory +from predicators.structs import Action, Array, Object, ParameterizedOption, \ + ParameterizedPolicy, Predicate, State, Type + +class Actions(IntEnum): + left = 0 + right = 1 + forward = 2 + toggle = 3 + open = 4 + close = 5 + slice = 6 + cook = 7 + drop_in = 8 + pickup_0 = 9 + pickup_1 = 10 + pickup_2 = 11 + drop_0 = 12 + drop_1 = 13 + drop_2 = 14 + +class MiniBehaviorGroundTruthOptionFactory(GroundTruthOptionFactory): + """Ground-truth options for the MiniBehavior environment.""" + + @classmethod + def get_env_names(cls) -> Set[str]: + return {"mini_behavior_env"} + + @classmethod + def get_options(cls, env_name: str, types: Dict[str, Type], + predicates: Dict[str, Predicate], + action_space: Box) -> Set[ParameterizedOption]: + + # Reformat names for consistency with other option naming. + def _format_name(name: str) -> str: + return "".join([n.capitalize() for n in name.split(" ")]) + + options: Set[ParameterizedOption] = { + utils.SingletonParameterizedOption( + _format_name(name), cls._create_policy(discrete_action=i)) + for i, name in {value: key for key, value in Actions.__members__.items()}.items() + } + + # FindObj option. + object_type = types["obj"] + FindObjOption = ParameterizedOption( + "FindObj", + [object_type], + Box(low=np.array([]), high=np.array([]), shape=(0, )), + policy=cls._create_find_obj_policy(), + initiable=lambda s, m, o, p: True, + terminal=lambda s, m, o, p: s.get(o[0], "type") == 8 and s.get(o[0], "state") != -1) # 8 is the goal enum type + options.add(FindObjOption) + + # ReplanToObj option. + ReplanToObj = utils.SingletonParameterizedOption("ReplanToObj", cls._create_policy(discrete_action=6)) + options.add(ReplanToObj) + + return options + + @classmethod + def _create_policy(cls, discrete_action: int) -> ParameterizedPolicy: + + def policy(state: State, memory: Dict, objects: Sequence[Object], + params: Array) -> Action: + del state, memory, objects, params # unused. + arr = np.zeros(7, dtype=np.float32) + arr[discrete_action] = 1 + return Action(arr) + + return policy + + @classmethod + def _create_find_obj_policy(cls) -> ParameterizedPolicy: + + def policy(state: State, memory: Dict, objects: Sequence[Object], + params: Array) -> Action: + del state, memory, objects, params # unused. + arr = np.zeros(7, dtype=np.float32) + arr[np.random.choice([0, 1, 2], 1, p=[0.2, 0.2, 0.6])[0]] = 1 + return Action(arr) + + return policy diff --git a/predicators/ground_truth_models/minigrid_env/__init__.py b/predicators/ground_truth_models/minigrid_env/__init__.py new file mode 100644 index 0000000000..63ddf1fab2 --- /dev/null +++ b/predicators/ground_truth_models/minigrid_env/__init__.py @@ -0,0 +1,6 @@ +"""Ground truth models for MiniGrid gym environment.""" + +from .nsrts import MiniGridGroundTruthNSRTFactory +from .options import MiniGridGroundTruthOptionFactory + +__all__ = ["MiniGridGroundTruthOptionFactory", "MiniGridGroundTruthNSRTFactory"] diff --git a/predicators/ground_truth_models/minigrid_env/nsrts.py b/predicators/ground_truth_models/minigrid_env/nsrts.py new file mode 100644 index 0000000000..31304111de --- /dev/null +++ b/predicators/ground_truth_models/minigrid_env/nsrts.py @@ -0,0 +1,320 @@ +"""Ground-truth NSRTs for the cover environment.""" + +from typing import Dict, List, Set + +from predicators.ground_truth_models import GroundTruthNSRTFactory +from predicators.structs import NSRT, LiftedAtom, ParameterizedOption, \ + Predicate, Type, Variable +from predicators.utils import null_sampler + + +class MiniGridGroundTruthNSRTFactory(GroundTruthNSRTFactory): + """Ground-truth NSRTs for the MiniGrid environment.""" + + @classmethod + def get_env_names(cls) -> Set[str]: + return {"minigrid_env"} + + @staticmethod + def get_nsrts(env_name: str, types: Dict[str, Type], + predicates: Dict[str, Predicate], + options: Dict[str, ParameterizedOption]) -> Set[NSRT]: + # Types + object_type = types["obj"] + + # Objects + obj1 = Variable("?obj1", object_type) + obj2 = Variable("?obj2", object_type) + obj3 = Variable("?obj3", object_type) + + # Predicates + At = predicates["At"] + IsLoc = predicates["IsLoc"] + Above = predicates["Above"] + Below = predicates["Below"] + RightOf = predicates["RightOf"] + LeftOf = predicates["LeftOf"] + IsAgent = predicates["IsAgent"] + IsGoal = predicates["IsGoal"] + IsFacingUp = predicates["IsFacingUp"] + IsFacingDown = predicates["IsFacingDown"] + IsFacingLeft = predicates["IsFacingLeft"] + IsFacingRight = predicates["IsFacingRight"] + Unknown = predicates["Unknown"] + Found = predicates["Found"] + Holding = predicates["Holding"] + Near = predicates["Near"] + + # Options + MoveForward = options["Forward"] + TurnLeft = options["Left"] + TurnRight = options["Right"] + Pickup = options["Pickup"] + Drop = options["Drop"] + Toggle = options["Toggle"] + Done = options["Done"] + FindObj = options["FindObj"] + ReplanToObj = options["ReplanToObj"] + + nsrts = set() + + # MoveUp + # Agent, from_loc, to_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(IsLoc, [obj2]), + LiftedAtom(Above, [obj3, obj2]), + LiftedAtom(At, [obj1, obj2]), + LiftedAtom(IsFacingUp, [obj1]), + } + add_effects = {LiftedAtom(At, [obj1, obj3])} + delete_effects = {LiftedAtom(At, [obj1, obj2])} + option = MoveForward + option_vars: List[Variable] = [] # dummy - not used + move_up_nsrt = NSRT("MoveUp", parameters, preconditions, add_effects, + delete_effects, set(), option, option_vars, + null_sampler) + nsrts.add(move_up_nsrt) + + # MoveDown + # Agent, from_loc, to_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(IsLoc, [obj2]), + LiftedAtom(Below, [obj3, obj2]), + LiftedAtom(At, [obj1, obj2]), + LiftedAtom(IsFacingDown, [obj1]), + } + add_effects = {LiftedAtom(At, [obj1, obj3])} + delete_effects = {LiftedAtom(At, [obj1, obj2])} + option = MoveForward + option_vars = [] # dummy - not used + move_down_nsrt = NSRT("MoveDown", parameters, preconditions, + add_effects, delete_effects, set(), option, + option_vars, null_sampler) + nsrts.add(move_down_nsrt) + + # MoveRight + # Agent, from_loc, to_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(IsLoc, [obj2]), + LiftedAtom(RightOf, [obj3, obj2]), + LiftedAtom(At, [obj1, obj2]), + LiftedAtom(IsFacingRight, [obj1]), + } + add_effects = {LiftedAtom(At, [obj1, obj3])} + delete_effects = {LiftedAtom(At, [obj1, obj2])} + option = MoveForward + option_vars = [] # dummy - not used + move_right_nsrt = NSRT("MoveRight", parameters, preconditions, + add_effects, delete_effects, set(), option, + option_vars, null_sampler) + nsrts.add(move_right_nsrt) + + # MoveLeft + # Agent, from_loc, to_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(IsLoc, [obj2]), + LiftedAtom(LeftOf, [obj3, obj2]), + LiftedAtom(At, [obj1, obj2]), + LiftedAtom(IsFacingLeft, [obj1]), + } + add_effects = {LiftedAtom(At, [obj1, obj3])} + delete_effects = {LiftedAtom(At, [obj1, obj2])} + option = MoveForward + option_vars = [] # dummy - not used + move_left_nsrt = NSRT("MoveLeft", parameters, preconditions, + add_effects, delete_effects, set(), option, + option_vars, null_sampler) + nsrts.add(move_left_nsrt) + + # TurnRight + turn_right_from_up_nsrt = NSRT("TurnRightFromUp", [obj1], + {LiftedAtom(IsFacingUp, [obj1])}, + {LiftedAtom(IsFacingRight, [obj1])}, + {LiftedAtom(IsFacingUp, [obj1])}, + set(), + TurnRight, [], null_sampler) + nsrts.add(turn_right_from_up_nsrt) + + turn_right_from_down_nsrt = NSRT("TurnRightFromDown", [obj1], + {LiftedAtom(IsFacingDown, [obj1])}, + {LiftedAtom(IsFacingLeft, [obj1])}, + {LiftedAtom(IsFacingDown, [obj1])}, + set(), + TurnRight, [], null_sampler) + nsrts.add(turn_right_from_down_nsrt) + + turn_right_from_left_nsrt = NSRT("TurnRightFromLeft", [obj1], + {LiftedAtom(IsFacingLeft, [obj1])}, + {LiftedAtom(IsFacingUp, [obj1])}, + {LiftedAtom(IsFacingLeft, [obj1])}, + set(), + TurnRight, [], null_sampler) + nsrts.add(turn_right_from_left_nsrt) + + turn_right_from_right_nsrt = NSRT("TurnRightFromRight", [obj1], + {LiftedAtom(IsFacingRight, [obj1])}, + {LiftedAtom(IsFacingDown, [obj1])}, + {LiftedAtom(IsFacingRight, [obj1])}, + set(), + TurnRight, [], null_sampler) + nsrts.add(turn_right_from_right_nsrt) + + # TurnLeft + turn_left_from_up_nsrt = NSRT("TurnLeftFromUp", [obj1], + {LiftedAtom(IsFacingUp, [obj1])}, + {LiftedAtom(IsFacingLeft, [obj1])}, + {LiftedAtom(IsFacingUp, [obj1])}, + set(), + TurnLeft, [], null_sampler) + nsrts.add(turn_left_from_up_nsrt) + + turn_left_from_down_nsrt = NSRT("TurnLeftFromDown", [obj1], + {LiftedAtom(IsFacingDown, [obj1])}, + {LiftedAtom(IsFacingRight, [obj1])}, + {LiftedAtom(IsFacingDown, [obj1])}, + set(), + TurnLeft, [], null_sampler) + nsrts.add(turn_left_from_down_nsrt) + + turn_left_from_left_nsrt = NSRT("TurnLeftFromLeft", [obj1], + {LiftedAtom(IsFacingLeft, [obj1])}, + {LiftedAtom(IsFacingDown, [obj1])}, + {LiftedAtom(IsFacingLeft, [obj1])}, + set(), + TurnLeft, [], null_sampler) + nsrts.add(turn_left_from_left_nsrt) + + turn_left_from_right_nsrt = NSRT("TurnLeftFromRight", [obj1], + {LiftedAtom(IsFacingRight, [obj1])}, + {LiftedAtom(IsFacingUp, [obj1])}, + {LiftedAtom(IsFacingRight, [obj1])}, + set(), + TurnLeft, [], null_sampler) + nsrts.add(turn_left_from_right_nsrt) + + # Pickup Left + # Agent, obj, agent_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(At, [obj1, obj3]), + LiftedAtom(LeftOf, [obj2, obj3]), + LiftedAtom(IsFacingLeft, [obj1]), + LiftedAtom(Found, [obj2]) + } + add_effects = {LiftedAtom(Holding, [obj2])} + delete_effects = {LiftedAtom(LeftOf, [obj2, obj1])} + option = Pickup + option_vars: List[Variable] = [] + pickup_left_nsrt = NSRT("Pickup_Left", parameters, preconditions, + add_effects, delete_effects, set(), option, + option_vars, null_sampler) + nsrts.add(pickup_left_nsrt) + + # Pickup Right + # Agent, obj, agent_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(At, [obj1, obj3]), + LiftedAtom(RightOf, [obj2, obj3]), + LiftedAtom(IsFacingRight, [obj1]), + LiftedAtom(Found, [obj2]) + } + add_effects = {LiftedAtom(Holding, [obj2])} + delete_effects = {LiftedAtom(RightOf, [obj2, obj1])} + option = Pickup + option_vars: List[Variable] = [] + pickup_right_nsrt = NSRT("Pickup_Right", parameters, preconditions, + add_effects, delete_effects, set(), option, + option_vars, null_sampler) + nsrts.add(pickup_right_nsrt) + + # Pickup Up + # Agent, obj, agent_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(At, [obj1, obj3]), + LiftedAtom(Above, [obj2, obj3]), + LiftedAtom(IsFacingUp, [obj1]), + LiftedAtom(Found, [obj2]) + } + add_effects = {LiftedAtom(Holding, [obj2])} + delete_effects = {LiftedAtom(Above, [obj2, obj1])} + option = Pickup + option_vars: List[Variable] = [] + pickup_up_nsrt = NSRT("Pickup_Up", parameters, preconditions, + add_effects, delete_effects, set(), option, + option_vars, null_sampler) + nsrts.add(pickup_up_nsrt) + + # Pickup Down + # Agent, obj, agent_loc + parameters = [obj1, obj2, obj3] + preconditions = { + LiftedAtom(IsAgent, [obj1]), + LiftedAtom(IsLoc, [obj3]), + LiftedAtom(At, [obj1, obj3]), + LiftedAtom(Below, [obj2, obj3]), + LiftedAtom(IsFacingDown, [obj1]), + LiftedAtom(Found, [obj2]) + } + add_effects = {LiftedAtom(Holding, [obj2])} + delete_effects = {LiftedAtom(Below, [obj2, obj1])} + option = Pickup + option_vars: List[Variable] = [] + pickup_down_nsrt = NSRT("Pickup_Down", parameters, preconditions, + add_effects, delete_effects, set(), option, + option_vars, null_sampler) + nsrts.add(pickup_down_nsrt) + + # Drop + # TODO + + # Toggle + # TODO + + # For Partial Observability + # Find Object + find_obj_nsrt = NSRT("FindObj", [obj1], + {LiftedAtom(Unknown, [obj1])}, + {LiftedAtom(Found, [obj1])}, + set(), + {LeftOf, RightOf, Above, Below}, + FindObj, [obj1], null_sampler) + nsrts.add(find_obj_nsrt) + + # Replan With Obj Known + replan_to_obj_nsrt = NSRT("ReplanToObj", [obj1, obj2], + {LiftedAtom(IsAgent, [obj1]), LiftedAtom(IsLoc, [obj2]), LiftedAtom(Unknown, [obj2]), LiftedAtom(Found, [obj2])}, + {LiftedAtom(At, [obj1, obj2])}, + set(), + {LeftOf, RightOf, Above, Below}, + ReplanToObj, [], null_sampler) + nsrts.add(replan_to_obj_nsrt) + + replan_to_pickable_obj_nsrt = NSRT("ReplanToPickableObj", [obj1, obj2], + {LiftedAtom(IsAgent, [obj1]), LiftedAtom(Unknown, [obj2]), LiftedAtom(Found, [obj2])}, + {LiftedAtom(Holding, [obj2])}, + set(), + {LeftOf, RightOf, Above, Below}, + ReplanToObj, [], null_sampler) + nsrts.add(replan_to_pickable_obj_nsrt) + + return nsrts diff --git a/predicators/ground_truth_models/minigrid_env/options.py b/predicators/ground_truth_models/minigrid_env/options.py new file mode 100644 index 0000000000..64b0a43291 --- /dev/null +++ b/predicators/ground_truth_models/minigrid_env/options.py @@ -0,0 +1,76 @@ +"""Ground-truth options for the sokoban environment.""" + +from typing import Dict, Sequence, Set + +import numpy as np +from gym.spaces import Box +from minigrid.core.actions import Actions + +from predicators import utils +from predicators.ground_truth_models import GroundTruthOptionFactory +from predicators.structs import Action, Array, Object, ParameterizedOption, \ + ParameterizedPolicy, Predicate, State, Type + + +class MiniGridGroundTruthOptionFactory(GroundTruthOptionFactory): + """Ground-truth options for the minigrid environment.""" + + @classmethod + def get_env_names(cls) -> Set[str]: + return {"minigrid_env"} + + @classmethod + def get_options(cls, env_name: str, types: Dict[str, Type], + predicates: Dict[str, Predicate], + action_space: Box) -> Set[ParameterizedOption]: + + # Reformat names for consistency with other option naming. + def _format_name(name: str) -> str: + return "".join([n.capitalize() for n in name.split(" ")]) + + options: Set[ParameterizedOption] = { + utils.SingletonParameterizedOption( + _format_name(name), cls._create_policy(discrete_action=i)) + for i, name in {value: key for key, value in Actions.__members__.items()}.items() + } + + # FindObj option. + object_type = types["obj"] + FindObjOption = ParameterizedOption( + "FindObj", + [object_type], + Box(low=np.array([]), high=np.array([]), shape=(0, )), + policy=cls._create_find_obj_policy(), + initiable=lambda s, m, o, p: True, + terminal=lambda s, m, o, p: s.get(o[0], "type") == 8 and s.get(o[0], "state") != -1) # 8 is the goal enum type + options.add(FindObjOption) + + # ReplanToObj option. + ReplanToObj = utils.SingletonParameterizedOption("ReplanToObj", cls._create_policy(discrete_action=6)) + options.add(ReplanToObj) + + return options + + @classmethod + def _create_policy(cls, discrete_action: int) -> ParameterizedPolicy: + + def policy(state: State, memory: Dict, objects: Sequence[Object], + params: Array) -> Action: + del state, memory, objects, params # unused. + arr = np.zeros(7, dtype=np.float32) + arr[discrete_action] = 1 + return Action(arr) + + return policy + + @classmethod + def _create_find_obj_policy(cls) -> ParameterizedPolicy: + + def policy(state: State, memory: Dict, objects: Sequence[Object], + params: Array) -> Action: + del state, memory, objects, params # unused. + arr = np.zeros(7, dtype=np.float32) + arr[np.random.choice([0, 1, 2], 1, p=[0.2, 0.2, 0.6])[0]] = 1 + return Action(arr) + + return policy diff --git a/predicators/nsrt_learning/nsrt_learning_main.py b/predicators/nsrt_learning/nsrt_learning_main.py index d13ef054b6..72abeb4ac2 100644 --- a/predicators/nsrt_learning/nsrt_learning_main.py +++ b/predicators/nsrt_learning/nsrt_learning_main.py @@ -119,7 +119,8 @@ def learn_nsrts_from_data( if CFG.strips_learner != "oracle" or CFG.sampler_learner != "oracle" or \ CFG.option_learner != "no_learning": # Updates the PNADs in-place. - _learn_pnad_options(pnads, known_options, action_space) + if CFG.option_learner != "no_learning": + _learn_pnad_options(pnads, known_options, action_space) # STEP 4: Learn samplers (sampler_learning.py) and update PNADs. _learn_pnad_samplers(pnads, sampler_learner) # in-place update diff --git a/predicators/nsrt_learning/strips_learning/base_strips_learner.py b/predicators/nsrt_learning/strips_learning/base_strips_learner.py index 5d3aa998ac..c64665d584 100644 --- a/predicators/nsrt_learning/strips_learning/base_strips_learner.py +++ b/predicators/nsrt_learning/strips_learning/base_strips_learner.py @@ -162,7 +162,11 @@ def _check_single_demo_preservation( traj_goal, option_plan, atoms_seq) return ground_nsrt_plan is not None - def _recompute_datastores_from_segments(self, pnads: List[PNAD]) -> None: + def _recompute_datastores_from_segments(self, + pnads: List[PNAD], + check_only_preconditions: bool = False, + check_assertion: bool = True, + any_matching: bool = False) -> None: """For the given PNADs, wipe and recompute the datastores. Uses a "rationality" heuristic, where for each segment, we @@ -182,7 +186,7 @@ def _recompute_datastores_from_segments(self, pnads: List[PNAD]) -> None: objects = set(seg_traj[0].states[0]) for segment in seg_traj: best_pnad, best_sub = self._find_best_matching_pnad_and_sub( - segment, objects, pnads) + segment, objects, pnads, check_only_preconditions, check_assertion, any_matching) if best_pnad is not None: assert best_sub is not None best_pnad.add_to_datastore((segment, best_sub), @@ -193,7 +197,9 @@ def _find_best_matching_pnad_and_sub( segment: Segment, objects: Set[Object], pnads: List[PNAD], - check_only_preconditions: bool = False + check_only_preconditions: bool = False, + check_assertion: bool = True, + any_matching: bool = False, ) -> Tuple[Optional[PNAD], Optional[Dict[Variable, Object]]]: """Find the best matching PNAD (if any) given our rationality-based score function, and return the PNAD and substitution necessary to @@ -226,7 +232,8 @@ def _find_best_matching_pnad_and_sub( for pnad in pnads: param_opt, opt_vars = pnad.option_spec if param_opt != segment_param_option: - continue + if not any_matching: + continue isub = dict(zip(opt_vars, segment_option_objs)) if segment in pnad.seg_to_keep_effects_sub: # If there are any variables only in the keep effects, @@ -244,7 +251,8 @@ def _find_best_matching_pnad_and_sub( # If the preconditions don't hold in the segment's # initial atoms, skip. if not ground_op.preconditions.issubset(segment.init_atoms): - continue + if not any_matching: + continue next_atoms = utils.apply_operator(ground_op, segment.init_atoms) if not check_only_preconditions: @@ -265,7 +273,8 @@ def _find_best_matching_pnad_and_sub( # with a most-general PNAD that has no add effects # and all other predicates sidelined, and thus this # assertion must hold. - assert next_atoms.issubset(segment.final_atoms) + if check_assertion: + assert next_atoms.issubset(segment.final_atoms) # This ground PNAD covers this segment. Score it! score = self._score_segment_ground_op_match(segment, ground_op) if score < best_score: # we want a closer match diff --git a/predicators/nsrt_learning/strips_learning/gen_to_spec_learner.py b/predicators/nsrt_learning/strips_learning/gen_to_spec_learner.py index 59906cd49a..f66cdd2425 100644 --- a/predicators/nsrt_learning/strips_learning/gen_to_spec_learner.py +++ b/predicators/nsrt_learning/strips_learning/gen_to_spec_learner.py @@ -10,8 +10,25 @@ from predicators.settings import CFG from predicators.structs import PNAD, GroundAtom, Object, \ ParameterizedOption, Segment, STRIPSOperator, Variable, \ - _GroundSTRIPSOperator + _GroundSTRIPSOperator, _Atom, LowLevelTrajectory, Predicate, Type, Action, LiftedAtom, NSRT +from predicators.planning import task_plan, task_plan_grounding, _SkeletonSearchTimeout +import re +name_to_actions = { + "Move": 0, + "Actions.pickup_0": 3, + "Actions.pickup_1": 4, + "Actions.pickup_2": 5, + "Actions.drop_0": 6, + "Actions.drop_1": 7, + "Actions.drop_2": 8, + "Actions.drop_in": 9, + "Actions.toggle": 10, + "Actions.close": 11, + "Actions.open": 12, + "Actions.cook": 13, + "Actions.slice": 14 +} class GeneralToSpecificSTRIPSLearner(BaseSTRIPSLearner): """Base class for a general-to-specific STRIPS learner.""" @@ -138,7 +155,6 @@ def get_pnads_with_keep_effects(pnad: PNAD) -> Set[PNAD]: # Remember to copy seg_to_keep_effects_sub into the new_pnad! new_pnad.seg_to_keep_effects_sub = pnad.seg_to_keep_effects_sub new_pnads_with_keep_effects.add(new_pnad) - return new_pnads_with_keep_effects def _reset_all_segment_necessary_add_effs(self) -> None: @@ -499,3 +515,591 @@ def _assert_all_data_in_exactly_one_datastore(self, continue for segment in seg_traj: assert segment in all_segs_in_data + +class BackwardForwardSTRIPSLearner(GeneralToSpecificSTRIPSLearner): + """Learn STRIPS operators by backchaining and forward search.""" + + def _learn(self) -> List[PNAD]: + # Initialize the most general PNADs by merging self._initial_pnads. + # As a result, we will have one very general PNAD per option. + param_opt_to_nec_pnads: Dict[ParameterizedOption, List[PNAD]] = {} + # Extract all parameterized options from the data. + parameterized_options = set() + for ll_traj, seg_traj in zip(self._trajectories, + self._segmented_trajs): + if not ll_traj.is_demo: + continue + for segment in seg_traj: + parameterized_options.add(segment.get_option().parent) + + # Set up the param_opt_to_nec_pnads dictionary. + for param_opt in parameterized_options: + param_opt_to_nec_pnads[param_opt] = [] + + prev_itr_ops: Set[STRIPSOperator] = set() + + # Load initial pnad set + if CFG.backward_forward_load_initial: + with open("test_saved.NSRTs.txt", "r") as file: + content = file.read() + nsrt_strs = ["NSRT-" + nsrt_str for nsrt_str in content.split("NSRT-") if nsrt_str != ''] + pnads = [self.parse_nsrt_block(nsrt_str) for nsrt_str in nsrt_strs] + self._recompute_datastores_from_segments(pnads) + for pnad in pnads: + param_opt_to_nec_pnads[pnad.option_spec[0]].append(pnad) + ### + + # We loop until the harmless PNADs induced by our procedure + # converge to a fixed point (i.e, they don't change after two + # subsequent iterations). + for _ in range(10): + # Run multiple passes of backchaining over the data until + # convergence to a fixed point. Note that this process creates + # operators with only parameters, preconditions, and add effects. + + # Step 1: Run backchaining + self._backchain_multipass(param_opt_to_nec_pnads) + + # Step 2: Strip preconditions (optional) + for pnads in param_opt_to_nec_pnads.values(): + for pnad in pnads: + pnad.op = pnad.op.copy_with(preconditions=set(), ignore_effects=set()) + + # Step 3: Forward refinement + self._forward_one_pass(param_opt_to_nec_pnads) + + # Recompute datastores. + cur_itr_pnads_unfiltered = [ + pnad for pnads in param_opt_to_nec_pnads.values() + for pnad in pnads + ] + self._recompute_datastores_from_segments(cur_itr_pnads_unfiltered, check_only_preconditions=True, check_assertion=False) + + # Induce delete effects, ignore effects and potentially + # keep effects. + self._induce_delete_side_keep(param_opt_to_nec_pnads) + + # Harmlessness should now hold, but it's slow to check. + if CFG.backchaining_check_intermediate_harmlessness: + assert self._check_harmlessness( + self._get_uniquely_named_nec_pnads(param_opt_to_nec_pnads)) + + # Recompute datastores and filter out PNADs that don't have datastores. + cur_itr_pnads_unfiltered = [ + pnad for pnads in param_opt_to_nec_pnads.values() + for pnad in pnads + ] + self._recompute_datastores_from_segments(cur_itr_pnads_unfiltered) + cur_itr_pnads_filtered = [] + for pnad in cur_itr_pnads_unfiltered: + if len(pnad.datastore) > 0: + # new_pre = self._induce_preconditions_via_intersection(pnad) + # NOTE: this implicitly changes param_opt_to_nec_pnads + # as well, since we're directly modifying the PNAD objects. + # nad.op = pnad.op.copy_with(preconditions=new_pre) + cur_itr_pnads_filtered.append(pnad) + else: + param_opt_to_nec_pnads[pnad.option_spec[0]].remove(pnad) + del cur_itr_pnads_unfiltered # should be unused after this + + # Check if the PNAD set has converged. If so, break. + if {pnad.op for pnad in cur_itr_pnads_filtered} == prev_itr_ops: + break + + prev_itr_ops = {pnad.op for pnad in cur_itr_pnads_filtered} + + # Assign a unique name to each PNAD. + final_pnads = self._get_uniquely_named_nec_pnads( + param_opt_to_nec_pnads) + # Assert data has been correctly partitioned amongst PNADs. + # self._assert_all_data_in_exactly_one_datastore(final_pnads) + return final_pnads + + def parse_nsrt_block(self, block: str) -> PNAD: + """Parses a single NSRT block into an PNAD object.""" + lines = block.strip().split("\n") + + name_match = re.match(r"(\S+):", lines[0]) + name = name_match.group(1) if name_match else "" + + parameters = re.findall(r"\?x\d+:\w+", lines[1]) + + def extract_effects(label: str) -> Set[str]: + """Extracts a list of predicates from labeled sections.""" + for line in lines: + if line.strip().startswith(label): + return set(re.findall(r"\w+\(.*?\)", line)) + return set() + + preconditions = extract_effects("Preconditions") + add_effects = extract_effects("Add Effects") + delete_effects = extract_effects("Delete Effects") + ignore_effects = extract_effects("Ignore Effects") + + option_spec_match = re.search(r"Option Spec:\s*(.*)", block) + option_spec = option_spec_match.group(1) if option_spec_match else "" + + objects = set() + atoms = set() + option_specs = {} + for traj in self._segmented_trajs: + for segment in traj: + for state in segment.states: + for k, v in state.items(): + objects.add(k) + atoms |= segment.init_atoms | segment.final_atoms + option_specs[segment.get_option().parent.name] = segment.get_option().parent + all_predicates_list = [(atom.predicate.name,atom.predicate) for atom in atoms] + def get_predicate(name, entities): + for pred_name, pred in all_predicates_list: + if pred_name == pred_name and pred.arity == len(entities): + valid_types = True + for i, ent in enumerate(entities): + if ent.type != pred.types[i]: + valid_types = False + if valid_types: + return pred + raise NotImplementedError + + types = {obj.type.name:obj.type for obj in objects} + + def extract_parameters(predicate: str) -> Set[str]: + parameter_pattern = re.compile(r"\?x\d+:\w+") # Matches variables like ?x0:obj_type + matches = parameter_pattern.findall(predicate) + return matches + + parameters = [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in parameters] + preconditions = set([LiftedAtom(get_predicate(pre.split("(")[0], [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(pre)]), [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(pre)]) for pre in preconditions]) + add_effects = set([LiftedAtom(get_predicate(add.split("(")[0], [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(add)]), [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(add)]) for add in add_effects]) + delete_effects = set([LiftedAtom(get_predicate(dle.split("(")[0], [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(dle)]), [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(dle)]) for dle in delete_effects]) + ignore_effects = set([get_predicate(ige, None) for ige in ignore_effects]) + option_spec = (option_specs[option_spec.split("(")[0]], []) + + nsrt = NSRT(name, parameters, preconditions, add_effects, delete_effects, ignore_effects, option_spec, [], None) + return PNAD(nsrt.op, [], option_spec) + + def _forward_one_pass( + self, param_opt_to_nec_pnads: Dict[ParameterizedOption, List[PNAD]] + ) -> None: + """Perform one forward search passes to refine PNAD preconditions + """ + + for ll_traj, seg_traj in zip(self._trajectories, self._segmented_trajs): + if not ll_traj.is_demo: + continue + task = self._train_tasks[ll_traj.train_task_idx] + + # Get initial atoms and object list + objects, _, _, ground_atoms_traj, _ = parse_objs_preds_and_options( + ll_traj, train_task_idx=ll_traj.train_task_idx) + init_atoms = ground_atoms_traj[1][0] + + prev_op_set: Set[STRIPSOperator] = set() + + nsrts = [pnad.op for pnads in param_opt_to_nec_pnads.values() + for pnad in pnads] + nsrt_to_option = {pnad.op:pnad.option_spec for pnads in param_opt_to_nec_pnads.values() for pnad in pnads} + predicates = self._predicates + + # Plan using current operators + ground_nsrts, reachable_atoms = task_plan_grounding( + init_atoms, objects, nsrts, allow_noops=True) + heuristic = utils.create_task_planning_heuristic( + "hadd", init_atoms, task.goal, ground_nsrts, + predicates, objects) + task_plan_generator = task_plan( + init_atoms, task.goal, ground_nsrts, + reachable_atoms, heuristic, + timeout=100, seed=123, max_skeletons_optimized=3) + + skeleton, _, _ = next(task_plan_generator) + + # Check if plan matches the actual low-level trajectory + planned_options = [] + for step in skeleton: + planned_options.append(nsrt_to_option[step.parent][0]) + + for i, planned_option in enumerate(planned_options): + if seg_traj[i].get_option().name != planned_option.name: + # TODO should not just be the first + pnad = None + for option_pnad in param_opt_to_nec_pnads[planned_option]: + if pnad is None: + pnad = option_pnad + if len(option_pnad.op.preconditions) < len(pnad.op.preconditions): + pnad = option_pnad + positive_data = pnad.datastore + diff_atoms = [] + diff_preds = [] + necessary_effects = set.union(*[seg.necessary_add_effects for seg in seg_traj]) + + for pos_seg in positive_data: + curr_diff_atoms = (pos_seg[0].init_atoms - seg_traj[i].init_atoms) & necessary_effects + diff_atoms.append(curr_diff_atoms) + diff_preds.append(set([atom.predicate for atom in curr_diff_atoms])) + + # if diff_preds == [] or set.intersection(*[s for s in diff_preds]) == set(): + # diff_atoms = [] + # diff_preds = [] + # for pos_seg in positive_data: + # curr_diff_atoms = (pos_seg[0].init_atoms - seg_traj[i].init_atoms) + # diff_atoms.append(curr_diff_atoms) + # diff_preds.append(set([atom.predicate for atom in curr_diff_atoms])) + + new_pre = set() + new_params = [] + print() + print(planned_option, set.intersection(*[s for s in diff_preds])) + if diff_preds != []: + new_preds = set.intersection(*[s for s in diff_preds]) + if new_preds != set(): + for pred in new_preds: + best_pnad, best_sub = self._find_best_matching_pnad_and_sub(positive_data[0][0], objects, param_opt_to_nec_pnads[planned_option], check_only_preconditions=True, check_assertion=False, any_matching=True) + pred_objs = [atom.objects for atom in positive_data[0][0].init_atoms if atom.predicate == pred][0] + print(pred_objs) + obj_vars = {v:k for k,v in best_sub.items()} + if best_pnad is not None: + params = [] + for obj in pred_objs: + if obj in obj_vars: + params.append(obj_vars[obj]) + else: + params.append(Variable("?x" + str(len(obj_vars.keys())), obj.type)) + new_pre.add(LiftedAtom(pred, params)) + new_params += params + print(params) + print(pnad) + if len(new_pre) > len(pnad.op.preconditions): + new_params += pnad.op.parameters + pnad.op = pnad.op.copy_with(parameters=list(set(new_params)),preconditions=new_pre) + + + # # Check for convergence + # cur_op_set = {pnad.op for pnads in param_opt_to_nec_pnads.values() + # for pnad in pnads} + # if cur_op_set == prev_op_set: + # break + # prev_op_set = cur_op_set + + def _backchain_multipass( + self, param_opt_to_nec_pnads: Dict[ParameterizedOption, + List[PNAD]]) -> None: + """Take multiple passes through the demonstrations, running + self._backchain_one_pass() each time. + + Keep going until the PNADs reach a fixed point. Note that this + process creates operators with only parameters, preconditions, + and add effects. + """ + while True: + # Before each pass, clear the poss_keep_effects + # of all the PNADs. We do this because we only want the + # poss_keep_effects of the final pass, where the PNADs did + # not change. However, we cannot simply clear the + # pnad.seg_to_keep_effects_sub because some of these + # substitutions might be necessary if this happens to be + # a PNAD that already has keep effects. Thus, we call a + # method that handles this correctly. + for pnads in param_opt_to_nec_pnads.values(): + for pnad in pnads: + self.clear_unnecessary_keep_effs(pnad) + # Run one pass of backchaining. + nec_pnad_set_changed = self._backchain_one_pass( + param_opt_to_nec_pnads) + if not nec_pnad_set_changed: + break + + def _backchain_one_pass( + self, param_opt_to_nec_pnads: Dict[ParameterizedOption, + List[PNAD]]) -> bool: + """Take one pass through the demonstrations in the given order. + + Go through each one from the end back to the start, making the + PNADs more specific whenever needed. Return whether any PNAD was + changed. + """ + # Reset all segments' necessary_add_effects so that they aren't + # accidentally used from a previous iteration of backchaining. + self._reset_all_segment_necessary_add_effs() + nec_pnad_set_changed = False + for ll_traj, seg_traj in zip(self._trajectories, + self._segmented_trajs): + if not ll_traj.is_demo: + continue + traj_goal = self._train_tasks[ll_traj.train_task_idx].goal + atoms_seq = utils.segment_trajectory_to_atoms_sequence(seg_traj) + assert traj_goal.issubset(atoms_seq[-1]) + # This variable, necessary_image, gets updated as we + # backchain. It always holds the set of ground atoms that + # are necessary for the remainder of the plan to reach the + # goal. At the start, necessary_image is simply the goal. + necessary_image = set(traj_goal) + for t in range(len(atoms_seq) - 2, -1, -1): + segment = seg_traj[t] + option = segment.get_option() + # Find the necessary PNADs associated with this option. If + # there are none, then use the general PNAD associated with + # this option. (But make sure to use a copy of it, because we + # don't want the general PNAD to get mutated when we mutate + # necessary PNADs!) + if len(param_opt_to_nec_pnads[option.parent]) == 0: + general_pnad = self._create_general_pnad_for_option( + option.parent) + pnads_for_option = [ + PNAD(general_pnad.op, list(general_pnad.datastore), + general_pnad.option_spec) + ] + else: + pnads_for_option = param_opt_to_nec_pnads[option.parent] + + # Compute the ground atoms that must be added on this timestep. + # They must be a subset of the current PNAD's add effects. + necessary_add_effects = necessary_image - atoms_seq[t] + assert necessary_add_effects.issubset(segment.add_effects) + # Update the segment's necessary_add_effects. + segment.necessary_add_effects = necessary_add_effects + + # We start by checking if any of the PNADs associated with the + # demonstrated option are able to match this transition. + objects = set(segment.states[0]) + pnad, var_to_obj = self._find_best_matching_pnad_and_sub( + segment, objects, pnads_for_option) + if pnad is not None: + assert var_to_obj is not None + obj_to_var = {v: k for k, v in var_to_obj.items()} + assert len(var_to_obj) == len(obj_to_var) + ground_op = pnad.op.ground( + tuple(var_to_obj[var] for var in pnad.op.parameters)) + if len(param_opt_to_nec_pnads[option.parent]) == 0: + param_opt_to_nec_pnads[option.parent].append(pnad) + segs_in_pnad = { + datapoint[0] + for datapoint in pnad.datastore + } + # In this case, we want to move the segment from + # another PNAD into the current PNAD. Note that + # we don't have to recompute the PNAD's add + # effects or preconditions because of the fact that + # this PNAD was found by the _find_best_matching + # function (which internally checks that the + # preconditions and add effects are all correct). + if segment not in segs_in_pnad: + # Find PNAD that the segment is currently in. + for seg_pnad in pnads_for_option: + segs_in_seg_pnad = [ + datapoint[0] + for datapoint in seg_pnad.datastore + ] + if segment in set(segs_in_seg_pnad): + seg_idx = segs_in_seg_pnad.index(segment) + seg_pnad.datastore.pop(seg_idx) + break + pnad.datastore.append((segment, var_to_obj)) + self._remove_empty_datastore_pnads( + param_opt_to_nec_pnads, option.parent) + + # If we weren't able to find a substitution (i.e, the above + # _find_best_matching call didn't yield a PNAD), we need to + # spawn a new PNAD from the most general PNAD to cover + # these necessary add effects. + else: + nec_pnad_set_changed = True + pnad = self.spawn_new_pnad(segment) + param_opt_to_nec_pnads[option.parent].append(pnad) + + # Recompute datastores for ALL PNADs associated with this + # option. We need to do this because the new PNAD may now + # be a better match for some transition that we previously + # matched to another PNAD. + self._recompute_datastores_from_segments( + param_opt_to_nec_pnads[option.parent]) + # Now that we have done this, certain PNADs may be + # left with empty datastores. Remove these. + self._remove_empty_datastore_pnads(param_opt_to_nec_pnads, + option.parent) + + # Recompute all preconditions, now that we have recomputed + # the datastores. + for nec_pnad in param_opt_to_nec_pnads[option.parent]: + if len(nec_pnad.datastore) > 0: + pre = self._induce_preconditions_via_intersection( + nec_pnad) + nec_pnad.op = nec_pnad.op.copy_with( + preconditions=pre) + + # After all this, the unification call that failed earlier + # (leading us into the current else statement) should work. + best_score_pnad, var_to_obj = \ + self._find_best_matching_pnad_and_sub( + segment, objects, + param_opt_to_nec_pnads[option.parent]) + assert var_to_obj is not None + assert best_score_pnad == pnad + # Also, since this segment caused us to induce the new + # PNAD, it should appear in this new PNAD's datastore. + segs_in_pnad = { + datapoint[0] + for datapoint in pnad.datastore + } + assert segment in segs_in_pnad + obj_to_var = {v: k for k, v in var_to_obj.items()} + assert len(var_to_obj) == len(obj_to_var) + ground_op = pnad.op.ground( + tuple(var_to_obj[var] for var in pnad.op.parameters)) + + self._update_pnad_seg_to_keep_effs(pnad, necessary_image, + ground_op, obj_to_var, + segment) + + # Update necessary_image for this timestep. It no longer + # needs to include the ground add effects of this PNAD, but + # must now include its ground preconditions. + necessary_image -= { + a.ground(var_to_obj) + for a in pnad.op.add_effects + } + necessary_image |= { + a.ground(var_to_obj) + for a in pnad.op.preconditions + } + return nec_pnad_set_changed + + @staticmethod + def _remove_empty_datastore_pnads(param_opt_to_nec_pnads: Dict[ + ParameterizedOption, List[PNAD]], + param_opt: ParameterizedOption) -> None: + """Removes all PNADs associated with the given param_opt that have + empty datastores from the input param_opt_to_nec_pnads dict.""" + pnads_to_rm = [] + for pnad in param_opt_to_nec_pnads[param_opt]: + if len(pnad.datastore) == 0: + pnads_to_rm.append(pnad) + for rm_pnad in pnads_to_rm: + param_opt_to_nec_pnads[param_opt].remove(rm_pnad) + + def _induce_delete_side_keep( + self, param_opt_to_nec_pnads: Dict[ParameterizedOption, + List[PNAD]]) -> None: + """Given the current PNADs where add effects and preconditions are + correct, learn the remaining components: delete effects, side + predicates, and keep_effects. + + Note that this may require spawning new PNADs with keep effects. + """ + for option, nec_pnad_list in sorted(param_opt_to_nec_pnads.items(), + key=str): + pnads_with_keep_effects = set() + for pnad in nec_pnad_list: + self._compute_pnad_delete_effects(pnad) + self._compute_pnad_ignore_effects(pnad) + pnads_with_keep_effects |= self.get_pnads_with_keep_effects( + pnad) + param_opt_to_nec_pnads[option].extend( + list(pnads_with_keep_effects)) + + @classmethod + def get_name(cls) -> str: + return "backward-forward" + + def _assert_all_data_in_exactly_one_datastore(self, + pnads: List[PNAD]) -> None: + """Assert that every demo datapoint appears in exactly one datastore + among the given PNADs' datastores.""" + all_segs_in_data_lst = [ + seg for pnad in pnads for seg, _ in pnad.datastore + ] + all_segs_in_data = set(all_segs_in_data_lst) + assert len(all_segs_in_data_lst) == len(all_segs_in_data) + for ll_traj, seg_traj in zip(self._trajectories, + self._segmented_trajs): + if not ll_traj.is_demo: # ignore non-demo data + continue + for segment in seg_traj: + assert segment in all_segs_in_data + +def parse_objs_preds_and_options(trajectory, train_task_idx=0, all_atoms=None): + objs = set() + preds = set() + options = set() + state = None + states = [] + actions = [] + ground_atoms_traj = [] + obj_types = {"obj_type": Type("obj_type", ["is_obj"]), "surface_type": Type("surface_type", ["is_obj"])} + + for i, s in enumerate(trajectory.states): + ground_atoms = set() + for pred_str in s: + pred = None + choice = [] + pattern = re.compile(r"(\w+)\((.*?)\)") + match = pattern.match(pred_str) + if match: + func_name = match.group(1) + args = match.group(2).split(',') if match.group(2) else [] + for arg in args: + base_name = arg.strip().split("_")[0] + if base_name in ['box','cabinet','table','sink','bucket', 'ashcan']: + obj_types[base_name] = Type("surface_type", ["is_obj"]) + else: + obj_types[base_name] = Type("obj_type", ["is_obj"]) #Type(base_name, ["is_obj"]) + obj = obj_types[base_name](arg.strip()) + choice.append(obj) + objs.add(obj) + if len(args) == 1: + base_name = args[0].strip().split("_")[0] + pred = Predicate(func_name, [obj_types[base_name]], lambda s, o: True) + preds.add(pred) + elif len(args) == 2: + base_name1 = args[0].strip().split("_")[0] + base_name2 = args[1].strip().split("_")[0] + pred = Predicate(func_name, [obj_types[base_name1], obj_types[base_name2]], lambda s, o: True) + preds.add(pred) + else: + NotImplementedError("") + ground_atoms.add(GroundAtom(pred, choice)) + states.append(state) + ground_atoms_traj.append(ground_atoms) + + if i < len(trajectory.actions): + a_name = trajectory.actions[i] + + param_option = utils.SingletonParameterizedOption( + a_name, lambda s, m, o, p: Action(name_to_actions[a_name])) + options.add(param_option) + option = param_option.ground([], []) + action = option.policy(state) + action.set_option(option) + actions.append(action) + + def get_all_atoms_in_traj(ground_atoms_traj): + all_atoms = set() + for timestep_atoms in ground_atoms_traj: + all_atoms.update(timestep_atoms) + return all_atoms + + def add_neg_atoms(preds, lltraj, all_atoms): + ground_atoms = [] + neg_pred_table = {str(atom):GroundAtom(Predicate("~" + atom.predicate.name, atom.predicate.types, lambda s, o: True), atom.objects) for atom in all_atoms} + neg_pred_table["HandEmpty"] = GroundAtom(Predicate("handempty", [], lambda s, o: True), []) + for timestep_atoms in lltraj[1]: + missing_atoms = all_atoms - timestep_atoms + neg_atoms = set([neg_pred_table[str(atom)] for atom in missing_atoms]) + handempty = True + for atom in timestep_atoms: + if "inhandofrobot" in str(atom): + handempty = False + if handempty: + neg_atoms |= set([neg_pred_table["HandEmpty"]]) + ground_atoms.append(timestep_atoms | neg_atoms) + lltraj = (lltraj[0], ground_atoms) + return preds | set([v.predicate for v in neg_pred_table.values()]) | set([atom.predicate for atom in all_atoms]), lltraj + + lltraj = (LowLevelTrajectory([{obj:[0.0] for obj in objs} for _ in states], actions, _is_demo=True, _train_task_idx=train_task_idx), ground_atoms_traj) + if all_atoms is None: + all_atoms = get_all_atoms_in_traj(ground_atoms_traj) + preds, lltraj = add_neg_atoms(preds, lltraj, all_atoms) + else: + preds, lltraj = add_neg_atoms(preds, lltraj, all_atoms) + + return objs, preds, options, lltraj, all_atoms diff --git a/predicators/nsrt_learning/strips_learning/pnad_search_learner.py b/predicators/nsrt_learning/strips_learning/pnad_search_learner.py index f7ebf9bd21..6c9b795a12 100644 --- a/predicators/nsrt_learning/strips_learning/pnad_search_learner.py +++ b/predicators/nsrt_learning/strips_learning/pnad_search_learner.py @@ -11,7 +11,8 @@ GeneralToSpecificSTRIPSLearner from predicators.settings import CFG from predicators.structs import PNAD, GroundAtom, LowLevelTrajectory, \ - ParameterizedOption, Predicate, Segment, Task, _GroundSTRIPSOperator + ParameterizedOption, Predicate, Segment, Task, _GroundSTRIPSOperator, NSRT, Variable, LiftedAtom +import re class _PNADSearchOperator(abc.ABC): @@ -101,6 +102,7 @@ def _append_new_pnad_and_keep_effects( # that are unnecessary. new_pnads = self._learner.recompute_pnads_from_effects( sorted(new_pnads)) + print(len(new_pnads)) return new_pnads def _get_backchaining_results( @@ -267,6 +269,70 @@ def recompute_pnads_from_effects(self, pnads: List[PNAD]) -> List[PNAD]: pnad_map[p.option_spec[0]].append(p) new_pnads = self._get_uniquely_named_nec_pnads(pnad_map) return new_pnads + + + def parse_nsrt_block(self, block: str) -> PNAD: + """Parses a single NSRT block into an PNAD object.""" + lines = block.strip().split("\n") + + name_match = re.match(r"(\S+):", lines[0]) + name = name_match.group(1) if name_match else "" + + parameters = re.findall(r"\?x\d+:\w+", lines[1]) + + def extract_effects(label: str) -> Set[str]: + """Extracts a list of predicates from labeled sections.""" + for line in lines: + if line.strip().startswith(label): + return set(re.findall(r"\w+\(.*?\)", line)) + return set() + + preconditions = extract_effects("Preconditions") + add_effects = extract_effects("Add Effects") + delete_effects = extract_effects("Delete Effects") + ignore_effects = extract_effects("Ignore Effects") + + option_spec_match = re.search(r"Option Spec:\s*(.*)", block) + option_spec = option_spec_match.group(1) if option_spec_match else "" + + objects = set() + atoms = set() + option_specs = {} + for traj in self._segmented_trajs: + for segment in traj: + for state in segment.states: + for k, v in state.items(): + objects.add(k) + atoms |= segment.init_atoms | segment.final_atoms + option_specs[segment.get_option().parent.name] = segment.get_option().parent + all_predicates_list = [(atom.predicate.name,atom.predicate) for atom in atoms] + def get_predicate(name, entities): + for pred_name, pred in all_predicates_list: + if pred_name == pred_name and pred.arity == len(entities): + valid_types = True + for i, ent in enumerate(entities): + if ent.type != pred.types[i]: + valid_types = False + if valid_types: + return pred + raise NotImplementedError + + types = {obj.type.name:obj.type for obj in objects} + + def extract_parameters(predicate: str) -> Set[str]: + parameter_pattern = re.compile(r"\?x\d+:\w+") # Matches variables like ?x0:obj_type + matches = parameter_pattern.findall(predicate) + return matches + + parameters = [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in parameters] + preconditions = set([LiftedAtom(get_predicate(pre.split("(")[0], [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(pre)]), [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(pre)]) for pre in preconditions]) + add_effects = set([LiftedAtom(get_predicate(add.split("(")[0], [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(add)]), [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(add)]) for add in add_effects]) + delete_effects = set([LiftedAtom(get_predicate(dle.split("(")[0], [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(dle)]), [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(dle)]) for dle in delete_effects]) + ignore_effects = set([get_predicate(ige, None) for ige in ignore_effects]) + option_spec = (option_specs[option_spec.split("(")[0]], []) + + nsrt = NSRT(name, parameters, preconditions, add_effects, delete_effects, ignore_effects, option_spec, [], None) + return PNAD(nsrt.op, [], option_spec) def _learn(self) -> List[PNAD]: # Set up hill-climbing search over PNAD sets. @@ -285,6 +351,16 @@ def get_successors( for i, child in enumerate(op.get_successors(pnads)): yield (op, i), child, 1.0 # cost always 1 + # Load initial pnad set + if CFG.pnad_search_load_initial: + initial_state = None + with open("test_saved.NSRTs.txt", "r") as file: + content = file.read() + nsrt_strs = ["NSRT-" + nsrt_str for nsrt_str in content.split("NSRT-") if nsrt_str != ''] + pnads = [self.parse_nsrt_block(nsrt_str) for nsrt_str in nsrt_strs] + self._recompute_datastores_from_segments(pnads) + initial_state = frozenset(pnads) + # Run hill-climbing search. path, _, _ = utils.run_hill_climbing(initial_state=initial_state, check_goal=lambda _: False, @@ -296,6 +372,7 @@ def get_successors( # Extract the best PNADs set. final_pnads = path[-1] sorted_final_pnads = sorted(final_pnads) + # Fix naming. pnad_map: Dict[ParameterizedOption, List[PNAD]] = { p.option_spec[0]: [] diff --git a/predicators/perception/mini_behavior_env_perceiver.py b/predicators/perception/mini_behavior_env_perceiver.py new file mode 100644 index 0000000000..eb43c38a23 --- /dev/null +++ b/predicators/perception/mini_behavior_env_perceiver.py @@ -0,0 +1,227 @@ +"""A mini_behavior-specific perceiver.""" + +import sys +from typing import Dict, Tuple + +import numpy as np + +from predicators import utils +from predicators.settings import CFG +from predicators.envs.mini_behavior_env import MiniBehavior +from predicators.perception.base_perceiver import BasePerceiver +from predicators.structs import EnvironmentTask, GroundAtom, Object, \ + Observation, State, Task, Video +from mini_behavior.grid import BehaviorGrid +from mini_bddl import DEFAULT_STATES, STATE_FUNC_MAPPING, DEFAULT_ACTIONS, OBJECT_TO_IDX, IDX_TO_OBJECT, OBJECTS, ABILITIES + +class MiniBehaviorPerceiver(BasePerceiver): + """A mini_behavior-specific perceiver.""" + + def __init__(self) -> None: + super().__init__() + self.grid = BehaviorGrid(16, 16) + + @classmethod + def get_name(cls) -> str: + return "mini_behavior_env" + + def parse_mini_behavior_task(self, env_task: EnvironmentTask) -> Task: + state = self._observation_to_state(env_task.init_obs) + if env_task.goal_description == "Get to the goal": + IsAgent, At, IsGoal, IsBall, IsKey, IsBox, \ + IsRed, IsGreen, IsBlue, IsPurple, IsYellow, IsGrey, \ + Holding, Near = MiniBehavior.get_goal_predicates() + assert len(MiniBehavior.get_objects_of_enum(state, "agent")) == 1 + assert len(MiniBehavior.get_objects_of_enum(state, "goal")) == 1 + agent_obj = list(MiniBehavior.get_objects_of_enum(state, "agent"))[0] + goal_obj = list(MiniBehavior.get_objects_of_enum(state, "goal"))[0] + goal = {GroundAtom(IsAgent, [agent_obj]), + GroundAtom(At, [agent_obj, goal_obj]), + GroundAtom(IsGoal, [goal_obj])} + elif "go to the " in env_task.goal_description: + color, obj_type = env_task.goal_description.split("go to the ")[1].split(" ")[0:2] + obj_name = f"{color}_{obj_type}" + IsAgent, At, IsGoal, IsBall, IsKey, IsBox, \ + IsRed, IsGreen, IsBlue, IsPurple, IsYellow, IsGrey, \ + Holding, Near = MiniBehavior.get_goal_predicates() + assert len(MiniBehavior.get_objects_of_enum(state, "agent")) == 1 + assert len(MiniBehavior.get_objects_of_enum(state, obj_type)) > 1 + agent_obj = list(MiniBehavior.get_objects_of_enum(state, "agent"))[0] + for obj in MiniBehavior.get_objects_of_enum(state, obj_type): + if obj.name == obj_name: + goal_obj = obj + obj_type_to_predicate = { + "ball": IsBall, + "key": IsKey, + "box": IsBox + } + color_to_predicate = { + "red": IsRed, + "green": IsGreen, + "blue": IsBlue, + "purple": IsPurple, + "yellow": IsYellow, + "grey": IsGrey + } + goal = {GroundAtom(IsAgent, [agent_obj]), + GroundAtom(At, [agent_obj, goal_obj]), + GroundAtom(obj_type_to_predicate[obj_type], [goal_obj]), + GroundAtom(color_to_predicate[color], [goal_obj]), + } + elif env_task.goal_description == "get to the green goal square": + IsAgent, At, IsGoal, IsBall, IsKey, IsBox, \ + IsRed, IsGreen, IsBlue, IsPurple, IsYellow, IsGrey, \ + Holding, Near = MiniBehavior.get_goal_predicates() + assert len(MiniBehavior.get_objects_of_enum(state, "agent")) == 1 + assert len(MiniBehavior.get_objects_of_enum(state, "goal")) == 1 + agent_obj = list(MiniBehavior.get_objects_of_enum(state, "agent"))[0] + goal_obj = list(MiniBehavior.get_objects_of_enum(state, "goal"))[0] + goal = {GroundAtom(IsAgent, [agent_obj]), + GroundAtom(At, [agent_obj, goal_obj]), + GroundAtom(IsGoal, [goal_obj])} + elif env_task.goal_description.startswith("get a") or \ + env_task.goal_description.startswith("go get a") or \ + env_task.goal_description.startswith("fetch a") or \ + env_task.goal_description.startswith("go fetch a") or \ + env_task.goal_description.startswith("you must fetch a") or \ + env_task.goal_description.startswith("pick up the"): + color, obj_type = env_task.goal_description.split(" ")[-2:] + obj_name = f"{color}_{obj_type}" + IsAgent, At, IsGoal, IsBall, IsKey, IsBox, \ + IsRed, IsGreen, IsBlue, IsPurple, IsYellow, IsGrey, \ + Holding, Near = MiniBehavior.get_goal_predicates() + assert len(MiniBehavior.get_objects_of_enum(state, "agent")) == 1 + assert len(MiniBehavior.get_objects_of_enum(state, obj_type)) > 1 + agent_obj = list(MiniBehavior.get_objects_of_enum(state, "agent"))[0] + for obj in MiniBehavior.get_objects_of_enum(state, obj_type): + if obj.name == obj_name: + goal_obj = obj + obj_type_to_predicate = { + "ball": IsBall, + "key": IsKey, + "box": IsBox + } + color_to_predicate = { + "red": IsRed, + "green": IsGreen, + "blue": IsBlue, + "purple": IsPurple, + "yellow": IsYellow, + "grey": IsGrey + } + goal = {GroundAtom(Holding, [goal_obj]), + GroundAtom(obj_type_to_predicate[obj_type], [goal_obj]), + GroundAtom(color_to_predicate[color], [goal_obj])} + else: + raise NotImplementedError(f"Goal description {env_task.goal_description} not supported") + return Task(state, goal) + + def reset(self, env_task: EnvironmentTask) -> Task: + return self.parse_mini_behavior_task(env_task) + + def step(self, observation: Observation) -> State: + return self._observation_to_state(observation) + + def _observation_to_objects(self, obs: Observation) -> Dict[str, Tuple[int, int]]: + objs = [] + import ipdb; ipdb.set_trace() + visual = obs['image'] + self.grid.decode(visual) + for r in range(visual.shape[0]): + for c in range(visual.shape[1]): + obj = [IDX_TO_OBJECT[visual[r, c][0]], IDX_TO_COLOR[visual[r, c][1]], visual[r, c][2], r - self.agent_pov_pos[0], c - self.agent_pov_pos[1]] + if obj[0] == 'empty': + obj[1] = 'black' + objs.append(tuple(obj)) + return objs + + def transform_point(self, x1, y1, o1, x2, y2): + # Compute global coordinates directly + x_prime = x1 + x2 * np.cos(o1) - y2 * np.sin(o1) + y_prime = y1 + x2 * np.sin(o1) + y2 * np.cos(o1) + return x_prime, y_prime + + def _globalize_coords(self, r: int, c: int) -> Tuple[int, int]: + # Adjusted direction-to-radian mapping + direction_to_radian = { + 0: 0, # right + 1: -np.pi / 2, # down + 2: np.pi, # left + 3: np.pi / 2 # up + } + o1 = direction_to_radian[self.direction] + x1, y1 = self.agent_pos[0], self.agent_pos[1] + x2, y2 = r, -c # Use c directly + x_prime, y_prime = self.transform_point(x1, y1, o1, x2, y2) + return int(round(x_prime)), int(round(y_prime)) + + def _observation_to_state(self, obs: Observation) -> State: + import numpy as np + self.last_obs = obs + self.agent_pos = None + + objs = self._observation_to_objects(obs) + + def _get_object_name(r: int, c: int, type_name: str, color: str) -> str: + # Put the location of the static objects in their names for easier + # debugging. + if type_name == "agent": + return "agent" + if type_name in ["empty", "wall"]: + return f"{type_name}_{r}_{c}" + else: + return f"{color}_{type_name}" + + for type_name, color, obj_state, r, c in objs: + enum = MiniBehavior.name_to_enum[type_name] + if CFG.mini_behavior_gym_fully_observable: + global_r, global_c = r, c + else: + global_r, global_c = self._globalize_coords(r, c) + if type_name in ["goal", "agent"]: + object_name = type_name + if type_name == "agent" and not CFG.mini_behavior_gym_fully_observable: + assert (global_r, global_c) == self.agent_pos + else: + object_name = _get_object_name(global_r, global_c, type_name, color) + obj = Object(object_name, MiniBehavior.object_type) + self.state_dict[obj] = { + "row": global_r, + "column": global_c, + "type": enum, + "state": obj_state, + "color": color, + } + + if all([val["type"] != MiniBehavior.name_to_enum['goal'] for key, val in self.state_dict.items()]): + enum = MiniBehavior.name_to_enum["goal"] + object_name = "goal" + obj = Object(object_name, MiniBehavior.object_type) + self.state_dict[obj] = { + "row": sys.maxsize, + "column": sys.maxsize, + "type": enum, + "state": -1, + "color": 'green', + } + + for color in ['blue', 'green', 'grey', 'purple', 'red', 'yellow']: + for obj_type in ['key', 'ball', 'box']: + if all([not (val["type"] == MiniBehavior.name_to_enum[obj_type] and val["color"] == color) for key, val in self.state_dict.items()]): + enum = MiniBehavior.name_to_enum[obj_type] + object_name = f"{color}_{obj_type}" + obj = Object(object_name, MiniBehavior.object_type) + self.state_dict[obj] = { + "row": sys.maxsize, + "column": sys.maxsize, + "type": enum, + "state": -1, + "color": color, + } + + state = utils.create_state_from_dict(self.state_dict) + return state + + def render_mental_images(self, observation: Observation, + env_task: EnvironmentTask) -> Video: + raise NotImplementedError("Mental images not implemented for mini_behavior") diff --git a/predicators/perception/minigrid_env_perceiver.py b/predicators/perception/minigrid_env_perceiver.py new file mode 100644 index 0000000000..8be4309d88 --- /dev/null +++ b/predicators/perception/minigrid_env_perceiver.py @@ -0,0 +1,258 @@ +"""A minigrid-specific perceiver.""" + +import sys +from typing import Dict, Tuple + +import numpy as np + +from predicators import utils +from predicators.settings import CFG +from predicators.envs.minigrid_env import MiniGridEnv +from predicators.perception.base_perceiver import BasePerceiver +from predicators.structs import EnvironmentTask, GroundAtom, Object, \ + Observation, State, Task, Video + +from minigrid.core.constants import ( + COLORS, + IDX_TO_COLOR, + IDX_TO_OBJECT, +) + +class MiniGridPerceiver(BasePerceiver): + """A minigrid-specific perceiver.""" + + def __init__(self) -> None: + super().__init__() + self.state_dict = {} + self.agent_pov_pos = (3,6) # agent's point of view is always at (3,6) + self.agent_pos = (0,0) # starts at origin + self.direction = 0 # directions (right, down, left, up) + self.last_obs = None + + @classmethod + def get_name(cls) -> str: + return "minigrid_env" + + def parse_minigrid_task(self, env_task: EnvironmentTask) -> Task: + state = self._observation_to_state(env_task.init_obs) + if env_task.goal_description == "Get to the goal": + IsAgent, At, IsGoal, IsBall, IsKey, IsBox, \ + IsRed, IsGreen, IsBlue, IsPurple, IsYellow, IsGrey, \ + Holding, Near = MiniGridEnv.get_goal_predicates() + assert len(MiniGridEnv.get_objects_of_enum(state, "agent")) == 1 + assert len(MiniGridEnv.get_objects_of_enum(state, "goal")) == 1 + agent_obj = list(MiniGridEnv.get_objects_of_enum(state, "agent"))[0] + goal_obj = list(MiniGridEnv.get_objects_of_enum(state, "goal"))[0] + goal = {GroundAtom(IsAgent, [agent_obj]), + GroundAtom(At, [agent_obj, goal_obj]), + GroundAtom(IsGoal, [goal_obj])} + elif "go to the " in env_task.goal_description: + color, obj_type = env_task.goal_description.split("go to the ")[1].split(" ")[0:2] + obj_name = f"{color}_{obj_type}" + IsAgent, At, IsGoal, IsBall, IsKey, IsBox, \ + IsRed, IsGreen, IsBlue, IsPurple, IsYellow, IsGrey, \ + Holding, Near = MiniGridEnv.get_goal_predicates() + assert len(MiniGridEnv.get_objects_of_enum(state, "agent")) == 1 + assert len(MiniGridEnv.get_objects_of_enum(state, obj_type)) > 1 + agent_obj = list(MiniGridEnv.get_objects_of_enum(state, "agent"))[0] + for obj in MiniGridEnv.get_objects_of_enum(state, obj_type): + if obj.name == obj_name: + goal_obj = obj + obj_type_to_predicate = { + "ball": IsBall, + "key": IsKey, + "box": IsBox + } + color_to_predicate = { + "red": IsRed, + "green": IsGreen, + "blue": IsBlue, + "purple": IsPurple, + "yellow": IsYellow, + "grey": IsGrey + } + goal = {GroundAtom(IsAgent, [agent_obj]), + GroundAtom(At, [agent_obj, goal_obj]), + GroundAtom(obj_type_to_predicate[obj_type], [goal_obj]), + GroundAtom(color_to_predicate[color], [goal_obj]), + } + elif env_task.goal_description == "get to the green goal square": + IsAgent, At, IsGoal, IsBall, IsKey, IsBox, \ + IsRed, IsGreen, IsBlue, IsPurple, IsYellow, IsGrey, \ + Holding, Near = MiniGridEnv.get_goal_predicates() + assert len(MiniGridEnv.get_objects_of_enum(state, "agent")) == 1 + assert len(MiniGridEnv.get_objects_of_enum(state, "goal")) == 1 + agent_obj = list(MiniGridEnv.get_objects_of_enum(state, "agent"))[0] + goal_obj = list(MiniGridEnv.get_objects_of_enum(state, "goal"))[0] + goal = {GroundAtom(IsAgent, [agent_obj]), + GroundAtom(At, [agent_obj, goal_obj]), + GroundAtom(IsGoal, [goal_obj])} + elif env_task.goal_description.startswith("get a") or \ + env_task.goal_description.startswith("go get a") or \ + env_task.goal_description.startswith("fetch a") or \ + env_task.goal_description.startswith("go fetch a") or \ + env_task.goal_description.startswith("you must fetch a") or \ + env_task.goal_description.startswith("pick up the"): + color, obj_type = env_task.goal_description.split(" ")[-2:] + obj_name = f"{color}_{obj_type}" + IsAgent, At, IsGoal, IsBall, IsKey, IsBox, \ + IsRed, IsGreen, IsBlue, IsPurple, IsYellow, IsGrey, \ + Holding, Near = MiniGridEnv.get_goal_predicates() + assert len(MiniGridEnv.get_objects_of_enum(state, "agent")) == 1 + assert len(MiniGridEnv.get_objects_of_enum(state, obj_type)) > 1 + agent_obj = list(MiniGridEnv.get_objects_of_enum(state, "agent"))[0] + for obj in MiniGridEnv.get_objects_of_enum(state, obj_type): + if obj.name == obj_name: + goal_obj = obj + obj_type_to_predicate = { + "ball": IsBall, + "key": IsKey, + "box": IsBox + } + color_to_predicate = { + "red": IsRed, + "green": IsGreen, + "blue": IsBlue, + "purple": IsPurple, + "yellow": IsYellow, + "grey": IsGrey + } + goal = {GroundAtom(Holding, [goal_obj]), + GroundAtom(obj_type_to_predicate[obj_type], [goal_obj]), + GroundAtom(color_to_predicate[color], [goal_obj])} + else: + raise NotImplementedError(f"Goal description {env_task.goal_description} not supported") + return Task(state, goal) + + def reset(self, env_task: EnvironmentTask) -> Task: + self.state_dict.clear() + return self.parse_minigrid_task(env_task) + + def step(self, observation: Observation) -> State: + return self._observation_to_state(observation) + + def _observation_to_objects(self, obs: Observation) -> Dict[str, Tuple[int, int]]: + objs = [] + visual = obs[0]['image'] + direction = obs[0]['direction'] + objs.append(('agent', + None, + direction, + 0, + 0)) + objs.append(('empty', + 'black', + 0, + 0, + 0)) + for r in range(visual.shape[0]): + for c in range(visual.shape[1]): + obj = [IDX_TO_OBJECT[visual[r, c][0]], IDX_TO_COLOR[visual[r, c][1]], visual[r, c][2], r - self.agent_pov_pos[0], c - self.agent_pov_pos[1]] + if obj[0] == 'empty': + obj[1] = 'black' + objs.append(tuple(obj)) + return objs + + def transform_point(self, x1, y1, o1, x2, y2): + # Compute global coordinates directly + x_prime = x1 + x2 * np.cos(o1) - y2 * np.sin(o1) + y_prime = y1 + x2 * np.sin(o1) + y2 * np.cos(o1) + return x_prime, y_prime + + def _globalize_coords(self, r: int, c: int) -> Tuple[int, int]: + # Adjusted direction-to-radian mapping + direction_to_radian = { + 0: 0, # right + 1: -np.pi / 2, # down + 2: np.pi, # left + 3: np.pi / 2 # up + } + o1 = direction_to_radian[self.direction] + x1, y1 = self.agent_pos[0], self.agent_pos[1] + x2, y2 = r, -c # Use c directly + x_prime, y_prime = self.transform_point(x1, y1, o1, x2, y2) + return int(round(x_prime)), int(round(y_prime)) + + def _observation_to_state(self, obs: Observation) -> State: + import numpy as np + + self.direction = obs[0]['direction'] + if len(obs) == 5: + if obs[4]['last_action'] == 2: # Moved Forward + if (not np.array_equal(self.last_obs[0]['image'], obs[0]['image'])) or \ + not np.array_equal(obs[0]['image'][self.agent_pov_pos[0], self.agent_pov_pos[1]-1], np.array([2, 5, 0], dtype=np.uint8)): + if self.direction == 0: # right (0, 1) + self.agent_pos = (self.agent_pos[0], self.agent_pos[1] + 1) + elif self.direction == 1: # down (1, 0) + self.agent_pos = (self.agent_pos[0] + 1, self.agent_pos[1]) + elif self.direction == 2: # left (0, -1) + self.agent_pos = (self.agent_pos[0], self.agent_pos[1] - 1) + elif self.direction == 3: # up (-1, 0) + self.agent_pos = (self.agent_pos[0] - 1, self.agent_pos[1]) + self.last_obs = obs + + objs = self._observation_to_objects(obs) + + def _get_object_name(r: int, c: int, type_name: str, color: str) -> str: + # Put the location of the static objects in their names for easier + # debugging. + if type_name == "agent": + return "agent" + if type_name in ["empty", "wall"]: + return f"{type_name}_{r}_{c}" + else: + return f"{color}_{type_name}" + + for type_name, color, obj_state, r, c in objs: + enum = MiniGridEnv.name_to_enum[type_name] + if CFG.minigrid_gym_fully_observable: + global_r, global_c = r, c + else: + global_r, global_c = self._globalize_coords(r, c) + if type_name in ["goal", "agent"]: + object_name = type_name + if type_name == "agent" and not CFG.minigrid_gym_fully_observable: + assert (global_r, global_c) == self.agent_pos + else: + object_name = _get_object_name(global_r, global_c, type_name, color) + obj = Object(object_name, MiniGridEnv.object_type) + self.state_dict[obj] = { + "row": global_r, + "column": global_c, + "type": enum, + "state": obj_state, + "color": color, + } + + if all([val["type"] != MiniGridEnv.name_to_enum['goal'] for key, val in self.state_dict.items()]): + enum = MiniGridEnv.name_to_enum["goal"] + object_name = "goal" + obj = Object(object_name, MiniGridEnv.object_type) + self.state_dict[obj] = { + "row": sys.maxsize, + "column": sys.maxsize, + "type": enum, + "state": -1, + "color": 'green', + } + + for color in ['blue', 'green', 'grey', 'purple', 'red', 'yellow']: + for obj_type in ['key', 'ball', 'box']: + if all([not (val["type"] == MiniGridEnv.name_to_enum[obj_type] and val["color"] == color) for key, val in self.state_dict.items()]): + enum = MiniGridEnv.name_to_enum[obj_type] + object_name = f"{color}_{obj_type}" + obj = Object(object_name, MiniGridEnv.object_type) + self.state_dict[obj] = { + "row": sys.maxsize, + "column": sys.maxsize, + "type": enum, + "state": -1, + "color": color, + } + + state = utils.create_state_from_dict(self.state_dict) + return state + + def render_mental_images(self, observation: Observation, + env_task: EnvironmentTask) -> Video: + raise NotImplementedError("Mental images not implemented for minigrid") diff --git a/predicators/planning.py b/predicators/planning.py index 123323ff0a..74f4603ef3 100644 --- a/predicators/planning.py +++ b/predicators/planning.py @@ -284,8 +284,10 @@ def task_plan_grounding( for nsrt in sorted(nsrts): for ground_nsrt in utils.all_ground_nsrts(nsrt, objects): if allow_noops or (ground_nsrt.add_effects - | ground_nsrt.delete_effects): + | ground_nsrt.delete_effects): ground_nsrts.append(ground_nsrt) + if CFG.single_grounding: + break reachable_atoms = utils.get_reachable_atoms(ground_nsrts, init_atoms) reachable_nsrts = [ nsrt for nsrt in ground_nsrts @@ -1208,15 +1210,15 @@ def run_task_plan_once( timeout -= duration plan, atoms_seq, metrics = next( task_plan(init_atoms, - goal, - ground_nsrts, - reachable_atoms, - heuristic, - seed, - timeout, - max_skeletons_optimized=1, - use_visited_state_set=True, - **kwargs)) + goal, + ground_nsrts, + reachable_atoms, + heuristic, + seed, + timeout, + max_skeletons_optimized=1, + use_visited_state_set=True, + **kwargs)) if len(plan) > max_horizon: raise PlanningFailure( "Skeleton produced by A-star exceeds horizon!") @@ -1243,7 +1245,7 @@ def run_task_plan_once( alias_flag = "--alias lama-first" else: raise ValueError("Unrecognized sesame_task_planner: " - f"{CFG.sesame_task_planner}") + f"{CFG.sesame_task_planner}") sas_file = generate_sas_file_for_fd(task, nsrts, preds, types, timeout, timeout_cmd, alias_flag, exec_str, @@ -1262,11 +1264,10 @@ def run_task_plan_once( list(objects), init_atoms, nsrts, float(max_horizon)) else: raise ValueError("Unrecognized sesame_task_planner: " - f"{CFG.sesame_task_planner}") + f"{CFG.sesame_task_planner}") necessary_atoms_seq = utils.compute_necessary_atoms_seq( plan, atoms_seq, goal) - return plan, necessary_atoms_seq, metrics diff --git a/predicators/settings.py b/predicators/settings.py index 4dc482e377..cd4642a59f 100644 --- a/predicators/settings.py +++ b/predicators/settings.py @@ -331,6 +331,22 @@ class GlobalSettings: # initialization and resetting. use Sokoban-small-v0 for tests sokoban_gym_name = "Sokoban-v0" + # minigrid env parameters + # Currently tested envs: + # "MiniGrid-Empty-5x5-v0" + # "MiniGrid-Empty-8x8-v0" + # "MiniGrid-Empty-16x16-v0" + # "MiniGrid-GoToObject-8x8-N2-v0" + # "MiniGrid-Fetch-8x8-N3-v0" + minigrid_gym_name = "MiniGrid-Fetch-8x8-N3-v0" + minigrid_gym_render = False + minigrid_gym_fully_observable = False + + # mini_behavior env parameters + mini_behavior_env_name = "MiniGrid-SortingBooks-16x16-N2-v0" + mini_behavior_env_render = False + mini_behavior_env_fully_observable = True + # kitchen env parameters kitchen_use_perfect_samplers = False kitchen_goals = "all" @@ -496,6 +512,9 @@ class GlobalSettings: enable_harmless_op_pruning = False # some methods may want this to be True precondition_soft_intersection_threshold_percent = 0.8 # between 0 and 1 backchaining_check_intermediate_harmlessness = False + backward_forward_load_initial = False + single_grounding = False + pnad_search_load_initial = False pnad_search_without_del = False pnad_search_timeout = 10.0 compute_sidelining_objective_value = False @@ -714,7 +733,9 @@ def get_arg_specific_settings(cls, args: Dict[str, Any]) -> Dict[str, Any]: # The method used for perception: now only "trivial" or "sokoban". perceiver=defaultdict(lambda: "trivial", { "sokoban": "sokoban", - "kitchen": "kitchen", + "minigrid_env": "minigrid_env", + "mini_behavior_env": "mini_behavior_env", + "kitchen": "kitchen" })[args.get("env", "")], # Horizon for each environment. When checking if a policy solves a # task, we run the policy for at most this many steps. @@ -728,6 +749,7 @@ def get_arg_specific_settings(cls, args: Dict[str, Any]) -> Dict[str, Any]: "doors": 1000, "coffee": 1000, "kitchen": 1000, + "minigrid_env": 1000, # For the very simple touch point environment, restrict # the horizon to be shorter. "touch_point": 15, diff --git a/predicators/utils.py b/predicators/utils.py index 3500562b56..e2f3fddaac 100644 --- a/predicators/utils.py +++ b/predicators/utils.py @@ -1473,7 +1473,8 @@ def _get_entity_combinations( this_choices.append(ent) choices.append(this_choices) for choice in itertools.product(*choices): - yield list(choice) + if len(set(choice)) == len(choice): + yield list(choice) def get_object_combinations(objects: Collection[Object], diff --git a/test_backward_forward copy.py b/test_backward_forward copy.py new file mode 100644 index 0000000000..b4595c8dcc --- /dev/null +++ b/test_backward_forward copy.py @@ -0,0 +1,509 @@ +import random +from typing import List, Set, Tuple +import numpy as np +from collections import deque, defaultdict + +np.random.seed(0) +random.seed(0) + +# Parameters +NUM_PREDICATES = 10 +NUM_OPERATORS = 10 +TRAJ_MAX = 10 +TRAJ_LEN = 5 +NUM_TRAJS = 50 # Fewer for readability +ACTION_SPACE = list(range(NUM_OPERATORS)) + +# --- Operator Representation --- +class Operator: + def __init__(self, pre: Set[int], add: Set[int], delete: Set[int], action: int): + self.pre = pre + self.add = add + self.delete = delete + self.action = action + + def is_applicable(self, state: Set[int]) -> bool: + return self.pre.issubset(state) + + def apply(self, state: Set[int]) -> Set[int]: + if not self.is_applicable(state): + return state # no-op if not applicable + new_state = state.copy() + new_state.difference_update(self.delete) + new_state.update(self.add) + return new_state + + def __repr__(self): + return f"Op(action={self.action}, pre={self.pre}, add={self.add}, del={self.delete})" + + +def plan(start: Set[int], goal: Set[int], operators: List[Operator], max_depth=10): + visited = set() + queue = deque() + queue.append((start.copy(), [])) + + while queue: + state, path = queue.popleft() + state_key = frozenset(state) + if state_key in visited: + continue + visited.add(state_key) + + if goal.issubset(state): + return path + + if len(path) >= max_depth: + continue + + for op in operators: + if op.is_applicable(state): + next_state = op.apply(state) + if next_state != state: + queue.append((next_state, path + [(state.copy(), op.action, next_state.copy())])) + + return None + + +# --- Generate Random Operators --- +def generate_random_operator(pred_pool: List[int], action_id: int) -> Operator: + pre = set(random.sample(pred_pool, random.randint(1, 3))) + effects = list(set(pred_pool) - pre) + add = set(random.sample(effects, random.randint(1, min(2, len(effects))))) + delete = set(random.sample(list(pre), random.randint(0, len(pre)))) + return Operator(pre, add, delete, action_id) + + +def compute_reachable_states(init_state: Set[int], operators: List[Operator], max_iters: int = 100) -> List[Set[int]]: + reached_states = set() + reachable = [] + frontier = [init_state.copy()] + + for _ in range(max_iters): + new_frontier = [] + + for state in frontier: + state_key = frozenset(state) + if state_key in reached_states: + continue + + reached_states.add(state_key) + reachable.append(frozenset(state)) + + for op in operators: + if op.is_applicable(state): + next_state = op.apply(state) + next_key = frozenset(next_state) + if next_key not in reached_states: + new_frontier.append(next_state) + + if not new_frontier: + break + frontier = new_frontier + + return set(reachable) + + +# --- Generate Demo Data --- +def generate_planned_demo_trajectories(operators: List[Operator], num_trajs: int, max_depth: int) -> List[Tuple[List[Tuple[Set[int], int, Set[int]]], Set[int]]]: + demos = [] + attempts = 0 + + while len(demos) < num_trajs and attempts < 10000000: + attempts += 1 + init_state = set(random.sample(range(NUM_PREDICATES), random.randint(2, NUM_PREDICATES))) + reachable = compute_reachable_states(init_state, operators) - init_state + + if len(reachable) == 0: + continue + + plan_traj = [] + goals = reachable + while len(goals) > 0 and len(plan_traj) < TRAJ_LEN: + goal_state = random.choice(list(goals)) + goal = goal_state - init_state + + if not goal: + goals.remove(goal_state) + continue + + plan_traj = plan(init_state, goal, operators, max_depth) + if plan_traj is None or len(plan_traj) < TRAJ_LEN: + goals.remove(goal_state) + plan_traj = [] + + if plan_traj and len(plan_traj) >= TRAJ_LEN: + demos.append((plan_traj, goal)) + + return demos + + +# --- Backwards-Forwards Operator Learning --- +def backward_infer_minimal_effects(demo_data, current_operators=None): + candidate_ops = defaultdict(lambda: {'demos': []}) + op_index = {} + if current_operators: + op_index = {(op.action, frozenset(op.add)): op for op in current_operators} + + for traj, goal in sorted(demo_data, key=lambda x: len(x[0])): #sorted(demo_data, key=lambda x: len(x[0])*(1+len(x[1]))): # order by smallest demo + current_goal = goal.copy() + + for (s, action, s_prime) in reversed(traj): + effect = s_prime - s + if len(effect) == 0: + raise Exception("No effect") + elif len(effect) == 1: + necessary_effect = effect + else: + necessary_effect = effect & current_goal + + key = (action, frozenset(necessary_effect)) + candidate_ops[key]['demos'].append((s, action, s_prime)) + + preconditions = set() + if key in op_index: + preconditions = op_index[key].pre + current_goal = (current_goal - necessary_effect) | preconditions + + return candidate_ops + + +def refine_by_plan_divergence(demos, learned_operators): + op_index = {(op.action, frozenset(op.add)): op for op in learned_operators} + support_sets = {key: [] for key in op_index} + + for traj, goal in demos: + current_goal = goal.copy() + for s, a, s_prime in traj: + effect = s_prime - s + necessary_effect = effect if len(effect) == 1 else effect & current_goal + key = (a, frozenset(necessary_effect)) + if key in support_sets: + support_sets[key].append(s) + + preconditions = set() + if key in op_index: + preconditions = op_index[key].pre + current_goal = (current_goal - necessary_effect) | preconditions + + for traj, goal in demos: + state = traj[0][0] + for (s_true, a_true, s_next_true) in traj: + applicable = [op for op in learned_operators if op.is_applicable(state)] + if not applicable: + break + op_planner = random.choice(applicable) + + key_true = (a_true, frozenset(s_next_true - s_true)) + op_true = op_index.get(key_true, None) + if op_true is None: + continue + + if op_planner is op_true: + state = op_true.apply(state) + continue + + key_planner = (op_planner.action, frozenset(op_planner.add)) + support = support_sets.get(key_planner, []) + if not support: + continue + + common_preds = set.intersection(*support) + potential_preds_to_add = common_preds - state + if not potential_preds_to_add: + continue + # if len(potential_preds_to_add - goal) > 0: + # preds_to_add = {random.choice(list(potential_preds_to_add - goal))} + # else: + # preds_to_add = {random.choice(list(potential_preds_to_add))} + preds_to_add = {random.choice(list(potential_preds_to_add))} + op_planner.pre.update(preds_to_add) + + state = op_true.apply(state) + + return list(op_index.values()) + + +def learn_operators_from_demos(demo_data, max_iters=100, verbose=True): + learned_ops = [] + last_preconds = None + + for iteration in range(max_iters): + if verbose: + print(f"\n--- Iteration {iteration + 1} (Backward + Forward) ---") + + candidate_ops = backward_infer_minimal_effects(demo_data, current_operators=learned_ops or None) + + if learned_ops == []: + learned_ops = [ + Operator(pre=set(), add=set(effect_frozen), delete=set(), action=action) + for (action, effect_frozen), entry in candidate_ops.items() + ] + else: + op_index = {(op.action, frozenset(op.add)): op for op in learned_ops} + learned_ops = [] + for (action, effect_frozen), entry in candidate_ops.items(): + if (action, effect_frozen) not in op_index: + learned_ops.append(Operator(pre=set(), add=set(effect_frozen), delete=set(), action=action)) + else: + learned_ops.append(Operator( + pre=op_index[(action, effect_frozen)].pre, + add=set(effect_frozen), + delete=set(), + action=action + )) + + if verbose: + print("Backward Learned Operators:") + for op in sorted(learned_ops, key=lambda x: x.action): + print(op) + + learned_ops = refine_by_plan_divergence(demo_data, learned_ops) + + if verbose: + print("Forward Learned Operators:") + for op in sorted(learned_ops, key=lambda x: x.action): + print(op) + + return learned_ops + + +# --- Run Learning --- +def run_operator_learning_trials(num_trials=10, verbose=True) -> int: + invalid_count = 0 + valid_count = 0 + + for _ in range(num_trials): + pred_pool = [i for i in range(NUM_PREDICATES)] + operators = [generate_random_operator(pred_pool, i) for i in range(NUM_OPERATORS)] + demo_data = generate_planned_demo_trajectories(operators, NUM_TRAJS, max_depth=TRAJ_MAX) + + if verbose: + print("\n--- Ground Truth Operators ---") + for op in sorted(operators, key=lambda x: x.action): + print(op) + print() + + print("Demos:") + for traj in demo_data: + print("Goal:", traj[1], "Length:", len(traj[0])) + + refined_ops = learn_operators_from_demos(demo_data, max_iters=1000, verbose=verbose) + + if verbose: + print("\n--- Final Learned Operators ---") + for op in sorted(refined_ops, key=lambda x: x.action): + print(op) + print() + + # Add delete effects + op_index = {(op.action, frozenset(op.add)): op for op in refined_ops} + for (action, effect_frozen), entry in backward_infer_minimal_effects(demo_data, current_operators=refined_ops).items(): + delete = op_index[(action, effect_frozen)].pre & set.intersection( + *[set(entry['demos'][i][0] - entry['demos'][i][2]) for i in range(len(entry['demos']))] + ) + op_index[(action, effect_frozen)].delete = delete + + for op in op_index.values(): + for actual_op in operators: + if op.action == actual_op.action: + if op.pre <= actual_op.pre and op.add <= actual_op.add and op.delete <= actual_op.delete: + valid_count += 1 + if verbose: + print("VALID\n\tLEARNED |", op, "\n\tORIGINAL|", actual_op) + else: + invalid_count += 1 + if verbose: + print("INVALID\n\tLEARNED |", op, "\n\tORIGINAL|", actual_op) + + return invalid_count, valid_count + +def deduplicate_predicates_by_equivalence(demos, operators, num_preds): + from collections import defaultdict + + # Step 1: Build truth vectors for each predicate + pred_vectors = defaultdict(list) + + for traj, goal in demos: + for s, _, s_prime in traj: + for i in range(num_preds): + pred_vectors[i].append(int(i in s)) + pred_vectors[i].append(int(i in s_prime)) + for i in range(num_preds): + pred_vectors[i].append(int(i in goal)) + + # Step 2: Group predicates with identical truth vectors + vector_to_preds = defaultdict(list) + for pred, vec in pred_vectors.items(): + vector_to_preds[tuple(vec)].append(pred) + + # Step 3: Build a mapping from redundant predicate -> representative + replace_map = {} + for group in vector_to_preds.values(): + representative = min(group) # pick smallest index as canonical + for pred in group: + replace_map[pred] = representative + + # Step 4: Replace predicates in demos + new_demos = [] + for traj, goal in demos: + new_traj = [] + for s, a, s_prime in traj: + s_new = {replace_map[p] for p in s} + s_prime_new = {replace_map[p] for p in s_prime} + new_traj.append((s_new, a, s_prime_new)) + new_goal = {replace_map[p] for p in goal} + new_demos.append((new_traj, new_goal)) + + # Step 5: Replace predicates in operators + new_operators = [] + for op in operators: + pre = {replace_map[p] for p in op.pre} + add = {replace_map[p] for p in op.add} + delete = {replace_map[p] for p in op.delete} + new_operators.append(Operator(pre, add, delete, op.action)) + + return new_demos, new_operators, replace_map + + +# invalids, valids = run_operator_learning_trials(num_trials=50, verbose=False) +# print(f"Number of invalid learned operators: {invalids} / {invalids+valids}") + +pred_pool = [i for i in range(NUM_PREDICATES)] +operators = [generate_random_operator(pred_pool, i) for i in range(NUM_OPERATORS)] +demo_data = generate_planned_demo_trajectories(operators, NUM_TRAJS, max_depth=TRAJ_MAX) + +demo_data, operators, pred_replace_map = deduplicate_predicates_by_equivalence(demo_data, operators, NUM_PREDICATES) +print("Predicate replacement map:", pred_replace_map) + +print("\n--- Ground Truth Operators ---") +for op in sorted(operators, key=lambda x: x.action): + print(op) +print() + +print("Demos:") +# for traj in demo_data: +# print("Goal:", traj[1], "Length:", len(traj[0])) +print(len(demo_data)) + +x = [] +y = [] +op_index = None + +for run_i in range(1, len(demo_data), 5): + refined_ops = learn_operators_from_demos(demo_data[:run_i], max_iters=1000, verbose=False) + op_index = {(op.action, frozenset(op.add)): op for op in refined_ops} + for (action, effect_frozen), entry in backward_infer_minimal_effects(demo_data[:run_i], current_operators=refined_ops).items(): + delete = op_index[(action, effect_frozen)].pre & set.intersection( + *[set(entry['demos'][i][0] - entry['demos'][i][2]) for i in range(len(entry['demos']))] + ) + op_index[(action, effect_frozen)].delete = delete + + print("\n--- Final Learned Operators ---") + for op in sorted(op_index.values(), key=lambda x: x.action): + print(op) + print() + + num_match = 0 + actions = set() + for actual_op in operators: + is_match = False + for op in op_index.values(): + actions.add(op.action) + if op.action == actual_op.action: + if op.pre == actual_op.pre and op.add == actual_op.add and op.delete == actual_op.delete: + is_match = True + if is_match: + num_match += 1 + num_actions = len(actions) + x.append(run_i) + y.append(num_match) + actions = set() + for traj in demo_data[:run_i]: + for (s, a, s_prime) in traj[0]: + actions.add(a) + print(len(actions)) + print(actions) + print(run_i, num_match, num_actions) + + +print("\n--- Ground Truth Operators ---") +for op in sorted(operators, key=lambda x: x.action): + print(op) +print() + +def augment_demos_with_missing_ground_truth_ops(demos, learned_ops, true_ops, num_preds, num_augments=1): + from collections import defaultdict + + # Index learned ops by (action, add, delete, pre) + learned_op_keys = set( + (op.action, frozenset(op.add), frozenset(op.delete), frozenset(op.pre)) + for op in learned_ops + ) + + augmented = [] + + for true_op in true_ops: + key = (true_op.action, frozenset(true_op.add), frozenset(true_op.delete), frozenset(true_op.pre)) + if key in learned_op_keys: + continue # already learned correctly + + # Add demos for this missing operator + for _ in range(num_augments): + possible_goals = None + while not possible_goals: + base_state = set(random.sample(range(num_preds), random.randint(2, num_preds))) + false_pre = set() + for op in learned_ops: + if op.action == true_op.action and op.add == true_op.add: + false_pre |= op.pre - true_op.pre + base_state -= false_pre # remove wrong precondition + base_state |= true_op.pre # ensure it's applicable + next_state = true_op.apply(base_state) + + # Choose a goal that is newly added by the operator + possible_goals = true_op.add - base_state + + goal = possible_goals + demo = [(base_state.copy(), true_op.action, next_state.copy())] + augmented.append((demo, goal)) + + return demos + augmented + +for _ in range(10): + demo_data = augment_demos_with_missing_ground_truth_ops( + demo_data, list(op_index.values()), operators, NUM_PREDICATES, num_augments=1 + ) + + refined_ops = learn_operators_from_demos(demo_data, max_iters=1000, verbose=False) + op_index = {(op.action, frozenset(op.add)): op for op in refined_ops} + for (action, effect_frozen), entry in backward_infer_minimal_effects(demo_data[:run_i], current_operators=refined_ops).items(): + delete = op_index[(action, effect_frozen)].pre & set.intersection( + *[set(entry['demos'][i][0] - entry['demos'][i][2]) for i in range(len(entry['demos']))] + ) + op_index[(action, effect_frozen)].delete = delete + +print("\n--- Final Learned Operators ---") +for op in sorted(op_index.values(), key=lambda x: x.action): + print(op) +print() + +num_match = 0 +actions = set() +for actual_op in operators: + is_match = False + for op in op_index.values(): + actions.add(op.action) + if op.action == actual_op.action: + if op.pre == actual_op.pre and op.add == actual_op.add and op.delete == actual_op.delete: + is_match = True + if is_match: + num_match += 1 + else: + print(actual_op) +num_actions = len(actions) +print("final", num_match, num_actions) + + +# TODO Fix Delete Effects + +import ipdb; ipdb.set_trace() \ No newline at end of file diff --git a/test_backward_forward.py b/test_backward_forward.py new file mode 100644 index 0000000000..c707b666b4 --- /dev/null +++ b/test_backward_forward.py @@ -0,0 +1,561 @@ +import random +from typing import List, Set, Tuple +import numpy as np +from collections import deque, defaultdict + +np.random.seed(0) +random.seed(0) + +# Parameters +NUM_PREDICATES = 10 +NUM_OPERATORS = 10 +TRAJ_MAX = 10 +TRAJ_LEN = 5 +NUM_TRAJS = 50 # Fewer for readability +ACTION_SPACE = list(range(NUM_OPERATORS)) + +# --- Operator Representation --- +class Operator: + def __init__(self, pre: Set[int], add: Set[int], delete: Set[int], action: int): + self.pre = pre + self.add = add + self.delete = delete + self.action = action + + def is_applicable(self, state: Set[int]) -> bool: + return self.pre.issubset(state) + + def apply(self, state: Set[int]) -> Set[int]: + if not self.is_applicable(state): + return state # no-op if not applicable + new_state = state.copy() + new_state.difference_update(self.delete) + new_state.update(self.add) + return new_state + + def __repr__(self): + return f"Op(action={self.action}, pre={self.pre}, add={self.add}, del={self.delete})" + + +def plan(start: Set[int], goal: Set[int], operators: List[Operator], max_depth=10): + visited = set() + queue = deque() + queue.append((start.copy(), [])) + + while queue: + state, path = queue.popleft() + state_key = frozenset(state) + if state_key in visited: + continue + visited.add(state_key) + + if goal.issubset(state): + return path + + if len(path) >= max_depth: + continue + + for op in operators: + if op.is_applicable(state): + next_state = op.apply(state) + if next_state != state: + queue.append((next_state, path + [(state.copy(), op.action, next_state.copy())])) + + return None + + +# --- Generate Random Operators --- +def generate_random_operator(pred_pool: List[int], action_id: int) -> Operator: + pre = set(random.sample(pred_pool, random.randint(1, 3))) + effects = list(set(pred_pool) - pre) + add = set(random.sample(effects, random.randint(1, min(2, len(effects))))) + delete = set(random.sample(list(pre), random.randint(0, len(pre)))) + return Operator(pre, add, delete, action_id) + + +def compute_reachable_states(init_state: Set[int], operators: List[Operator], max_iters: int = 100) -> List[Set[int]]: + reached_states = set() + reachable = [] + frontier = [init_state.copy()] + + for _ in range(max_iters): + new_frontier = [] + + for state in frontier: + state_key = frozenset(state) + if state_key in reached_states: + continue + + reached_states.add(state_key) + reachable.append(frozenset(state)) + + for op in operators: + if op.is_applicable(state): + next_state = op.apply(state) + next_key = frozenset(next_state) + if next_key not in reached_states: + new_frontier.append(next_state) + + if not new_frontier: + break + frontier = new_frontier + + return set(reachable) + + +# --- Generate Demo Data --- +def generate_planned_demo_trajectories(operators: List[Operator], num_trajs: int, max_depth: int) -> List[Tuple[List[Tuple[Set[int], int, Set[int]]], Set[int]]]: + demos = [] + attempts = 0 + + while len(demos) < num_trajs and attempts < 10000000: + attempts += 1 + init_state = set(random.sample(range(NUM_PREDICATES), random.randint(2, NUM_PREDICATES))) + reachable = compute_reachable_states(init_state, operators) - init_state + + if len(reachable) == 0: + continue + + plan_traj = [] + goals = reachable + while len(goals) > 0 and len(plan_traj) < TRAJ_LEN: + goal_state = random.choice(list(goals)) + goal = goal_state - init_state + + if not goal: + goals.remove(goal_state) + continue + + plan_traj = plan(init_state, goal, operators, max_depth) + if plan_traj is None or len(plan_traj) < TRAJ_LEN: + goals.remove(goal_state) + plan_traj = [] + + if plan_traj and len(plan_traj) >= TRAJ_LEN: + demos.append((plan_traj, goal)) + + return demos + + +# --- Backwards-Forwards Operator Learning --- +def backward_infer_minimal_effects(demo_data, current_operators=None): + candidate_ops = defaultdict(lambda: {'demos': []}) + op_index = {} + if current_operators: + op_index = {(op.action, frozenset(op.add)): op for op in current_operators} + + for traj, goal in sorted(demo_data, key=lambda x: len(x[0])): #sorted(demo_data, key=lambda x: len(x[0])*(1+len(x[1]))): # order by smallest demo + current_goal = goal.copy() + + for (s, action, s_prime) in reversed(traj): + effect = s_prime - s + if len(effect) == 0: + raise Exception("No effect") + elif len(effect) == 1: + necessary_effect = effect + else: + necessary_effect = effect & current_goal + + key = (action, frozenset(necessary_effect)) + candidate_ops[key]['demos'].append((s, action, s_prime)) + + preconditions = set() + if key in op_index: + preconditions = op_index[key].pre + current_goal = (current_goal - necessary_effect) | preconditions + + return candidate_ops + + +def refine_by_plan_divergence(demos, learned_operators): + op_index = {(op.action, frozenset(op.add)): op for op in learned_operators} + support_sets = {key: [] for key in op_index} + + for traj, goal in demos: + current_goal = goal.copy() + for s, a, s_prime in traj: + effect = s_prime - s + necessary_effect = effect if len(effect) == 1 else effect & current_goal + key = (a, frozenset(necessary_effect)) + if key in support_sets: + support_sets[key].append(s) + + preconditions = set() + if key in op_index: + preconditions = op_index[key].pre + current_goal = (current_goal - necessary_effect) | preconditions + + for traj, goal in demos: + state = traj[0][0] + for (s_true, a_true, s_next_true) in traj: + applicable = [op for op in learned_operators if op.is_applicable(state)] + if not applicable: + break + op_planner = random.choice(applicable) + + key_true = (a_true, frozenset(s_next_true - s_true)) + op_true = op_index.get(key_true, None) + if op_true is None: + continue + + if op_planner is op_true: + state = op_true.apply(state) + continue + + key_planner = (op_planner.action, frozenset(op_planner.add)) + support = support_sets.get(key_planner, []) + if not support: + continue + + common_preds = set.intersection(*support) + potential_preds_to_add = common_preds - state + if not potential_preds_to_add: + continue + # if len(potential_preds_to_add - goal) > 0: + # preds_to_add = {random.choice(list(potential_preds_to_add - goal))} + # else: + # preds_to_add = {random.choice(list(potential_preds_to_add))} + preds_to_add = {random.choice(list(potential_preds_to_add))} + op_planner.pre.update(preds_to_add) + + state = op_true.apply(state) + + return list(op_index.values()) + + +def learn_operators_from_demos(demo_data, max_iters=100, verbose=True): + learned_ops = [] + last_preconds = None + + for iteration in range(max_iters): + if verbose: + print(f"\n--- Iteration {iteration + 1} (Backward + Forward) ---") + + candidate_ops = backward_infer_minimal_effects(demo_data, current_operators=learned_ops or None) + + if learned_ops == []: + learned_ops = [ + Operator(pre=set(), add=set(effect_frozen), delete=set(), action=action) + for (action, effect_frozen), entry in candidate_ops.items() + ] + else: + op_index = {(op.action, frozenset(op.add)): op for op in learned_ops} + learned_ops = [] + for (action, effect_frozen), entry in candidate_ops.items(): + if (action, effect_frozen) not in op_index: + learned_ops.append(Operator(pre=set(), add=set(effect_frozen), delete=set(), action=action)) + else: + learned_ops.append(Operator( + pre=op_index[(action, effect_frozen)].pre, + add=set(effect_frozen), + delete=set(), + action=action + )) + + if verbose: + print("Backward Learned Operators:") + for op in sorted(learned_ops, key=lambda x: x.action): + print(op) + + learned_ops = refine_by_plan_divergence(demo_data, learned_ops) + + if verbose: + print("Forward Learned Operators:") + for op in sorted(learned_ops, key=lambda x: x.action): + print(op) + + return learned_ops + + +# --- Run Learning --- +def run_operator_learning_trials(num_trials=10, verbose=True) -> int: + invalid_count = 0 + valid_count = 0 + + for _ in range(num_trials): + pred_pool = [i for i in range(NUM_PREDICATES)] + operators = [generate_random_operator(pred_pool, i) for i in range(NUM_OPERATORS)] + demo_data = generate_planned_demo_trajectories(operators, NUM_TRAJS, max_depth=TRAJ_MAX) + + if verbose: + print("\n--- Ground Truth Operators ---") + for op in sorted(operators, key=lambda x: x.action): + print(op) + print() + + print("Demos:") + for traj in demo_data: + print("Goal:", traj[1], "Length:", len(traj[0])) + + refined_ops = learn_operators_from_demos(demo_data, max_iters=1000, verbose=verbose) + + if verbose: + print("\n--- Final Learned Operators ---") + for op in sorted(refined_ops, key=lambda x: x.action): + print(op) + print() + + # Add delete effects + op_index = {(op.action, frozenset(op.add)): op for op in refined_ops} + for (action, effect_frozen), entry in backward_infer_minimal_effects(demo_data, current_operators=refined_ops).items(): + delete = op_index[(action, effect_frozen)].pre & set.intersection( + *[set(entry['demos'][i][0] - entry['demos'][i][2]) for i in range(len(entry['demos']))] + ) + op_index[(action, effect_frozen)].delete = delete + + for op in op_index.values(): + for actual_op in operators: + if op.action == actual_op.action: + if op.pre <= actual_op.pre and op.add <= actual_op.add and op.delete <= actual_op.delete: + valid_count += 1 + if verbose: + print("VALID\n\tLEARNED |", op, "\n\tORIGINAL|", actual_op) + else: + invalid_count += 1 + if verbose: + print("INVALID\n\tLEARNED |", op, "\n\tORIGINAL|", actual_op) + + return invalid_count, valid_count + +def deduplicate_predicates_by_equivalence(demos, operators, num_preds): + from collections import defaultdict + + # Step 1: Build truth vectors for each predicate + pred_vectors = defaultdict(list) + + for traj, goal in demos: + for s, _, s_prime in traj: + for i in range(num_preds): + pred_vectors[i].append(int(i in s)) + pred_vectors[i].append(int(i in s_prime)) + for i in range(num_preds): + pred_vectors[i].append(int(i in goal)) + + # Step 2: Group predicates with identical truth vectors + vector_to_preds = defaultdict(list) + for pred, vec in pred_vectors.items(): + vector_to_preds[tuple(vec)].append(pred) + + # Step 3: Build a mapping from redundant predicate -> representative + replace_map = {} + for group in vector_to_preds.values(): + representative = min(group) # pick smallest index as canonical + for pred in group: + replace_map[pred] = representative + + # Step 4: Replace predicates in demos + new_demos = [] + for traj, goal in demos: + new_traj = [] + for s, a, s_prime in traj: + s_new = {replace_map[p] for p in s} + s_prime_new = {replace_map[p] for p in s_prime} + new_traj.append((s_new, a, s_prime_new)) + new_goal = {replace_map[p] for p in goal} + new_demos.append((new_traj, new_goal)) + + # Step 5: Replace predicates in operators + new_operators = [] + for op in operators: + pre = {replace_map[p] for p in op.pre} + add = {replace_map[p] for p in op.add} + delete = {replace_map[p] for p in op.delete} + new_operators.append(Operator(pre, add, delete, op.action)) + + return new_demos, new_operators, replace_map + +def augment_demos_with_missing_ground_truth_ops(demos, learned_ops, true_ops, num_preds, num_augments=1): + from collections import defaultdict + + # Index learned ops by (action, add, delete, pre) + learned_op_keys = set( + (op.action, frozenset(op.add), frozenset(op.delete), frozenset(op.pre)) + for op in learned_ops + ) + + augmented = [] + + for true_op in true_ops: + key = (true_op.action, frozenset(true_op.add), frozenset(true_op.delete), frozenset(true_op.pre)) + if key in learned_op_keys: + continue # already learned correctly + + # Add demos for this missing operator + for _ in range(num_augments): + possible_goals = None + while not possible_goals: + base_state = set(random.sample(range(num_preds), random.randint(2, num_preds))) + false_pre = set() + for op in learned_ops: + if op.action == true_op.action and op.add == true_op.add: + false_pre |= op.pre - true_op.pre + base_state -= false_pre # remove wrong precondition + base_state |= true_op.pre # ensure it's applicable + next_state = true_op.apply(base_state) + + # Choose a goal that is newly added by the operator + possible_goals = true_op.add - base_state + + goal = possible_goals + demo = [(base_state.copy(), true_op.action, next_state.copy())] + augmented.append((demo, goal)) + + return demos + augmented + +# invalids, valids = run_operator_learning_trials(num_trials=50, verbose=False) +# print(f"Number of invalid learned operators: {invalids} / {invalids+valids}") + +# +results = {"tot_matches":[], "tot_soft_matches":[],"tot_exsoft_matches":[], "tot_num_ops":[]} +# +pred_pool = [i for i in range(NUM_PREDICATES)] +operators = [generate_random_operator(pred_pool, i) for i in range(NUM_OPERATORS)] +all_demo_data = generate_planned_demo_trajectories(operators, NUM_TRAJS, max_depth=TRAJ_MAX) + +# all_demo_data, operators, pred_replace_map = deduplicate_predicates_by_equivalence(all_demo_data, operators, NUM_PREDICATES) +# print("Predicate replacement map:", pred_replace_map) + +for num_trajs in range(1, 52, 10): + + print("\n--- Ground Truth Operators ---") + for op in sorted(operators, key=lambda x: x.action): + print(op) + print() + + print("Demos:") + # for traj in demo_data: + # print("Goal:", traj[1], "Length:", len(traj[0])) + demo_data = all_demo_data[:num_trajs] + print(len(demo_data)) + + op_index = None + + potential_op_sets = {} + for run_i in range(100): + refined_ops = learn_operators_from_demos(demo_data, max_iters=1000, verbose=False) + op_index = {(op.action, frozenset(op.add)): op for op in refined_ops} + for (action, effect_frozen), entry in backward_infer_minimal_effects(demo_data, current_operators=refined_ops).items(): + delete = op_index[(action, effect_frozen)].pre & set.intersection( + *[set(entry['demos'][i][0] - entry['demos'][i][2]) for i in range(len(entry['demos']))] + ) + op_index[(action, effect_frozen)].delete = delete + + augmented_demo_data = augment_demos_with_missing_ground_truth_ops( + demo_data, list(op_index.values()), operators, NUM_PREDICATES, num_augments=5 + ) + + refined_ops = learn_operators_from_demos(augmented_demo_data, max_iters=1000, verbose=False) + op_index = {(op.action, frozenset(op.add)): op for op in refined_ops} + for (action, effect_frozen), entry in backward_infer_minimal_effects(augmented_demo_data, current_operators=refined_ops).items(): + delete = op_index[(action, effect_frozen)].pre & set.intersection( + *[set(entry['demos'][i][0] - entry['demos'][i][2]) for i in range(len(entry['demos']))] + ) + op_index[(action, effect_frozen)].delete = delete + + print("\n--- Final Learned Operators ---") + for op in sorted(op_index.values(), key=lambda x: x.action): + print(op) + print() + + num_match = 0 + num_soft_match = 0 + num_exsoft_match = 0 + actions = set() + for actual_op in operators: + is_match = False + is_soft_match = False + is_exsoft_match = False + for op in op_index.values(): + actions.add(op.action) + if op.action == actual_op.action: + if len(op.pre - actual_op.pre) <= 2 and op.add == actual_op.add and op.delete == actual_op.delete: + is_exsoft_match = True + if len(op.pre - actual_op.pre) <= 1 and op.add == actual_op.add and op.delete == actual_op.delete: + is_soft_match = True + if op.pre == actual_op.pre and op.add == actual_op.add and op.delete == actual_op.delete: + is_match = True + if is_exsoft_match: + num_exsoft_match += 1 + if is_soft_match: + num_soft_match += 1 + if is_match: + num_match += 1 + num_actions = len(actions) + actions = set() + for traj in demo_data[:run_i]: + for (s, a, s_prime) in traj[0]: + actions.add(a) + new_op_set_str = str([op for op in sorted(op_index.values(), key=lambda x: x.action)]) + if new_op_set_str in potential_op_sets: + potential_op_sets[new_op_set_str] += 1 + else: + potential_op_sets[new_op_set_str] = 0 + results["tot_matches"].append((num_trajs, run_i, num_match)) + results["tot_soft_matches"].append((num_trajs, run_i, num_soft_match)) + results["tot_exsoft_matches"].append((num_trajs, run_i, num_exsoft_match)) + results["tot_num_ops"].append((num_trajs, run_i, len(potential_op_sets.keys()), num_actions)) + print(num_trajs, run_i, num_match, num_soft_match, num_exsoft_match, len(potential_op_sets.keys()), num_actions) + + print("\n--- Ground Truth Operators ---") + for op in sorted(operators, key=lambda x: x.action): + print(op) + print() + +import pickle + +filename = 'HITL_more_results.pkl' + +# Open the file in binary write mode ('wb') +with open(filename, 'wb') as file: + pickle.dump(results, file) + +import ipdb; ipdb.set_trace() + + + + + + + + + +quit() + +for _ in range(10): + demo_data = augment_demos_with_missing_ground_truth_ops( + demo_data, list(op_index.values()), operators, NUM_PREDICATES, num_augments=1 + ) + + refined_ops = learn_operators_from_demos(demo_data, max_iters=1000, verbose=False) + op_index = {(op.action, frozenset(op.add)): op for op in refined_ops} + for (action, effect_frozen), entry in backward_infer_minimal_effects(demo_data[:run_i], current_operators=refined_ops).items(): + delete = op_index[(action, effect_frozen)].pre & set.intersection( + *[set(entry['demos'][i][0] - entry['demos'][i][2]) for i in range(len(entry['demos']))] + ) + op_index[(action, effect_frozen)].delete = delete + +print("\n--- Final Learned Operators ---") +for op in sorted(op_index.values(), key=lambda x: x.action): + print(op) +print() + +num_match = 0 +actions = set() +for actual_op in operators: + is_match = False + for op in op_index.values(): + actions.add(op.action) + if op.action == actual_op.action: + if op.pre == actual_op.pre and op.add == actual_op.add and op.delete == actual_op.delete: + is_match = True + if is_match: + num_match += 1 + else: + print(actual_op) +num_actions = len(actions) +print("final", num_match, num_actions) + + +# TODO Fix Delete Effects + +import ipdb; ipdb.set_trace() \ No newline at end of file diff --git a/test_backward_forward_pick_and_place.py b/test_backward_forward_pick_and_place.py new file mode 100644 index 0000000000..c49286cf94 --- /dev/null +++ b/test_backward_forward_pick_and_place.py @@ -0,0 +1,509 @@ +import random +from typing import List, Set, Tuple +import numpy as np +from collections import deque, defaultdict + +np.random.seed(0) +random.seed(0) + +# Parameters +NUM_PREDICATES = 10 +NUM_OPERATORS = 10 +TRAJ_MAX = 10 +TRAJ_LEN = 5 +NUM_TRAJS = 50 # Fewer for readability +ACTION_SPACE = list(range(NUM_OPERATORS)) + +# --- Operator Representation --- +class Operator: + def __init__(self, pre: Set[int], add: Set[int], delete: Set[int], action: int): + self.pre = pre + self.add = add + self.delete = delete + self.action = action + + def is_applicable(self, state: Set[int]) -> bool: + return self.pre.issubset(state) + + def apply(self, state: Set[int]) -> Set[int]: + if not self.is_applicable(state): + return state # no-op if not applicable + new_state = state.copy() + new_state.difference_update(self.delete) + new_state.update(self.add) + return new_state + + def __repr__(self): + return f"Op(action={self.action}, pre={self.pre}, add={self.add}, del={self.delete})" + + +def plan(start: Set[int], goal: Set[int], operators: List[Operator], max_depth=10): + visited = set() + queue = deque() + queue.append((start.copy(), [])) + + while queue: + state, path = queue.popleft() + state_key = frozenset(state) + if state_key in visited: + continue + visited.add(state_key) + + if goal.issubset(state): + return path + + if len(path) >= max_depth: + continue + + for op in operators: + if op.is_applicable(state): + next_state = op.apply(state) + if next_state != state: + queue.append((next_state, path + [(state.copy(), op.action, next_state.copy())])) + + return None + + +# --- Generate Random Operators --- +def generate_random_operator(pred_pool: List[int], action_id: int) -> Operator: + pre = set(random.sample(pred_pool, random.randint(1, 3))) + effects = list(set(pred_pool) - pre) + add = set(random.sample(effects, random.randint(1, min(2, len(effects))))) + delete = set(random.sample(list(pre), random.randint(0, len(pre)))) + return Operator(pre, add, delete, action_id) + + +def compute_reachable_states(init_state: Set[int], operators: List[Operator], max_iters: int = 100) -> List[Set[int]]: + reached_states = set() + reachable = [] + frontier = [init_state.copy()] + + for _ in range(max_iters): + new_frontier = [] + + for state in frontier: + state_key = frozenset(state) + if state_key in reached_states: + continue + + reached_states.add(state_key) + reachable.append(frozenset(state)) + + for op in operators: + if op.is_applicable(state): + next_state = op.apply(state) + next_key = frozenset(next_state) + if next_key not in reached_states: + new_frontier.append(next_state) + + if not new_frontier: + break + frontier = new_frontier + + return set(reachable) + + +# --- Generate Demo Data --- +def generate_planned_demo_trajectories(operators: List[Operator], num_trajs: int, max_depth: int) -> List[Tuple[List[Tuple[Set[int], int, Set[int]]], Set[int]]]: + demos = [] + attempts = 0 + + while len(demos) < num_trajs and attempts < 10000000: + attempts += 1 + init_state = set(random.sample(range(NUM_PREDICATES), random.randint(2, NUM_PREDICATES))) + reachable = compute_reachable_states(init_state, operators) - init_state + + if len(reachable) == 0: + continue + + plan_traj = [] + goals = reachable + while len(goals) > 0 and len(plan_traj) < TRAJ_LEN: + goal_state = random.choice(list(goals)) + goal = goal_state - init_state + + if not goal: + goals.remove(goal_state) + continue + + plan_traj = plan(init_state, goal, operators, max_depth) + if plan_traj is None or len(plan_traj) < TRAJ_LEN: + goals.remove(goal_state) + plan_traj = [] + + if plan_traj and len(plan_traj) >= TRAJ_LEN: + demos.append((plan_traj, goal)) + + return demos + + +# --- Backwards-Forwards Operator Learning --- +def backward_infer_minimal_effects(demo_data, current_operators=None): + candidate_ops = defaultdict(lambda: {'demos': []}) + op_index = {} + if current_operators: + op_index = {(op.action, frozenset(op.add)): op for op in current_operators} + + for traj, goal in sorted(demo_data, key=lambda x: len(x[0])): #sorted(demo_data, key=lambda x: len(x[0])*(1+len(x[1]))): # order by smallest demo + current_goal = goal.copy() + + for (s, action, s_prime) in reversed(traj): + effect = s_prime - s + if len(effect) == 0: + raise Exception("No effect") + elif len(effect) == 1: + necessary_effect = effect + else: + necessary_effect = effect & current_goal + + key = (action, frozenset(necessary_effect)) + candidate_ops[key]['demos'].append((s, action, s_prime)) + + preconditions = set() + if key in op_index: + preconditions = op_index[key].pre + current_goal = (current_goal - necessary_effect) | preconditions + + return candidate_ops + + +def refine_by_plan_divergence(demos, learned_operators): + op_index = {(op.action, frozenset(op.add)): op for op in learned_operators} + support_sets = {key: [] for key in op_index} + + for traj, goal in demos: + current_goal = goal.copy() + for s, a, s_prime in traj: + effect = s_prime - s + necessary_effect = effect if len(effect) == 1 else effect & current_goal + key = (a, frozenset(necessary_effect)) + if key in support_sets: + support_sets[key].append(s) + + preconditions = set() + if key in op_index: + preconditions = op_index[key].pre + current_goal = (current_goal - necessary_effect) | preconditions + + for traj, goal in demos: + state = traj[0][0] + for (s_true, a_true, s_next_true) in traj: + applicable = [op for op in learned_operators if op.is_applicable(state)] + if not applicable: + break + op_planner = random.choice(applicable) + + key_true = (a_true, frozenset(s_next_true - s_true)) + op_true = op_index.get(key_true, None) + if op_true is None: + continue + + if op_planner is op_true: + state = op_true.apply(state) + continue + + key_planner = (op_planner.action, frozenset(op_planner.add)) + support = support_sets.get(key_planner, []) + if not support: + continue + + common_preds = set.intersection(*support) + potential_preds_to_add = common_preds - state + if not potential_preds_to_add: + continue + # if len(potential_preds_to_add - goal) > 0: + # preds_to_add = {random.choice(list(potential_preds_to_add - goal))} + # else: + # preds_to_add = {random.choice(list(potential_preds_to_add))} + preds_to_add = {random.choice(list(potential_preds_to_add))} + op_planner.pre.update(preds_to_add) + + state = op_true.apply(state) + + return list(op_index.values()) + + +def learn_operators_from_demos(demo_data, max_iters=100, verbose=True): + learned_ops = [] + last_preconds = None + + for iteration in range(max_iters): + if verbose: + print(f"\n--- Iteration {iteration + 1} (Backward + Forward) ---") + + candidate_ops = backward_infer_minimal_effects(demo_data, current_operators=learned_ops or None) + + if learned_ops == []: + learned_ops = [ + Operator(pre=set(), add=set(effect_frozen), delete=set(), action=action) + for (action, effect_frozen), entry in candidate_ops.items() + ] + else: + op_index = {(op.action, frozenset(op.add)): op for op in learned_ops} + learned_ops = [] + for (action, effect_frozen), entry in candidate_ops.items(): + if (action, effect_frozen) not in op_index: + learned_ops.append(Operator(pre=set(), add=set(effect_frozen), delete=set(), action=action)) + else: + learned_ops.append(Operator( + pre=op_index[(action, effect_frozen)].pre, + add=set(effect_frozen), + delete=set(), + action=action + )) + + if verbose: + print("Backward Learned Operators:") + for op in sorted(learned_ops, key=lambda x: x.action): + print(op) + + learned_ops = refine_by_plan_divergence(demo_data, learned_ops) + + if verbose: + print("Forward Learned Operators:") + for op in sorted(learned_ops, key=lambda x: x.action): + print(op) + + return learned_ops + + +# --- Run Learning --- +def run_operator_learning_trials(num_trials=10, verbose=True) -> int: + invalid_count = 0 + valid_count = 0 + + for _ in range(num_trials): + pred_pool = [i for i in range(NUM_PREDICATES)] + operators = [generate_random_operator(pred_pool, i) for i in range(NUM_OPERATORS)] + demo_data = generate_planned_demo_trajectories(operators, NUM_TRAJS, max_depth=TRAJ_MAX) + + if verbose: + print("\n--- Ground Truth Operators ---") + for op in sorted(operators, key=lambda x: x.action): + print(op) + print() + + print("Demos:") + for traj in demo_data: + print("Goal:", traj[1], "Length:", len(traj[0])) + + refined_ops = learn_operators_from_demos(demo_data, max_iters=1000, verbose=verbose) + + if verbose: + print("\n--- Final Learned Operators ---") + for op in sorted(refined_ops, key=lambda x: x.action): + print(op) + print() + + # Add delete effects + op_index = {(op.action, frozenset(op.add)): op for op in refined_ops} + for (action, effect_frozen), entry in backward_infer_minimal_effects(demo_data, current_operators=refined_ops).items(): + delete = op_index[(action, effect_frozen)].pre & set.intersection( + *[set(entry['demos'][i][0] - entry['demos'][i][2]) for i in range(len(entry['demos']))] + ) + op_index[(action, effect_frozen)].delete = delete + + for op in op_index.values(): + for actual_op in operators: + if op.action == actual_op.action: + if op.pre <= actual_op.pre and op.add <= actual_op.add and op.delete <= actual_op.delete: + valid_count += 1 + if verbose: + print("VALID\n\tLEARNED |", op, "\n\tORIGINAL|", actual_op) + else: + invalid_count += 1 + if verbose: + print("INVALID\n\tLEARNED |", op, "\n\tORIGINAL|", actual_op) + + return invalid_count, valid_count + +def deduplicate_predicates_by_equivalence(demos, operators, num_preds): + from collections import defaultdict + + # Step 1: Build truth vectors for each predicate + pred_vectors = defaultdict(list) + + for traj, goal in demos: + for s, _, s_prime in traj: + for i in range(num_preds): + pred_vectors[i].append(int(i in s)) + pred_vectors[i].append(int(i in s_prime)) + for i in range(num_preds): + pred_vectors[i].append(int(i in goal)) + + # Step 2: Group predicates with identical truth vectors + vector_to_preds = defaultdict(list) + for pred, vec in pred_vectors.items(): + vector_to_preds[tuple(vec)].append(pred) + + # Step 3: Build a mapping from redundant predicate -> representative + replace_map = {} + for group in vector_to_preds.values(): + representative = min(group) # pick smallest index as canonical + for pred in group: + replace_map[pred] = representative + + # Step 4: Replace predicates in demos + new_demos = [] + for traj, goal in demos: + new_traj = [] + for s, a, s_prime in traj: + s_new = {replace_map[p] for p in s} + s_prime_new = {replace_map[p] for p in s_prime} + new_traj.append((s_new, a, s_prime_new)) + new_goal = {replace_map[p] for p in goal} + new_demos.append((new_traj, new_goal)) + + # Step 5: Replace predicates in operators + new_operators = [] + for op in operators: + pre = {replace_map[p] for p in op.pre} + add = {replace_map[p] for p in op.add} + delete = {replace_map[p] for p in op.delete} + new_operators.append(Operator(pre, add, delete, op.action)) + + return new_demos, new_operators, replace_map + + +# invalids, valids = run_operator_learning_trials(num_trials=50, verbose=False) +# print(f"Number of invalid learned operators: {invalids} / {invalids+valids}") + + +PREDICATES = { + "at_A": 0, + "at_B": 1, + "handempty": 2, + "holding_block1": 3, + "holding_block2": 4, + "clear_block1": 5, + "clear_block2": 6, + "inside_block1": 7, + "inside_block2": 8, +} + +OPERATORS = [ + # move from B to A + Operator(pre={PREDICATES["at_B"]}, add={PREDICATES["at_A"]}, delete={PREDICATES["at_B"]}, action=0), + # move from A to B + Operator(pre={PREDICATES["at_A"]}, add={PREDICATES["at_B"]}, delete={PREDICATES["at_A"]}, action=1), + + # pick block1 + Operator(pre={PREDICATES["at_A"], PREDICATES["clear_block1"], PREDICATES["handempty"]}, + add={PREDICATES["holding_block1"]}, + delete={PREDICATES["clear_block1"], PREDICATES["handempty"]}, + action=2), + + # pick block2 + Operator(pre={PREDICATES["at_A"], PREDICATES["clear_block2"], PREDICATES["handempty"]}, + add={PREDICATES["holding_block2"]}, + delete={PREDICATES["clear_block2"], PREDICATES["handempty"]}, + action=3), + + # place block1 in box (at B) + Operator(pre={PREDICATES["at_B"], PREDICATES["holding_block1"]}, + add={PREDICATES["inside_block1"], PREDICATES["handempty"]}, + delete={PREDICATES["holding_block1"]}, + action=4), + + # place block2 in box (at B) + Operator(pre={PREDICATES["at_B"], PREDICATES["holding_block2"]}, + add={PREDICATES["inside_block2"], PREDICATES["handempty"]}, + delete={PREDICATES["holding_block2"]}, + action=5), +] + + +init_state = { + PREDICATES["at_B"], PREDICATES["handempty"], + PREDICATES["clear_block1"], PREDICATES["clear_block2"] +} + +actions = [0, 2, 1, 4, 0, 3, 1, 5] # move→pick→move→place (block1), move→pick→move→place (block2) + +state = init_state.copy() +traj1 = [] + +for action_id in actions: + op = OPERATORS[action_id] + next_state = op.apply(state) + traj1.append((state.copy(), action_id, next_state.copy())) + state = next_state.copy() + +goal1 = {PREDICATES["inside_block1"], PREDICATES["inside_block2"]} + +actions = [0, 3, 1, 5, 0, 2, 1, 4] # move→pick→move→place (block1), move→pick→move→place (block2) + +state = init_state.copy() +traj2 = [] + +for action_id in actions: + op = OPERATORS[action_id] + next_state = op.apply(state) + traj2.append((state.copy(), action_id, next_state.copy())) + state = next_state.copy() + +goal2 = {PREDICATES["inside_block1"], PREDICATES["inside_block2"]} +demo_data = [(traj1, goal1), (traj2, goal2)] + +demo_data, operators, pred_replace_map = deduplicate_predicates_by_equivalence(demo_data, OPERATORS, NUM_PREDICATES) +print("Predicate replacement map:", pred_replace_map) + +print("\n--- Ground Truth Operators ---") +for op in sorted(operators, key=lambda x: x.action): + print(op) +print() + +print("Demos:") +# for traj in demo_data: +# print("Goal:", traj[1], "Length:", len(traj[0])) +print(len(demo_data)) + +learned = learn_operators_from_demos(demo_data, max_iters=20, verbose=True) +op_index = {(op.action, frozenset(op.add)): op for op in learned} +for (action, effect_frozen), entry in backward_infer_minimal_effects(demo_data, current_operators=learned).items(): + delete = op_index[(action, effect_frozen)].pre & set.intersection( + *[set(entry['demos'][i][0] - entry['demos'][i][2]) for i in range(len(entry['demos']))] + ) + op_index[(action, effect_frozen)].delete = delete + + +print("\n--- Final Learned Operators ---") +for op in sorted(learned, key=lambda x: x.action): + print(op) +print() + +print("\n--- Ground Truth Operators ---") +for op in sorted(operators, key=lambda x: x.action): + print(op) +print() + +# PREDICATES.update({ +# "reachable_block1": 9, +# "reachable_block2": 10, +# }) + +# demo_data = [([step if i not in (0, 4) else (step[0], step[1], step[2] | {9, 10}) for i, step in enumerate(traj)], goal) for traj, goal in demo_data] +# demo_data = [([step if i not in (1, 5) else (step[0] | {9, 10}, step[1], step[2] | {9, 10}) for i, step in enumerate(traj)], goal) for traj, goal in demo_data] +# demo_data = [([step if i not in (2, 6) else (step[0] | {9, 10}, step[1], step[2]) for i, step in enumerate(traj)], goal) for traj, goal in demo_data] + +# demo_data, operators, pred_replace_map = deduplicate_predicates_by_equivalence(demo_data, OPERATORS, NUM_PREDICATES + 2) +# print("Predicate replacement map:", pred_replace_map) + +# print("Demos:") +# # for traj in demo_data: +# # print("Goal:", traj[1], "Length:", len(traj[0])) +# print(len(demo_data)) + +# learned = learn_operators_from_demos(demo_data, max_iters=20, verbose=True) + +# print("\n--- Final Learned Operators ---") +# for op in sorted(learned, key=lambda x: x.action): +# print(op) +# print() + +# print("\n--- Ground Truth Operators ---") +# for op in sorted(operators, key=lambda x: x.action): +# print(op) +# print() + +import ipdb; ipdb.set_trace() \ No newline at end of file diff --git a/test_colla_env.py b/test_colla_env.py new file mode 100644 index 0000000000..326866f4e6 --- /dev/null +++ b/test_colla_env.py @@ -0,0 +1,354 @@ +import gym +import numpy as np +from PIL import Image +from minigrid.wrappers import * +from mini_behavior.window import Window +from mini_behavior.utils.save import get_step, save_demo +from mini_behavior.grid import GridDimension +from mini_behavior.states import * +from collections import deque +import random + +TILE_PIXELS = 32 + +class MiniBehaviorEnv: + def __init__(self, env_id='MiniGrid-InstallingAPrinter-8x8-N2-v0', seed=-1, tile_size=32, + agent_view=False, save_demo_flag=False, load_path=None): + + self.env_id = env_id + self.seed = seed + self.tile_size = tile_size + self.agent_view = agent_view + self.save_demo_flag = save_demo_flag + self.load_path = load_path + self.show_furniture = False + self.all_steps = {} + + self.env = gym.make(env_id) + self.env.teleop_mode() + self.key_to_action = { + '0': self.env.actions.pickup_0, + '1': self.env.actions.pickup_1, + '2': self.env.actions.pickup_2, + '3': self.env.actions.drop_0, + '4': self.env.actions.drop_1, + '5': self.env.actions.drop_2, + 't': self.env.actions.toggle, + 'o': self.env.actions.open, + 'c': self.env.actions.close, + 'k': self.env.actions.cook, + '6': self.env.actions.slice, + 'i': self.env.actions.drop_in, + } + for obj_type, obj_list in self.env.objs.items(): + for obj in obj_list: + self.key_to_action["moveto-" + obj.name] = "moveto-" + obj.name + + + if self.agent_view: + self.env = RGBImgPartialObsWrapper(self.env) + self.env = ImgObsWrapper(self.env) + + self.window = Window('mini_behavior - ' + env_id) + self.window.no_closeup() + + if self.load_path is not None: + self._load_state() + + self.nav_sampler_cache = {} + self.short_task = True + + def redraw(self, img): + if not self.agent_view: + img = self.env.render() + self.window.set_inventory(self.env) + self.window.show_img(img) + self.window.save_img("output_image.jpeg") + + def render_furniture(self): + self.show_furniture = not self.show_furniture + if self.show_furniture: + img = np.copy(self.env.furniture_view) + i, j = self.env.agent_pos + ymin = j * TILE_PIXELS + ymax = (j + 1) * TILE_PIXELS + xmin = i * TILE_PIXELS + xmax = (i + 1) * TILE_PIXELS + img[ymin:ymax, xmin:xmax, :] = GridDimension.render_agent( + img[ymin:ymax, xmin:xmax, :], self.env.agent_dir) + img = self.env.render_furniture_states(img) + self.window.show_img(img) + else: + obs = self.env.gen_obs() + self.redraw(obs) + + def show_states(self): + imgs = self.env.render_states() + self.window.show_closeup(imgs) + + def switch_dim(self, dim): + self.env.switch_dim(dim) + print(f'switching to dim: {self.env.render_dim}') + obs = self.env.gen_obs() + self.redraw(obs) + + def _load_state(self): + if self.seed != -1: + self.env.seed(self.seed) + self.env.reset() + obs = self.env.load_state(self.load_path) + if hasattr(self.env, 'mission'): + print('Mission: %s' % self.env.mission) + self.window.set_caption(self.env.mission) + self.redraw(obs) + + def reset(self): + if self.seed != -1: + self.env.seed(self.seed) + obs = self.env.reset() + if hasattr(self.env, 'mission'): + print('Mission: %s' % self.env.mission) + self.window.set_caption(self.env.mission) + self.redraw(obs) + return obs + + def get_lifted_state(self): + objs = self.env.objs + obj_instances = {} + for obj_type, obj_list in objs.items(): + for obj in obj_list: + obj_instances[obj.name] = obj + + ground_atoms = [] + for k, o in obj_instances.items(): + for pred_name, pred in o.states.items(): + if isinstance(pred, (AbsoluteObjectState, AbilityState, ObjectProperty)): + if pred.get_value(self.env): + ground_atoms.append(f"{pred_name}({k})") + elif isinstance(pred, RelativeObjectState): + for k2, o2 in obj_instances.items(): + if o.check_rel_state(self.env, o2, pred_name): + ground_atoms.append(f"{pred_name}({k},{k2})") + return ground_atoms + + def step(self, action): + prev_obs = self.env.gen_obs() + prev_state = self.get_lifted_state() + if isinstance(action, str) and action.startswith("moveto-"): + self.move_in_front_of(action.replace("moveto-","")) + obs = self.env.gen_obs() + reward = 0.0 + done = False + terminated = False + info = {} + else: + obs, reward, done, terminated, info = self.env.step(action) + if self.short_task: + if self.env_id == 'MiniGrid-SortingBooks-16x16-N2-v0': + book = self.env.objs['book'] + hardback = self.env.objs['hardback'] + shelf = self.env.objs['shelf'][0] + for obj in book + hardback: + if obj.check_rel_state(self.env, shelf, 'onTop'): + reward = 1.0 + done = 1.0 + elif self.env_id == 'MiniGrid-WateringHouseplants-16x16-N2-v0': + pot_plants = self.env.objs['pot_plant'] + for plant in pot_plants: + if plant.check_abs_state(self.env, 'soakable'): + reward = 1.0 + done = 1.0 + elif self.env_id == 'MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0': + for hamburger in self.env.objs['hamburger']: + is_inside = [hamburger.check_rel_state(self.env, ashcan, 'inside') for ashcan in self.env.objs['ashcan']] + if True in is_inside: + reward = 1.0 + done = 1.0 + elif self.env_id == 'MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0': + book = self.env.objs['book'] + box = self.env.objs['box'][0] + for obj in book: + if obj.check_rel_state(self.env, box, 'inside'): + reward = 1.0 + done = 1.0 + state = self.get_lifted_state() + + print(f'env_id={self.env_id}, step={self.env.step_count}, reward={reward:.2f}') + # for atom in state: + # print(atom) + + if self.save_demo_flag: + self.all_steps[self.env.step_count] = (prev_obs, prev_state, action, obs, state) + + if done: + print('done!') + if self.save_demo_flag: + save_demo(self.all_steps, self.env_id, self.env.episode) + self.reset() + else: + self.redraw(obs) + return obs, reward, done, terminated, info + + def show(self): + self.window.show(block=False) + + def key_handler_primitive(self, event): + print('pressed', event.key) + action_map = { + 'left': self.env.actions.left, + 'right': self.env.actions.right, + 'up': self.env.actions.forward, + '0': self.env.actions.pickup_0, + '1': self.env.actions.pickup_1, + '2': self.env.actions.pickup_2, + '3': self.env.actions.drop_0, + '4': self.env.actions.drop_1, + '5': self.env.actions.drop_2, + 't': self.env.actions.toggle, + 'o': self.env.actions.open, + 'c': self.env.actions.close, + 'k': self.env.actions.cook, + '6': self.env.actions.slice, + 'i': self.env.actions.drop_in + } + + if event.key == 'escape': + self.window.close() + elif event.key in action_map: + self.step(action_map[event.key]) + elif event.key == 'pagedown': + self.show_states() + + def bfs_path(self, start, goal): + grid = self.env.grid + width, height = grid.width, grid.height + visited = set() + queue = deque([(start, [])]) + + while queue: + current_pos, path = queue.popleft() + if current_pos == goal: + return path + + for dx, dy in [(-1,0), (1,0), (0,-1), (0,1)]: + nx, ny = current_pos[0] + dx, current_pos[1] + dy + next_pos = (nx, ny) + + if not (0 <= nx < width and 0 <= ny < height): + continue + if next_pos in visited: + continue + if grid.get(nx, ny) != [[None, None], [None, None], [None, None]]: + if grid.get(nx, ny)[0][0] is None or grid.get(nx, ny)[0][0].name != "door": + continue # Obstacle + + visited.add(next_pos) + queue.append((next_pos, path + [next_pos])) + + return None # No path found + + def move_in_front_of(self, target_name): + # Find the target object + target_obj = None + for obj_list in self.env.objs.values(): + for obj in obj_list: + if obj.name == target_name: + target_obj = obj + break + if target_obj: + break + + if target_obj is None: + print(f"[Error] Object '{target_name}' not found.") + return + + reachable = [] + start_pos = tuple(self.env.agent_pos) + target_pos = target_obj.cur_pos + adjacents = [ + (target_pos[0] + 1, target_pos[1]), + (target_pos[0] - 1, target_pos[1]), + (target_pos[0], target_pos[1] + 1), + (target_pos[0], target_pos[1] - 1) + ] + pos_to_target = {} + for adj in adjacents: + pos_to_target[adj] = target_pos + if hasattr(target_obj, 'all_pos'): + adjacents = [] + for target_pos in target_obj.all_pos: + if 'cabinet' not in target_name: + if target_name in self.nav_sampler_cache: + if target_pos in self.nav_sampler_cache[target_name]: + continue + new_adjacents = [ + (target_pos[0] + 1, target_pos[1]), + (target_pos[0] - 1, target_pos[1]), + (target_pos[0], target_pos[1] + 1), + (target_pos[0], target_pos[1] - 1) + ] + for adj in new_adjacents: + pos_to_target[adj] = target_pos + adjacents += new_adjacents + # Choose a reachable adjacent position + random.shuffle(adjacents) + for pos in adjacents: + if (0 <= pos[0] < self.env.grid.width and 0 <= pos[1] < self.env.grid.height): + if self.env.grid.get(*pos) == [[None, None], [None, None], [None, None]]: + path = self.bfs_path(start_pos, pos) + if path: + reachable.append((pos, path)) + elif self.env.grid.get(*pos)[0][0] is not None: + if self.env.grid.get(*pos)[0][0].name == "door": + path = self.bfs_path(start_pos, pos) + if path: + reachable.append((pos, path)) + else: + pass + + if not reachable: + print(f"[Error] No accessible position next to '{target_name}'") + return + + # Choose shortest reachable + goal_pos, path = min(reachable, key=lambda x: len(x[1])) + + # Follow path + for next_pos in path: + dx = next_pos[0] - self.env.agent_pos[0] + dy = next_pos[1] - self.env.agent_pos[1] + + desired_dir = { + (1, 0): 0, + (0, 1): 1, + (-1, 0): 2, + (0, -1): 3 + }.get((dx, dy)) + + if desired_dir is None: + continue + + while self.env.agent_dir != desired_dir: + self.step(self.env.actions.right) + self.step(self.env.actions.forward) + + # Face the object + target_pos = pos_to_target[tuple(self.env.agent_pos)] + face_dir = (target_pos[0] - self.env.agent_pos[0], target_pos[1] - self.env.agent_pos[1]) + target_dir = { + (1, 0): 0, + (0, 1): 1, + (-1, 0): 2, + (0, -1): 3 + }.get(face_dir) + + if target_dir is not None: + while self.env.agent_dir != target_dir: + self.step(self.env.actions.right) + + print(f"[Success] Reached position in front of '{target_name}', facing it.") + if target_name in self.nav_sampler_cache: + self.nav_sampler_cache[target_name].append(target_pos) + else: + self.nav_sampler_cache[target_name] = [target_pos] + diff --git a/test_colla_final.py b/test_colla_final.py new file mode 100644 index 0000000000..6859df2105 --- /dev/null +++ b/test_colla_final.py @@ -0,0 +1,798 @@ +# (1) implement the evaluation function evaluate(agent) returns dictionary of results +# (2) implment evaluation visualization visualize(results) +# (3) do whatever it takes to make results better (CI, BC, FF+BC, FF+BC+LLMs) + +from test_colla_env import MiniBehaviorEnv +from test_colla_helpers import Box, LowLevelTrajectory, State, Task, \ + demo_files, get_demo_traj, learn_nsrts_from_data, parse_nsrt_block +import matplotlib.pyplot as plt +import seaborn as sns +import pandas as pd +import random +from predicators.planning import task_plan, task_plan_grounding, _SkeletonSearchTimeout, PlanningFailure +from predicators import utils +from predicators.structs import Action, LowLevelTrajectory, Predicate, State, \ + Type, GroundAtom, Task, STRIPSOperator +import numpy as np +from collections import Counter + +from predicators.nsrt_learning.strips_learning.gen_to_spec_learner import parse_objs_preds_and_options + +import pickle as pkl +import numpy as np +from predicators.structs import Action, LowLevelTrajectory, Predicate, State, \ + Type +from test_operator_learning_all import get_demo_traj, demo_files +from predicators.nsrt_learning.segmentation import segment_trajectory + +opname_to_key = { + 'Actions.pickup_0': '0', + 'Actions.pickup_1': '1', + 'Actions.pickup_2': '2', + 'Actions.drop_0': '3', + 'Actions.drop_1': '4', + 'Actions.drop_2': '5', + 'Actions.drop_in': 'i', + 'Actions.toggle': 't', + 'Actions.close': 'c', + 'Actions.open': 'o', + 'Actions.cook': 'k', + 'Actions.slice': '6' +} + +class RandomAgent(): + def __init__(self, name): + self.name = name + self.actions = None + + def reset(self, task_name, learn=False): + pass + + def policy(self, obs, env): + #print(env.get_lifted_state()) + return env.key_to_action[random.choice(list(env.key_to_action.keys()))] + +def evaluation(agents, tasks, num_iterations=10, start_seed=100, short_task=True, learn=True): + results = {} + task_i = 0 + for i in range(num_iterations): + for task in tasks: + for agent in agents: + try: + env = MiniBehaviorEnv(env_id=task, seed=i+start_seed) + env.short_task = short_task + observation, _ = env.reset() + agent.short_task = short_task + agent.reset(env.env_id, learn=learn) + steps = 0 + for _ in range(50): + action = agent.policy(observation, env) + observation, reward, terminated, truncated, _ = env.step(action) + steps += 1 + env.show() + + if reward != 0: + break + + if terminated or truncated: + break + found_plan = 0 + plan_diff = -1 + if agent.actions is not None and agent.actions != []: + found_plan = 1 + + key_to_opname = {v:k for k,v in opname_to_key.items()} + plan = [key_to_opname[action] if not action.startswith("moveto") else "Move" for action in agent.actions] + dataset_plan = agent.dataset[0].actions + + def differing_reoccurring_counts(list1, list2): + count1 = Counter(list1) + count2 = Counter(list2) + all_keys = set(count1.keys()) | set(count2.keys()) + result = {} + total_diff = 0 + for key in all_keys: + c1 = count1.get(key, 0) + c2 = count2.get(key, 0) + if (c1 > 1 or c2 > 1) and c1 != c2: + diff = abs(c1 - c2) + result[key] = diff + total_diff += diff + result['total'] = total_diff + return result + plan_diff = differing_reoccurring_counts(plan, dataset_plan)['total'] + results[str(task_i) + "_" + task + "_" + agent.name] = (steps, reward, i, found_plan, plan_diff) + except _SkeletonSearchTimeout: + print("did not find skeleton - timeout") + results[str(task_i) + "_" + task + "_" + agent.name] = (-1,0,0,0,-1) + except PlanningFailure: + print("did not find skeleton - plan failure") + results[str(task_i) + "_" + task + "_" + agent.name] = (-1,0,0,0,-1) + task_i += 1 + return results + +def structure_results(results_dict): + data = [] + for key, (steps, reward, iteration, found_plan, plan_diff) in results_dict.items(): + task_idx, task_name, agent_name = key.split("_", 2) + data.append({ + "task_name": task_name, + "task_idx": int(task_idx), + "iteration": int(iteration), + "found_plan": int(found_plan), + "plan_diff": int(plan_diff), + "steps": steps, + "reward": reward, + "success": 1 if reward > 0 else 0, + "agent": agent_name + }) + df = pd.DataFrame(data) + df["task_order"] = df["task_idx"] + return df.sort_values(["agent", "iteration", "task_order"]) + +def plot_lifelong_success(df): + plt.figure(figsize=(14, 5)) + + sns.lineplot( + data=df, + x="task_order", + y="success", + hue="agent", + marker="o" + ) + + # Set up x-ticks with task names, spaced across iterations + xticks = df["task_order"] + xticklabels = df["task_name"] + plt.xticks(ticks=xticks, labels=xticklabels, rotation=45, ha='right') + + plt.ylim(-0.1, 1.1) + plt.yticks([0, 1], ["Fail", "Success"]) + plt.ylabel("Success") + plt.xlabel("Tasks over Lifelong Iterations") + plt.title("Lifelong Learning Success per Task") + plt.tight_layout() + plt.grid(True, linestyle='--', alpha=0.3) + plt.savefig("lifelong_learning_success.png", dpi=200) + +class OperatorLearningAgent(): + def __init__(self, name, strips_learner, single_grounding=False): + self.name = name + self.num_demos = 1 + + # Initialized once; populated in get_data() + self.dataset = [] + self.ground_atom_dataset = [] + self.tasks = [] + self.action_space = Box(0, 7, (1,)) + self.objs = set() + self.preds = set() + self.options = set() + self.ground_atoms_traj = [] + self.goal = None + + # Runtime variables + self.nsrts = None + self.actions = None + self.i = 0 + self.seed_i = 0 + self.short_task = True + + # Learning Params + self.strips_learner = strips_learner + self.single_grounding = single_grounding + utils.reset_config({ + "strips_learner": self.strips_learner, + "segmenter": "every_step", + "disable_harmlessness_check": True, + "pnad_search_load_initial": True, + "backward_forward_load_initial": True, + "min_data_for_nsrt": 0, + "min_perc_data_for_nsrt": 0, + "pnad_search_timeout":1000.0, + "single_grounding": self.single_grounding, + "option_learner": "no_learning" + }) + + def reset(self, task_name, learn=True): + if False: + self.dataset = [] + self.ground_atom_dataset = [] + self.tasks = [] + self.objs = set() + self.preds = set() + self.options = set() + self.ground_atoms_traj = [] + self.action_space = Box(0, 7, (1,)) + self.seed_i = 0 + + # Learning Params + utils.reset_config({ + "strips_learner": self.strips_learner, + "segmenter": "every_step", + "disable_harmlessness_check": True, + "pnad_search_load_initial": True, + "backward_forward_load_initial": True, + "min_data_for_nsrt": 0, + "min_perc_data_for_nsrt": 0, + "pnad_search_timeout":1000.0, + "single_grounding": self.single_grounding, + "option_learner": "no_learning" + }) + + if learn: + self.nsrts = self.learn_nsrts(task_name) + else: + dataset, tasks, preds, options, action_space, ground_atom_dataset = self.get_data(task_name) + with open("test_saved.NSRTs.txt", "r") as file: + content = file.read() + nsrt_strs = ["NSRT-" + nsrt_str for nsrt_str in content.split("NSRT-") if nsrt_str != ''] + segmented_trajs = [segment_trajectory(traj, self.preds, atom_seq=atom_seq) for traj, atom_seq in self.ground_atom_dataset] + self.nsrts = [parse_nsrt_block(nsrt_str, segmented_trajs) for nsrt_str in nsrt_strs] + self.goal = self.parse_goal(task_name, self.ground_atoms_traj[1][-1]) + self.actions = None + self.i = 0 + + def parse_goal(self, task_name, ground_atoms_state): + if task_name == "MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("inside(")]) + + elif task_name == "MiniGrid-OpeningPackages-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("openable(")]) + + elif task_name == "MiniGrid-CleaningACar-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("inside(")]) | set([atom for atom in ground_atoms_state if str(atom).startswith("~dustyable(")]) + + elif task_name == "MiniGrid-CleaningShoes-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("~stainable(") and "shoe" in str(atom)]) | \ + set([atom for atom in ground_atoms_state if str(atom).startswith("~dustyable(") and "shoe" in str(atom)]) | \ + set([atom for atom in ground_atoms_state if str(atom).startswith("onfloor(") and "towel" in str(atom)]) + + elif task_name == "MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if ( + str(atom).startswith("onTop(") and "blender" in str(atom) and "countertop" in str(atom) + ) or ( + str(atom).startswith("nextto(") and "soap" in str(atom) and "sink" in str(atom) + ) or ( + str(atom).startswith("inside(") and "vegetable_oil" in str(atom) and "cabinet" in str(atom) + ) or ( + str(atom).startswith("inside(") and "plate" in str(atom) and "cabinet" in str(atom) + ) or ( + str(atom).startswith("inside(") and "casserole" in str(atom) and "electric_refrigerator" in str(atom) + ) or ( + str(atom).startswith("inside(") and "apple" in str(atom) and "electric_refrigerator" in str(atom) + ) or ( + str(atom).startswith("inside(") and "rag" in str(atom) and "sink" in str(atom) + ) or ( + str(atom).startswith("nextto(") and "rag" in str(atom) and "sink" in str(atom) + ) or ( + str(atom).startswith("~dustyable(") and "cabinet" in str(atom) + ) or ( + str(atom).startswith("~stainable(") and "plate" in str(atom) + ) + ]) + + elif task_name == "MiniGrid-CollectMisplacedItems-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("onTop(") and "table" in str(atom) and ( + "gym_shoe" in str(atom) or + "necklace" in str(atom) or + "notebook" in str(atom) or + "sock" in str(atom) + ) and not str(atom).startswith("onTop(table") + ]) + + elif task_name == "MiniGrid-InstallingAPrinter-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(")]) | \ + set([atom for atom in ground_atoms_state if str(atom).startswith("toggleable(")]) + + elif task_name == "MiniGrid-LayingWoodFloors-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("nextto(")]) + + elif task_name == "MiniGrid-MakingTea-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("sliceable(") and "lemon" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("onTop(") and "teapot" in str(atom) and "stove" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("atsamelocation(") and "tea_bag" in str(atom) and "teapot" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("soakable(") and "teapot" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("toggleable(") and "stove" in str(atom) + ]) + + elif task_name == "MiniGrid-MovingBoxesToStorage-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(")]) + + elif task_name == "MiniGrid-OrganizingFileCabinet-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("onTop(") and "marker" in str(atom) and "table" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("inside(") and "document" in str(atom) and "cabinet" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("inside(") and "folder" in str(atom) and "cabinet" in str(atom) + ]) + + elif task_name == "MiniGrid-PreparingSalad-16x16-N2-v0": + import ipdb; ipdb.set_trace() + raise NotImplementedError("parse_goal not implemented for PreparingSalad") + + elif task_name == "MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("inside(") and "plate" in str(atom) and "cabinet" in str(atom) + ]) + + + elif task_name == "MiniGrid-SettingUpCandles-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(")]) + + elif task_name == "MiniGrid-SortingBooks-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(") and "shelf" in str(atom) and ("book" in str(atom) or "hardback" in str(atom))]) + + elif task_name == "MiniGrid-StoringFood-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("inside(") and "cabinet" in str(atom) and ( + "oatmeal" in str(atom) or "chip" in str(atom) or "vegetable_oil" in str(atom) or "sugar" in str(atom) + ) + ]) + + elif task_name == "MiniGrid-ThawingFrozenFood-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("nextto(") and ( + ("date" in str(atom) and "fish" in str(atom)) or + ("fish" in str(atom) and "sink" in str(atom)) or + ("olive" in str(atom) and "sink" in str(atom)) + ) + ]) + + elif task_name == "MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("inside(") and "hamburger" in str(atom) and "ashcan" in str(atom)]) + + elif task_name == "MiniGrid-WashingPotsAndPans-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("~stainable(") and ( + "pan" in str(atom) or "kettle" in str(atom) or "teapot" in str(atom) + ) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("inside(") and "cabinet" in str(atom) and ( + "pan" in str(atom) or "kettle" in str(atom) or "teapot" in str(atom) + ) + ]) + + elif task_name == "MiniGrid-WateringHouseplants-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("soakable(") and "pot_plant" in str(atom)]) + else: + import ipdb; ipdb.set_trace() + + + def get_plan(self, state, seed): + objs, _, _, ground_atoms_traj, all_atoms = parse_objs_preds_and_options(self.dataset[-1], train_task_idx=len(self.dataset)) + task = Task(State({}, None), self.goal) + + _, _, _, ground_atoms_traj, _ = parse_objs_preds_and_options(LowLevelTrajectory([state], [], _is_demo=True, _train_task_idx=0), train_task_idx=0, all_atoms=all_atoms) + init_atoms = ground_atoms_traj[1][0] + plan = self.plan(init_atoms, objs, self.preds, self.nsrts, task, seed) + return plan + + def policy(self, obs, env): + if self.actions is None: + seed = self.seed_i + self.seed_i += 1 + num_remove_pre = 0 + while self.actions is None or self.actions == []: + self.actions = self.get_plan(env.get_lifted_state(), seed) + self.i = 0 + # num_remove_pre += 1 + # new_nsrts = set() + # for nsrt in self.nsrts: + # pre = set() + # tot_pre = len(nsrt.op.preconditions) - num_remove_pre + # if tot_pre > 0: + # pre = random.sample(nsrt.op.preconditions, tot_pre) + # ignore_effects = nsrt.op.ignore_effects + # del_effs = nsrt.op.delete_effects + # # if num_remove_pre > 10: + # # ignore_effects = set() + # # del_effs = set() + # new_nsrts.add( + # nsrt.op.copy_with(preconditions=pre, + # ignore_effects=ignore_effects, + # delete_effects=del_effs).make_nsrt( + # nsrt.option, + # [], # dummy sampler + # lambda s, g, rng, o: np.zeros(1, dtype=np.float32))) + # self.nsrts = new_nsrts + # with open("test_saved.NSRTs.txt", "w") as file: + # for nsrt in self.nsrts: + # if nsrt.op.add_effects != set(): + # file.write(str(nsrt)+"\n") + + self.i += 1 + if self.i-1 < len(self.actions): + return env.key_to_action[self.actions[self.i-1]] + else: + self.actions = None + self.i = 0 + return env.key_to_action["0"] + + def clean_action_plan(self, action_plan): + plan = [] + for step in action_plan: + name = step[0] + objs = step[1] + if len(objs) > 0: + obj_name = objs[0].name + if name.startswith("Move"): + plan.append(f"moveto-{obj_name}") + else: + for opname, key in opname_to_key.items(): + if opname in name: + plan.append(key) + break + return plan + + def plan(self, init_atoms, objects, predicates, nsrts, task, seed): + ground_nsrts, reachable_atoms = task_plan_grounding(init_atoms, objects, nsrts, allow_noops=True) + heuristic = utils.create_task_planning_heuristic("hadd", init_atoms, + task.goal, ground_nsrts, + predicates, objects) + task_plan_generator = task_plan(init_atoms, + task.goal, + ground_nsrts, + reachable_atoms, + heuristic, + timeout=1, + seed=seed, + max_skeletons_optimized=3) + skeleton, _, _ = next(task_plan_generator) + + action_plan = [] + for step in skeleton: + action_plan.append((step.option.name, step.objects)) + return self.clean_action_plan(action_plan) + + def get_data(self, task_name): + for demo_file in demo_files: + if task_name in demo_file: + demo_traj = get_demo_traj(demo_file=demo_file, verbose=False) + + if self.short_task: + if task_name == 'MiniGrid-SortingBooks-16x16-N2-v0': + demo_traj = LowLevelTrajectory(demo_traj.states[:5], demo_traj.actions[:4], _is_demo=True, _train_task_idx=0) + elif task_name == 'MiniGrid-WateringHouseplants-16x16-N2-v0': + demo_traj = LowLevelTrajectory(demo_traj.states[:7], demo_traj.actions[:6], _is_demo=True, _train_task_idx=0) + elif task_name == 'MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0': + demo_traj = LowLevelTrajectory(demo_traj.states[:5], demo_traj.actions[:4], _is_demo=True, _train_task_idx=0) + elif task_name == 'MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0': + demo_traj = LowLevelTrajectory(demo_traj.states[:5], demo_traj.actions[:4], _is_demo=True, _train_task_idx=0) + + idx = len(self.dataset) + demo_traj = LowLevelTrajectory(demo_traj.states, demo_traj.actions, _is_demo=True, _train_task_idx=idx) + + self.dataset.append(demo_traj) + new_objs, new_preds, new_options, self.ground_atoms_traj, _ = parse_objs_preds_and_options(demo_traj, train_task_idx=idx) + self.objs |= new_objs + self.preds |= new_preds + self.options |= new_options + self.ground_atom_dataset.append(self.ground_atoms_traj) + goal = self.parse_goal(task_name, self.ground_atoms_traj[1][-1]) + self.tasks.append(Task(State({}, None), goal)) + # if len(self.dataset) >= self.num_demos: + # break + # assert len(self.dataset) == self.num_demos + return self.dataset, self.tasks, self.preds, self.options, self.action_space, self.ground_atom_dataset + + def learn_nsrts(self, task_name): + dataset, tasks, preds, options, action_space, ground_atom_dataset = self.get_data(task_name) + nsrts, _, _ = learn_nsrts_from_data(dataset, + tasks, + preds, + options, + action_space, + ground_atom_dataset, + sampler_learner="neural", + annotations=None) + with open("test_saved.NSRTs.txt", "w") as file: + for nsrt in nsrts: + if nsrt.op.add_effects != set(): + file.write(str(nsrt)+"\n") + return nsrts + +class DummyAgent(OperatorLearningAgent): + def __init__(self, name="dummy", strips_learner="dummy"): + super().__init__(name=name, strips_learner=strips_learner, single_grounding=True) + + def learn_nsrts(self, task_name): + dataset, tasks, preds, options, action_space, ground_atom_dataset = self.get_data(task_name) + goal = self.parse_goal(task_name, self.ground_atoms_traj[1][-1]) + obj_to_var = {obj:obj.type("?" + obj.name) for obj in self.objs} + lifted_goal = {atom.lift(obj_to_var) for atom in goal} + + nsrts = set() + name_i = 0 + for option in options: + op = STRIPSOperator( + name="Dummy" + str(name_i), + parameters=[], + preconditions=set(), + add_effects=set(), + delete_effects=set(), + ignore_effects=set() + ) + dummy_nsrt = op.make_nsrt( + option, + [], # dummy sampler + lambda s, g, rng, o: np.zeros(1, dtype=np.float32)) + nsrts.add(dummy_nsrt) + name_i += 1 + + params = [] + for sublist in [lifted_atom.variables for lifted_atom in lifted_goal]: + params += sublist + params = [x for x in set(params)] + op = STRIPSOperator( + name="Dummy" + str(name_i), + parameters=params, + preconditions=set(), + add_effects=lifted_goal, + delete_effects=set(), + ignore_effects=set() + ) + dummy_nsrt = op.make_nsrt( + option, + [], # dummy sampler + lambda s, g, rng, o: np.zeros(1, dtype=np.float32)) + nsrts.add(dummy_nsrt) + name_i += 1 + return nsrts + +class GroundTruthAgent(OperatorLearningAgent): + def __init__(self, name): + super().__init__(name=name, strips_learner="NONE") + self.name = name + self.ground_truth_trajs = {} + self.i = 0 + self.actions = None + + def reset(self, task_name, learn=False): + self.dataset = [] + self.ground_atom_dataset = [] + self.tasks = [] + self.action_space = Box(0, 7, (1,)) + self.objs = set() + self.preds = set() + self.options = set() + self.ground_atoms_traj = [] + + dataset, tasks, preds, options, action_space, ground_atom_dataset = self.get_data(task_name) + + self.goal = self.parse_goal(task_name, self.ground_atoms_traj[1][-1]) + self.actions = None + self.i = 0 + + action_plan = [] + for i, step in enumerate(self.ground_atoms_traj[0].actions): + curr_state = self.ground_atoms_traj[1][i] + next_state = self.ground_atoms_traj[1][i+1] + def count_object_occurrences(atom_set): + counter = Counter() + for atom in atom_set: + for obj in atom.objects: + if not atom.predicate.name.startswith("~inreachofrobot"): + counter[obj] += 1 + return counter + counter = count_object_occurrences(next_state - curr_state) + def get_max_count_object(counter, exclude_types=("table", "shelf")): + max_count = max(counter.values()) + candidates = [ + obj for obj, count in counter.items() + if count == max_count and all(ex_type not in str(obj) for ex_type in exclude_types) + ] + + if candidates: + return candidates[0] + else: + return None + + try: + if get_max_count_object(counter) is None: + objs = [max(counter, key=counter.get)] + else: + objs = [get_max_count_object(counter)] + except: + objs = random.sample(self.objs, 1) + action_plan.append((step._option.name, objs)) + self.ground_truth_trajs[task_name] = self.clean_action_plan(action_plan) + + def policy(self, obs, env): + #print(env.get_lifted_state()) + try: + assert env.env_id in self.ground_truth_trajs.keys() + except: + import ipdb; ipdb.set_trace() + self.i += 1 + if self.i-1 < len(self.ground_truth_trajs[env.env_id]): + return env.key_to_action[self.ground_truth_trajs[env.env_id][self.i-1]] + else: + return env.key_to_action["0"] + + def learn_nsrts(self, task_name): + return None + +# tasks = ["MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0", +# "MiniGrid-CollectMisplacedItems-16x16-N2-v0", +# "MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0", +# "MiniGrid-OpeningPackages-16x16-N2-v0", +# "MiniGrid-WateringHouseplants-16x16-N2-v0", +# "MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0"] + +# agents = [RandomAgent("random"), GroundTruthAgent("ground-truth")] +# results = evaluation(agents, tasks, num_iterations=3) +# df = structure_results(results) +# plot_lifelong_success(df) + +##### + +# tasks = ["MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0"] +# agents = [GroundTruthAgent("ground-truth")] +# results = evaluation(agents, tasks, num_iterations=1) +# df = structure_results(results) +# #plot_lifelong_success(df) +# import ipdb; ipdb.set_trace() + +##### + +############ +# tasks = ["MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0"] +# agents = [ +# DummyAgent("dummy", strips_learner="dummy"), +# OperatorLearningAgent("cluster-intersect", strips_learner="cluster_and_intersect"), +# OperatorLearningAgent("backchaining", strips_learner="backchaining"), +# OperatorLearningAgent("hill-climbing", strips_learner="pnad_search"), +# # OperatorLearningAgent("llm", strips_learner="llm") +# ] +# results = evaluation(agents, tasks, num_iterations=1) +# df = structure_results(results) +# #plot_lifelong_success(df) +# for agent in agents: +# print(agent.name, len(agent.nsrts), agent.actions) +# print() +# import ipdb; ipdb.set_trace() + +# Note: grounding should only be for operators based on the goal.... +# Maybe LLM can help with grounding too + +# TODO Finally - Collect Demos, Increment Num_Demos + +# TODO Try Run 3-5 Env Eval on all 5 Baselines (Dummy, CI, Pnad_Search, Back_Chaining) + +# TODO Fix LLM Agents + +# TODO Make BC+FS+LLM Agent +# see other code + +# TODO Make Version-Space Agent +# Note: from CI it should fall back to BC+FS+LLM then to BC then to Dummy + +# TODO Try Run Full Eval on all 7 Agents + +# tasks = [ +# 'MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0', +# 'MiniGrid-CleaningACar-16x16-N2-v0', +# 'MiniGrid-CleaningShoes-16x16-N2-v0', #1 +# 'MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0', +# 'MiniGrid-CollectMisplacedItems-16x16-N2-v0', +# 'MiniGrid-InstallingAPrinter-16x16-N2-v0', +# 'MiniGrid-LayingWoodFloors-16x16-N2-v0', +# 'MiniGrid-MakingTea-16x16-N2-v0', +# 'MiniGrid-MovingBoxesToStorage-16x16-N2-v0', +# 'MiniGrid-OpeningPackages-16x16-N2-v0', +# 'MiniGrid-OrganizingFileCabinet-16x16-N2-v0', +# #[DEBUG]'MiniGrid-PreparingSalad-16x16-N2-v0', +# 'MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0', +# 'MiniGrid-SettingUpCandles-16x16-N2-v0', #1 +# 'MiniGrid-SortingBooks-16x16-N2-v0', +# 'MiniGrid-StoringFood-16x16-N2-v0', +# #[DEBUG]'MiniGrid-ThawingFrozenFood-16x16-N2-v0', +# 'MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0', +# 'MiniGrid-WashingPotsAndPans-16x16-N2-v0', +# 'MiniGrid-WateringHouseplants-16x16-N2-v0' +# ] + +# task_info = {} + +# for demo_file in demo_files: +# traj = get_demo_traj(demo_file, verbose=False) +# add_count = 0 +# for i, action in enumerate(traj.actions): +# curr_state = set(traj.states[i]) +# next_state = set(traj.states[i+1]) +# del_effs = curr_state - next_state +# add_effs = next_state - curr_state +# # print(action) +# # print("DEL:", del_effs) +# # print("ADD:", add_effs) +# # print() +# add_count += len(add_effs) +# assert len(add_effs) != 0 or str(action) == "Move" +# task_name = demo_file.split("/")[-1].split("_")[0] +# agent = OperatorLearningAgent("cluster-intersect", strips_learner="cluster_and_intersect") +# agent.get_data(task_name=task_name) +# goal = agent.parse_goal(task_name=task_name, ground_atoms_state=agent.ground_atoms_traj[1][-1]) +# task_info[demo_file.split("/")[-1]] = (len(traj.actions), len(goal), add_count) + +# i = 0 +# curriculum = [] +# for k,v in sorted([(k,v) for k,v in task_info.items()], key=lambda x: x[1][2]): # by add effects +# i+=1 +# print("|", v[0], "| goal length:", v[1], "| add count:", v[2], "|", k.split("_")[0], i) +# curriculum.append(k.split("_")[0]) + +import time +start_time = time.time() +tasks = ['MiniGrid-OpeningPackages-16x16-N2-v0', + 'MiniGrid-InstallingAPrinter-16x16-N2-v0', + 'MiniGrid-MovingBoxesToStorage-16x16-N2-v0', + 'MiniGrid-SortingBooks-16x16-N2-v0',# + 'MiniGrid-WateringHouseplants-16x16-N2-v0',# + #'MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0',# + 'MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0'# + ] +# tasks = curriculum +# tasks.remove('MiniGrid-LayingWoodFloors-16x16-N2-v0') +# tasks.remove('MiniGrid-CleaningACar-16x16-N2-v0') +# tasks.remove('MiniGrid-SortingBooks-16x16-N2-v0') +# tasks.remove('MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0') +print("#"*30) +print(tasks) + +all_agents = [ + GroundTruthAgent("ground-truth"), + #DummyAgent("dummy", strips_learner="dummy"), + #OperatorLearningAgent("cluster-intersect", strips_learner="cluster_and_intersect"), + #OperatorLearningAgent("backchaining", strips_learner="backchaining"), + #OperatorLearningAgent("hill-climbing", strips_learner="pnad_search"), + #OperatorLearningAgent("llm", strips_learner="llm"), + #OperatorLearningAgent("backward-forward", strips_learner="backward-forward"), + ] + +for agent in all_agents: + with open("test_saved.NSRTs.txt", "w") as file: + file.write("""NSRT-Move0: + Parameters: [?x0:obj_type] + Preconditions: [] + Add Effects: [inreachofrobot(?x0:obj_type)] + Delete Effects: [~inreachofrobot(?x0:obj_type)] + Ignore Effects: [inreachofrobot, ~inreachofrobot] + Option Spec: Move()""") + results = evaluation([agent], tasks, num_iterations=1, start_seed=100) + df = structure_results(results) + plot_lifelong_success(df) + end_time = time.time() + print("time elasped", end_time - start_time) + df.to_csv('test_results/' + agent.name + '_output.csv') + + results = evaluation([agent], tasks, num_iterations=1, start_seed=100, short_task=False, learn=False) + df2 = structure_results(results) + plot_lifelong_success(df2) + end_time = time.time() + print("time elasped", end_time - start_time) + df2.to_csv('test_results/' + agent.name + '_long_output.csv') + diff --git a/test_colla_helpers.py b/test_colla_helpers.py new file mode 100644 index 0000000000..36b8873e9f --- /dev/null +++ b/test_colla_helpers.py @@ -0,0 +1,160 @@ +import numpy as np +from gym.spaces import Box +import re +import pickle as pkl + +from predicators import utils +from predicators.nsrt_learning.nsrt_learning_main import learn_nsrts_from_data +from predicators.structs import Action, LowLevelTrajectory, Predicate, State, \ + Type, GroundAtom, Task, Variable, LiftedAtom, NSRT, Set +import glob + +name_to_actions = { + "Move": 0, + "Actions.pickup_0": 3, + "Actions.pickup_1": 4, + "Actions.pickup_2": 5, + "Actions.drop_0": 6, + "Actions.drop_1": 7, + "Actions.drop_2": 8, + "Actions.drop_in": 9, + "Actions.toggle": 10, + "Actions.close": 11, + "Actions.open": 12, + "Actions.cook": 13, + "Actions.slice": 14 +} + +demo_files = sorted([filename for filename in glob.glob("/Users/shashlik/Documents/GitHub/predicators/demos/*/*")]) +demo_tasks = set([demo_file.split("/")[-1].split("_")[0] for demo_file in demo_files]) + +# Load and do this from MiniBeahvior Demo + +def get_demo_traj(demo_file, verbose=True): + with open(demo_file, 'rb') as f: + data = pkl.load(f) + + last_skill = "Move" + state = [a for a in data[1][1] if "infovofrobot" not in a] + states = [state] + actions = [] + for step in data.keys(): + obs = data[step][0]['image'] + direction = data[step][0]['direction'] + action = data[step][2] + skill = None + + if "forward" in str(action) or \ + "left" in str(action) or \ + "right" in str(action): + + skill = "Move" + else: + skill = str(action) + + has_effect = True + try: + next_obs = data[step][3]['image'] + next_direction = data[step][3]['direction'] + if np.allclose(obs, next_obs) and (direction == next_direction): + has_effect = False + except: + pass + + if has_effect: + if last_skill != skill: + if verbose: + print("#") + print(last_skill) + try: + next_state = [a for a in data[step][1] if "infovofrobot" not in a] + if verbose: + print("PREV:", set(state)) + print("ADD:", set(next_state) - set(state)) + print("DEL:", set(state) - set(next_state)) + state = next_state + actions.append(last_skill) + states.append(state) + except: + pass + last_skill = skill + else: + if verbose: + print("#") + print(last_skill) + next_state = [a for a in data[step][4] if "infovofrobot" not in a] + if verbose: + print("PREV:", set(state)) + print("ADD:", set(next_state) - set(state)) + print("DEL:", set(state) - set(next_state)) + state = next_state + if verbose: + print("#") + actions.append(last_skill) + states.append(state) + + return LowLevelTrajectory(states, actions, _is_demo=True, _train_task_idx=0) + +def parse_nsrt_block(block, segmented_trajs) -> NSRT: + """Parses a single NSRT block into an PNAD object.""" + lines = block.strip().split("\n") + + name_match = re.match(r"(\S+):", lines[0]) + name = name_match.group(1) if name_match else "" + + parameters = re.findall(r"\?x\d+:\w+", lines[1]) + + def extract_effects(label: str) -> Set[str]: + """Extracts a list of predicates from labeled sections.""" + for line in lines: + if line.strip().startswith(label): + return set(re.findall(r"\w+\(.*?\)", line)) + return set() + + preconditions = extract_effects("Preconditions") + add_effects = extract_effects("Add Effects") + delete_effects = extract_effects("Delete Effects") + ignore_effects = extract_effects("Ignore Effects") + + option_spec_match = re.search(r"Option Spec:\s*(.*)", block) + option_spec = option_spec_match.group(1) if option_spec_match else "" + + objects = set() + atoms = set() + option_specs = {} + for traj in segmented_trajs: + for segment in traj: + for state in segment.states: + for k, v in state.items(): + objects.add(k) + atoms |= segment.init_atoms | segment.final_atoms + option_specs[segment.get_option().parent.name] = segment.get_option().parent + all_predicates_list = [(atom.predicate.name,atom.predicate) for atom in atoms] + def get_predicate(name, entities): + for pred_name, pred in all_predicates_list: + if pred_name == pred_name and pred.arity == len(entities): + valid_types = True + for i, ent in enumerate(entities): + if ent.type != pred.types[i]: + valid_types = False + if valid_types: + return pred + raise NotImplementedError + + types = {obj.type.name:obj.type for obj in objects} + + def extract_parameters(predicate: str) -> Set[str]: + parameter_pattern = re.compile(r"\?x\d+:\w+") # Matches variables like ?x0:obj_type + matches = parameter_pattern.findall(predicate) + return matches + + parameters = [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in parameters] + preconditions = set([LiftedAtom(get_predicate(pre.split("(")[0], [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(pre)]), [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(pre)]) for pre in preconditions]) + add_effects = set([LiftedAtom(get_predicate(add.split("(")[0], [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(add)]), [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(add)]) for add in add_effects]) + delete_effects = set([LiftedAtom(get_predicate(dle.split("(")[0], [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(dle)]), [Variable(param.split(":")[0], types[param.split(":")[1]]) for param in extract_parameters(dle)]) for dle in delete_effects]) + ignore_effects = set([get_predicate(ige, None) for ige in ignore_effects]) + a_name = option_spec.split("(")[0] + option_spec = utils.SingletonParameterizedOption( + a_name, lambda s, m, o, p: Action(name_to_actions[a_name])) + + return NSRT(name, parameters, preconditions, add_effects, delete_effects, ignore_effects, option_spec, [], None) \ No newline at end of file diff --git a/test_colla_results copy.py b/test_colla_results copy.py new file mode 100644 index 0000000000..34976f9143 --- /dev/null +++ b/test_colla_results copy.py @@ -0,0 +1,760 @@ +# (1) implement the evaluation function evaluate(agent) returns dictionary of results +# (2) implment evaluation visualization visualize(results) +# (3) do whatever it takes to make results better (CI, BC, FF+BC, FF+BC+LLMs) + +from test_colla_env import MiniBehaviorEnv +from test_colla_helpers import Box, LowLevelTrajectory, State, Task, \ + demo_files, get_demo_traj, learn_nsrts_from_data +import matplotlib.pyplot as plt +import seaborn as sns +import pandas as pd +import random +from predicators.planning import task_plan, task_plan_grounding, _SkeletonSearchTimeout, PlanningFailure +from predicators import utils +from predicators.structs import Action, LowLevelTrajectory, Predicate, State, \ + Type, GroundAtom, Task, STRIPSOperator +import numpy as np +from collections import Counter + +from predicators.nsrt_learning.strips_learning.gen_to_spec_learner import parse_objs_preds_and_options + +import pickle as pkl +import numpy as np +from predicators.structs import Action, LowLevelTrajectory, Predicate, State, \ + Type +from test_operator_learning_all import get_demo_traj, demo_files + +opname_to_key = { + 'Actions.pickup_0': '0', + 'Actions.pickup_1': '1', + 'Actions.pickup_2': '2', + 'Actions.drop_0': '3', + 'Actions.drop_1': '4', + 'Actions.drop_2': '5', + 'Actions.drop_in': 'i', + 'Actions.toggle': 't', + 'Actions.close': 'c', + 'Actions.open': 'o', + 'Actions.cook': 'k', + 'Actions.slice': '6' +} + +class RandomAgent(): + def __init__(self, name): + self.name = name + self.actions = None + + def reset(self, task_name): + pass + + def policy(self, obs, env): + #print(env.get_lifted_state()) + return env.key_to_action[random.choice(list(env.key_to_action.keys()))] + +def evaluation(agents, tasks, num_iterations=10, start_seed=100): + results = {} + task_i = 0 + for i in range(num_iterations): + for task in tasks: + for agent in agents: + env = MiniBehaviorEnv(env_id=task, seed=i+start_seed) + observation, _ = env.reset() + agent.reset(env.env_id) + steps = 0 + for _ in range(50): + action = agent.policy(observation, env) + observation, reward, terminated, truncated, _ = env.step(action) + steps += 1 + env.show() + + if reward != 0: + break + + if terminated or truncated: + break + found_plan = 0 + plan_diff = -1 + if agent.actions is not None and agent.actions != []: + found_plan = 1 + + key_to_opname = {v:k for k,v in opname_to_key.items()} + plan = [key_to_opname[action] if not action.startswith("moveto") else "Move" for action in agent.actions] + dataset_plan = agent.dataset[0].actions + + def differing_reoccurring_counts(list1, list2): + count1 = Counter(list1) + count2 = Counter(list2) + all_keys = set(count1.keys()) | set(count2.keys()) + result = {} + total_diff = 0 + for key in all_keys: + c1 = count1.get(key, 0) + c2 = count2.get(key, 0) + if (c1 > 1 or c2 > 1) and c1 != c2: + diff = abs(c1 - c2) + result[key] = diff + total_diff += diff + result['total'] = total_diff + return result + plan_diff = differing_reoccurring_counts(plan, dataset_plan)['total'] + results[str(task_i) + "_" + task + "_" + agent.name] = (steps, reward, i, found_plan, plan_diff) + task_i += 1 + return results + +def structure_results(results_dict): + data = [] + for key, (steps, reward, iteration, found_plan, plan_diff) in results_dict.items(): + task_idx, task_name, agent_name = key.split("_", 2) + data.append({ + "task_name": task_name, + "task_idx": int(task_idx), + "iteration": int(iteration), + "found_plan": int(found_plan), + "plan_diff": int(plan_diff), + "steps": steps, + "reward": reward, + "success": 1 if reward > 0 else 0, + "agent": agent_name + }) + df = pd.DataFrame(data) + df["task_order"] = df["task_idx"] + return df.sort_values(["agent", "iteration", "task_order"]) + +def plot_lifelong_success(df): + plt.figure(figsize=(14, 5)) + + sns.lineplot( + data=df, + x="task_order", + y="success", + hue="agent", + marker="o" + ) + + # Set up x-ticks with task names, spaced across iterations + xticks = df["task_order"] + xticklabels = df["task_name"] + plt.xticks(ticks=xticks, labels=xticklabels, rotation=45, ha='right') + + plt.ylim(-0.1, 1.1) + plt.yticks([0, 1], ["Fail", "Success"]) + plt.ylabel("Success") + plt.xlabel("Tasks over Lifelong Iterations") + plt.title("Lifelong Learning Success per Task") + plt.tight_layout() + plt.grid(True, linestyle='--', alpha=0.3) + plt.savefig("lifelong_learning_success.png", dpi=200) + +class OperatorLearningAgent(): + def __init__(self, name, strips_learner, single_grounding=False): + self.name = name + self.num_demos = 1 + + # Initialized once; populated in get_data() + self.dataset = [] + self.ground_atom_dataset = [] + self.tasks = [] + self.action_space = Box(0, 7, (1,)) + self.objs = set() + self.preds = set() + self.options = set() + self.ground_atoms_traj = [] + self.goal = None + + # Runtime variables + self.nsrts = None + self.actions = None + self.i = 0 + self.seed_i = 0 + + # Learning Params + self.strips_learner = strips_learner + self.single_grounding = single_grounding + utils.reset_config({ + "strips_learner": self.strips_learner, + "segmenter": "every_step", + "disable_harmlessness_check": True, + "pnad_search_load_initial": True, + "backward_forward_load_initial": True, + "min_data_for_nsrt": 0, + "min_perc_data_for_nsrt": 0, + "pnad_search_timeout":100.0, + "single_grounding": self.single_grounding, + "option_learner": "no_learning" + }) + + def reset(self, task_name): + if False: + self.dataset = [] + self.ground_atom_dataset = [] + self.tasks = [] + self.objs = set() + self.preds = set() + self.options = set() + self.ground_atoms_traj = [] + self.action_space = Box(0, 7, (1,)) + self.seed_i = 0 + + # Learning Params + utils.reset_config({ + "strips_learner": self.strips_learner, + "segmenter": "every_step", + "disable_harmlessness_check": True, + "pnad_search_load_initial": True, + "backward_forward_load_initial": True, + "min_data_for_nsrt": 0, + "min_perc_data_for_nsrt": 0, + "pnad_search_timeout":100.0, + "single_grounding": self.single_grounding, + "option_learner": "no_learning" + }) + + self.nsrts = self.learn_nsrts(task_name) + self.goal = self.parse_goal(task_name, self.ground_atoms_traj[1][-1]) + self.actions = None + self.i = 0 + + def parse_goal(self, task_name, ground_atoms_state): + if task_name == "MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("inside(")]) + + elif task_name == "MiniGrid-OpeningPackages-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("openable(")]) + + elif task_name == "MiniGrid-CleaningACar-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("inside(")]) | set([atom for atom in ground_atoms_state if str(atom).startswith("~dustyable(")]) + + elif task_name == "MiniGrid-CleaningShoes-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("~stainable(") and "shoe" in str(atom)]) | \ + set([atom for atom in ground_atoms_state if str(atom).startswith("~dustyable(") and "shoe" in str(atom)]) | \ + set([atom for atom in ground_atoms_state if str(atom).startswith("onfloor(") and "towel" in str(atom)]) + + + elif task_name == "MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if ( + str(atom).startswith("onTop(") and "blender" in str(atom) and "countertop" in str(atom) + ) or ( + str(atom).startswith("nextto(") and "soap" in str(atom) and "sink" in str(atom) + ) or ( + str(atom).startswith("inside(") and "vegetable_oil" in str(atom) and "cabinet" in str(atom) + ) or ( + str(atom).startswith("inside(") and "plate" in str(atom) and "cabinet" in str(atom) + ) or ( + str(atom).startswith("inside(") and "casserole" in str(atom) and "electric_refrigerator" in str(atom) + ) or ( + str(atom).startswith("inside(") and "apple" in str(atom) and "electric_refrigerator" in str(atom) + ) or ( + str(atom).startswith("inside(") and "rag" in str(atom) and "sink" in str(atom) + ) or ( + str(atom).startswith("nextto(") and "rag" in str(atom) and "sink" in str(atom) + ) or ( + str(atom).startswith("~dustyable(") and "cabinet" in str(atom) + ) or ( + str(atom).startswith("~stainable(") and "plate" in str(atom) + ) + ]) + + elif task_name == "MiniGrid-CollectMisplacedItems-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("onTop(") and "table" in str(atom) and ( + "gym_shoe" in str(atom) or + "necklace" in str(atom) or + "notebook" in str(atom) or + "sock" in str(atom) + ) and not str(atom).startswith("onTop(table") + ]) + + elif task_name == "MiniGrid-InstallingAPrinter-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(")]) | \ + set([atom for atom in ground_atoms_state if str(atom).startswith("toggleable(")]) + + elif task_name == "MiniGrid-LayingWoodFloors-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("nextto(")]) + + elif task_name == "MiniGrid-MakingTea-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("sliceable(") and "lemon" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("onTop(") and "teapot" in str(atom) and "stove" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("atsamelocation(") and "tea_bag" in str(atom) and "teapot" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("soakable(") and "teapot" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("toggleable(") and "stove" in str(atom) + ]) + + elif task_name == "MiniGrid-MovingBoxesToStorage-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(")]) + + elif task_name == "MiniGrid-OrganizingFileCabinet-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("onTop(") and "marker" in str(atom) and "table" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("inside(") and "document" in str(atom) and "cabinet" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("inside(") and "folder" in str(atom) and "cabinet" in str(atom) + ]) + + elif task_name == "MiniGrid-PreparingSalad-16x16-N2-v0": + import ipdb; ipdb.set_trace() + raise NotImplementedError("parse_goal not implemented for PreparingSalad") + + elif task_name == "MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("inside(") and "plate" in str(atom) and "cabinet" in str(atom) + ]) + + + elif task_name == "MiniGrid-SettingUpCandles-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(")]) + + elif task_name == "MiniGrid-SortingBooks-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(") and "shelf" in str(atom) and ("book" in str(atom) or "hardback" in str(atom))]) + + elif task_name == "MiniGrid-StoringFood-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("inside(") and "cabinet" in str(atom) and ( + "oatmeal" in str(atom) or "chip" in str(atom) or "vegetable_oil" in str(atom) or "sugar" in str(atom) + ) + ]) + + elif task_name == "MiniGrid-ThawingFrozenFood-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("nextto(") and ( + ("date" in str(atom) and "fish" in str(atom)) or + ("fish" in str(atom) and "sink" in str(atom)) or + ("olive" in str(atom) and "sink" in str(atom)) + ) + ]) + + elif task_name == "MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("inside(") and "hamburger" in str(atom) and "ashcan" in str(atom)]) + + elif task_name == "MiniGrid-WashingPotsAndPans-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("~stainable(") and ( + "pan" in str(atom) or "kettle" in str(atom) or "teapot" in str(atom) + ) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("inside(") and "cabinet" in str(atom) and ( + "pan" in str(atom) or "kettle" in str(atom) or "teapot" in str(atom) + ) + ]) + + elif task_name == "MiniGrid-WateringHouseplants-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("soakable(") and "pot_plant" in str(atom)]) + else: + import ipdb; ipdb.set_trace() + + + def get_plan(self, state, seed): + objs, _, _, ground_atoms_traj, all_atoms = parse_objs_preds_and_options(self.dataset[-1], train_task_idx=0) + task = Task(State({}, None), self.goal) + + _, _, _, ground_atoms_traj, _ = parse_objs_preds_and_options(LowLevelTrajectory([state], [], _is_demo=True, _train_task_idx=0), train_task_idx=0, all_atoms=all_atoms) + init_atoms = ground_atoms_traj[1][0] + plan = self.plan(init_atoms, objs, self.preds, self.nsrts, task, seed) + return plan + + def policy(self, obs, env): + if self.actions is None: + seed = self.seed_i + self.seed_i += 1 + num_remove_pre = 0 + while self.actions is None or self.actions == []: + try: + self.actions = self.get_plan(env.get_lifted_state(), seed) + break + except _SkeletonSearchTimeout: + print("did not find skeleton - timeout") + except PlanningFailure: + print("did not find skeleton - plan failure") + num_remove_pre += 1 + new_nsrts = set() + for nsrt in self.nsrts: + pre = set() + tot_pre = len(nsrt.op.preconditions) - num_remove_pre + if tot_pre > 0: + pre = random.sample(nsrt.op.preconditions, tot_pre) + ignore_effects = nsrt.op.ignore_effects + del_effs = nsrt.op.delete_effects + if num_remove_pre > 10: + ignore_effects = set() + del_effs = set() + new_nsrts.add( + nsrt.op.copy_with(preconditions=pre, + ignore_effects=ignore_effects, + delete_effects=del_effs)).make_nsrt( + nsrt.option, + [], # dummy sampler + lambda s, g, rng, o: np.zeros(1, dtype=np.float32)) + self.nsrts = new_nsrts + with open("test_saved.NSRTs.txt", "w") as file: + for nsrt in self.nsrts: + if nsrt.op.add_effects != set(): + file.write(str(nsrt)+"\n") + + self.i += 1 + if self.i-1 < len(self.actions): + return env.key_to_action[self.actions[self.i-1]] + else: + self.actions = None + self.i = 0 + return env.key_to_action["0"] + + def clean_action_plan(self, action_plan): + plan = [] + for step in action_plan: + name = step[0] + objs = step[1] + if len(objs) > 0: + obj_name = objs[0].name + if name.startswith("Move"): + plan.append(f"moveto-{obj_name}") + else: + for opname, key in opname_to_key.items(): + if opname in name: + plan.append(key) + break + return plan + + def plan(self, init_atoms, objects, predicates, nsrts, task, seed): + ground_nsrts, reachable_atoms = task_plan_grounding(init_atoms, objects, nsrts, allow_noops=True) + heuristic = utils.create_task_planning_heuristic("hadd", init_atoms, + task.goal, ground_nsrts, + predicates, objects) + task_plan_generator = task_plan(init_atoms, + task.goal, + ground_nsrts, + reachable_atoms, + heuristic, + timeout=1, + seed=seed, + max_skeletons_optimized=3) + skeleton, _, _ = next(task_plan_generator) + + action_plan = [] + for step in skeleton: + action_plan.append((step.option.name, step.objects)) + return self.clean_action_plan(action_plan) + + def get_data(self, task_name): + for demo_file in demo_files: + if task_name in demo_file: + demo_traj = get_demo_traj(demo_file=demo_file, verbose=False) + + idx = len(self.dataset) + demo_traj = LowLevelTrajectory(demo_traj.states, demo_traj.actions, _is_demo=True, _train_task_idx=idx) + + self.dataset.append(demo_traj) + new_objs, new_preds, new_options, self.ground_atoms_traj, _ = parse_objs_preds_and_options(demo_traj, train_task_idx=idx) + self.objs |= new_objs + self.preds |= new_preds + self.options |= new_options + self.ground_atom_dataset.append(self.ground_atoms_traj) + goal = self.parse_goal(task_name, self.ground_atoms_traj[1][-1]) + self.tasks.append(Task(State({}, None), goal)) + # if len(self.dataset) >= self.num_demos: + # break + # assert len(self.dataset) == self.num_demos + return self.dataset, self.tasks, self.preds, self.options, self.action_space, self.ground_atom_dataset + + def learn_nsrts(self, task_name): + dataset, tasks, preds, options, action_space, ground_atom_dataset = self.get_data(task_name) + nsrts, _, _ = learn_nsrts_from_data(dataset, + tasks, + preds, + options, + action_space, + ground_atom_dataset, + sampler_learner="neural", + annotations=None) + with open("test_saved.NSRTs.txt", "w") as file: + for nsrt in nsrts: + if nsrt.op.add_effects != set(): + file.write(str(nsrt)+"\n") + return nsrts + +class DummyAgent(OperatorLearningAgent): + def __init__(self, name="dummy", strips_learner="dummy"): + super().__init__(name=name, strips_learner=strips_learner, single_grounding=True) + + def learn_nsrts(self, task_name): + dataset, tasks, preds, options, action_space, ground_atom_dataset = self.get_data(task_name) + goal = self.parse_goal(task_name, self.ground_atoms_traj[1][-1]) + obj_to_var = {obj:obj.type("?" + obj.name) for obj in self.objs} + lifted_goal = {atom.lift(obj_to_var) for atom in goal} + + nsrts = set() + name_i = 0 + for option in options: + op = STRIPSOperator( + name="Dummy" + str(name_i), + parameters=[], + preconditions=set(), + add_effects=set(), + delete_effects=set(), + ignore_effects=set() + ) + dummy_nsrt = op.make_nsrt( + option, + [], # dummy sampler + lambda s, g, rng, o: np.zeros(1, dtype=np.float32)) + nsrts.add(dummy_nsrt) + name_i += 1 + + params = [] + for sublist in [lifted_atom.variables for lifted_atom in lifted_goal]: + params += sublist + params = [x for x in set(params)] + op = STRIPSOperator( + name="Dummy" + str(name_i), + parameters=params, + preconditions=set(), + add_effects=lifted_goal, + delete_effects=set(), + ignore_effects=set() + ) + dummy_nsrt = op.make_nsrt( + option, + [], # dummy sampler + lambda s, g, rng, o: np.zeros(1, dtype=np.float32)) + nsrts.add(dummy_nsrt) + name_i += 1 + return nsrts + +class GroundTruthAgent(OperatorLearningAgent): + def __init__(self, name): + super().__init__(name=name, strips_learner="NONE") + self.name = name + self.ground_truth_trajs = {} + self.i = 0 + self.actions = None + + def reset(self, task_name): + self.dataset = [] + self.ground_atom_dataset = [] + self.tasks = [] + self.action_space = Box(0, 7, (1,)) + self.objs = set() + self.preds = set() + self.options = set() + self.ground_atoms_traj = [] + + dataset, tasks, preds, options, action_space, ground_atom_dataset = self.get_data(task_name) + + self.goal = self.parse_goal(task_name, self.ground_atoms_traj[1][-1]) + self.actions = None + self.i = 0 + + action_plan = [] + for i, step in enumerate(self.ground_atoms_traj[0].actions): + curr_state = self.ground_atoms_traj[1][i] + next_state = self.ground_atoms_traj[1][i+1] + def count_object_occurrences(atom_set): + counter = Counter() + for atom in atom_set: + for obj in atom.objects: + if not atom.predicate.name.startswith("~inreachofrobot"): + counter[obj] += 1 + return counter + counter = count_object_occurrences(next_state - curr_state) + def get_max_count_object(counter, exclude_types=("table", "shelf")): + max_count = max(counter.values()) + candidates = [ + obj for obj, count in counter.items() + if count == max_count and all(ex_type not in str(obj) for ex_type in exclude_types) + ] + + if candidates: + return candidates[0] + else: + return None + + try: + if get_max_count_object(counter) is None: + objs = [max(counter, key=counter.get)] + else: + objs = [get_max_count_object(counter)] + except: + objs = random.sample(self.objs, 1) + action_plan.append((step._option.name, objs)) + self.ground_truth_trajs[task_name] = self.clean_action_plan(action_plan) + + def policy(self, obs, env): + #print(env.get_lifted_state()) + try: + assert env.env_id in self.ground_truth_trajs.keys() + except: + import ipdb; ipdb.set_trace() + self.i += 1 + if self.i-1 < len(self.ground_truth_trajs[env.env_id]): + return env.key_to_action[self.ground_truth_trajs[env.env_id][self.i-1]] + else: + return env.key_to_action["0"] + + def learn_nsrts(self, task_name): + return None + +# tasks = ["MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0", +# "MiniGrid-CollectMisplacedItems-16x16-N2-v0", +# "MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0", +# "MiniGrid-OpeningPackages-16x16-N2-v0", +# "MiniGrid-WateringHouseplants-16x16-N2-v0", +# "MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0"] + +# agents = [RandomAgent("random"), GroundTruthAgent("ground-truth")] +# results = evaluation(agents, tasks, num_iterations=3) +# df = structure_results(results) +# plot_lifelong_success(df) + +##### + +# tasks = ["MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0"] +# agents = [GroundTruthAgent("ground-truth")] +# results = evaluation(agents, tasks, num_iterations=1) +# df = structure_results(results) +# #plot_lifelong_success(df) +# import ipdb; ipdb.set_trace() + +##### + +############ +# tasks = ["MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0"] +# agents = [ +# DummyAgent("dummy", strips_learner="dummy"), +# OperatorLearningAgent("cluster-intersect", strips_learner="cluster_and_intersect"), +# OperatorLearningAgent("backchaining", strips_learner="backchaining"), +# OperatorLearningAgent("hill-climbing", strips_learner="pnad_search"), +# # OperatorLearningAgent("llm", strips_learner="llm") +# ] +# results = evaluation(agents, tasks, num_iterations=1) +# df = structure_results(results) +# #plot_lifelong_success(df) +# for agent in agents: +# print(agent.name, len(agent.nsrts), agent.actions) +# print() +# import ipdb; ipdb.set_trace() + +# Note: grounding should only be for operators based on the goal.... +# Maybe LLM can help with grounding too + +# TODO Finally - Collect Demos, Increment Num_Demos + +# TODO Try Run 3-5 Env Eval on all 5 Baselines (Dummy, CI, Pnad_Search, Back_Chaining) + +# TODO Fix LLM Agents + +# TODO Make BC+FS+LLM Agent +# see other code + +# TODO Make Version-Space Agent +# Note: from CI it should fall back to BC+FS+LLM then to BC then to Dummy + +# TODO Try Run Full Eval on all 7 Agents + +tasks = [ + 'MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0', + 'MiniGrid-CleaningACar-16x16-N2-v0', + 'MiniGrid-CleaningShoes-16x16-N2-v0', #1 + 'MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0', + 'MiniGrid-CollectMisplacedItems-16x16-N2-v0', + 'MiniGrid-InstallingAPrinter-16x16-N2-v0', + 'MiniGrid-LayingWoodFloors-16x16-N2-v0', + 'MiniGrid-MakingTea-16x16-N2-v0', + 'MiniGrid-MovingBoxesToStorage-16x16-N2-v0', + 'MiniGrid-OpeningPackages-16x16-N2-v0', + 'MiniGrid-OrganizingFileCabinet-16x16-N2-v0', + #[DEBUG]'MiniGrid-PreparingSalad-16x16-N2-v0', + 'MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0', + 'MiniGrid-SettingUpCandles-16x16-N2-v0', #1 + 'MiniGrid-SortingBooks-16x16-N2-v0', + 'MiniGrid-StoringFood-16x16-N2-v0', + #[DEBUG]'MiniGrid-ThawingFrozenFood-16x16-N2-v0', + 'MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0', + 'MiniGrid-WashingPotsAndPans-16x16-N2-v0', + 'MiniGrid-WateringHouseplants-16x16-N2-v0' +] + +task_info = {} + +for demo_file in demo_files: + traj = get_demo_traj(demo_file, verbose=False) + add_count = 0 + for i, action in enumerate(traj.actions): + curr_state = set(traj.states[i]) + next_state = set(traj.states[i+1]) + del_effs = curr_state - next_state + add_effs = next_state - curr_state + # print(action) + # print("DEL:", del_effs) + # print("ADD:", add_effs) + # print() + add_count += len(add_effs) + assert len(add_effs) != 0 or str(action) == "Move" + task_name = demo_file.split("/")[-1].split("_")[0] + agent = OperatorLearningAgent("cluster-intersect", strips_learner="cluster_and_intersect") + agent.get_data(task_name=task_name) + goal = agent.parse_goal(task_name=task_name, ground_atoms_state=agent.ground_atoms_traj[1][-1]) + task_info[demo_file.split("/")[-1]] = (len(traj.actions), len(goal), add_count) + +i = 0 +curriculum = [] +for k,v in sorted([(k,v) for k,v in task_info.items()], key=lambda x: x[1][2]): # by add effects + i+=1 + print("|", v[0], "| goal length:", v[1], "| add count:", v[2], "|", k.split("_")[0], i) + curriculum.append(k.split("_")[0]) + +import time +start_time = time.time() +tasks = ['MiniGrid-OpeningPackages-16x16-N2-v0', + 'MiniGrid-InstallingAPrinter-16x16-N2-v0', + 'MiniGrid-MovingBoxesToStorage-16x16-N2-v0', + # 'MiniGrid-SortingBooks-16x16-N2-v0',# + 'MiniGrid-WateringHouseplants-16x16-N2-v0',# + # 'MiniGrid-MakingTea-16x16-N2-v0',# + # 'MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0',# + # 'MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0'# + ] +# tasks = curriculum +# tasks.remove('MiniGrid-LayingWoodFloors-16x16-N2-v0') +# tasks.remove('MiniGrid-CleaningACar-16x16-N2-v0') +# tasks.remove('MiniGrid-SortingBooks-16x16-N2-v0') +# tasks.remove('MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0') +print("#"*30) +print(tasks) +agents = [ + # GroundTruthAgent("ground-truth"), + # DummyAgent("dummy", strips_learner="dummy"), + # OperatorLearningAgent("cluster-intersect", strips_learner="cluster_and_intersect"), + # OperatorLearningAgent("backchaining", strips_learner="backchaining"), + # OperatorLearningAgent("hill-climbing", strips_learner="pnad_search"), + # OperatorLearningAgent("llm", strips_learner="llm"), + OperatorLearningAgent("backward-forward", strips_learner="backward-forward"), + ] +with open("test_saved.NSRTs.txt", "w") as file: + file.write("") +results = evaluation(agents, tasks, num_iterations=1, start_seed=100) +df = structure_results(results) +plot_lifelong_success(df) +end_time = time.time() +print("time elasped", end_time - start_time) + +import ipdb; ipdb.set_trace() diff --git a/test_colla_results.py b/test_colla_results.py new file mode 100644 index 0000000000..69f9d7c818 --- /dev/null +++ b/test_colla_results.py @@ -0,0 +1,788 @@ +# (1) implement the evaluation function evaluate(agent) returns dictionary of results +# (2) implment evaluation visualization visualize(results) +# (3) do whatever it takes to make results better (CI, BC, FF+BC, FF+BC+LLMs) + +from test_colla_env import MiniBehaviorEnv +from test_colla_helpers import Box, LowLevelTrajectory, State, Task, \ + demo_files, get_demo_traj, learn_nsrts_from_data +import matplotlib.pyplot as plt +import seaborn as sns +import pandas as pd +import random +from predicators.planning import task_plan, task_plan_grounding, _SkeletonSearchTimeout, PlanningFailure +from predicators import utils +from predicators.structs import Action, LowLevelTrajectory, Predicate, State, \ + Type, GroundAtom, Task, STRIPSOperator +import numpy as np +from collections import Counter + +from predicators.nsrt_learning.strips_learning.gen_to_spec_learner import parse_objs_preds_and_options + +import pickle as pkl +import numpy as np +from predicators.structs import Action, LowLevelTrajectory, Predicate, State, \ + Type +from test_operator_learning_all import get_demo_traj, demo_files + +opname_to_key = { + 'Actions.pickup_0': '0', + 'Actions.pickup_1': '1', + 'Actions.pickup_2': '2', + 'Actions.drop_0': '3', + 'Actions.drop_1': '4', + 'Actions.drop_2': '5', + 'Actions.drop_in': 'i', + 'Actions.toggle': 't', + 'Actions.close': 'c', + 'Actions.open': 'o', + 'Actions.cook': 'k', + 'Actions.slice': '6' +} + +class RandomAgent(): + def __init__(self, name): + self.name = name + self.actions = None + + def reset(self, task_name): + pass + + def policy(self, obs, env): + #print(env.get_lifted_state()) + return env.key_to_action[random.choice(list(env.key_to_action.keys()))] + +def evaluation(agents, tasks, num_iterations=10, start_seed=100, short_task=True): + results = {} + task_i = 0 + for i in range(num_iterations): + for task in tasks: + for agent in agents: + env = MiniBehaviorEnv(env_id=task, seed=i+start_seed) + env.short_task = short_task + observation, _ = env.reset() + agent.short_task = short_task + agent.reset(env.env_id) + steps = 0 + for _ in range(50): + action = agent.policy(observation, env) + observation, reward, terminated, truncated, _ = env.step(action) + steps += 1 + env.show() + + if reward != 0: + break + + if terminated or truncated: + break + found_plan = 0 + plan_diff = -1 + if agent.actions is not None and agent.actions != []: + found_plan = 1 + + key_to_opname = {v:k for k,v in opname_to_key.items()} + plan = [key_to_opname[action] if not action.startswith("moveto") else "Move" for action in agent.actions] + dataset_plan = agent.dataset[0].actions + + def differing_reoccurring_counts(list1, list2): + count1 = Counter(list1) + count2 = Counter(list2) + all_keys = set(count1.keys()) | set(count2.keys()) + result = {} + total_diff = 0 + for key in all_keys: + c1 = count1.get(key, 0) + c2 = count2.get(key, 0) + if (c1 > 1 or c2 > 1) and c1 != c2: + diff = abs(c1 - c2) + result[key] = diff + total_diff += diff + result['total'] = total_diff + return result + plan_diff = differing_reoccurring_counts(plan, dataset_plan)['total'] + results[str(task_i) + "_" + task + "_" + agent.name] = (steps, reward, i, found_plan, plan_diff) + task_i += 1 + return results + +def structure_results(results_dict): + data = [] + for key, (steps, reward, iteration, found_plan, plan_diff) in results_dict.items(): + task_idx, task_name, agent_name = key.split("_", 2) + data.append({ + "task_name": task_name, + "task_idx": int(task_idx), + "iteration": int(iteration), + "found_plan": int(found_plan), + "plan_diff": int(plan_diff), + "steps": steps, + "reward": reward, + "success": 1 if reward > 0 else 0, + "agent": agent_name + }) + df = pd.DataFrame(data) + df["task_order"] = df["task_idx"] + return df.sort_values(["agent", "iteration", "task_order"]) + +def plot_lifelong_success(df): + plt.figure(figsize=(14, 5)) + + sns.lineplot( + data=df, + x="task_order", + y="success", + hue="agent", + marker="o" + ) + + # Set up x-ticks with task names, spaced across iterations + xticks = df["task_order"] + xticklabels = df["task_name"] + plt.xticks(ticks=xticks, labels=xticklabels, rotation=45, ha='right') + + plt.ylim(-0.1, 1.1) + plt.yticks([0, 1], ["Fail", "Success"]) + plt.ylabel("Success") + plt.xlabel("Tasks over Lifelong Iterations") + plt.title("Lifelong Learning Success per Task") + plt.tight_layout() + plt.grid(True, linestyle='--', alpha=0.3) + plt.savefig("lifelong_learning_success.png", dpi=200) + +class OperatorLearningAgent(): + def __init__(self, name, strips_learner, single_grounding=False): + self.name = name + self.num_demos = 1 + + # Initialized once; populated in get_data() + self.dataset = [] + self.ground_atom_dataset = [] + self.tasks = [] + self.action_space = Box(0, 7, (1,)) + self.objs = set() + self.preds = set() + self.options = set() + self.ground_atoms_traj = [] + self.goal = None + + # Runtime variables + self.nsrts = None + self.actions = None + self.i = 0 + self.seed_i = 0 + self.short_task = True + + # Learning Params + self.strips_learner = strips_learner + self.single_grounding = single_grounding + utils.reset_config({ + "strips_learner": self.strips_learner, + "segmenter": "every_step", + "disable_harmlessness_check": True, + "pnad_search_load_initial": True, + "backward_forward_load_initial": True, + "min_data_for_nsrt": 0, + "min_perc_data_for_nsrt": 0, + "pnad_search_timeout":1000.0, + "single_grounding": self.single_grounding, + "option_learner": "no_learning" + }) + + def reset(self, task_name): + if False: + self.dataset = [] + self.ground_atom_dataset = [] + self.tasks = [] + self.objs = set() + self.preds = set() + self.options = set() + self.ground_atoms_traj = [] + self.action_space = Box(0, 7, (1,)) + self.seed_i = 0 + + # Learning Params + utils.reset_config({ + "strips_learner": self.strips_learner, + "segmenter": "every_step", + "disable_harmlessness_check": True, + "pnad_search_load_initial": True, + "backward_forward_load_initial": True, + "min_data_for_nsrt": 0, + "min_perc_data_for_nsrt": 0, + "pnad_search_timeout":1000.0, + "single_grounding": self.single_grounding, + "option_learner": "no_learning" + }) + + self.nsrts = self.learn_nsrts(task_name) + self.goal = self.parse_goal(task_name, self.ground_atoms_traj[1][-1]) + self.actions = None + self.i = 0 + + def parse_goal(self, task_name, ground_atoms_state): + if task_name == "MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("inside(")]) + + elif task_name == "MiniGrid-OpeningPackages-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("openable(")]) + + elif task_name == "MiniGrid-CleaningACar-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("inside(")]) | set([atom for atom in ground_atoms_state if str(atom).startswith("~dustyable(")]) + + elif task_name == "MiniGrid-CleaningShoes-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("~stainable(") and "shoe" in str(atom)]) | \ + set([atom for atom in ground_atoms_state if str(atom).startswith("~dustyable(") and "shoe" in str(atom)]) | \ + set([atom for atom in ground_atoms_state if str(atom).startswith("onfloor(") and "towel" in str(atom)]) + + elif task_name == "MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if ( + str(atom).startswith("onTop(") and "blender" in str(atom) and "countertop" in str(atom) + ) or ( + str(atom).startswith("nextto(") and "soap" in str(atom) and "sink" in str(atom) + ) or ( + str(atom).startswith("inside(") and "vegetable_oil" in str(atom) and "cabinet" in str(atom) + ) or ( + str(atom).startswith("inside(") and "plate" in str(atom) and "cabinet" in str(atom) + ) or ( + str(atom).startswith("inside(") and "casserole" in str(atom) and "electric_refrigerator" in str(atom) + ) or ( + str(atom).startswith("inside(") and "apple" in str(atom) and "electric_refrigerator" in str(atom) + ) or ( + str(atom).startswith("inside(") and "rag" in str(atom) and "sink" in str(atom) + ) or ( + str(atom).startswith("nextto(") and "rag" in str(atom) and "sink" in str(atom) + ) or ( + str(atom).startswith("~dustyable(") and "cabinet" in str(atom) + ) or ( + str(atom).startswith("~stainable(") and "plate" in str(atom) + ) + ]) + + elif task_name == "MiniGrid-CollectMisplacedItems-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("onTop(") and "table" in str(atom) and ( + "gym_shoe" in str(atom) or + "necklace" in str(atom) or + "notebook" in str(atom) or + "sock" in str(atom) + ) and not str(atom).startswith("onTop(table") + ]) + + elif task_name == "MiniGrid-InstallingAPrinter-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(")]) | \ + set([atom for atom in ground_atoms_state if str(atom).startswith("toggleable(")]) + + elif task_name == "MiniGrid-LayingWoodFloors-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("nextto(")]) + + elif task_name == "MiniGrid-MakingTea-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("sliceable(") and "lemon" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("onTop(") and "teapot" in str(atom) and "stove" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("atsamelocation(") and "tea_bag" in str(atom) and "teapot" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("soakable(") and "teapot" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("toggleable(") and "stove" in str(atom) + ]) + + elif task_name == "MiniGrid-MovingBoxesToStorage-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(")]) + + elif task_name == "MiniGrid-OrganizingFileCabinet-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("onTop(") and "marker" in str(atom) and "table" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("inside(") and "document" in str(atom) and "cabinet" in str(atom) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("inside(") and "folder" in str(atom) and "cabinet" in str(atom) + ]) + + elif task_name == "MiniGrid-PreparingSalad-16x16-N2-v0": + import ipdb; ipdb.set_trace() + raise NotImplementedError("parse_goal not implemented for PreparingSalad") + + elif task_name == "MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("inside(") and "plate" in str(atom) and "cabinet" in str(atom) + ]) + + + elif task_name == "MiniGrid-SettingUpCandles-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(")]) + + elif task_name == "MiniGrid-SortingBooks-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("onTop(") and "shelf" in str(atom) and ("book" in str(atom) or "hardback" in str(atom))]) + + elif task_name == "MiniGrid-StoringFood-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("inside(") and "cabinet" in str(atom) and ( + "oatmeal" in str(atom) or "chip" in str(atom) or "vegetable_oil" in str(atom) or "sugar" in str(atom) + ) + ]) + + elif task_name == "MiniGrid-ThawingFrozenFood-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("nextto(") and ( + ("date" in str(atom) and "fish" in str(atom)) or + ("fish" in str(atom) and "sink" in str(atom)) or + ("olive" in str(atom) and "sink" in str(atom)) + ) + ]) + + elif task_name == "MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("inside(") and "hamburger" in str(atom) and "ashcan" in str(atom)]) + + elif task_name == "MiniGrid-WashingPotsAndPans-16x16-N2-v0": + return set([ + atom for atom in ground_atoms_state + if str(atom).startswith("~stainable(") and ( + "pan" in str(atom) or "kettle" in str(atom) or "teapot" in str(atom) + ) + ]) | set([ + atom for atom in ground_atoms_state + if str(atom).startswith("inside(") and "cabinet" in str(atom) and ( + "pan" in str(atom) or "kettle" in str(atom) or "teapot" in str(atom) + ) + ]) + + elif task_name == "MiniGrid-WateringHouseplants-16x16-N2-v0": + return set([atom for atom in ground_atoms_state if str(atom).startswith("soakable(") and "pot_plant" in str(atom)]) + else: + import ipdb; ipdb.set_trace() + + + def get_plan(self, state, seed): + objs, _, _, ground_atoms_traj, all_atoms = parse_objs_preds_and_options(self.dataset[-1], train_task_idx=len(self.dataset)) + task = Task(State({}, None), self.goal) + + _, _, _, ground_atoms_traj, _ = parse_objs_preds_and_options(LowLevelTrajectory([state], [], _is_demo=True, _train_task_idx=0), train_task_idx=0, all_atoms=all_atoms) + init_atoms = ground_atoms_traj[1][0] + plan = self.plan(init_atoms, objs, self.preds, self.nsrts, task, seed) + return plan + + def policy(self, obs, env): + if self.actions is None: + seed = self.seed_i + self.seed_i += 1 + num_remove_pre = 0 + while self.actions is None or self.actions == []: + try: + self.actions = self.get_plan(env.get_lifted_state(), seed) + break + except _SkeletonSearchTimeout: + print("did not find skeleton - timeout") + except PlanningFailure: + print("did not find skeleton - plan failure") + self.actions = [] + # num_remove_pre += 1 + # new_nsrts = set() + # for nsrt in self.nsrts: + # pre = set() + # tot_pre = len(nsrt.op.preconditions) - num_remove_pre + # if tot_pre > 0: + # pre = random.sample(nsrt.op.preconditions, tot_pre) + # ignore_effects = nsrt.op.ignore_effects + # del_effs = nsrt.op.delete_effects + # # if num_remove_pre > 10: + # # ignore_effects = set() + # # del_effs = set() + # new_nsrts.add( + # nsrt.op.copy_with(preconditions=pre, + # ignore_effects=ignore_effects, + # delete_effects=del_effs).make_nsrt( + # nsrt.option, + # [], # dummy sampler + # lambda s, g, rng, o: np.zeros(1, dtype=np.float32))) + # self.nsrts = new_nsrts + # with open("test_saved.NSRTs.txt", "w") as file: + # for nsrt in self.nsrts: + # if nsrt.op.add_effects != set(): + # file.write(str(nsrt)+"\n") + + self.i += 1 + if self.i-1 < len(self.actions): + return env.key_to_action[self.actions[self.i-1]] + else: + self.actions = None + self.i = 0 + return env.key_to_action["0"] + + def clean_action_plan(self, action_plan): + plan = [] + for step in action_plan: + name = step[0] + objs = step[1] + if len(objs) > 0: + obj_name = objs[0].name + if name.startswith("Move"): + plan.append(f"moveto-{obj_name}") + else: + for opname, key in opname_to_key.items(): + if opname in name: + plan.append(key) + break + return plan + + def plan(self, init_atoms, objects, predicates, nsrts, task, seed): + ground_nsrts, reachable_atoms = task_plan_grounding(init_atoms, objects, nsrts, allow_noops=True) + heuristic = utils.create_task_planning_heuristic("hadd", init_atoms, + task.goal, ground_nsrts, + predicates, objects) + task_plan_generator = task_plan(init_atoms, + task.goal, + ground_nsrts, + reachable_atoms, + heuristic, + timeout=1, + seed=seed, + max_skeletons_optimized=3) + skeleton, _, _ = next(task_plan_generator) + + action_plan = [] + for step in skeleton: + action_plan.append((step.option.name, step.objects)) + return self.clean_action_plan(action_plan) + + def get_data(self, task_name): + for demo_file in demo_files: + if task_name in demo_file: + demo_traj = get_demo_traj(demo_file=demo_file, verbose=False) + + if self.short_task: + if task_name == 'MiniGrid-SortingBooks-16x16-N2-v0': + demo_traj = LowLevelTrajectory(demo_traj.states[:5], demo_traj.actions[:4], _is_demo=True, _train_task_idx=0) + elif task_name == 'MiniGrid-WateringHouseplants-16x16-N2-v0': + demo_traj = LowLevelTrajectory(demo_traj.states[:7], demo_traj.actions[:6], _is_demo=True, _train_task_idx=0) + elif task_name == 'MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0': + demo_traj = LowLevelTrajectory(demo_traj.states[:5], demo_traj.actions[:4], _is_demo=True, _train_task_idx=0) + elif task_name == 'MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0': + demo_traj = LowLevelTrajectory(demo_traj.states[:5], demo_traj.actions[:4], _is_demo=True, _train_task_idx=0) + + idx = len(self.dataset) + demo_traj = LowLevelTrajectory(demo_traj.states, demo_traj.actions, _is_demo=True, _train_task_idx=idx) + + self.dataset.append(demo_traj) + new_objs, new_preds, new_options, self.ground_atoms_traj, _ = parse_objs_preds_and_options(demo_traj, train_task_idx=idx) + self.objs |= new_objs + self.preds |= new_preds + self.options |= new_options + self.ground_atom_dataset.append(self.ground_atoms_traj) + goal = self.parse_goal(task_name, self.ground_atoms_traj[1][-1]) + self.tasks.append(Task(State({}, None), goal)) + # if len(self.dataset) >= self.num_demos: + # break + # assert len(self.dataset) == self.num_demos + return self.dataset, self.tasks, self.preds, self.options, self.action_space, self.ground_atom_dataset + + def learn_nsrts(self, task_name): + dataset, tasks, preds, options, action_space, ground_atom_dataset = self.get_data(task_name) + nsrts, _, _ = learn_nsrts_from_data(dataset, + tasks, + preds, + options, + action_space, + ground_atom_dataset, + sampler_learner="neural", + annotations=None) + with open("test_saved.NSRTs.txt", "w") as file: + for nsrt in nsrts: + if nsrt.op.add_effects != set(): + file.write(str(nsrt)+"\n") + return nsrts + +class DummyAgent(OperatorLearningAgent): + def __init__(self, name="dummy", strips_learner="dummy"): + super().__init__(name=name, strips_learner=strips_learner, single_grounding=True) + + def learn_nsrts(self, task_name): + dataset, tasks, preds, options, action_space, ground_atom_dataset = self.get_data(task_name) + goal = self.parse_goal(task_name, self.ground_atoms_traj[1][-1]) + obj_to_var = {obj:obj.type("?" + obj.name) for obj in self.objs} + lifted_goal = {atom.lift(obj_to_var) for atom in goal} + + nsrts = set() + name_i = 0 + for option in options: + op = STRIPSOperator( + name="Dummy" + str(name_i), + parameters=[], + preconditions=set(), + add_effects=set(), + delete_effects=set(), + ignore_effects=set() + ) + dummy_nsrt = op.make_nsrt( + option, + [], # dummy sampler + lambda s, g, rng, o: np.zeros(1, dtype=np.float32)) + nsrts.add(dummy_nsrt) + name_i += 1 + + params = [] + for sublist in [lifted_atom.variables for lifted_atom in lifted_goal]: + params += sublist + params = [x for x in set(params)] + op = STRIPSOperator( + name="Dummy" + str(name_i), + parameters=params, + preconditions=set(), + add_effects=lifted_goal, + delete_effects=set(), + ignore_effects=set() + ) + dummy_nsrt = op.make_nsrt( + option, + [], # dummy sampler + lambda s, g, rng, o: np.zeros(1, dtype=np.float32)) + nsrts.add(dummy_nsrt) + name_i += 1 + return nsrts + +class GroundTruthAgent(OperatorLearningAgent): + def __init__(self, name): + super().__init__(name=name, strips_learner="NONE") + self.name = name + self.ground_truth_trajs = {} + self.i = 0 + self.actions = None + + def reset(self, task_name): + self.dataset = [] + self.ground_atom_dataset = [] + self.tasks = [] + self.action_space = Box(0, 7, (1,)) + self.objs = set() + self.preds = set() + self.options = set() + self.ground_atoms_traj = [] + + dataset, tasks, preds, options, action_space, ground_atom_dataset = self.get_data(task_name) + + self.goal = self.parse_goal(task_name, self.ground_atoms_traj[1][-1]) + self.actions = None + self.i = 0 + + action_plan = [] + for i, step in enumerate(self.ground_atoms_traj[0].actions): + curr_state = self.ground_atoms_traj[1][i] + next_state = self.ground_atoms_traj[1][i+1] + def count_object_occurrences(atom_set): + counter = Counter() + for atom in atom_set: + for obj in atom.objects: + if not atom.predicate.name.startswith("~inreachofrobot"): + counter[obj] += 1 + return counter + counter = count_object_occurrences(next_state - curr_state) + def get_max_count_object(counter, exclude_types=("table", "shelf")): + max_count = max(counter.values()) + candidates = [ + obj for obj, count in counter.items() + if count == max_count and all(ex_type not in str(obj) for ex_type in exclude_types) + ] + + if candidates: + return candidates[0] + else: + return None + + try: + if get_max_count_object(counter) is None: + objs = [max(counter, key=counter.get)] + else: + objs = [get_max_count_object(counter)] + except: + objs = random.sample(self.objs, 1) + action_plan.append((step._option.name, objs)) + self.ground_truth_trajs[task_name] = self.clean_action_plan(action_plan) + + def policy(self, obs, env): + #print(env.get_lifted_state()) + try: + assert env.env_id in self.ground_truth_trajs.keys() + except: + import ipdb; ipdb.set_trace() + self.i += 1 + if self.i-1 < len(self.ground_truth_trajs[env.env_id]): + return env.key_to_action[self.ground_truth_trajs[env.env_id][self.i-1]] + else: + return env.key_to_action["0"] + + def learn_nsrts(self, task_name): + return None + +# tasks = ["MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0", +# "MiniGrid-CollectMisplacedItems-16x16-N2-v0", +# "MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0", +# "MiniGrid-OpeningPackages-16x16-N2-v0", +# "MiniGrid-WateringHouseplants-16x16-N2-v0", +# "MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0"] + +# agents = [RandomAgent("random"), GroundTruthAgent("ground-truth")] +# results = evaluation(agents, tasks, num_iterations=3) +# df = structure_results(results) +# plot_lifelong_success(df) + +##### + +# tasks = ["MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0"] +# agents = [GroundTruthAgent("ground-truth")] +# results = evaluation(agents, tasks, num_iterations=1) +# df = structure_results(results) +# #plot_lifelong_success(df) +# import ipdb; ipdb.set_trace() + +##### + +############ +# tasks = ["MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0"] +# agents = [ +# DummyAgent("dummy", strips_learner="dummy"), +# OperatorLearningAgent("cluster-intersect", strips_learner="cluster_and_intersect"), +# OperatorLearningAgent("backchaining", strips_learner="backchaining"), +# OperatorLearningAgent("hill-climbing", strips_learner="pnad_search"), +# # OperatorLearningAgent("llm", strips_learner="llm") +# ] +# results = evaluation(agents, tasks, num_iterations=1) +# df = structure_results(results) +# #plot_lifelong_success(df) +# for agent in agents: +# print(agent.name, len(agent.nsrts), agent.actions) +# print() +# import ipdb; ipdb.set_trace() + +# Note: grounding should only be for operators based on the goal.... +# Maybe LLM can help with grounding too + +# TODO Finally - Collect Demos, Increment Num_Demos + +# TODO Try Run 3-5 Env Eval on all 5 Baselines (Dummy, CI, Pnad_Search, Back_Chaining) + +# TODO Fix LLM Agents + +# TODO Make BC+FS+LLM Agent +# see other code + +# TODO Make Version-Space Agent +# Note: from CI it should fall back to BC+FS+LLM then to BC then to Dummy + +# TODO Try Run Full Eval on all 7 Agents + +# tasks = [ +# 'MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0', +# 'MiniGrid-CleaningACar-16x16-N2-v0', +# 'MiniGrid-CleaningShoes-16x16-N2-v0', #1 +# 'MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0', +# 'MiniGrid-CollectMisplacedItems-16x16-N2-v0', +# 'MiniGrid-InstallingAPrinter-16x16-N2-v0', +# 'MiniGrid-LayingWoodFloors-16x16-N2-v0', +# 'MiniGrid-MakingTea-16x16-N2-v0', +# 'MiniGrid-MovingBoxesToStorage-16x16-N2-v0', +# 'MiniGrid-OpeningPackages-16x16-N2-v0', +# 'MiniGrid-OrganizingFileCabinet-16x16-N2-v0', +# #[DEBUG]'MiniGrid-PreparingSalad-16x16-N2-v0', +# 'MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0', +# 'MiniGrid-SettingUpCandles-16x16-N2-v0', #1 +# 'MiniGrid-SortingBooks-16x16-N2-v0', +# 'MiniGrid-StoringFood-16x16-N2-v0', +# #[DEBUG]'MiniGrid-ThawingFrozenFood-16x16-N2-v0', +# 'MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0', +# 'MiniGrid-WashingPotsAndPans-16x16-N2-v0', +# 'MiniGrid-WateringHouseplants-16x16-N2-v0' +# ] + +# task_info = {} + +# for demo_file in demo_files: +# traj = get_demo_traj(demo_file, verbose=False) +# add_count = 0 +# for i, action in enumerate(traj.actions): +# curr_state = set(traj.states[i]) +# next_state = set(traj.states[i+1]) +# del_effs = curr_state - next_state +# add_effs = next_state - curr_state +# # print(action) +# # print("DEL:", del_effs) +# # print("ADD:", add_effs) +# # print() +# add_count += len(add_effs) +# assert len(add_effs) != 0 or str(action) == "Move" +# task_name = demo_file.split("/")[-1].split("_")[0] +# agent = OperatorLearningAgent("cluster-intersect", strips_learner="cluster_and_intersect") +# agent.get_data(task_name=task_name) +# goal = agent.parse_goal(task_name=task_name, ground_atoms_state=agent.ground_atoms_traj[1][-1]) +# task_info[demo_file.split("/")[-1]] = (len(traj.actions), len(goal), add_count) + +# i = 0 +# curriculum = [] +# for k,v in sorted([(k,v) for k,v in task_info.items()], key=lambda x: x[1][2]): # by add effects +# i+=1 +# print("|", v[0], "| goal length:", v[1], "| add count:", v[2], "|", k.split("_")[0], i) +# curriculum.append(k.split("_")[0]) + +import time +start_time = time.time() +tasks = ['MiniGrid-OpeningPackages-16x16-N2-v0', + 'MiniGrid-InstallingAPrinter-16x16-N2-v0', + 'MiniGrid-MovingBoxesToStorage-16x16-N2-v0', + 'MiniGrid-SortingBooks-16x16-N2-v0',# + 'MiniGrid-WateringHouseplants-16x16-N2-v0',# + #'MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0',# + 'MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0'# + ] +# tasks = curriculum +# tasks.remove('MiniGrid-LayingWoodFloors-16x16-N2-v0') +# tasks.remove('MiniGrid-CleaningACar-16x16-N2-v0') +# tasks.remove('MiniGrid-SortingBooks-16x16-N2-v0') +# tasks.remove('MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0') +print("#"*30) +print(tasks) + +all_agents = [ + #GroundTruthAgent("ground-truth"), + #DummyAgent("dummy", strips_learner="dummy"), + #OperatorLearningAgent("cluster-intersect", strips_learner="cluster_and_intersect"), + #OperatorLearningAgent("backchaining", strips_learner="backchaining"), + #OperatorLearningAgent("hill-climbing", strips_learner="pnad_search"), + #OperatorLearningAgent("llm", strips_learner="llm"), + #OperatorLearningAgent("backward-forward", strips_learner="backward-forward"), + ] + +for agent in all_agents: + with open("test_saved.NSRTs.txt", "w") as file: + file.write("""NSRT-Move0: + Parameters: [?x0:obj_type] + Preconditions: [] + Add Effects: [inreachofrobot(?x0:obj_type)] + Delete Effects: [~inreachofrobot(?x0:obj_type)] + Ignore Effects: [inreachofrobot, ~inreachofrobot] + Option Spec: Move()""") + results = evaluation([agent], tasks, num_iterations=1, start_seed=100) + df = structure_results(results) + plot_lifelong_success(df) + end_time = time.time() + print("time elasped", end_time - start_time) + df.to_csv('test_results/' + agent.name + '_output.csv') + + # results = evaluation([agent], tasks, num_iterations=1, start_seed=100, short_task=False) + # df2 = structure_results(results) + # plot_lifelong_success(df2) + # end_time = time.time() + # print("time elasped", end_time - start_time) + # df2.to_csv('test_results/' + agent.name + '_long_output.csv') + diff --git a/test_colla_visualize_results.py b/test_colla_visualize_results.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test_curriculum.py b/test_curriculum.py new file mode 100644 index 0000000000..6eb33454fe --- /dev/null +++ b/test_curriculum.py @@ -0,0 +1,377 @@ +import numpy as np +from gym.spaces import Box +import re +import pickle as pkl + +from predicators import utils +from predicators.nsrt_learning.nsrt_learning_main import learn_nsrts_from_data +from predicators.structs import Action, LowLevelTrajectory, Predicate, State, \ + Type, GroundAtom, Task +import glob + +demo_files = sorted([filename for filename in glob.glob("/Users/shashlik/Documents/GitHub/predicators/demos/*/*")]) +demo_tasks = set([demo_file.split("/")[-1].split("_")[0] for demo_file in demo_files]) + +utils.reset_config({ + "strips_learner": "pnad_search", + "segmenter": "every_step", + "disable_harmlessness_check": True, + "pnad_search_load_initial": True, + "min_data_for_nsrt": 0, + "min_perc_data_for_nsrt": 0, + "pnad_search_timeout":1000.0 + }) + +# Load and do this from MiniBeahvior Demo + +def get_demo_traj(demo_file, verbose=True): + with open(demo_file, 'rb') as f: + data = pkl.load(f) + + last_skill = "Move" + state = [a for a in data[1][1] if "infovofrobot" not in a] + states = [state] + actions = [] + for step in data.keys(): + obs = data[step][0]['image'] + direction = data[step][0]['direction'] + action = data[step][2] + skill = None + + if "forward" in str(action) or \ + "left" in str(action) or \ + "right" in str(action): + + skill = "Move" + else: + skill = str(action) + + has_effect = True + try: + next_obs = data[step][3]['image'] + next_direction = data[step][3]['direction'] + if np.allclose(obs, next_obs) and (direction == next_direction): + has_effect = False + except: + pass + + if has_effect: + if last_skill != skill: + if verbose: + print("#") + print(last_skill) + try: + next_state = [a for a in data[step][1] if "infovofrobot" not in a] + if verbose: + print("PREV:", set(state)) + print("ADD:", set(next_state) - set(state)) + print("DEL:", set(state) - set(next_state)) + state = next_state + actions.append(last_skill) + states.append(state) + except: + pass + last_skill = skill + else: + if verbose: + print("#") + print(last_skill) + next_state = [a for a in data[step][4] if "infovofrobot" not in a] + if verbose: + print("PREV:", set(state)) + print("ADD:", set(next_state) - set(state)) + print("DEL:", set(state) - set(next_state)) + state = next_state + if verbose: + print("#") + actions.append(last_skill) + states.append(state) + + return LowLevelTrajectory(states, actions, _is_demo=True, _train_task_idx=0) + +def parse_objs_preds_and_options(trajectory, train_task_idx=0): + objs = set() + preds = set() + options = set() + state = None + states = [] + actions = [] + ground_atoms_traj = [] + obj_type = Type("obj_type", ["is_obj"]) + + for i, s in enumerate(trajectory.states): + ground_atoms = set() + for pred_str in s: + pred = None + choice = [] + pattern = re.compile(r"(\w+)\((.*?)\)") + match = pattern.match(pred_str) + if match: + func_name = match.group(1) + args = match.group(2).split(',') if match.group(2) else [] + for arg in args: + obj = obj_type(arg.strip()) + choice.append(obj) + objs.add(obj) + if len(args) == 1: + pred = Predicate(func_name, [obj_type], lambda s, o: True) + preds.add(pred) + elif len(args) == 2: + pred = Predicate(func_name, [obj_type, obj_type], lambda s, o: True) + preds.add(pred) + else: + NotImplementedError("") + ground_atoms.add(GroundAtom(pred, choice)) + states.append(state) + ground_atoms_traj.append(ground_atoms) + + if i < len(trajectory.actions): + a_name = trajectory.actions[i] + name_to_actions = actions_dict = { + "Move": 0, + "Actions.pickup_0": 3, + "Actions.pickup_1": 4, + "Actions.pickup_2": 5, + "Actions.drop_0": 6, + "Actions.drop_1": 7, + "Actions.drop_2": 8, + "Actions.drop_in": 9, + "Actions.toggle": 10, + "Actions.close": 11, + "Actions.open": 12, + "Actions.cook": 13, + "Actions.slice": 14 + } + + param_option = utils.SingletonParameterizedOption( + a_name, lambda s, m, o, p: Action(name_to_actions[a_name])) + options.add(param_option) + option = param_option.ground([], []) + action = option.policy(state) + action.set_option(option) + actions.append(action) + + return objs, preds, options, (LowLevelTrajectory([{obj:[0.0] for obj in objs} for _ in states], actions, _is_demo=True, _train_task_idx=train_task_idx), ground_atoms_traj) + +dataset = [] +ground_atom_dataset = [] +tasks = [] +action_space = Box(0, 7, (1, )) +all_preds = set() +all_options = set() + +demo_traj = get_demo_traj("demos/MiniGrid-OpeningPackages-16x16-N2-v0/MiniGrid-OpeningPackages-16x16-N2-v0_0", verbose=False) + +idx = len(dataset) +demo_traj = LowLevelTrajectory(demo_traj.states, demo_traj.actions, _is_demo=True, _train_task_idx=idx) + +dataset += [demo_traj] +objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx) +all_preds = preds | all_preds +all_options = options | all_options +ground_atom_dataset += [ground_atoms_traj] +goal = set([atom for atom in ground_atoms_traj[1][-1] if "openable(" in str(atom)]) +tasks += [Task(State({}, None), goal)] + +nsrts, _, _ = learn_nsrts_from_data(dataset, tasks, + preds, + options, + action_space, + ground_atom_dataset, + sampler_learner="neural", + annotations=None) + +for nsrt in nsrts: + print(nsrt) + print() + +with open("test_saved.NSRTs.txt", "w") as file: + for nsrt in nsrts: + file.write(str(nsrt)+"\n") + +demo_traj = get_demo_traj("demos/MiniGrid-SortingBooks-16x16-N2-v0/MiniGrid-SortingBooks-16x16-N2-v0_0", verbose=False) + +idx = len(dataset) +demo_traj = LowLevelTrajectory(demo_traj.states[0:5], demo_traj.actions[0:4], _is_demo=True, _train_task_idx=idx) + +dataset += [demo_traj] +objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx) +all_preds = preds | all_preds +all_options = options | all_options +ground_atom_dataset += [ground_atoms_traj] +goal = set([atom for atom in ground_atoms_traj[1][-1] if "onTop(" in str(atom) and "shelf" in str(atom)]) +tasks += [Task(State({}, None), goal)] + +nsrts, _, _ = learn_nsrts_from_data(dataset, tasks, + all_preds, + all_options, + action_space, + ground_atom_dataset, + sampler_learner="neural", + annotations=None) + +print("#"*60) +for nsrt in nsrts: + print(nsrt) + print() + +with open("test_saved.NSRTs.txt", "w") as file: + for nsrt in nsrts: + file.write(str(nsrt)+"\n") + + +demo_traj = get_demo_traj("demos/MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0/MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0_0", verbose=False) + +idx = len(dataset) +demo_traj = LowLevelTrajectory(demo_traj.states[0:5], demo_traj.actions[0:4], _is_demo=True, _train_task_idx=idx) + +dataset += [demo_traj] +objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx) +all_preds = preds | all_preds +all_options = options | all_options +ground_atom_dataset += [ground_atoms_traj] +goal = set([atom for atom in ground_atoms_traj[1][-1] if "inside(book_3" in str(atom)]) +tasks += [Task(State({}, None), goal)] + +nsrts, _, _ = learn_nsrts_from_data(dataset, tasks, + all_preds, + all_options, + action_space, + ground_atom_dataset, + sampler_learner="neural", + annotations=None) + +print("#"*60) +for nsrt in nsrts: + print(nsrt) + print() + +with open("test_saved.NSRTs.txt", "w") as file: + for nsrt in nsrts: + file.write(str(nsrt)+"\n") + + +demo_traj = get_demo_traj("demos/MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0/MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0_0", verbose=False) + +idx = len(dataset) +demo_traj = LowLevelTrajectory(demo_traj.states[0:7], demo_traj.actions[0:6], _is_demo=True, _train_task_idx=idx) + +dataset += [demo_traj] +objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx) +all_preds = preds | all_preds +all_options = options | all_options +ground_atom_dataset += [ground_atoms_traj] +goal = set([atom for atom in ground_atoms_traj[1][-1] if "inside(" in str(atom)]) +tasks += [Task(State({}, None), goal)] + +nsrts, _, _ = learn_nsrts_from_data(dataset, tasks, + all_preds, + all_options, + action_space, + ground_atom_dataset, + sampler_learner="neural", + annotations=None) + +print("#"*60) +for nsrt in nsrts: + print(nsrt) + print() + +with open("test_saved.NSRTs.txt", "w") as file: + for nsrt in nsrts: + file.write(str(nsrt)+"\n") + + + +demo_traj = get_demo_traj("demos/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0/MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0_0", verbose=False) + +idx = len(dataset) +demo_traj = LowLevelTrajectory(demo_traj.states[0:5], demo_traj.actions[0:4], _is_demo=True, _train_task_idx=idx) + +dataset += [demo_traj] +objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx) +all_preds = preds | all_preds +all_options = options | all_options +ground_atom_dataset += [ground_atoms_traj] +goal = set([atom for atom in ground_atoms_traj[1][-1] if "inside(" in str(atom)]) +tasks += [Task(State({}, None), goal)] + +nsrts, _, _ = learn_nsrts_from_data(dataset, tasks, + all_preds, + all_options, + action_space, + ground_atom_dataset, + sampler_learner="neural", + annotations=None) + +print("#"*60) +for nsrt in nsrts: + print(nsrt) + print() + +with open("test_saved.NSRTs.txt", "w") as file: + for nsrt in nsrts: + file.write(str(nsrt)+"\n") + +demo_traj = get_demo_traj("demos/MiniGrid-CollectMisplacedItems-16x16-N2-v0/MiniGrid-CollectMisplacedItems-16x16-N2-v0_0", verbose=False) + +idx = len(dataset) +demo_traj = LowLevelTrajectory(demo_traj.states[0:5], demo_traj.actions[0:4], _is_demo=True, _train_task_idx=idx) + +dataset += [demo_traj] +objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx) +all_preds = preds | all_preds +all_options = options | all_options +ground_atom_dataset += [ground_atoms_traj] +goal = set([atom for atom in ground_atoms_traj[1][-1] if "onTop(" in str(atom) and "table_1" in str(atom)]) +tasks += [Task(State({}, None), goal)] + +nsrts, _, _ = learn_nsrts_from_data(dataset, tasks, + all_preds, + all_options, + action_space, + ground_atom_dataset, + sampler_learner="neural", + annotations=None) + +print("#"*60) +for nsrt in nsrts: + print(nsrt) + print() + +with open("test_saved.NSRTs.txt", "w") as file: + for nsrt in nsrts: + file.write(str(nsrt)+"\n") + +quit() + +demo_traj = get_demo_traj("demos/MiniGrid-WateringHouseplants-16x16-N2-v0/MiniGrid-WateringHouseplants-16x16-N2-v0_0", verbose=False) + +idx = len(dataset) +demo_traj = LowLevelTrajectory(demo_traj.states[0:7], demo_traj.actions[0:6], _is_demo=True, _train_task_idx=idx) + +dataset += [demo_traj] +objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx) +all_preds = preds | all_preds +all_options = options | all_options +ground_atom_dataset += [ground_atoms_traj] +goal = set([atom for atom in ground_atoms_traj[1][-1] if "inside(" in str(atom) or "soakable(" in str(atom)]) +tasks += [Task(State({}, None), goal)] + +nsrts, _, _ = learn_nsrts_from_data(dataset, tasks, + all_preds, + all_options, + action_space, + ground_atom_dataset, + sampler_learner="neural", + annotations=None) + +print("#"*60) +for nsrt in nsrts: + print(nsrt) + print() + +with open("test_saved.NSRTs.txt", "w") as file: + for nsrt in nsrts: + file.write(str(nsrt)+"\n") + diff --git a/test_graph_results.py b/test_graph_results.py new file mode 100644 index 0000000000..b1dcdfbd27 --- /dev/null +++ b/test_graph_results.py @@ -0,0 +1,89 @@ +import pickle + +filename = 'results.pkl' + +with open(filename, 'rb') as file: + results = pickle.load(file) + +filename = 'HITL_results.pkl' + +with open(filename, 'rb') as file: + results_hitl = pickle.load(file) + +filename = 'HITL_more_results.pkl' + +with open(filename, 'rb') as file: + results_more_hitl = pickle.load(file) + + +import pandas as pd + +# Assuming `results` is your dictionary of lists of tuples +df_matches = pd.DataFrame(results["tot_matches"], columns=["num_trajs", "run_i", "num_match"]) +df_soft_matches = pd.DataFrame(results["tot_soft_matches"], columns=["num_trajs", "run_i", "num_soft_match"]) +df_exsoft_matches = pd.DataFrame(results["tot_exsoft_matches"], columns=["num_trajs", "run_i", "num_exsoft_match"]) +df_num_ops = pd.DataFrame(results["tot_num_ops"], columns=["num_trajs", "run_i", "num_op_sets", "num_actions"]) + +agg_matches = df_matches.groupby("num_trajs")["num_match"].agg(["mean", "std"]).reset_index() +agg_soft = df_soft_matches.groupby("num_trajs")["num_soft_match"].agg(["mean", "std"]).reset_index() +agg_exsoft = df_exsoft_matches.groupby("num_trajs")["num_exsoft_match"].agg(["mean", "std"]).reset_index() +agg_ops = df_num_ops.groupby("num_trajs")[["num_op_sets", "num_actions"]].agg(["mean", "std"]).reset_index() + +df_matches_hitl = pd.DataFrame(results_hitl["tot_matches"], columns=["num_trajs", "run_i", "num_match"]) +df_soft_matches_hitl = pd.DataFrame(results_hitl["tot_soft_matches"], columns=["num_trajs", "run_i", "num_soft_match"]) +df_exsoft_matches_hitl = pd.DataFrame(results_hitl["tot_exsoft_matches"], columns=["num_trajs", "run_i", "num_exsoft_match"]) +df_num_ops_hitl = pd.DataFrame(results_hitl["tot_num_ops"], columns=["num_trajs", "run_i", "num_op_sets", "num_actions"]) + +agg_matches_hitl = df_matches_hitl.groupby("num_trajs")["num_match"].agg(["mean", "std"]).reset_index() +agg_soft_hitl = df_soft_matches_hitl.groupby("num_trajs")["num_soft_match"].agg(["mean", "std"]).reset_index() +agg_exsoft_hitl = df_exsoft_matches_hitl.groupby("num_trajs")["num_exsoft_match"].agg(["mean", "std"]).reset_index() +agg_ops_hitl = df_num_ops_hitl.groupby("num_trajs")[["num_op_sets", "num_actions"]].agg(["mean", "std"]).reset_index() + +df_matches_more_hitl = pd.DataFrame(results_more_hitl["tot_matches"], columns=["num_trajs", "run_i", "num_match"]) +df_soft_matches_more_hitl = pd.DataFrame(results_more_hitl["tot_soft_matches"], columns=["num_trajs", "run_i", "num_soft_match"]) +df_exsoft_matches_more_hitl = pd.DataFrame(results_more_hitl["tot_exsoft_matches"], columns=["num_trajs", "run_i", "num_exsoft_match"]) +df_num_ops_more_hitl = pd.DataFrame(results_more_hitl["tot_num_ops"], columns=["num_trajs", "run_i", "num_op_sets", "num_actions"]) + +agg_matches_more_hitl = df_matches_more_hitl.groupby("num_trajs")["num_match"].agg(["mean", "std"]).reset_index() +agg_soft_more_hitl = df_soft_matches_more_hitl.groupby("num_trajs")["num_soft_match"].agg(["mean", "std"]).reset_index() +agg_exsoft_more_hitl = df_exsoft_matches_more_hitl.groupby("num_trajs")["num_exsoft_match"].agg(["mean", "std"]).reset_index() +agg_ops_more_hitl = df_num_ops_more_hitl.groupby("num_trajs")[["num_op_sets", "num_actions"]].agg(["mean", "std"]).reset_index() + +import matplotlib.pyplot as plt + +plt.figure() +# No-HITL +plt.errorbar(agg_matches["num_trajs"], agg_matches["mean"], yerr=agg_matches["std"], label="Match (no-HITL)") +plt.errorbar(agg_soft["num_trajs"], agg_soft["mean"], yerr=agg_soft["std"], label="Soft Match (no-HITL)") +plt.errorbar(agg_exsoft["num_trajs"], agg_exsoft["mean"], yerr=agg_exsoft["std"], label="ExSoft Match (no-HITL)") + +# HITL +plt.errorbar(agg_matches_hitl["num_trajs"], agg_matches_hitl["mean"], yerr=agg_matches_hitl["std"], linestyle='--', label="Match (HITL)") +plt.errorbar(agg_soft_hitl["num_trajs"], agg_soft_hitl["mean"], yerr=agg_soft_hitl["std"], linestyle='--', label="Soft Match (HITL)") +plt.errorbar(agg_exsoft_hitl["num_trajs"], agg_exsoft_hitl["mean"], yerr=agg_exsoft_hitl["std"], linestyle='--', label="ExSoft Match (HITL)") + +# More HITL +plt.errorbar(agg_matches_more_hitl["num_trajs"], agg_matches_more_hitl["mean"], yerr=agg_matches_more_hitl["std"], linestyle=':', label="Match (HITL+)") +plt.errorbar(agg_soft_more_hitl["num_trajs"], agg_soft_more_hitl["mean"], yerr=agg_soft_more_hitl["std"], linestyle=':', label="Soft Match (HITL+)") +plt.errorbar(agg_exsoft_more_hitl["num_trajs"], agg_exsoft_more_hitl["mean"], yerr=agg_exsoft_more_hitl["std"], linestyle=':', label="ExSoft Match (HITL+)") + +plt.xlabel("Number of Trajectories") +plt.ylabel("Matches") +plt.title("Match Types vs Number of Trajectories (HITL vs No-HITL vs. More-HITL)") +plt.legend(loc='center left', bbox_to_anchor=(1, 0.5)) +plt.tight_layout() # Adjust layout to make space for legend +plt.grid(True) +plt.show() + +plt.figure() +# No-HITL +plt.errorbar(agg_ops["num_trajs"], agg_ops[("num_op_sets", "mean")], yerr=agg_ops[("num_op_sets", "std")], label="# Operators (no-HITL)") +plt.errorbar(agg_ops["num_trajs"], agg_ops[("num_actions", "mean")], yerr=agg_ops[("num_actions", "std")], label="# Actions (no-HITL)") + +plt.xlabel("Number of Trajectories") +plt.ylabel("Count") +plt.title("Operator and Action Counts vs Number of Trajectories (No-HITL)") +plt.legend(loc='center left', bbox_to_anchor=(1, 0.5)) +plt.tight_layout() # Adjust layout to make space for legend +plt.grid(True) +plt.show() diff --git a/test_lbf_final.py b/test_lbf_final.py new file mode 100644 index 0000000000..7f917a8e1f --- /dev/null +++ b/test_lbf_final.py @@ -0,0 +1,662 @@ +# Cleaned-up and organized version of your operator learning code +# - Uses dataclasses +# - Removes duplication +# - Adds helpers +# - Keeps everything in one file + +from dataclasses import dataclass, field +from typing import List, Set, Tuple, Dict +import random +import numpy as np +from collections import deque, defaultdict + +# --- Config --- + +np.random.seed(1) +random.seed(1) + +NUM_PREDICATES = 10 +NUM_OPERATORS = 10 +TRAJ_MAX = 10 +TRAJ_LEN = 5 +NUM_TRAJS = 100 + +# --- Operator Representation --- + +@dataclass +class Operator: + action: int + pre: Set[int] = field(default_factory=set) + add: Set[int] = field(default_factory=set) + delete: Set[int] = field(default_factory=set) + + def is_applicable(self, state: Set[int]) -> bool: + return self.pre.issubset(state) + + def apply(self, state: Set[int]) -> Set[int]: + if not self.is_applicable(state): + return state + return (state - self.delete) | self.add + +# Utility + +def op_key(op: Operator) -> Tuple[int, frozenset]: + return (op.action, frozenset(op.add)) + +def is_equivalent(op1: Operator, op2: Operator) -> bool: + return op1.pre == op2.pre and op1.add == op2.add and op1.delete == op2.delete + +def is_covered_by(op1: Operator, op2: Operator) -> bool: + return op1.pre >= op2.pre and op1.add == op2.add and op1.delete >= op2.delete + +# Planning + +def plan(start: Set[int], goal: Set[int], operators: List[Operator], max_depth=10): + visited, queue = set(), deque([(start.copy(), [])]) + + while queue: + state, path = queue.popleft() + state_key = frozenset(state) + if state_key in visited: continue + visited.add(state_key) + + if goal.issubset(state): return path + if len(path) >= max_depth: continue + + for op in operators: + if op.is_applicable(state): + next_state = op.apply(state) + if next_state != state: + queue.append((next_state, path + [(state.copy(), op.action, next_state.copy())])) + return None + +# Random Generator + +def generate_random_operator(pred_pool: List[int], action_id: int) -> Operator: + pre = set(random.sample(pred_pool, random.randint(1, 3))) + effects = list(set(pred_pool) - pre) + add = set(random.sample(effects, random.randint(1, min(2, len(effects))))) + delete = set(random.sample(list(pre), random.randint(0, len(pre)))) + return Operator(action=action_id, pre=pre, add=add, delete=delete) + +# Reachability + +def compute_reachable_states(init_state: Set[int], operators: List[Operator], max_iters=100) -> Set[frozenset]: + reached_states, frontier = set(), [init_state.copy()] + reachable = set() + + for _ in range(max_iters): + new_frontier = [] + for state in frontier: + key = frozenset(state) + if key in reached_states: continue + reached_states.add(key) + reachable.add(key) + + for op in operators: + if op.is_applicable(state): + next_state = op.apply(state) + if frozenset(next_state) not in reached_states: + new_frontier.append(next_state) + + if not new_frontier: break + frontier = new_frontier + + return reachable + +# Demo Generation + +def generate_planned_demo_trajectories(operators: List[Operator], num_trajs: int, max_depth: int) -> List[Tuple[List[Tuple[Set[int], int, Set[int]]], Set[int]]]: + demos, attempts = [], 0 + + while len(demos) < num_trajs and attempts < 100000: + attempts += 1 + init_state = set(random.sample(range(NUM_PREDICATES), random.randint(2, NUM_PREDICATES))) + reachable = compute_reachable_states(init_state, operators) - {frozenset(init_state)} + if not reachable: continue + + plan_traj = [] + goals = list(reachable) + + while goals and len(plan_traj) < TRAJ_LEN: + goal_state = random.choice(goals) + goal = set(goal_state) - init_state + if not goal: + goals.remove(goal_state) + continue + + plan_traj = plan(init_state, goal, operators, max_depth) + if plan_traj is None or len(plan_traj) < TRAJ_LEN: + goals.remove(goal_state) + plan_traj = [] + + if plan_traj and len(plan_traj) >= TRAJ_LEN: + demos.append((plan_traj, goal)) + + return demos + +# Backward Pass + +def backward_infer_minimal_effects(demo_data, current_operators=None): + candidate_ops = defaultdict(lambda: {'demos': []}) + op_index = {(op.action, frozenset(op.add)): op for op in current_operators} if current_operators else {} + + for traj, goal in sorted(demo_data, key=lambda x: len(x[0])): + current_goal = goal.copy() + + for (s, action, s_prime) in reversed(traj): + effect = s_prime - s + if not effect: + raise Exception("No effect") + necessary_effect = effect if len(effect) == 1 else effect & current_goal + key = (action, frozenset(necessary_effect)) + candidate_ops[key]['demos'].append((s, action, s_prime)) + + preconditions = op_index.get(key, Operator(action)).pre + current_goal = (current_goal - necessary_effect) | preconditions + + return candidate_ops + +# Forward Refinement + +def refine_by_plan_divergence(demos, learned_operators): + op_index = {(op.action, frozenset(op.add)): op for op in learned_operators} + support_sets = {key: [] for key in op_index} + + for traj, goal in demos: + current_goal = goal.copy() + for s, a, s_prime in traj: + effect = s_prime - s + necessary_effect = effect if len(effect) == 1 else effect & current_goal + key = (a, frozenset(necessary_effect)) + if key in support_sets: + support_sets[key].append(s) + preconditions = op_index.get(key, Operator(a)).pre + current_goal = (current_goal - necessary_effect) | preconditions + + for traj, _ in demos: + state = traj[0][0] + for (s_true, a_true, s_next_true) in traj: + applicable = [op for op in learned_operators if op.is_applicable(state)] + if not applicable: break + op_planner = random.choice(applicable) + + key_true = (a_true, frozenset(s_next_true - s_true)) + op_true = op_index.get(key_true) + if op_true is None: continue + + if op_planner is op_true: + state = op_true.apply(state) + continue + + key_planner = (op_planner.action, frozenset(op_planner.add)) + support = support_sets.get(key_planner, []) + if not support: continue + + common_preds = set.intersection(*support) + potential_preds = common_preds - state + if not potential_preds: continue + + preds_to_add = set(random.sample(list(potential_preds), random.randint(1, len(potential_preds)))) + op_planner.pre.update(preds_to_add) + state = op_true.apply(state) + + return list(op_index.values()) + +# Learning Loop + +def learn_operators_from_demos(demo_data, max_iters=100, verbose=True): + learned_ops = [] + + for iteration in range(max_iters): + if verbose: + print(f"\n--- Iteration {iteration + 1} ---") + + # Backward pass + candidate_ops = backward_infer_minimal_effects(demo_data, current_operators=learned_ops or None) + + # Create new operators from candidate effects + op_index = {} + for (action, effect_frozen), entry in candidate_ops.items(): + op = Operator(action=action, add=set(effect_frozen), pre=set(), delete=set()) + op_index[(action, frozenset(op.add))] = op + learned_ops = list(op_index.values()) + + # Assign each transition to at most one operator + demo_assignments = defaultdict(list) + assigned_transitions = {} + for traj, _ in demo_data: + for s, a, s_prime in traj: + effect = s_prime - s + matching_keys = [(key, op) for key, op in op_index.items() if key[0] == a and key[1] <= set(effect)] + matching_vals = [len(set(effect) - key[1]) for key, op in op_index.items() if key[0] == a and key[1] <= set(effect)] + if matching_keys: + best_key, _ = matching_keys[np.argmin(matching_vals)] # choose the first match + demo_assignments[best_key].append((s, a, s_prime)) + if (frozenset(s), a, frozenset(s_prime)) in assigned_transitions: + assigned_transitions[(frozenset(s), a, frozenset(s_prime))] += 1 + else: + assigned_transitions[(frozenset(s), a, frozenset(s_prime))] = 1 + + # Assert total assignments match demo transitions + total_transitions = sum(len(traj) for traj, _ in demo_data) + assert sum(assigned_transitions.values()) == total_transitions, ( + f"Assigned transitions ({len(assigned_transitions)}) != total demo transitions ({total_transitions})") + used_keys = set(demo_assignments.keys()) + learned_ops = [op for key, op in op_index.items() if key in used_keys] + + if verbose: + print("Backward Pass Result:") + for op in sorted(learned_ops, key=lambda x: x.action): + print(op) + + # Forward refinement + learned_ops = refine_by_plan_divergence(demo_data, learned_ops) + + if verbose: + print("Forward Pass Result:") + for op in sorted(learned_ops, key=lambda x: x.action): + print(op) + + return learned_ops + +# Evaluation + +def evaluate_learned_operators(learned_ops: List[Operator], true_ops: List[Operator], verbose=True, is_equal=True) -> Tuple[int, int]: + valid, invalid = 0, 0 + for true_op in true_ops: + match_found = False + for learned_op in learned_ops: + if is_equal: + if learned_op.action == true_op.action and is_equivalent(true_op, learned_op): + match_found = True + break + else: + if learned_op.action == true_op.action and is_covered_by(true_op, learned_op): + match_found = True + break + if match_found: + valid += 1 + if verbose: + print(f"VALID\n\tLEARNED | {learned_op}\n\tTRUE | {true_op}") + else: + invalid += 1 + if verbose: + print(f"INVALID\n\tLEARNED | MISSING\n\tTRUE | {true_op}") + return valid, invalid + +def augment_demos_with_missing_ground_truth_ops(demos, learned_ops, true_ops, num_preds, num_augments=1): + from collections import defaultdict + + learned_op_keys = set( + (op.action, frozenset(op.add), frozenset(op.delete), frozenset(op.pre)) + for op in learned_ops + ) + + augmented = [] + + for true_op in true_ops: + key = (true_op.action, frozenset(true_op.add), frozenset(true_op.delete), frozenset(true_op.pre)) + if key in learned_op_keys: + continue + + for _ in range(num_augments): + possible_goals = None + while not possible_goals: + base_state = set(random.sample(range(num_preds), random.randint(2, num_preds))) + false_pre = set() + for op in learned_ops: + if op.action == true_op.action and op.add == true_op.add: + false_pre |= op.pre - true_op.pre + base_state -= false_pre + base_state |= true_op.pre + next_state = true_op.apply(base_state) + possible_goals = true_op.add - base_state + + goal = possible_goals + demo = [(base_state.copy(), true_op.action, next_state.copy())] + augmented.append((demo, goal)) + + return demos + augmented + +def deduplicate_predicates_by_equivalence(demos, operators, num_preds): + from collections import defaultdict + + # Step 1: Build truth vectors for each predicate + pred_vectors = defaultdict(list) + + for traj, goal in demos: + for s, _, s_prime in traj: + for i in range(num_preds): + pred_vectors[i].append(int(i in s)) + pred_vectors[i].append(int(i in s_prime)) + for i in range(num_preds): + pred_vectors[i].append(int(i in goal)) + + # Step 2: Group predicates with identical truth vectors + vector_to_preds = defaultdict(list) + for pred, vec in pred_vectors.items(): + vector_to_preds[tuple(vec)].append(pred) + + # Step 3: Build a mapping from redundant predicate -> representative + replace_map = {} + for group in vector_to_preds.values(): + representative = min(group) # pick smallest index as canonical + for pred in group: + replace_map[pred] = representative + + # Step 4: Replace predicates in demos + new_demos = [] + for traj, goal in demos: + new_traj = [] + for s, a, s_prime in traj: + s_new = {replace_map[p] for p in s} + s_prime_new = {replace_map[p] for p in s_prime} + new_traj.append((s_new, a, s_prime_new)) + new_goal = {replace_map[p] for p in goal} + new_demos.append((new_traj, new_goal)) + + # Step 5: Replace predicates in operators + new_operators = [] + for op in operators: + pre = {replace_map[p] for p in op.pre} + add = {replace_map[p] for p in op.add} + delete = {replace_map[p] for p in op.delete} + new_operators.append(Operator(op.action, pre, add, delete)) + + return new_demos, new_operators, replace_map + +# Main Execution + +def main(): + pred_pool = list(range(NUM_PREDICATES)) + operators = [generate_random_operator(pred_pool, i) for i in range(NUM_OPERATORS)] + print("\n--- Ground Truth Operators ---") + for op in sorted(operators, key=lambda x: x.action): + print(op) + + demo_data = generate_planned_demo_trajectories(operators, NUM_TRAJS, max_depth=TRAJ_MAX) + print(f"\nGenerated {len(demo_data)} demo trajectories.") + + op_nums = {i: 0 for i in range(NUM_OPERATORS)} + for traj, goal in demo_data: + for t in traj: + op_nums[t[1]] += 1 + + print("\nOPERATOR DEMO COUNT:", op_nums,"\n") + + + results = {} + for op_set_idx in range(100): + learned_ops = learn_operators_from_demos(demo_data, max_iters=10, verbose=False) + op_index = {(op.action, frozenset(op.add)): op for op in learned_ops} + for (action, effect_frozen), entry in backward_infer_minimal_effects(demo_data, current_operators=learned_ops).items(): + if (action, effect_frozen) in op_index: + delete = op_index[(action, effect_frozen)].pre & set.intersection( + *[set(entry['demos'][i][0] - entry['demos'][i][2]) for i in range(len(entry['demos']))] + ) + op_index[(action, effect_frozen)].delete = delete + print(op_set_idx, sum([len(op.pre) for op in learned_ops])) + valid, invalid = evaluate_learned_operators(learned_ops, operators, verbose=False) + print(f"Summary: {valid} valid / {valid + invalid} total operators correctly learned.\n") + val = sum([len(op.pre) for op in learned_ops]) + if val in results: + results[val] += [float(valid) / float(valid + invalid)] + else: + results[val] = [float(valid) / float(valid + invalid)] + + print([(k, np.mean(v)) for k,v in sorted(results.items(), key=lambda x: np.mean(x[1]))]) + + print("\n--- Final Learned Operators ---") + for op in sorted(learned_ops, key=lambda x: x.action): + print(op) + + valid, invalid = evaluate_learned_operators(learned_ops, operators, verbose=True) + print(f"\nSummary: {valid} valid / {valid + invalid} total operators correctly learned.") + + valid, invalid = evaluate_learned_operators(learned_ops, operators, verbose=True, is_equal=False) + print(f"\n(Coverage) Summary: {valid} valid / {valid + invalid} total operators correctly learned.") + + + # Augment and re-evaluate + for round in range(1, 10): + demo_data = augment_demos_with_missing_ground_truth_ops(demo_data, learned_ops, operators, NUM_PREDICATES, num_augments=1) + learned_ops = learn_operators_from_demos(demo_data, max_iters=5, verbose=False) + op_index = {(op.action, frozenset(op.add)): op for op in learned_ops} + for (action, effect_frozen), entry in backward_infer_minimal_effects(demo_data, current_operators=learned_ops).items(): + if (action, effect_frozen) in op_index: + delete = op_index[(action, effect_frozen)].pre & set.intersection( + *[set(entry['demos'][i][0] - entry['demos'][i][2]) for i in range(len(entry['demos']))] + ) + op_index[(action, effect_frozen)].delete = delete + + print(f"\n--- After Augmentation Round {round} ---") + # for op in sorted(learned_ops, key=lambda x: x.action): + # print(op) + + valid, invalid = evaluate_learned_operators(learned_ops, operators, verbose=False) + print(f"Round {round} Summary: {valid} valid / {valid + invalid} total operators correctly learned.") + + valid, invalid = evaluate_learned_operators(learned_ops, operators, verbose=False, is_equal=False) + print(f"\n(Coverage) Summary: {valid} valid / {valid + invalid} total operators correctly learned.") + + print(f"\n--- After Augmentation Round {round} ---") + for op in sorted(learned_ops, key=lambda x: x.action): + print(op) + + ################## + + # PREDICATES = { + # "at_A": 0, + # "at_B": 1, + # "handempty": 2, + # "holding_block1": 3, + # "holding_block2": 4, + # "inside_block1": 7, + # "inside_block2": 8, + # } + + # OPERATORS = [ + # # move from B to A + # Operator(pre={PREDICATES["at_B"]}, add={PREDICATES["at_A"]}, delete={PREDICATES["at_B"]}, action=0), + # # move from A to B + # Operator(pre={PREDICATES["at_A"]}, add={PREDICATES["at_B"]}, delete={PREDICATES["at_A"]}, action=1), + + # # pick block1 + # Operator(pre={PREDICATES["at_A"], PREDICATES["handempty"]}, + # add={PREDICATES["holding_block1"]}, + # delete={PREDICATES["handempty"]}, + # action=2), + + # # pick block2 + # Operator(pre={PREDICATES["at_A"], PREDICATES["handempty"]}, + # add={PREDICATES["holding_block2"]}, + # delete={PREDICATES["handempty"]}, + # action=3), + + # # place block1 in box (at B) + # Operator(pre={PREDICATES["at_B"], PREDICATES["holding_block1"]}, + # add={PREDICATES["inside_block1"], PREDICATES["handempty"]}, + # delete={PREDICATES["holding_block1"]}, + # action=4), + + # # place block2 in box (at B) + # Operator(pre={PREDICATES["at_B"], PREDICATES["holding_block2"]}, + # add={PREDICATES["inside_block2"], PREDICATES["handempty"]}, + # delete={PREDICATES["holding_block2"]}, + # action=5), + # ] + + + # init_state = { + # PREDICATES["at_B"], PREDICATES["handempty"] + # } + + # actions = [0, 2, 1, 4, 0, 3, 1, 5] # move→pick→move→place (block1), move→pick→move→place (block2) + + # state = init_state.copy() + # traj1 = [] + + # for action_id in actions: + # op = OPERATORS[action_id] + # next_state = op.apply(state) + # traj1.append((state.copy(), action_id, next_state.copy())) + # state = next_state.copy() + + # goal1 = {PREDICATES["inside_block1"], PREDICATES["inside_block2"], PREDICATES["handempty"]} + + # actions = [0, 3, 1, 5, 0, 2, 1, 4] # move→pick→move→place (block1), move→pick→move→place (block2) + + # state = init_state.copy() + # traj2 = [] + + # for action_id in actions: + # op = OPERATORS[action_id] + # next_state = op.apply(state) + # traj2.append((state.copy(), action_id, next_state.copy())) + # state = next_state.copy() + + # goal2 = {PREDICATES["inside_block1"], PREDICATES["inside_block2"], PREDICATES["handempty"]} + # demo_data = [(traj1, goal1), (traj2, goal2)] + + # demo_data, operators, pred_replace_map = deduplicate_predicates_by_equivalence(demo_data, OPERATORS, NUM_PREDICATES) + # print("Predicate replacement map:", pred_replace_map) + + # print("\n--- Ground Truth Operators ---") + # for op in sorted(operators, key=lambda x: x.action): + # print(op) + # print() + + # print("Demos:") + # # for traj in demo_data: + # # print("Goal:", traj[1], "Length:", len(traj[0])) + # print(len(demo_data)) + + # learned_ops = learn_operators_from_demos(demo_data, max_iters=1000, verbose=True) + # op_index = {(op.action, frozenset(op.add)): op for op in learned_ops} + # for (action, effect_frozen), entry in backward_infer_minimal_effects(demo_data, current_operators=learned_ops).items(): + # if (action, effect_frozen) in op_index: + # delete = op_index[(action, effect_frozen)].pre & set.intersection( + # *[set(entry['demos'][i][0] - entry['demos'][i][2]) for i in range(len(entry['demos']))] + # ) + # op_index[(action, effect_frozen)].delete = delete + # valid, invalid = evaluate_learned_operators(learned_ops, operators, verbose=False) + # print(f"Summary: {valid} valid / {valid + invalid} total operators correctly learned.\n") + + + # print("\n--- Final Learned Operators ---") + # for op in sorted(learned_ops, key=lambda x: x.action): + # print(op) + + # valid, invalid = evaluate_learned_operators(learned_ops, operators, verbose=True) + # print(f"\nSummary: {valid} valid / {valid + invalid} total operators correctly learned.") + + # # Augment and re-evaluate + # demo_data = augment_demos_with_missing_ground_truth_ops(demo_data, learned_ops, operators, NUM_PREDICATES, num_augments=1) + # learned_ops = learn_operators_from_demos(demo_data, max_iters=5, verbose=False) + # op_index = {(op.action, frozenset(op.add)): op for op in learned_ops} + # for (action, effect_frozen), entry in backward_infer_minimal_effects(demo_data, current_operators=learned_ops).items(): + # if (action, effect_frozen) in op_index: + # delete = op_index[(action, effect_frozen)].pre & set.intersection( + # *[set(entry['demos'][i][0] - entry['demos'][i][2]) for i in range(len(entry['demos']))] + # ) + # op_index[(action, effect_frozen)].delete = delete + + # print(f"\n--- After Augmentation Round {round} ---") + # for op in sorted(learned_ops, key=lambda x: x.action): + # print(op) + + # valid, invalid = evaluate_learned_operators(learned_ops, operators, verbose=True) + # print(f"HITL Summary: {valid} valid / {valid + invalid} total operators correctly learned.") + +# Batch Evaluation Experiment + +def main_experiment(): + results = { + "num_actions": [], + "equivalent": [], "covered": [], "overfit": [], "missed": [], + "hitl_1_equivalent": [], "hitl_1_covered": [], "hitl_1_overfit": [], "hitl_1_missed": [], + "hitl_5_equivalent": [], "hitl_5_covered": [], "hitl_5_overfit": [], "hitl_5_missed": [] + } + + for num_trajs in range(1, 102, 10): + for run_i in range(100): + pred_pool = list(range(NUM_PREDICATES)) + operators = [generate_random_operator(pred_pool, i) for i in range(NUM_OPERATORS)] + all_demo_data = generate_planned_demo_trajectories(operators, 110, max_depth=TRAJ_MAX) + # all_demo_data, operators, pred_replace_map = deduplicate_predicates_by_equivalence(all_demo_data, operators, NUM_PREDICATES) + # print("Predicate replacement map:", pred_replace_map) + + demo_data = all_demo_data[:num_trajs] + unique_actions = set() + for traj, _ in demo_data: + for (s, a, s_prime) in traj: + unique_actions.add(a) + results["num_actions"].append((num_trajs, len(unique_actions))) + + refined_ops = learn_operators_from_demos(demo_data, max_iters=1000, verbose=False) + op_index = {(op.action, frozenset(op.add)): op for op in refined_ops} + for (action, effect_frozen), entry in backward_infer_minimal_effects(demo_data, current_operators=refined_ops).items(): + if (action, effect_frozen) in op_index: + delete = op_index[(action, effect_frozen)].pre & set.intersection( + *[set(entry['demos'][i][0] - entry['demos'][i][2]) for i in range(len(entry['demos']))] + ) + op_index[(action, effect_frozen)].delete = delete + + refined_ops_hitl = augment_demos_with_missing_ground_truth_ops( + demo_data, list(op_index.values()), operators, NUM_PREDICATES, num_augments=1 + ) + refined_ops_hitl5 = augment_demos_with_missing_ground_truth_ops( + demo_data, list(op_index.values()), operators, NUM_PREDICATES, num_augments=5 + ) + + def count_matches(learned_ops): + eq, cov, ofit = 0, 0, 0 + done_ops = set() + for actual_op in operators: + for op in learned_ops: + if op.action == actual_op.action: + if (actual_op.action, frozenset(actual_op.add)) not in done_ops: + if op.pre == actual_op.pre and op.add == actual_op.add and op.delete == actual_op.delete: + eq += 1 + done_ops.add((actual_op.action, frozenset(actual_op.add))) + elif is_covered_by(actual_op, op): + cov += 1 + done_ops.add((actual_op.action, frozenset(actual_op.add))) + elif len(op.pre - actual_op.pre) <= 2 and op.add == actual_op.add and op.delete == actual_op.delete: + ofit += 1 + done_ops.add((actual_op.action, frozenset(actual_op.add))) + return eq, cov, ofit + + eq, cov, ofit = count_matches(list(op_index.values())) + miss = len(unique_actions) - (eq+cov+ofit) + results["equivalent"].append((num_trajs, run_i, eq)) + results["covered"].append((num_trajs, run_i, cov)) + results["overfit"].append((num_trajs, run_i, ofit)) + results["missed"].append((num_trajs, run_i, miss)) + + ops_hitl1 = learn_operators_from_demos(refined_ops_hitl, max_iters=1000, verbose=False) + eq1, cov1, ofit1 = count_matches(ops_hitl1) + miss1 = NUM_OPERATORS - (eq1+cov1+ofit1) + results["hitl_1_equivalent"].append((num_trajs, run_i, eq1)) + results["hitl_1_covered"].append((num_trajs, run_i, cov1)) + results["hitl_1_overfit"].append((num_trajs, run_i, ofit1)) + results["hitl_1_missed"].append((num_trajs, run_i, miss1)) + + ops_hitl5 = learn_operators_from_demos(refined_ops_hitl5, max_iters=1000, verbose=False) + eq5, cov5, ofit5 = count_matches(ops_hitl5) + miss5 = NUM_OPERATORS - (eq5+cov5+ofit5) + results["hitl_5_equivalent"].append((num_trajs, run_i, eq5)) + results["hitl_5_covered"].append((num_trajs, run_i, cov5)) + results["hitl_5_overfit"].append((num_trajs, run_i, ofit5)) + results["hitl_5_missed"].append((num_trajs, run_i, miss5)) + + print(f"Trajs: {num_trajs}, Actions: {len(unique_actions)}, Run: {run_i}, Eq: {eq}, Cov: {cov}, Ofit: {ofit}, Missed: {miss}, HITL1: Eq={eq1}, Cov={cov1}, Ofit={ofit1}, Missed={miss1}, HITL5: Eq={eq5}, Cov={cov5}, Ofit={ofit5}, Missed={miss5}") + + import pickle + with open('HITL_experiment_results_random.pkl', 'wb') as f: + pickle.dump(results, f) + +if __name__ == "__main__": + main() + main_experiment() \ No newline at end of file diff --git a/test_lbf_plot.py b/test_lbf_plot.py new file mode 100644 index 0000000000..0b0adc0b60 --- /dev/null +++ b/test_lbf_plot.py @@ -0,0 +1,83 @@ +import pickle +import matplotlib.pyplot as plt +from collections import defaultdict +import numpy as np + +# Load results +with open("HITL_experiment_results_random.pkl", "rb") as f: + results = pickle.load(f) + +NUM_OPERATORS = 10 # ensure consistency if changed in experiment + +# Helper: aggregate to mean/std +def aggregate(metric): + agg = defaultdict(list) + for num_trajs, _, score in results[metric]: + agg[num_trajs].append(score) + means = {k: np.mean(v) for k, v in agg.items()} + stds = {k: np.std(v) for k, v in agg.items()} + return means, stds + +# Compute cumulative scores +def compute_total(eq, cov, ofit): + cov_total = {k: eq[k] + cov[k] for k in eq} + ofit_total = {k: cov_total[k] + ofit[k] for k in eq} + return cov_total, ofit_total + +# Get all metrics +eq, eq_std = aggregate("equivalent") +cov, cov_std = aggregate("covered") +ofit, ofit_std = aggregate("overfit") +miss, miss_std = aggregate("missed") + +eq1, eq1_std = aggregate("hitl_1_equivalent") +cov1, cov1_std = aggregate("hitl_1_covered") +ofit1, ofit1_std = aggregate("hitl_1_overfit") +miss1, miss1_std = aggregate("hitl_1_missed") + +eq5, eq5_std = aggregate("hitl_5_equivalent") +cov5, cov5_std = aggregate("hitl_5_covered") +ofit5, ofit5_std = aggregate("hitl_5_overfit") +miss5, miss5_std = aggregate("hitl_5_missed") + +# Compute cumulative +cov_total, ofit_total = compute_total(eq, cov, ofit) +cov1_total, ofit1_total = compute_total(eq1, cov1, ofit1) +cov5_total, ofit5_total = compute_total(eq5, cov5, ofit5) + +# Plot +plt.figure(figsize=(14, 8)) +x_vals = sorted(eq) + +def plot_with_error(x, y_mean, y_std, label, color, linestyle): + y = [y_mean[k] for k in x] + err = [y_std.get(k, 0) for k in x] + plt.errorbar(x, y, yerr=err, label=label, fmt=linestyle, color=color, capsize=4) + +# Baseline +# plot_with_error(x_vals, eq, eq_std, "Exact", "black", "o-") +# plot_with_error(x_vals, cov_total, cov_std, "Covered (incl. exact)", "black", "--") +# plot_with_error(x_vals, ofit_total, ofit_std, "Overfit (incl. cov)", "black", ":") +# plot_with_error(x_vals, miss, miss_std, "Missed", "black", "-.") + +# # HITL-1 +plot_with_error(x_vals, eq1, eq1_std, "HITL-1 Exact", "blue", "o-") +plot_with_error(x_vals, cov1_total, cov1_std, "HITL-1 Covered", "blue", "--") +plot_with_error(x_vals, ofit1_total, ofit1_std, "HITL-1 Overfit", "blue", ":") +# plot_with_error(x_vals, miss1, miss1_std, "HITL-1 Missed", "blue", "-.") + +# HITL-5 +# plot_with_error(x_vals, eq5, eq5_std, "HITL-5 Exact", "green", "o-") +# plot_with_error(x_vals, cov5_total, cov5_std, "HITL-5 Covered", "green", "--") +# plot_with_error(x_vals, ofit5_total, ofit5_std, "HITL-5 Overfit", "green", ":") +# plot_with_error(x_vals, miss5, miss5_std, "HITL-5 Missed", "green", "-.") + +plt.xlabel("Number of Demonstrations") +plt.ylabel("Operators") +plt.title("Operator Learning Comparison: Exact, Covered, Overfit, Missed") +plt.legend(loc="upper left", fontsize="small", ncol=2) +plt.grid(True) +plt.tight_layout() +plt.savefig("operator_learning_summary.png") +plt.ylim(0, 10) +plt.show() diff --git a/test_minibehavior_envs.txt b/test_minibehavior_envs.txt new file mode 100644 index 0000000000..c03a2ccdb3 --- /dev/null +++ b/test_minibehavior_envs.txt @@ -0,0 +1,24 @@ +python3 test_solve_task.py --env "MiniGrid-CollectMisplacedItems-16x16-N2-v0" --save True --seed 0 + +[ + 'MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0', + 'MiniGrid-CleaningACar-16x16-N2-v0', + 'MiniGrid-CleaningShoes-16x16-N2-v0', #1 + 'MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0', + 'MiniGrid-CollectMisplacedItems-16x16-N2-v0', + 'MiniGrid-InstallingAPrinter-16x16-N2-v0', + 'MiniGrid-LayingWoodFloors-16x16-N2-v0', + 'MiniGrid-MakingTea-16x16-N2-v0', + 'MiniGrid-MovingBoxesToStorage-16x16-N2-v0', + 'MiniGrid-OpeningPackages-16x16-N2-v0', + 'MiniGrid-OrganizingFileCabinet-16x16-N2-v0', + [DEBUG]'MiniGrid-PreparingSalad-16x16-N2-v0', + 'MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0', + 'MiniGrid-SettingUpCandles-16x16-N2-v0', #1 + 'MiniGrid-SortingBooks-16x16-N2-v0', + 'MiniGrid-StoringFood-16x16-N2-v0', + [DEBUG]'MiniGrid-ThawingFrozenFood-16x16-N2-v0', + 'MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0', + 'MiniGrid-WashingPotsAndPans-16x16-N2-v0', + 'MiniGrid-WateringHouseplants-16x16-N2-v0' +] \ No newline at end of file diff --git a/test_minigrid.py b/test_minigrid.py new file mode 100644 index 0000000000..51bae0683b --- /dev/null +++ b/test_minigrid.py @@ -0,0 +1,14 @@ +import gymnasium as gym +env = gym.make("MiniGrid-Fetch-8x8-N3-v0", render_mode="human") +observation, info = env.reset(seed=42) +import ipdb; ipdb.set_trace() +for _ in range(1000): + action = int(input("Action: ")) # User-defined policy function + observation, reward, terminated, truncated, info = env.step(action) + + if terminated or truncated: + observation, info = env.reset() +env.close() + +# Need a look at new region operator + diff --git a/test_operator_learning_all.py b/test_operator_learning_all.py new file mode 100644 index 0000000000..4cebb643f1 --- /dev/null +++ b/test_operator_learning_all.py @@ -0,0 +1,445 @@ +import numpy as np +from gym.spaces import Box +import re +import pickle as pkl + +from predicators import utils +from predicators.nsrt_learning.nsrt_learning_main import learn_nsrts_from_data +from predicators.structs import Action, LowLevelTrajectory, Predicate, State, \ + Type, GroundAtom, Task +import glob + +demo_files = sorted([filename for filename in glob.glob("/Users/shashlik/Documents/GitHub/predicators/demos/*/*")]) +demo_tasks = set([demo_file.split("/")[-1].split("_")[0] for demo_file in demo_files]) + +utils.reset_config({ + "strips_learner": "pnad_search", + "segmenter": "every_step", + "disable_harmlessness_check": True, + "pnad_search_load_initial": False, + "min_data_for_nsrt": 0, + "min_perc_data_for_nsrt": 0, + "pnad_search_timeout":1000.0 + }) + +# Load and do this from MiniBeahvior Demo + +def get_demo_traj(demo_file, verbose=True): + with open(demo_file, 'rb') as f: + data = pkl.load(f) + + last_skill = "Move" + state = [a for a in data[1][1] if "infovofrobot" not in a] + states = [state] + actions = [] + for step in data.keys(): + obs = data[step][0]['image'] + direction = data[step][0]['direction'] + action = data[step][2] + skill = None + + if "forward" in str(action) or \ + "left" in str(action) or \ + "right" in str(action): + + skill = "Move" + else: + skill = str(action) + + has_effect = True + try: + next_obs = data[step][3]['image'] + next_direction = data[step][3]['direction'] + if np.allclose(obs, next_obs) and (direction == next_direction): + has_effect = False + except: + pass + + if has_effect: + if last_skill != skill: + if verbose: + print("#") + print(last_skill) + try: + next_state = [a for a in data[step][1] if "infovofrobot" not in a] + if verbose: + print("PREV:", set(state)) + print("ADD:", set(next_state) - set(state)) + print("DEL:", set(state) - set(next_state)) + state = next_state + actions.append(last_skill) + states.append(state) + except: + pass + last_skill = skill + else: + if verbose: + print("#") + print(last_skill) + next_state = [a for a in data[step][4] if "infovofrobot" not in a] + if verbose: + print("PREV:", set(state)) + print("ADD:", set(next_state) - set(state)) + print("DEL:", set(state) - set(next_state)) + state = next_state + if verbose: + print("#") + actions.append(last_skill) + states.append(state) + + return LowLevelTrajectory(states, actions, _is_demo=True, _train_task_idx=0) + +def parse_objs_preds_and_options(trajectory, train_task_idx=0): + objs = set() + preds = set() + options = set() + state = None + states = [] + actions = [] + ground_atoms_traj = [] + obj_type = Type("obj_type", ["is_obj"]) + + for i, s in enumerate(trajectory.states): + ground_atoms = set() + for pred_str in s: + pred = None + choice = [] + pattern = re.compile(r"(\w+)\((.*?)\)") + match = pattern.match(pred_str) + if match: + func_name = match.group(1) + args = match.group(2).split(',') if match.group(2) else [] + for arg in args: + obj = obj_type(arg.strip()) + choice.append(obj) + objs.add(obj) + if len(args) == 1: + pred = Predicate(func_name, [obj_type], lambda s, o: True) + preds.add(pred) + elif len(args) == 2: + pred = Predicate(func_name, [obj_type, obj_type], lambda s, o: True) + preds.add(pred) + else: + NotImplementedError("") + ground_atoms.add(GroundAtom(pred, choice)) + states.append(state) + ground_atoms_traj.append(ground_atoms) + + if i < len(trajectory.actions): + a_name = trajectory.actions[i] + name_to_actions = actions_dict = { + "Move": 0, + "Actions.pickup_0": 3, + "Actions.pickup_1": 4, + "Actions.pickup_2": 5, + "Actions.drop_0": 6, + "Actions.drop_1": 7, + "Actions.drop_2": 8, + "Actions.drop_in": 9, + "Actions.toggle": 10, + "Actions.close": 11, + "Actions.open": 12, + "Actions.cook": 13, + "Actions.slice": 14 + } + + param_option = utils.SingletonParameterizedOption( + a_name, lambda s, m, o, p: Action(name_to_actions[a_name])) + options.add(param_option) + option = param_option.ground([], []) + action = option.policy(state) + action.set_option(option) + actions.append(action) + + return objs, preds, options, (LowLevelTrajectory([{obj:[0.0] for obj in objs} for _ in states], actions, _is_demo=True, _train_task_idx=train_task_idx), ground_atoms_traj) + + +# dataset = [] +# ground_atom_dataset = [] +# tasks = [] +# action_space = Box(0, 7, (1, )) + +# task_name = "MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0" +# for demo_file in demo_files: +# if task_name in demo_file: +# demo_traj = get_demo_traj(demo_file=demo_file, verbose=False) + +# idx = len(dataset) +# demo_traj = LowLevelTrajectory(demo_traj.states[0:5], demo_traj.actions[0:4], _is_demo=True, _train_task_idx=idx) + +# dataset += [demo_traj] +# objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx) +# ground_atom_dataset += [ground_atoms_traj] +# goal = set([atom for atom in ground_atoms_traj[1][-1] if "inside(" in str(atom)]) +# tasks += [Task(State({}, None), goal)] + +# print("#"*30) +# print(task_name) +# print("#"*30) + +# nsrts, _, _ = learn_nsrts_from_data(dataset, tasks, +# preds, +# options, +# action_space, +# ground_atom_dataset, +# sampler_learner="neural", +# annotations=None) + +# assert len(nsrts) == 3 +# import ipdb; ipdb.set_trace() +# quit() + + +# dataset = [] +# ground_atom_dataset = [] +# tasks = [] +# action_space = Box(0, 7, (1, )) + +# task_name = "MiniGrid-CollectMisplacedItems-16x16-N2-v0" +# for demo_file in demo_files: +# if task_name in demo_file: +# demo_traj = get_demo_traj(demo_file=demo_file, verbose=False) + +# idx = len(dataset) +# demo_traj = LowLevelTrajectory(demo_traj.states[0:5], demo_traj.actions[0:4], _is_demo=True, _train_task_idx=idx) + +# dataset += [demo_traj] +# objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx) +# ground_atom_dataset += [ground_atoms_traj] +# goal = set([atom for atom in ground_atoms_traj[1][-1] if "onTop(" in str(atom) and "table_1" in str(atom)]) +# tasks += [Task(State({}, None), goal)] + +# print("#"*30) +# print(task_name) +# print("#"*30) + +# nsrts, _, _ = learn_nsrts_from_data(dataset, tasks, +# preds, +# options, +# action_space, +# ground_atom_dataset, +# sampler_learner="neural", +# annotations=None) + +# assert len(nsrts) == 3 + +# import ipdb; ipdb.set_trace() + +# dataset = [] +# ground_atom_dataset = [] +# tasks = [] +# action_space = Box(0, 7, (1, )) +# all_options = set() + +# task_name = "MiniGrid-SortingBooks-16x16-N2-v0" +# for demo_file in demo_files: +# if task_name in demo_file: +# demo_traj = get_demo_traj(demo_file=demo_file, verbose=False) + +# idx = len(dataset) +# demo_traj = LowLevelTrajectory(demo_traj.states[0:5], demo_traj.actions[0:4], _is_demo=True, _train_task_idx=idx) + +# dataset += [demo_traj] +# objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx) +# all_options = all_options | options +# ground_atom_dataset += [ground_atoms_traj] +# goal = set([atom for atom in ground_atoms_traj[1][-1] if "onTop(" in str(atom) and "shelf" in str(atom)]) +# tasks += [Task(State({}, None), goal)] + +# print("#"*30) +# print(task_name) +# print("#"*30) + +# nsrts, _, _ = learn_nsrts_from_data(dataset, tasks, +# preds, +# all_options, +# action_space, +# ground_atom_dataset, +# sampler_learner="neural", +# annotations=None) + +# import ipdb; ipdb.set_trace() +# # assert len(nsrts) == 3 + +# dataset = [] +# ground_atom_dataset = [] +# tasks = [] +# action_space = Box(0, 7, (1, )) + +# task_name = "MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0" +# for demo_file in demo_files: +# if task_name in demo_file: +# demo_traj = get_demo_traj(demo_file=demo_file, verbose=False) + +# idx = len(dataset) +# demo_traj = LowLevelTrajectory(demo_traj.states[0:5], demo_traj.actions[0:4], _is_demo=True, _train_task_idx=idx) + +# dataset += [demo_traj] +# objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx) +# ground_atom_dataset += [ground_atoms_traj] +# goal = set([atom for atom in ground_atoms_traj[1][-1] if "inside(" in str(atom)]) +# tasks += [Task(State({}, None), goal)] + +# print("#"*30) +# print(task_name) +# print("#"*30) + +# nsrts, _, _ = learn_nsrts_from_data(dataset, tasks, +# preds, +# options, +# action_space, +# ground_atom_dataset, +# sampler_learner="neural", +# annotations=None) + +# # assert len(nsrts) == 3 + + +# dataset = [] +# ground_atom_dataset = [] +# tasks = [] +# action_space = Box(0, 7, (1, )) + +# task_name = "MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0" +# for demo_file in demo_files: +# if task_name in demo_file: +# demo_traj = get_demo_traj(demo_file=demo_file, verbose=False) + +# idx = len(dataset) +# demo_traj = LowLevelTrajectory(demo_traj.states, demo_traj.actions, _is_demo=True, _train_task_idx=idx) + +# dataset += [demo_traj] +# objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx) +# ground_atom_dataset += [ground_atoms_traj] +# goal = set([atom for atom in ground_atoms_traj[1][-1] if "inside(" in str(atom)]) +# tasks += [Task(State({}, None), goal)] + +# print("#"*30) +# print(task_name) +# print("#"*30) + +# print("Skipped") + +# # nsrts, _, _ = learn_nsrts_from_data(dataset, tasks, +# # preds, +# # options, +# # action_space, +# # ground_atom_dataset, +# # sampler_learner="neural", +# # annotations=None) + +# # assert len(nsrts) == 2 + + +# #### BROKEN ##### +# dataset = [] +# ground_atom_dataset = [] +# tasks = [] +# action_space = Box(0, 7, (1, )) + +# task_name = "MiniGrid-CleaningACar-16x16-N2-v0" +# for demo_file in demo_files: +# if task_name in demo_file: +# demo_traj = get_demo_traj(demo_file=demo_file, verbose=False) + +# idx = len(dataset) +# demo_traj = LowLevelTrajectory(demo_traj.states, demo_traj.actions, _is_demo=True, _train_task_idx=idx) + +# dataset += [demo_traj] +# objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx) +# ground_atom_dataset += [ground_atoms_traj] +# goal = set([atom for atom in ground_atoms_traj[1][-1] if "inside(" in str(atom) or "dustyable(" in str(atom)]) +# tasks += [Task(State({}, None), goal)] + +# print("#"*30) +# print(task_name) +# print("#"*30) + +# print("Broken - No dustyable") + +# # import ipdb; ipdb.set_trace() + +# # nsrts, _, _ = learn_nsrts_from_data(dataset, tasks, +# # preds, +# # options, +# # action_space, +# # ground_atom_dataset, +# # sampler_learner="neural", +# # annotations=None) + +# # assert len(nsrts) == 2 + +# dataset = [] +# ground_atom_dataset = [] +# tasks = [] +# action_space = Box(0, 7, (1, )) + +# task_name = "MiniGrid-WateringHouseplants-16x16-N2-v0" +# for demo_file in demo_files: +# if task_name in demo_file: +# demo_traj = get_demo_traj(demo_file=demo_file, verbose=False) + +# idx = len(dataset) +# demo_traj = LowLevelTrajectory(demo_traj.states, demo_traj.actions, _is_demo=True, _train_task_idx=idx) + +# dataset += [demo_traj] +# objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx) +# ground_atom_dataset += [ground_atoms_traj] +# goal = set([atom for atom in ground_atoms_traj[1][-1] if "soakable(" in str(atom)]) +# tasks += [Task(State({}, None), goal)] + +# print("#"*30) +# print(task_name) +# print("#"*30) + +# print("Skipped") + +# # nsrts, _, _ = learn_nsrts_from_data(dataset, tasks, +# # preds, +# # options, +# # action_space, +# # ground_atom_dataset, +# # sampler_learner="neural", +# # annotations=None) + +# # assert len(nsrts) == 2 + +# dataset = [] +# ground_atom_dataset = [] +# tasks = [] +# action_space = Box(0, 7, (1, )) + +# task_name = "MiniGrid-OpeningPackages-16x16-N2-v0" +# for demo_file in demo_files: +# if task_name in demo_file: +# demo_traj = get_demo_traj(demo_file=demo_file, verbose=False) + +# idx = len(dataset) +# demo_traj = LowLevelTrajectory(demo_traj.states, demo_traj.actions, _is_demo=True, _train_task_idx=idx) + +# dataset += [demo_traj] +# objs, preds, options, ground_atoms_traj = parse_objs_preds_and_options(demo_traj, train_task_idx=idx) +# ground_atom_dataset += [ground_atoms_traj] +# goal = set([atom for atom in ground_atoms_traj[1][-1] if "openable(" in str(atom)]) +# tasks += [Task(State({}, None), goal)] + +# print("#"*30) +# print(task_name) +# print("#"*30) + +# nsrts, _, _ = learn_nsrts_from_data(dataset, tasks, +# preds, +# options, +# action_space, +# ground_atom_dataset, +# sampler_learner="neural", +# annotations=None) + +# assert len(nsrts) == 2 + + +# ########################################## +# # Generate Random Operator Demos +# ########################################## + + diff --git a/test_random_operator_learning.py b/test_random_operator_learning.py new file mode 100644 index 0000000000..5749f0525e --- /dev/null +++ b/test_random_operator_learning.py @@ -0,0 +1,20 @@ + + + + +# TODO # +# 1. Generate Random Operators +# - Parameters +# - Precondions +# - Add Effects +# - Del Effects +# - Option +# 2. Generate Random Tasks +# - Current Atoms +# - Goal Atoms +# 3. Use those Operators and Task to generate trajectory data +# - Search (Output Plan) +# - Step by Step get states and actions +# 4. Learn NSRTs +# 5. Assert these are the same as the Random Operators + diff --git a/test_saved.NSRTs.txt b/test_saved.NSRTs.txt new file mode 100644 index 0000000000..ffd890216c --- /dev/null +++ b/test_saved.NSRTs.txt @@ -0,0 +1,7 @@ +NSRT-Move0: + Parameters: [?x0:obj_type] + Preconditions: [] + Add Effects: [inreachofrobot(?x0:obj_type)] + Delete Effects: [~inreachofrobot(?x0:obj_type)] + Ignore Effects: [inreachofrobot, ~inreachofrobot] + Option Spec: Move() \ No newline at end of file diff --git a/test_saved.NSRTs_copy.txt b/test_saved.NSRTs_copy.txt new file mode 100644 index 0000000000..ae9e5c8116 --- /dev/null +++ b/test_saved.NSRTs_copy.txt @@ -0,0 +1,14 @@ +NSRT-Move0: + Parameters: [?x0:obj_type] + Preconditions: [atsamelocation(?x0:obj_type, ?x0:obj_type), insameroomasrobot(?x0:obj_type), onfloor(?x0:obj_type)] + Add Effects: [inreachofrobot(?x0:obj_type)] + Delete Effects: [] + Ignore Effects: [inreachofrobot] + Option Spec: Move() +NSRT-Actions.open0: + Parameters: [?x0:obj_type] + Preconditions: [atsamelocation(?x0:obj_type, ?x0:obj_type), inreachofrobot(?x0:obj_type), insameroomasrobot(?x0:obj_type), onfloor(?x0:obj_type)] + Add Effects: [openable(?x0:obj_type)] + Delete Effects: [] + Ignore Effects: [] + Option Spec: Actions.open() diff --git a/test_segment_traj.py b/test_segment_traj.py new file mode 100644 index 0000000000..07f6452bda --- /dev/null +++ b/test_segment_traj.py @@ -0,0 +1,101 @@ +import pickle as pkl +import numpy as np +from predicators.structs import Action, LowLevelTrajectory, Predicate, State, \ + Type +from test_operator_learning_all import get_demo_traj, demo_files +from test_colla_results import OperatorLearningAgent + +completed = [ + 'MiniGrid-BoxingBooksUpForStorage-16x16-N2-v0', + 'MiniGrid-CleaningACar-16x16-N2-v0', + 'MiniGrid-CleaningShoes-16x16-N2-v0', #1 + 'MiniGrid-CleaningUpTheKitchenOnly-16x16-N2-v0', + 'MiniGrid-CollectMisplacedItems-16x16-N2-v0', + 'MiniGrid-InstallingAPrinter-16x16-N2-v0', + 'MiniGrid-LayingWoodFloors-16x16-N2-v0', + 'MiniGrid-MakingTea-16x16-N2-v0', + 'MiniGrid-MovingBoxesToStorage-16x16-N2-v0', + 'MiniGrid-OpeningPackages-16x16-N2-v0', + 'MiniGrid-OrganizingFileCabinet-16x16-N2-v0', + #[DEBUG]'MiniGrid-PreparingSalad-16x16-N2-v0', + 'MiniGrid-PuttingAwayDishesAfterCleaning-16x16-N2-v0', + 'MiniGrid-SettingUpCandles-16x16-N2-v0', #1 + 'MiniGrid-SortingBooks-16x16-N2-v0', + 'MiniGrid-StoringFood-16x16-N2-v0', + #[DEBUG]'MiniGrid-ThawingFrozenFood-16x16-N2-v0', + 'MiniGrid-ThrowingAwayLeftovers-16x16-N2-v0', + 'MiniGrid-WashingPotsAndPans-16x16-N2-v0', + 'MiniGrid-WateringHouseplants-16x16-N2-v0' +] + +task_info = {} + +for demo_file in demo_files: + # print("#"*60) + # print(demo_file.split("/")[-1]) + # print("#"*60) + # print("# PLAN #") + traj = get_demo_traj(demo_file, verbose=False) + add_count = 0 + for i, action in enumerate(traj.actions): + curr_state = set(traj.states[i]) + next_state = set(traj.states[i+1]) + del_effs = curr_state - next_state + add_effs = next_state - curr_state + # print(action) + # print("DEL:", del_effs) + # print("ADD:", add_effs) + # print() + add_count += len(add_effs) + assert len(add_effs) != 0 or str(action) == "Move" + task_name = demo_file.split("/")[-1].split("_")[0] + agent = OperatorLearningAgent("cluster-intersect", strips_learner="cluster_and_intersect") + agent.get_data(task_name=task_name) + goal = agent.parse_goal(task_name=task_name, ground_atoms_state=agent.ground_atoms_traj[1][-1]) + task_info[demo_file.split("/")[-1]] = (len(traj.actions), len(goal), add_count) + +i = 0 +for k,v in sorted([(k,v) for k,v in task_info.items()], key=lambda x: x[1][2]): # by add effects + i+=1 + print("|", v[0], "| goal length:", v[1], "| add count:", v[2], "|", k.split("_")[0], i) + # for atom in agent.parse_goal(task_name=task_name, ground_atoms_state=agent.ground_atoms_traj[1][-1]): + # print(atom) + +######################################### + +# ### NEED To Turn Images into Objects or Save Object Centric State +# from minigrid.wrappers import * +# from mini_behavior.states import * + +# env = gym.make('MiniGrid-SortingBooks-16x16-N2-v0') +# env.reset() + +# # AbilityState +# # AbsoluteObjectState +# # RelativeObjectState +# # ObjectProperty + +# def get_lifted_state(env): +# mb_state = env.get_state() +# grid = mb_state['grid'] +# agent_pos = mb_state['agent_pos'] +# agent_dir = mb_state['agent_dir'] +# objs = mb_state['objs'] +# obj_instances = mb_state['obj_instances'] +# ground_atoms = [] +# for k, o in obj_instances.items(): +# for pred_name, pred in o.states.items(): +# if isinstance(o.states[pred_name], AbsoluteObjectState): +# if o.states[pred_name].get_value(env): +# ground_atoms.append(pred_name+'('+k+')') +# elif isinstance(o.states[pred_name], AbilityState): +# if o.states[pred_name].get_value(env): +# ground_atoms.append(pred_name+'('+k+')') +# elif isinstance(o.states[pred_name], ObjectProperty): +# if o.states[pred_name].get_value(env): +# ground_atoms.append(pred_name+'('+k+')') +# elif isinstance(o.states[pred_name], RelativeObjectState): +# for k2, o2 in obj_instances.items(): +# if o.states[pred_name].get_value(o2, env=env): +# ground_atoms.append(pred_name+'('+k+','+k2+')') +# return ground_atoms \ No newline at end of file diff --git a/test_solve_task.py b/test_solve_task.py new file mode 100644 index 0000000000..d419502719 --- /dev/null +++ b/test_solve_task.py @@ -0,0 +1,307 @@ +#!/usr/bin/env python3 + +import argparse +from minigrid.wrappers import * +from mini_behavior.window import Window +from mini_behavior.utils.save import get_step, save_demo +from mini_behavior.grid import GridDimension +import numpy as np +from PIL import Image +from mini_behavior.states import * + +# Size in pixels of a tile in the full-scale human view +TILE_PIXELS = 32 +show_furniture = False + + +def redraw(img): + if not args.agent_view: + img = env.render() + window.no_closeup() + window.set_inventory(env) + window.show_img(img) + image_path = "output_image.jpeg" + window.save_img(image_path) + +def render_furniture(): + global show_furniture + show_furniture = not show_furniture + + if show_furniture: + img = np.copy(env.furniture_view) + + # i, j = env.agent.cur_pos + i, j = env.agent_pos + ymin = j * TILE_PIXELS + ymax = (j + 1) * TILE_PIXELS + xmin = i * TILE_PIXELS + xmax = (i + 1) * TILE_PIXELS + + img[ymin:ymax, xmin:xmax, :] = GridDimension.render_agent( + img[ymin:ymax, xmin:xmax, :], env.agent_dir) + img = env.render_furniture_states(img) + + window.show_img(img) + else: + obs = env.gen_obs() + redraw(obs) + + +def show_states(): + imgs = env.render_states() + window.show_closeup(imgs) + + +def reset(): + if args.seed != -1: + env.seed(args.seed) + + obs = env.reset() + + if hasattr(env, 'mission'): + print('Mission: %s' % env.mission) + window.set_caption(env.mission) + + redraw(obs) + + +def load(): + if args.seed != -1: + env.seed(args.seed) + + env.reset() + obs = env.load_state(args.load) + + if hasattr(env, 'mission'): + print('Mission: %s' % env.mission) + window.set_caption(env.mission) + + redraw(obs) + +def get_lifted_state(env): + objs = env.objs + obj_instances = {} + for obj_type, obj_list in objs.items(): + for obj in obj_list: + obj_instances[obj.name] = obj + ground_atoms = [] + try: + for k, o in obj_instances.items(): + for pred_name, pred in o.states.items(): + if isinstance(o.states[pred_name], AbsoluteObjectState): + if o.states[pred_name].get_value(env): + ground_atoms.append(pred_name+'('+k+')') + elif isinstance(o.states[pred_name], AbilityState): + if o.states[pred_name].get_value(env): + ground_atoms.append(pred_name+'('+k+')') + elif isinstance(o.states[pred_name], ObjectProperty): + if o.states[pred_name].get_value(env): + ground_atoms.append(pred_name+'('+k+')') + elif isinstance(o.states[pred_name], RelativeObjectState): + for k2, o2 in obj_instances.items(): + if o.check_rel_state(env, o2, pred_name): + ground_atoms.append(pred_name+'('+k+','+k2+')') + except: + import ipdb; ipdb.set_trace() + return ground_atoms + + + +def step(action): + prev_obs = env.gen_obs() + + prev_state = get_lifted_state(env) + obs, reward, done, terminated, info = env.step(action) + state = get_lifted_state(env) + + print('step=%s, reward=%.2f' % (env.step_count, reward)) + for atom in state: + print(atom) + + if args.save: + all_steps[env.step_count] = (prev_obs, prev_state, action, obs, state) + + if done: + print('done!') + if args.save: + save_demo(all_steps, args.env, env.episode) + reset() + else: + redraw(obs) + + +def switch_dim(dim): + env.switch_dim(dim) + print(f'switching to dim: {env.render_dim}') + obs = env.gen_obs() + redraw(obs) + + +def key_handler_cartesian(event): + print('pressed', event.key) + if event.key == 'escape': + window.close() + return + if event.key == 'backspace': + reset() + return + if event.key == 'left': + step(env.actions.left) + return + if event.key == 'right': + step(env.actions.right) + return + if event.key == 'up': + step(env.actions.forward) + return + # Spacebar + if event.key == ' ': + render_furniture() + return + if event.key == 'pageup': + step('choose') + return + if event.key == 'enter': + env.save_state() + return + if event.key == 'pagedown': + show_states() + return + if event.key == '0': + switch_dim(None) + return + if event.key == '1': + switch_dim(0) + return + if event.key == '2': + switch_dim(1) + return + if event.key == '3': + switch_dim(2) + return + +def key_handler_primitive(event): + print('pressed', event.key) + if event.key == 'escape': + window.close() + return + if event.key == 'left': + step(env.actions.left) + return + if event.key == 'right': + step(env.actions.right) + return + if event.key == 'up': + step(env.actions.forward) + return + if event.key == '0': + step(env.actions.pickup_0) + return + if event.key == '1': + step(env.actions.pickup_1) + return + if event.key == '2': + step(env.actions.pickup_2) + return + if event.key == '3': + step(env.actions.drop_0) + return + if event.key == '4': + step(env.actions.drop_1) + return + if event.key == '5': + step(env.actions.drop_2) + return + if event.key == 't': + step(env.actions.toggle) + return + if event.key == 'o': + step(env.actions.open) + return + if event.key == 'c': + step(env.actions.close) + return + if event.key == 'k': + step(env.actions.cook) + return + if event.key == '6': + step(env.actions.slice) + return + if event.key == 'i': + step(env.actions.drop_in) + return + if event.key == 'pagedown': + show_states() + return + + +parser = argparse.ArgumentParser() +parser.add_argument( + "--env", + help="gym environment to load", + default='MiniGrid-InstallingAPrinter-8x8-N2-v0' +) +parser.add_argument( + "--seed", + type=int, + help="random seed to generate the environment with", + default=-1 +) +parser.add_argument( + "--tile_size", + type=int, + help="size at which to render tiles", + default=32 +) +parser.add_argument( + '--agent_view', + default=False, + help="draw the agent sees (partially observable view)", + action='store_true' +) +# NEW +parser.add_argument( + "--save", + default=False, + help="whether or not to save the demo_16" +) +# NEW +parser.add_argument( + "--load", + default=None, + help="path to load state from" +) + +args = parser.parse_args() +# ### +# all_envs = [env_id for env_id in gym.envs.registry.keys() if "MiniGrid-" in env_id] +# print(args) +# print(all_envs) +# quit() +# ### + +env = gym.make(args.env) +env.teleop_mode() +if args.save: + # We do not support save for cartesian action space + assert env.mode == "primitive" + +all_steps = {} + +if args.agent_view: + env = RGBImgPartialObsWrapper(env) + env = ImgObsWrapper(env) + +window = Window('mini_behavior - ' + args.env) +if env.mode == "cartesian": + window.reg_key_handler(key_handler_cartesian) +elif env.mode == "primitive": + window.reg_key_handler(key_handler_primitive) + +if args.load is None: + reset() +else: + load() + +# Blocking event loop +window.show(block=True) \ No newline at end of file diff --git a/willie_req.txt b/willie_req.txt new file mode 100644 index 0000000000..30051c5c13 --- /dev/null +++ b/willie_req.txt @@ -0,0 +1,180 @@ +aiodns==3.2.0 +aiohappyeyeballs==2.4.4 +aiohttp==3.10.11 +aiosignal==1.3.2 +annotated-types==0.7.0 +anyio==4.9.0 +apriltag==0.0.16 +astroid==2.11.7 +asttokens==3.0.0 +async-timeout==5.0.1 +attrs==24.3.0 +beautifulsoup4==4.12.3 +bosdyn-api==4.1.1 +bosdyn-client==4.1.1 +bosdyn-core==4.1.1 +cachetools==5.5.2 +ccxt==4.4.42 +certifi==2024.12.14 +cffi==1.17.1 +charset-normalizer==3.4.0 +click==8.1.8 +cloudpickle==3.1.1 +-e git+https://github.com/concepts-ai/Concepts.git@44ecfd7aff3d48f967a92e455d78cc87f6f97971#egg=concepts +contourpy==1.3.1 +cryptography==44.0.0 +cycler==0.12.1 +Cython==3.0.12 +decorator==4.4.2 +Deprecated==1.2.18 +dill==0.3.5.1 +distro==1.9.0 +exceptiongroup==1.2.2 +executing==2.1.0 +Farama-Notifications==0.0.4 +filelock==3.17.0 +fonttools==4.56.0 +frozendict==2.4.6 +frozenlist==1.5.0 +fsspec==2025.2.0 +google-ai-generativelanguage==0.6.15 +google-api-core==2.24.2 +google-api-python-client==2.167.0 +google-auth==2.39.0 +google-auth-httplib2==0.2.0 +google-generativeai==0.8.5 +googleapis-common-protos==1.70.0 +graphlib_backport==1.1.0 +grpcio==1.71.0 +grpcio-status==1.62.3 +gym==0.26.2 +gym-minigrid==1.0.3 +gym-notices==0.0.8 +gym-sokoban @ git+https://github.com/Learning-and-Intelligent-Systems/gym-sokoban.git@0ff1758c3cade36339a9ff1c766daceadc65bb6a +gymnasium==0.29.1 +h11==0.14.0 +h5py==3.13.0 +html5lib==1.1 +httpcore==1.0.8 +httplib2==0.22.0 +httpx==0.27.0 +idna==3.10 +ImageHash==4.3.2 +imageio==2.22.2 +imageio-ffmpeg==0.6.0 +iniconfig==2.1.0 +ipdb==0.13.13 +ipython==8.31.0 +isort==5.13.2 +jedi==0.19.2 +Jinja2==3.1.6 +joblib==1.4.2 +kiwisolver==1.4.8 +lark==1.2.2 +lazy-object-proxy==1.11.0 +lisdf==0.1.1 +lxml==5.3.0 +markdown-it-py==3.0.0 +MarkupSafe==3.0.2 +matplotlib==3.6.2 +matplotlib-inline==0.1.7 +mccabe==0.7.0 +mdurl==0.1.2 +-e git+https://github.com/StanfordVL/mini_behavior.git@66155f42b1f273cab5a6b82e5a007461125e0d26#egg=mini_behavior +minigrid==3.0.0 +moviepy==1.0.3 +mpmath==1.3.0 +multidict==6.1.0 +multiprocess==0.70.13 +multitasking==0.0.11 +mypy==1.8.0 +mypy_extensions==1.1.0 +mysql-connector-python==9.1.0 +networkx==3.4.2 +nltk==3.9.1 +numpy==1.23.5 +openai==1.19.0 +opencv-python==4.7.0.72 +packaging==24.2 +pandas==1.5.1 +pandasql==0.7.3 +parso==0.8.4 +pathos==0.2.9 +pbrspot @ git+https://github.com/NishanthJKumar/pbrspot.git@0ad581da646523325bfb808625a87e6a898fd2bc +peewee==3.17.8 +pexpect==4.9.0 +pg3 @ git+https://github.com/tomsilver/pg3.git@d93fd9f5037b58fa1e10f65555558474415adadc +pillow==10.3.0 +pkgconfig==1.5.5 +platformdirs==4.3.6 +pluggy==1.5.0 +pox==0.3.6 +ppft==1.7.7 +-e git+https://github.com/bdaiinstitute/predicators.git@a40f8cebd099ad8546532ca5b1feaa7cdd7d2240#egg=predicators +proglog==0.1.11 +prompt_toolkit==3.0.48 +propcache==0.2.1 +proto-plus==1.26.1 +protobuf==4.22.0 +ptyprocess==0.7.0 +pure_eval==0.2.3 +py==1.11.0 +pyasn1==0.6.1 +pyasn1_modules==0.4.2 +pybullet==3.2.7 +pycares==4.5.0 +pycparser==2.22 +pydantic==2.11.3 +pydantic_core==2.33.1 +pygame==2.6.1 +Pygments==2.18.0 +PyJWT==2.10.1 +pylint==2.14.5 +pynmea2==1.19.0 +pyparsing==3.2.1 +pyperplan==2.1 +pytest==7.1.3 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +pytz==2024.2 +PyWavelets==1.8.0 +PyYAML==6.0 +recordclass==0.23.1 +regex==2024.11.6 +requests==2.32.3 +rich==14.0.0 +rsa==4.9.1 +scikit-image==0.19.3 +scikit-learn==1.1.2 +scipy==1.9.3 +seaborn==0.12.1 +six==1.17.0 +slack_bolt==1.23.0 +slack_sdk==3.35.0 +smepy @ git+https://github.com/sebdumancic/structure_mapping.git@df2553a1e07cedebf4ceb01992b8d275d15dc22c +sniffio==1.3.1 +soupsieve==2.6 +SQLAlchemy==2.0.36 +stack-data==0.6.3 +sympy==1.13.1 +tabulate==0.9.0 +tenacity==9.1.2 +threadpoolctl==3.6.0 +tifffile==2025.3.30 +tomli==2.2.1 +tomlkit==0.13.2 +torch==2.0.1 +torchvision==0.21.0 +tqdm==4.67.1 +traitlets==5.14.3 +types-PyYAML==6.0.12.20250402 +typing-inspection==0.4.0 +typing_extensions==4.12.2 +tzdata==2024.2 +uritemplate==4.1.1 +urllib3==2.2.3 +wcwidth==0.2.13 +webencodings==0.5.1 +wrapt==1.17.2 +yarl==1.18.3 +yfinance==0.2.51