From 15f4c6dbc365a7fced9ba121052893aa618b5cb4 Mon Sep 17 00:00:00 2001
From: emilianopp <emilianopp550@gmail.com>
Date: Tue, 12 Aug 2025 19:38:13 +0000
Subject: [PATCH 01/10] xray bugfix

---
 src/agentlab/analyze/agent_xray.py | 58 +++++++++++++++++++++++-------
 1 file changed, 45 insertions(+), 13 deletions(-)

diff --git a/src/agentlab/analyze/agent_xray.py b/src/agentlab/analyze/agent_xray.py
index 84dc423d..db95a59b 100644
--- a/src/agentlab/analyze/agent_xray.py
+++ b/src/agentlab/analyze/agent_xray.py
@@ -74,6 +74,7 @@ class EpisodeId:
     agent_id: str = None
     task_name: str = None
     seed: int = None
+    row_index: int = None  # unique row index to disambiguate selections
 
 
 @dataclass
@@ -99,6 +100,24 @@ def update_exp_result(self, episode_id: EpisodeId):
         if self.result_df is None or episode_id.task_name is None or episode_id.seed is None:
             self.exp_result = None
 
+        # Prefer selecting by explicit row index if available
+        if episode_id.row_index is not None:
+            tmp_df = self.result_df.reset_index(inplace=False)
+            tmp_df["_row_index"] = tmp_df.index
+            sub_df = tmp_df[tmp_df["_row_index"] == episode_id.row_index]
+            if len(sub_df) == 0:
+                self.exp_result = None
+                raise ValueError(f"Could not find episode for row_index: {episode_id.row_index}")
+            if len(sub_df) > 1:
+                warning(
+                    f"Found multiple rows for row_index: {episode_id.row_index}. Using the first one."
+                )
+            exp_dir = sub_df.iloc[0]["exp_dir"]
+            print(exp_dir)
+            self.exp_result = ExpResult(exp_dir)
+            self.step = 0
+            return
+
         # find unique row for task_name and seed
         result_df = self.agent_df.reset_index(inplace=False)
         sub_df = result_df[
@@ -128,16 +147,15 @@ def get_agent_id(self, row: pd.Series):
         return agent_id
 
     def filter_agent_id(self, agent_id: list[tuple]):
-        # query_str = " & ".join([f"`{col}` == {repr(val)}" for col, val in agent_id])
-        # agent_df = info.result_df.query(query_str)
-
-        agent_df = self.result_df.reset_index(inplace=False)
-        agent_df.set_index(TASK_NAME_KEY, inplace=True)
+        # Preserve a stable row index to disambiguate selections later
+        tmp_df = self.result_df.reset_index(inplace=False)
+        tmp_df["_row_index"] = tmp_df.index
+        tmp_df.set_index(TASK_NAME_KEY, inplace=True)
 
         for col, val in agent_id:
             col = col.replace(".\n", ".")
-            agent_df = agent_df[agent_df[col] == val]
-        self.agent_df = agent_df
+            tmp_df = tmp_df[tmp_df[col] == val]
+        self.agent_df = tmp_df
 
 
 info = Info()
@@ -735,7 +753,7 @@ def dict_msg_to_markdown(d: dict):
             case _:
                 parts.append(f"\n```\n{str(item)}\n```\n")
 
-    markdown = f"### {d["role"].capitalize()}\n"
+    markdown = f"### {d['role'].capitalize()}\n"
     markdown += "\n".join(parts)
     return markdown
 
@@ -1003,7 +1021,8 @@ def get_seeds_df(result_df: pd.DataFrame, task_name: str):
     def extract_columns(row: pd.Series):
         return pd.Series(
             {
-                "seed": row[TASK_SEED_KEY],
+                "index": row.get("_row_index", None),
+                "seed": row.get(TASK_SEED_KEY, None),
                 "reward": row.get("cum_reward", None),
                 "err": bool(row.get("err_msg", None)),
                 "n_steps": row.get("n_steps", None),
@@ -1011,6 +1030,8 @@ def extract_columns(row: pd.Series):
         )
 
     seed_df = result_df.apply(extract_columns, axis=1)
+    # Ensure column order and readability
+    seed_df = seed_df[["seed", "reward", "err", "n_steps","index"]]
     return seed_df
 
 
@@ -1028,15 +1049,26 @@ def on_select_task(evt: gr.SelectData, df: pd.DataFrame, agent_id: list[tuple]):
 def update_seeds(agent_task_id: tuple):
     agent_id, task_name = agent_task_id
     seed_df = get_seeds_df(info.agent_df, task_name)
-    first_seed = seed_df.iloc[0]["seed"]
-    return seed_df, EpisodeId(agent_id=agent_id, task_name=task_name, seed=first_seed)
+    first_seed = int(seed_df.iloc[0]["seed"]) if len(seed_df) else None
+    first_index = int(seed_df.iloc[0]["index"]) if len(seed_df) else None
+    return seed_df, EpisodeId(
+        agent_id=agent_id, task_name=task_name, seed=first_seed, row_index=first_index
+    )
 
 
 def on_select_seed(evt: gr.SelectData, df: pd.DataFrame, agent_task_id: tuple):
     agent_id, task_name = agent_task_id
     col_idx = df.columns.get_loc("seed")
-    seed = evt.row_value[col_idx]  # seed should be the first column
-    return EpisodeId(agent_id=agent_id, task_name=task_name, seed=seed)
+    idx_col = df.columns.get_loc("index") if "index" in df.columns else None
+    seed = evt.row_value[col_idx]
+    row_index = evt.row_value[idx_col] if idx_col is not None else None
+    try:
+        seed = int(seed)
+        if row_index is not None:
+            row_index = int(row_index)
+    except Exception:
+        pass
+    return EpisodeId(agent_id=agent_id, task_name=task_name, seed=seed, row_index=row_index)
 
 
 def new_episode(episode_id: EpisodeId, progress=gr.Progress()):

From 31d649ceebc7e31ce1cef4c690324575cf0c1e9c Mon Sep 17 00:00:00 2001
From: emilianopp <emilianopp550@gmail.com>
Date: Wed, 20 Aug 2025 13:57:59 +0000
Subject: [PATCH 02/10] privileged agent push

---
 .../privaleged_info_agent/privaleged_agent.py | 412 ++++++++++++++++
 .../privaleged_agent_prompt.py                | 304 ++++++++++++
 src/agentlab/analyze/json_xray.py             | 454 ++++++++++++++++++
 src/agentlab/experiments/loop.py              |   5 +-
 4 files changed, 1174 insertions(+), 1 deletion(-)
 create mode 100644 src/agentlab/agents/privaleged_info_agent/privaleged_agent.py
 create mode 100644 src/agentlab/agents/privaleged_info_agent/privaleged_agent_prompt.py
 create mode 100644 src/agentlab/analyze/json_xray.py

diff --git a/src/agentlab/agents/privaleged_info_agent/privaleged_agent.py b/src/agentlab/agents/privaleged_info_agent/privaleged_agent.py
new file mode 100644
index 00000000..a87c99f4
--- /dev/null
+++ b/src/agentlab/agents/privaleged_info_agent/privaleged_agent.py
@@ -0,0 +1,412 @@
+"""
+GenericAgent implementation for AgentLab
+
+This module defines a `GenericAgent` class and its associated arguments for use in the AgentLab framework. \
+The `GenericAgent` class is designed to interact with a chat-based model to determine actions based on \
+observations. It includes methods for preprocessing observations, generating actions, and managing internal \
+state such as plans, memories, and thoughts. The `GenericAgentArgs` class provides configuration options for \
+the agent, including model arguments and flags for various behaviors.
+"""
+
+from copy import deepcopy
+from dataclasses import asdict, dataclass
+from pathlib import Path
+from warnings import warn
+import json
+
+from typing import Dict, List, Optional
+
+import bgym
+from browsergym.experiments.agent import Agent, AgentInfo
+
+from agentlab.agents import dynamic_prompting as dp
+from agentlab.agents.agent_args import AgentArgs
+from agentlab.llm.chat_api import BaseModelArgs
+from agentlab.llm.llm_utils import Discussion, ParseError, SystemMessage, retry
+from agentlab.llm.tracking import cost_tracker_decorator
+
+from .privaleged_agent_prompt import PrivalegedPrompt, PrivalegedPromptFlags
+from functools import partial
+
+
+@dataclass
+class PrivalegedAgentArgs(AgentArgs):
+    chat_model_args: BaseModelArgs = None
+    flags: PrivalegedPromptFlags = None
+    max_retry: int = 4
+    privaleged_actions_path: Path = None
+    use_privileged_actions: bool = True
+
+    def __post_init__(self):
+        try:  # some attributes might be temporarily args.CrossProd for hyperparameter generation
+            self.agent_name = f"GenericAgent-{self.chat_model_args.model_name}".replace("/", "_")
+        except AttributeError:
+            pass
+
+    def set_benchmark(self, benchmark: bgym.Benchmark, demo_mode):
+        """Override Some flags based on the benchmark."""
+        if benchmark.name.startswith("miniwob"):
+            self.flags.obs.use_html = True
+
+        self.flags.obs.use_tabs = benchmark.is_multi_tab
+        self.flags.action.action_set = deepcopy(benchmark.high_level_action_set_args)
+
+        # for backward compatibility with old traces
+        if self.flags.action.multi_actions is not None:
+            self.flags.action.action_set.multiaction = self.flags.action.multi_actions
+        if self.flags.action.is_strict is not None:
+            self.flags.action.action_set.strict = self.flags.action.is_strict
+
+        # verify if we can remove this
+        if demo_mode:
+            self.flags.action.action_set.demo_mode = "all_blue"
+
+    def set_reproducibility_mode(self):
+        self.chat_model_args.temperature = 0
+
+    def prepare(self):
+        return self.chat_model_args.prepare_server()
+
+    def close(self):
+        return self.chat_model_args.close_server()
+
+    def make_agent(self):
+        return PrivalegedAgent(
+            chat_model_args=self.chat_model_args,
+            flags=self.flags,
+            max_retry=self.max_retry,
+            privaleged_actions_path=self.privaleged_actions_path,
+            use_privileged_actions=self.use_privileged_actions,
+        )
+
+
+@dataclass
+class PrivilegedObservation:
+    """Represents a single privileged observation with action and metadata."""
+
+    action: str
+    reward: int
+    task_name: str
+    model_name: str
+    output: str
+    goal: Optional[str]   
+
+
+@dataclass
+class PrivilegedRun:
+    """Represents a single run with multiple steps for a goal."""
+
+    steps: Dict[str, PrivilegedObservation]
+
+    def __init__(self):
+        self.steps = {}
+
+    def add_step(self, step_id: str, obs: PrivilegedObservation):
+        """Add a step to this run."""
+
+        self.steps[step_id] = obs
+
+    def get_step(self, step_id: str) -> Optional[PrivilegedObservation]:
+        """Get a specific step."""
+        return self.steps.get(step_id)
+
+    def get_all_steps(self) -> List[PrivilegedObservation]:
+        """Get all steps in order."""
+        return [
+            self.steps[str(i)]
+            for i in sorted([int(k) for k in self.steps.keys()])
+            if str(i) in self.steps
+        ]
+
+
+@dataclass
+class PrivilegedObservationCollection:
+    """Container for privileged observations with query capabilities."""
+
+    observations: Dict[str, Dict[str, Dict[str, PrivilegedRun]]]
+
+    def __init__(self):
+        self.observations = {}
+
+    def add_run(self, task: str, goal: str, trajectory_id: str, run: PrivilegedRun):
+        """Add a privileged run for a specific task and goal."""
+        if task not in self.observations:
+            self.observations[task] = {}
+        if goal not in self.observations[task]:
+            self.observations[task][goal] = {}
+        self.observations[task][goal][trajectory_id] = run
+
+    def get_run(self, task: str, goal: str, trajectory_id: str) -> Optional[PrivilegedRun]:
+        """Get a specific run for a task and goal."""
+        return self.observations.get(task, {}).get(goal, {}).get(trajectory_id)
+
+    def get_all_runs(self, task: str, goal: str = None) -> List[PrivilegedRun]:
+        """Get all runs for a specific task and optionally goal."""
+        if task not in self.observations:
+            return []
+        if goal is None:
+            runs = []
+            for goal_data in self.observations[task].values():
+                runs.extend(goal_data.values())
+            return runs
+        return list(self.observations[task].get(goal, {}).values())
+
+    def get_all_tasks(self) -> List[str]:
+        """Get all available tasks."""
+        return list(self.observations.keys())
+
+    def get_all_goals(self, task: str) -> List[str]:
+        """Get all available goals for a task."""
+        return list(self.observations.get(task, {}).keys())
+
+    def has_task(self, task: str) -> bool:
+        """Check if a task exists in the collection."""
+        return task in self.observations
+
+    def has_goal(self, task: str, goal: str) -> bool:
+        """Check if a goal exists for a task."""
+        return task in self.observations and goal in self.observations[task]
+
+    def get_random(
+        self, task: str, goal: Optional[str] = None, get_step: bool = False
+    ) -> Optional[PrivilegedObservation]:
+        """Get a random step from a random trajectory. If goal is specified and exists, use it. Otherwise, pick a random goal."""
+        import random
+
+        if not self.has_task(task):
+            return None
+
+        # Determine the goal to use
+        goal_to_use = goal
+        if goal_to_use is None or not self.has_goal(task, goal_to_use):
+            # Pick a random goal if not specified or not found
+            available_goals = self.get_all_goals(task)
+            if not available_goals:
+                return None
+            goal_to_use = random.choice(available_goals)
+
+        # Get all trajectories for this goal
+        trajectories = list(self.observations[task][goal_to_use].values())
+        if not trajectories:
+            return None
+
+        # Pick a random trajectory
+        random_trajectory = random.choice(trajectories)
+        steps = random_trajectory.get_all_steps()
+        if not steps:
+            return None
+
+        return random.choice(steps) if get_step else steps
+
+
+class PrivalegedAgent(Agent):
+
+    def __init__(
+        self,
+        chat_model_args: PrivalegedAgentArgs,
+        flags: PrivalegedPromptFlags,
+        max_retry: int = 4,
+        privaleged_actions_path: Path = None,
+        use_privileged_actions: bool = True,
+    ):
+
+        self.chat_llm = chat_model_args.make_model()
+        self.chat_model_args = chat_model_args
+        self.max_retry = max_retry
+        self.use_privileged_actions = use_privileged_actions
+        self.task = None
+        self.flags = flags
+        self.action_set = self.flags.action.action_set.make_action_set()
+        self._obs_preprocessor = dp.make_obs_preprocessor(flags.obs)
+        self.privileged_observations = PrivilegedObservationCollection()
+        self.privalaged_path = privaleged_actions_path
+        self.load_privaleged_actions(self.privalaged_path)
+        self.goal = None
+        self._check_flag_constancy()
+        self.reset(seed=None)
+        self.trajectory = None
+
+    def obs_preprocessor(self, obs: dict) -> dict:
+        return self._obs_preprocessor(obs)
+
+    def load_privaleged_actions(self, privaleged_actions_path: Path):
+        """Load privileged actions from a JSON file."""
+        try:
+            with open(privaleged_actions_path, "r") as f:
+                data = json.load(f)
+
+            for goal, trajectories_data in data.items():
+                for trajectory_id, steps_data in trajectories_data.items():
+                    run = PrivilegedRun()
+                    task_name = None
+
+                    for step_id, obs_data in steps_data.items():
+                        privileged_obs = PrivilegedObservation(
+                            action=obs_data["action"],
+                            reward=obs_data["reward"],
+                            task_name=obs_data["task_name"],
+                            model_name=obs_data["model_name"],
+                            output=obs_data["output"],
+                            goal = goal,
+                        )
+                        run.add_step(step_id, privileged_obs)
+
+                        # Extract task_name from the first step
+                        if task_name is None:
+                            task_name = obs_data["task_name"]
+
+                    # Add run indexed by task_name
+                    if task_name:
+                        self.privileged_observations.add_run(task_name, goal, trajectory_id, run)
+
+        except (FileNotFoundError, json.JSONDecodeError, KeyError) as e:
+            warn(f"Failed to load privileged actions from {privaleged_actions_path}: {e}")
+
+    def sample_trajectory(
+        self, task: str, goal: str, trajectory_id: str
+    ) -> List[PrivilegedObservation]:
+        """Sample a trajectory of privileged actions for a specific task, goal, and trajectory."""
+        if not self.privileged_observations.has_task(task):
+            warn(f"No privileged actions found for task: {task}")
+            return []
+
+        if not self.privileged_observations.has_goal(task, goal):
+            warn(f"No goal found for task: {task}, goal: {goal}")
+            return []
+
+        run = self.privileged_observations.get_run(task, goal, trajectory_id)
+        if not run:
+            warn(
+                f"No trajectory found for task: {task}, goal: {goal}, trajectory_id: {trajectory_id}"
+            )
+            return []
+
+        return run.get_all_steps()
+
+    def set_goal(self, goal):
+        self.goal = goal
+        self.trajectory = self.privileged_observations.get_random(task=self.task, goal=self.goal)
+
+    @cost_tracker_decorator
+    def get_action(self, obs):
+
+        self.obs_history.append(obs)
+
+        main_prompt = PrivalegedPrompt(
+            action_set=self.action_set,
+            obs_history=self.obs_history,
+            actions=self.actions,
+            goal=self.goal,
+            memories=self.memories,
+            thoughts=self.thoughts,
+            previous_plan=self.plan,
+            step=self.plan_step,
+            flags=self.flags,
+            trajectory=self.trajectory,
+            use_privileged_actions=self.use_privileged_actions,
+        )
+
+        max_prompt_tokens, max_trunc_itr = self._get_maxes()
+
+        system_prompt = SystemMessage(dp.SystemPrompt().prompt)
+
+        human_prompt = dp.fit_tokens(
+            shrinkable=main_prompt,
+            max_prompt_tokens=max_prompt_tokens,
+            model_name=self.chat_model_args.model_name,
+            max_iterations=max_trunc_itr,
+            additional_prompts=system_prompt,
+        )
+        try:
+            # TODO, we would need to further shrink the prompt if the retry
+            # cause it to be too long
+
+            chat_messages = Discussion([system_prompt, human_prompt])
+            ans_dict = retry(
+                self.chat_llm,
+                chat_messages,
+                n_retry=self.max_retry,
+                parser=main_prompt._parse_answer,
+            )
+            ans_dict["busted_retry"] = 0
+            # inferring the number of retries, TODO: make this less hacky
+            ans_dict["n_retry"] = (len(chat_messages) - 3) / 2
+        except ParseError as e:
+            ans_dict = dict(
+                action=None,
+                n_retry=self.max_retry + 1,
+                busted_retry=1,
+            )
+
+        stats = self.chat_llm.get_stats()
+        stats["n_retry"] = ans_dict["n_retry"]
+        stats["busted_retry"] = ans_dict["busted_retry"]
+
+        self.plan = ans_dict.get("plan", self.plan)
+        self.plan_step = ans_dict.get("step", self.plan_step)
+        self.actions.append(ans_dict["action"])
+        self.memories.append(ans_dict.get("memory", None))
+        self.thoughts.append(ans_dict.get("think", None))
+
+        agent_info = AgentInfo(
+            think=ans_dict.get("think", None),
+            chat_messages=chat_messages,
+            stats=stats,
+            extra_info={"chat_model_args": asdict(self.chat_model_args)},
+        )
+        return ans_dict["action"], agent_info
+
+    def reset(self, seed=None):
+        self.seed = seed
+        self.plan = "No plan yet"
+        self.plan_step = -1
+        self.memories = []
+        self.thoughts = []
+        self.actions = []
+        self.obs_history = []
+
+    def _check_flag_constancy(self):
+        flags = self.flags
+        if flags.obs.use_som:
+            if not flags.obs.use_screenshot:
+                warn(
+                    """
+Warning: use_som=True requires use_screenshot=True. Disabling use_som."""
+                )
+                flags.obs.use_som = False
+        if flags.obs.use_screenshot:
+            if not self.chat_model_args.vision_support:
+                warn(
+                    """
+Warning: use_screenshot is set to True, but the chat model \
+does not support vision. Disabling use_screenshot."""
+                )
+                flags.obs.use_screenshot = False
+        return flags
+
+    def _get_maxes(self):
+        maxes = (
+            self.flags.max_prompt_tokens,
+            self.chat_model_args.max_total_tokens,
+            self.chat_model_args.max_input_tokens,
+        )
+        maxes = [m for m in maxes if m is not None]
+        max_prompt_tokens = min(maxes) if maxes else None
+        max_trunc_itr = (
+            self.flags.max_trunc_itr
+            if self.flags.max_trunc_itr
+            else 20  # dangerous to change the default value here?
+        )
+        return max_prompt_tokens, max_trunc_itr
+
+    def set_task(self, task: str):
+        """
+        Set the task for the agent. This method can be used to change the task
+        during an episode.
+
+        Parameters:
+        -----------
+        task: str
+            The new task for the agent.
+        """
+        self.task = task
diff --git a/src/agentlab/agents/privaleged_info_agent/privaleged_agent_prompt.py b/src/agentlab/agents/privaleged_info_agent/privaleged_agent_prompt.py
new file mode 100644
index 00000000..36e39706
--- /dev/null
+++ b/src/agentlab/agents/privaleged_info_agent/privaleged_agent_prompt.py
@@ -0,0 +1,304 @@
+"""
+# Prompt builder for GenericAgent
+
+It is based on the dynamic_prompting module from the agentlab package.
+"""
+
+import logging
+from dataclasses import dataclass
+
+from browsergym.core import action
+from browsergym.core.action.base import AbstractActionSet
+
+from agentlab.agents import dynamic_prompting as dp
+from agentlab.llm.llm_utils import HumanMessage, parse_html_tags_raise
+
+
+@dataclass
+class PrivalegedPromptFlags(dp.Flags):
+    """
+    A class to represent various flags used to control features in an application.
+
+    Attributes:
+        use_plan (bool): Ask the LLM to provide a plan.
+        use_criticise (bool): Ask the LLM to first draft and criticise the action before producing it.
+        use_thinking (bool): Enable a chain of thoughts.
+        use_concrete_example (bool): Use a concrete example of the answer in the prompt for a generic task.
+        use_abstract_example (bool): Use an abstract example of the answer in the prompt.
+        use_hints (bool): Add some human-engineered hints to the prompt.
+        enable_chat (bool): Enable chat mode, where the agent can interact with the user.
+        max_prompt_tokens (int): Maximum number of tokens allowed in the prompt.
+        be_cautious (bool): Instruct the agent to be cautious about its actions.
+        extra_instructions (Optional[str]): Extra instructions to provide to the agent.
+        add_missparsed_messages (bool): When retrying, add the missparsed messages to the prompt.
+        flag_group (Optional[str]): Group of flags used.
+    """
+
+    obs: dp.ObsFlags
+    action: dp.ActionFlags
+    use_plan: bool = False  #
+    use_criticise: bool = False  #
+    use_thinking: bool = False
+    use_memory: bool = False  #
+    use_concrete_example: bool = True
+    use_abstract_example: bool = False
+    use_hints: bool = False
+    enable_chat: bool = False
+    max_prompt_tokens: int = None
+    be_cautious: bool = True
+    extra_instructions: str | None = None
+    add_missparsed_messages: bool = True
+    max_trunc_itr: int = 20
+    flag_group: str = None
+
+
+class PrivalegedPrompt(dp.Shrinkable):
+    def __init__(
+        self,
+        action_set: AbstractActionSet,
+        obs_history: list[dict],
+        actions: list[str],
+        memories: list[str],
+        thoughts: list[str],
+        goal: str,
+        previous_plan: str,
+        step: int,
+        flags: PrivalegedPromptFlags,
+        trajectory: list[dict] | None = None,
+        use_privileged_actions: bool = True,
+    ) -> None:
+        super().__init__()
+        self.flags = flags
+        self.history = dp.History(obs_history, actions, memories, thoughts, flags.obs)
+        if self.flags.enable_chat:
+            self.instructions = dp.ChatInstructions(
+                obs_history[-1]["chat_messages"], extra_instructions=flags.extra_instructions
+            )
+        else:
+            if sum([msg["role"] == "user" for msg in obs_history[-1].get("chat_messages", [])]) > 1:
+                logging.warning(
+                    "Agent is in goal mode, but multiple user messages are present in the chat. Consider switching to `enable_chat=True`."
+                )
+            self.instructions = dp.GoalInstructions(
+                obs_history[-1]["goal_object"], extra_instructions=flags.extra_instructions
+            )
+        self.use_privileged_actions = use_privileged_actions
+        self.obs = dp.Observation(
+            obs_history[-1],
+            self.flags.obs,
+        )
+        self.goal = goal
+        self.trajectory = trajectory
+        self.action_prompt = dp.ActionPrompt(action_set, action_flags=flags.action)
+        self.step_idx = len(actions)
+
+        def time_for_caution():
+            # no need for caution if we're in single action mode
+            return flags.be_cautious and (
+                flags.action.action_set.multiaction or flags.action.action_set == "python"
+            )
+
+        self.be_cautious = dp.BeCautious(visible=time_for_caution)
+        self.think = dp.Think(visible=lambda: flags.use_thinking)
+        self.hints = dp.Hints(visible=lambda: flags.use_hints)
+        self.plan = Plan(previous_plan, step, lambda: flags.use_plan)  # TODO add previous plan
+        self.criticise = Criticise(visible=lambda: flags.use_criticise)
+        self.memory = Memory(visible=lambda: flags.use_memory)
+        self.step = step
+
+    @property
+    def _prompt(self) -> HumanMessage:
+        prompt = HumanMessage(self.instructions.prompt)
+        prompt.add_text(
+            f"""\
+{self.obs.prompt}\
+{self.history.prompt}\
+{self.action_prompt.prompt}\
+# {self.hints.prompt}\
+{self.be_cautious.prompt}\
+{self.think.prompt}\
+{self.plan.prompt}\
+{self.memory.prompt}\
+{self.criticise.prompt}\
+"""
+        )
+
+        #         if self.flags.use_abstract_example:
+        #             prompt.add_text(
+        #                 f"""
+        # # Abstract Example
+
+        # Here is an abstract version of the answer with description of the content of
+        # each tag. Make sure you follow this structure, but replace the content with your
+        # answer:
+        # {self.think.abstract_ex}\
+        # {self.plan.abstract_ex}\
+        # {self.memory.abstract_ex}\
+        # {self.criticise.abstract_ex}\
+        # {self.action_prompt.abstract_ex}\
+        # """
+        #             )
+
+        # else:
+        if self.use_privileged_actions:
+            try:
+                # Create a formatted string for the trajectory, highlighting the current step
+                trajectory_lines = []
+                traj_goal = self.trajectory[0].goal
+                for i, t in enumerate(self.trajectory):
+                    # prefix = "-->" if i == self.step_idx else "   "
+                    trajectory_lines.append(f"{t.action}")
+
+                trajectory_str = "\n".join(trajectory_lines)
+
+                prompt.add_text(
+                    f"""
+<Secret information>
+    Here is a successful trajectory for a similar task. Your goal is to replicate the sequence of actions from this trajectory to solve the current task.
+
+    **Successful Trajectory Example for the Goal {traj_goal}:**
+    {trajectory_str}
+
+    **Your Task:**
+
+    1.  **Follow the Action Sequence:** You must execute the same type of action (e.g., `click`, `fill`) as the corresponding step in the example trajectory.
+    2.  **Adapt to the Environment:** The current environment is similar but may not be identical. Element attributes (like IDs, text, or positions) will be different. Your task is to identify the element in the current observation that corresponds to the target of the action in the example trajectory.
+    3.  **Reasoning:** For each step, first state the action from the example trajectory. Then, reason about how to find the equivalent element in the current observation and construct the correct action.
+    4.  **Strict Constraint:** Do NOT mention that you have been given an example trajectory in your reasoning. Act as if you are determining the best action based only on the observation and goal. You will be penalized for violating this rule.
+    5. **Reflect on the actions you have taken so far:** If you have already executed some actions, consider their effects and how they might influence the next action. You can refer to the previous actions in the trajectory to guide your reasoning.
+</Secret information>
+
+"""
+                )
+            except Exception as e:
+                # raise RuntimeError(f"No privilaged action will for goal {self.goal}.") from e
+                pass
+        if self.flags.use_concrete_example:
+            prompt.add_text(
+                f"""
+    # Concrete Example
+
+    Here is a concrete example of how to format your answer.
+    Make sure to follow the template with proper tags:
+    {self.think.concrete_ex}\
+    {self.plan.concrete_ex}\
+    {self.memory.concrete_ex}\
+    {self.criticise.concrete_ex}\
+    {self.action_prompt.concrete_ex}\
+    """
+            )
+
+       
+
+        return self.obs.add_screenshot(prompt)
+
+    def shrink(self):
+        self.history.shrink()
+        self.obs.shrink()
+
+    def _parse_answer(self, text_answer):
+        ans_dict = {}
+        ans_dict.update(self.think.parse_answer(text_answer))
+        ans_dict.update(self.plan.parse_answer(text_answer))
+        ans_dict.update(self.memory.parse_answer(text_answer))
+        ans_dict.update(self.criticise.parse_answer(text_answer))
+        ans_dict.update(self.action_prompt.parse_answer(text_answer))
+        return ans_dict
+
+
+class Memory(dp.PromptElement):
+    _prompt = ""  # provided in the abstract and concrete examples
+
+    _abstract_ex = """
+<memory>
+Write down anything you need to remember for next steps. You will be presented
+with the list of previous memories and past actions. Some tasks require to
+remember hints from previous steps in order to solve it.
+</memory>
+"""
+
+    _concrete_ex = """
+<memory>
+I clicked on bid "32" to activate tab 2. The accessibility tree should mention
+focusable for elements of the form at next step.
+</memory>
+"""
+
+    def _parse_answer(self, text_answer):
+        return parse_html_tags_raise(text_answer, optional_keys=["memory"], merge_multiple=True)
+
+
+class Plan(dp.PromptElement):
+    def __init__(self, previous_plan, plan_step, visible: bool = True) -> None:
+        super().__init__(visible=visible)
+        self.previous_plan = previous_plan
+        self._prompt = f"""
+# Plan:
+
+You just executed step {plan_step} of the previously proposed plan:\n{previous_plan}\n
+After reviewing the effect of your previous actions, verify if your plan is still
+relevant and update it if necessary.
+"""
+
+    _abstract_ex = """
+<plan>
+Provide a multi step plan that will guide you to accomplish the goal. There
+should always be steps to verify if the previous action had an effect. The plan
+can be revisited at each steps. Specifically, if there was something unexpected.
+The plan should be cautious and favor exploring befor submitting.
+</plan>
+
+<step>Integer specifying the step of current action
+</step>
+"""
+
+    _concrete_ex = """
+<plan>
+1. fill form (failed)
+    * type first name
+    * type last name
+2. Try to activate the form
+    * click on tab 2
+3. fill form again
+    * type first name
+    * type last name
+4. verify and submit
+    * verify form is filled
+    * submit if filled, if not, replan
+</plan>
+
+<step>2</step>
+"""
+
+    def _parse_answer(self, text_answer):
+        return parse_html_tags_raise(text_answer, optional_keys=["plan", "step"])
+
+
+class Criticise(dp.PromptElement):
+    _prompt = ""
+
+    _abstract_ex = """
+<action_draft>
+Write a first version of what you think is the right action.
+</action_draft>
+
+<criticise>
+Criticise action_draft. What could be wrong with it? Enumerate reasons why it
+could fail. Did your past actions had the expected effect? Make sure you're not
+repeating the same mistakes.
+</criticise>
+"""
+
+    _concrete_ex = """
+<action_draft>
+click("32")
+</action_draft>
+
+<criticise>
+click("32") might not work because the element is not visible yet. I need to
+explore the page to find a way to activate the form.
+</criticise>
+"""
+
+    def _parse_answer(self, text_answer):
+        return parse_html_tags_raise(text_answer, optional_keys=["action_draft", "criticise"])
diff --git a/src/agentlab/analyze/json_xray.py b/src/agentlab/analyze/json_xray.py
new file mode 100644
index 00000000..136ed878
--- /dev/null
+++ b/src/agentlab/analyze/json_xray.py
@@ -0,0 +1,454 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Gradio dashboard to browse conversation-style JSON datasets produced in your experiments.
+- Scan a base folder for JSON files (e.g., */epoch_*/seed_*/dataset/*.json)
+- Load a file and browse by trajectory and step
+- See prompt/think/action/output/expected and rewards
+- Search trajectories by keyword
+
+Requirements: pip install gradio pandas
+Run:
+- python src/agentlab/analyze/json_xray.py --base /path/to/dir
+- XRAY_BASE_DIR=/path/to/dir python src/agentlab/analyze/json_xray.py
+"""
+from __future__ import annotations
+import json
+import re
+import os
+from pathlib import Path
+from typing import Dict, List, Any, Tuple
+import argparse
+
+import gradio as gr
+import pandas as pd
+
+# -------- Helpers ---------
+
+
+def find_json_files(base_dir: str) -> List[str]:
+    p = Path(base_dir)
+    if not p.exists():
+        return []
+    # Look for likely dataset files
+    patterns = [
+        "**/dataset/*.json",
+        "**/evals/**/*.json",
+        "**/*.json",
+    ]
+    found = []
+    for pat in patterns:
+        for fp in p.glob(pat):
+            # Prefer small-medium JSONs, skip huge checkpoints, shards, etc.
+            name = fp.name.lower()
+            if any(k in name for k in ["train", "valid", "val", "test", "dataset"]):
+                found.append(str(fp.resolve()))
+    # Dedup while preserving order
+    dedup = []
+    seen = set()
+    for f in found:
+        if f not in seen:
+            seen.add(f)
+            dedup.append(f)
+    return dedup
+
+
+def load_dataset(
+    json_path: str,
+) -> Tuple[List[Dict[str, Any]], Dict[str, List[Dict[str, Any]]]]:
+    with open(json_path, "r", encoding="utf-8") as f:
+        data = json.load(f)
+    if not isinstance(data, list):
+        raise ValueError("Expected a list of items in the JSON file")
+
+    # Normalize step_key to int where possible and group by trajectory/trace id
+    by_traj: Dict[str, List[Dict[str, Any]]] = {}
+    for item in data:
+        # Prefer explicit IDs if present
+        traj_group_key = (
+            item.get("trace_id")
+            or item.get("trajectory_key")
+            or item.get("trajectory_goal")
+            or "<unknown>"
+        )
+        step_raw = item.get("step_id")
+        if step_raw is None:
+            step_raw = item.get("step_key")
+        try:
+            item["_step_idx"] = int(step_raw) if step_raw is not None else 0
+        except Exception:
+            item["_step_idx"] = 0
+        by_traj.setdefault(traj_group_key, []).append(item)
+
+    for k in by_traj:
+        by_traj[k] = sorted(by_traj[k], key=lambda x: x.get("_step_idx", 0))
+
+    return data, by_traj
+
+
+def summarize(by_traj: Dict[str, List[Dict[str, Any]]]) -> pd.DataFrame:
+    rows = []
+    for traj, items in by_traj.items():
+        goal = None
+        matches = 0
+        total = len(items)
+        og_rewards = []
+        match_rewards = []
+        for it in items:
+            goal = goal or it.get("trajectory_goal")
+            if it.get("action") == it.get("expected_action"):
+                matches += 1
+            if it.get("og_reward") is not None:
+                og_rewards.append(it.get("og_reward"))
+            if it.get("match_reward") is not None:
+                match_rewards.append(it.get("match_reward"))
+        rows.append(
+            {
+                "trajectory_key": traj,
+                "goal": goal,
+                "steps": total,
+                "match_rate": round(matches / total, 3) if total else None,
+                "avg_og_reward": (
+                    round(sum(og_rewards) / len(og_rewards), 3) if og_rewards else None
+                ),
+                "avg_match_reward": (
+                    round(sum(match_rewards) / len(match_rewards), 3) if match_rewards else None
+                ),
+            }
+        )
+    df = (
+        pd.DataFrame(rows)
+        .sort_values(["match_rate", "steps"], ascending=[False, False])
+        .reset_index(drop=True)
+    )
+    return df
+
+
+def extract_section(text: str, start_marker: str, end_marker: str) -> str | None:
+    if not isinstance(text, str):
+        return None
+    try:
+        start = text.find(start_marker)
+        if start == -1:
+            return None
+        start += len(start_marker)
+        end = text.find(end_marker, start)
+        if end == -1:
+            return text[start:].strip()
+        return text[start:end].strip()
+    except Exception:
+        return None
+
+
+def render_item(item: Dict[str, Any]) -> str:
+    traj = item.get("trajectory_key", "")
+    goal = item.get("trajectory_goal", "")
+    step = item.get("_step_idx", 0)
+    action = item.get("action")
+    expected = item.get("expected_action")
+    match = "✅" if action == expected else "❌"
+    og_reward = item.get("og_reward")
+    match_reward = item.get("match_reward")
+    # Rename Prompt->Input; keep Output; drop Think/HTML/Privileged
+    prompt = item.get("prompt", "")
+    output = item.get("output", "")
+
+    def fence(title: str, body: str | None):
+        if not body:
+            return f"\n**{title}:**\n_None_\n"
+        return f"\n**{title}:**\n```\n{body}\n```\n"
+
+    md = []
+    md.append(
+        f"### Trajectory\n- Key: `{traj}`\n- Goal: `{goal}`\n- Step: `{step}`\n- Action: `{action}`\n- Expected: `{expected}` {match}\n- og_reward: `{og_reward}`\n- match_reward: `{match_reward}`\n"
+    )
+    md.append(fence("Input", prompt))
+    if output:
+        md.append(fence("Output", output))
+    return "\n".join(md)
+
+
+# -------- Gradio App ---------
+
+DEFAULT_BASE_DIR = str(Path(__file__).resolve().parent)
+
+# If this repo layout matches yours, set a friendlier default:
+CANDIDATE_DEFAULT = "/mnt/adea/data_rw/finetuning/emiliano_home/experiments/20250812_043826_on_policy_miniwob_random_tasks_v5"
+if Path(CANDIDATE_DEFAULT).exists():
+    DEFAULT_BASE_DIR = CANDIDATE_DEFAULT
+
+
+def build_demo(default_base_dir: str) -> gr.Blocks:
+    with gr.Blocks(title="Conversation Dataset Viewer", theme=gr.themes.Base()) as demo:
+        gr.Markdown(
+            """# Conversation Dataset Viewer
+Use this to browse JSON conversation logs produced during finetuning/evaluation.
+1) Enter a base folder and Scan for JSON files.
+2) Pick a file to load.
+3) Select a trajectory and step to view details.
+"""
+        )
+
+        ds_state = gr.State(
+            {}
+        )  # { 'data': list, 'by_traj': dict, 'traj_list': list, 'summary': df }
+
+        with gr.Row():
+            base_dir = gr.Textbox(label="Base folder", value=default_base_dir, scale=5)
+            scan_btn = gr.Button("Scan", variant="secondary", scale=1)
+        found_files = gr.Dropdown(label="Found JSON files", choices=[], interactive=True)
+
+        with gr.Row():
+            load_btn = gr.Button("Load selected file", variant="primary")
+            file_label = gr.Markdown("_No file loaded_")
+
+        with gr.Row():
+            search = gr.Textbox(
+                label="Filter trajectories (substring match)",
+                placeholder="e.g., miniwob.click-scroll-list or Trinidad",
+            )
+
+        with gr.Row():
+            traj_dd = gr.Dropdown(label="Trajectory", choices=[], interactive=False)
+            step_dd = gr.Dropdown(label="Step", choices=[], interactive=True)
+
+        summary_df = gr.Dataframe(
+            label="Summary by trajectory", interactive=True, row_count=(0, "dynamic")
+        )
+
+        with gr.Row():
+            prev_btn = gr.Button("◀ Prev")
+            next_btn = gr.Button("Next ▶")
+
+        details_md = gr.Markdown("_Load a file to see details_")
+
+        # ---- Callbacks ----
+
+        def on_scan(base: str):
+            files = find_json_files(base)
+            return gr.update(choices=files, value=(files[0] if files else None))
+
+        scan_btn.click(on_scan, inputs=[base_dir], outputs=[found_files])
+
+        def on_load(file_path: str):
+            if not file_path:
+                return (
+                    "_No file selected_",
+                    gr.update(choices=[], value=None),
+                    gr.update(choices=[], value=None),
+                    pd.DataFrame(),
+                    ds_state,
+                    "_No file loaded_",
+                )
+            data, by_traj = load_dataset(file_path)
+            traj_list = sorted(by_traj.keys())
+            df = summarize(by_traj)
+            # Initialize state
+            new_state = {
+                "file": file_path,
+                "data": data,
+                "by_traj": by_traj,
+                "traj_list": traj_list,
+                "summary": df,
+            }
+            ds_state.value = new_state
+            file_md = (
+                f"Loaded: `{file_path}`  \\\nTrajectories: {len(traj_list)} | Steps: {len(data)}"
+            )
+            # Preselect first traj and step
+            first_traj = traj_list[0] if traj_list else None
+            step_choices = [
+                str(it.get("_step_idx", i)) for i, it in enumerate(by_traj.get(first_traj, []))
+            ]
+            first_step = step_choices[0] if step_choices else None
+
+            # Render details for the first trajectory's first step
+            if first_traj and by_traj.get(first_traj):
+                first_step_data = by_traj[first_traj][0]
+                details = render_item(first_step_data)
+            else:
+                details = "_No data available_"
+
+            return (
+                file_md,
+                gr.update(choices=traj_list, value=first_traj),
+                gr.update(choices=step_choices, value=first_step),
+                df,
+                new_state,
+                details,
+            )
+
+        load_btn.click(
+            on_load,
+            inputs=[found_files],
+            outputs=[file_label, traj_dd, step_dd, summary_df, ds_state, details_md],
+        )
+
+        def on_filter(query: str, state: Dict[str, Any]):
+            if not state or not state.get("traj_list"):
+                return gr.update(choices=[], value=None)
+            trajs = state["traj_list"]
+            if not query:
+                return gr.update(choices=trajs, value=(trajs[0] if trajs else None))
+            q = query.lower()
+            filtered = []
+            for t in trajs:
+                # match on key or goal text from first item
+                first = state["by_traj"][t][0]
+                goal = (first.get("trajectory_goal") or "").lower()
+                key_lower = t.lower()
+                if q in key_lower or q in goal:
+                    filtered.append(t)
+            val = filtered[0] if filtered else None
+            return gr.update(choices=filtered, value=val)
+
+        search.change(on_filter, inputs=[search, ds_state], outputs=[traj_dd])
+
+        def on_table_select(state: Dict[str, Any], evt: gr.SelectData):
+            """Handle table row selection to update trajectory dropdown from table clicks"""
+            # Validate state payload without triggering pandas truthiness
+            if not isinstance(state, dict) or "summary" not in state:
+                return gr.update(), gr.update(), "_No data loaded_"
+            df = state["summary"]
+            try:
+                is_empty = df is None or (hasattr(df, "empty") and df.empty)
+            except Exception:
+                is_empty = True
+            if is_empty:
+                return gr.update(), gr.update(), "_No data_"
+
+            # Resolve selected row index from event
+            if isinstance(evt.index, (list, tuple)) and len(evt.index) > 0:
+                row_idx = evt.index[0]
+            else:
+                row_idx = evt.index
+            if not isinstance(row_idx, int) or row_idx < 0 or row_idx >= len(df):
+                return gr.update(), gr.update(), f"_Invalid selection: row {row_idx}_"
+
+            # Extract trajectory key and build step choices
+            try:
+                traj_key = df.iloc[row_idx]["trajectory_key"]
+            except Exception as e:
+                return gr.update(), gr.update(), f"_Error getting trajectory: {e}_"
+
+            steps = state["by_traj"].get(traj_key, [])
+            step_choices = [str(it.get("_step_idx", i)) for i, it in enumerate(steps)]
+            first_step = step_choices[0] if step_choices else None
+            details = render_item(steps[0]) if steps else "_No steps_"
+
+            return (
+                gr.update(value=traj_key),
+                gr.update(choices=step_choices, value=first_step),
+                details,
+            )
+
+        summary_df.select(
+            on_table_select, inputs=[ds_state], outputs=[traj_dd, step_dd, details_md]
+        )
+
+        def on_select_traj(traj_key: str, state: Dict[str, Any]):
+            if not traj_key or not state:
+                return gr.update(choices=[], value=None), "_No trajectory selected_"
+            steps = state["by_traj"].get(traj_key, [])
+            step_choices = [str(it.get("_step_idx", i)) for i, it in enumerate(steps)]
+            # Render first step details
+            details = render_item(steps[0]) if steps else "_No steps_"
+            return (
+                gr.update(choices=step_choices, value=(step_choices[0] if step_choices else None)),
+                details,
+            )
+
+        traj_dd.change(on_select_traj, inputs=[traj_dd, ds_state], outputs=[step_dd, details_md])
+
+        def on_select_step(traj_key: str, step_key: str, state: Dict[str, Any]):
+            if not traj_key or not step_key or not state:
+                return "_Select a trajectory and step_"
+            steps = state["by_traj"].get(traj_key, [])
+            # Find by numeric step
+            try:
+                idx = int(step_key)
+            except Exception:
+                idx = 0
+            # Map step idx to position if non-contiguous
+            pos = 0
+            for i, it in enumerate(steps):
+                if it.get("_step_idx", i) == idx:
+                    pos = i
+                    break
+            details = render_item(steps[pos]) if steps else "_No steps_"
+            return details
+
+        step_dd.change(on_select_step, inputs=[traj_dd, step_dd, ds_state], outputs=[details_md])
+
+        def on_prev(traj_key: str, step_key: str, state: Dict[str, Any]):
+            if not traj_key or not state:
+                return gr.update(), "_No selection_"
+            steps = state["by_traj"].get(traj_key, [])
+            if not steps:
+                return gr.update(), "_No steps_"
+            # Find current position
+            try:
+                cur = int(step_key) if step_key is not None else steps[0].get("_step_idx", 0)
+            except Exception:
+                cur = steps[0].get("_step_idx", 0)
+            positions = [it.get("_step_idx", i) for i, it in enumerate(steps)]
+            if cur in positions:
+                idx = positions.index(cur)
+            else:
+                idx = 0
+            new_idx = max(0, idx - 1)
+            new_step_key = str(positions[new_idx])
+            return gr.update(value=new_step_key), render_item(steps[new_idx])
+
+        def on_next(traj_key: str, step_key: str, state: Dict[str, Any]):
+            if not traj_key or not state:
+                return gr.update(), "_No selection_"
+            steps = state["by_traj"].get(traj_key, [])
+            if not steps:
+                return gr.update(), "_No steps_"
+            try:
+                cur = int(step_key) if step_key is not None else steps[0].get("_step_idx", 0)
+            except Exception:
+                cur = steps[0].get("_step_idx", 0)
+            positions = [it.get("_step_idx", i) for i, it in enumerate(steps)]
+            if cur in positions:
+                idx = positions.index(cur)
+            else:
+                idx = 0
+            new_idx = min(len(steps) - 1, idx + 1)
+            new_step_key = str(positions[new_idx])
+            return gr.update(value=new_step_key), render_item(steps[new_idx])
+
+        prev_btn.click(on_prev, inputs=[traj_dd, step_dd, ds_state], outputs=[step_dd, details_md])
+        next_btn.click(on_next, inputs=[traj_dd, step_dd, ds_state], outputs=[step_dd, details_md])
+
+    return demo
+
+
+if __name__ == "__main__":
+    # Parse optional base directory from CLI or env.
+    parser = argparse.ArgumentParser(description="Launch Conversation Dataset Viewer (Gradio)")
+    parser.add_argument(
+        "--base",
+        "--base-dir",
+        dest="base_dir",
+        help="Base folder to scan for JSON files",
+    )
+    parser.add_argument(
+        "directory",
+        nargs="?",
+        help="Positional base folder to scan (alternative to --base)",
+    )
+    args = parser.parse_args()
+
+    env_base = os.getenv("XRAY_BASE_DIR")
+    chosen_base = args.base_dir or args.directory or env_base or DEFAULT_BASE_DIR
+
+    # Launch the app. Env overrides: GRADIO_SERVER_NAME, GRADIO_SERVER_PORT, GRADIO_SHARE
+    demo = build_demo(chosen_base)
+    demo.queue(max_size=64)
+    server_name = os.getenv("GRADIO_SERVER_NAME", "127.0.0.1")
+    port_env = os.getenv("GRADIO_SERVER_PORT", "")
+    server_port = int(port_env) if port_env.isdigit() else None  # None => pick a free port
+    share = os.getenv("GRADIO_SHARE", "true").lower() in ("1", "true", "yes")
+    demo.launch(server_name=server_name, server_port=server_port, share=share)
diff --git a/src/agentlab/experiments/loop.py b/src/agentlab/experiments/loop.py
index de4b976a..20530f02 100644
--- a/src/agentlab/experiments/loop.py
+++ b/src/agentlab/experiments/loop.py
@@ -425,6 +425,7 @@ def run(self):
                 exp_dir=self.exp_dir,
                 use_raw_page_output=getattr(self.agent_args, "use_raw_page_output", False),
             )
+            
 
             logger.debug("Environment created.")
             step_info = StepInfo(step=0)
@@ -433,7 +434,9 @@ def run(self):
                 env, seed=self.env_args.task_seed or 0, obs_preprocessor=agent.obs_preprocessor
             )
             logger.debug("Environment reset.")
-
+            goal = step_info.obs['goal'] 
+            agent.set_task(self.env_args.task_name)  # set the task for the agent
+            agent.set_goal(goal)  # set the goal for the agent, if applicable
             while not step_info.is_done:  # set a limit
                 logger.debug(f"Starting step {step_info.step}.")
                 action = step_info.from_action(agent)

From 404888837c249cb4400cb7dcf915f1ea308a3bc5 Mon Sep 17 00:00:00 2001
From: emilianopp <emilianopp550@gmail.com>
Date: Fri, 22 Aug 2025 00:57:18 +0000
Subject: [PATCH 03/10] bugfix for set task missing

---
 src/agentlab/agents/generic_agent/generic_agent.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/agentlab/agents/generic_agent/generic_agent.py b/src/agentlab/agents/generic_agent/generic_agent.py
index d1f48f76..b933b430 100644
--- a/src/agentlab/agents/generic_agent/generic_agent.py
+++ b/src/agentlab/agents/generic_agent/generic_agent.py
@@ -201,3 +201,14 @@ def _get_maxes(self):
             else 20  # dangerous to change the default value here?
         )
         return max_prompt_tokens, max_trunc_itr
+    def set_task(self, task: str):
+        """
+        Set the task for the agent. This method can be used to change the task
+        during an episode.
+
+        Parameters:
+        -----------
+        task: str
+            The new task for the agent.
+        """
+        self.task = task
\ No newline at end of file

From 355e3a6ad448aada44c79b6a370d53cd60f00c3e Mon Sep 17 00:00:00 2001
From: emilianopp <emilianopp550@gmail.com>
Date: Fri, 22 Aug 2025 01:05:01 +0000
Subject: [PATCH 04/10] bandaid

---
 src/agentlab/agents/generic_agent/generic_agent.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/agentlab/agents/generic_agent/generic_agent.py b/src/agentlab/agents/generic_agent/generic_agent.py
index b933b430..dd6921d1 100644
--- a/src/agentlab/agents/generic_agent/generic_agent.py
+++ b/src/agentlab/agents/generic_agent/generic_agent.py
@@ -31,6 +31,7 @@ class GenericAgentArgs(AgentArgs):
     chat_model_args: BaseModelArgs = None
     flags: GenericPromptFlags = None
     max_retry: int = 4
+    privaleged_actions_path : None 
 
     def __post_init__(self):
         try:  # some attributes might be temporarily args.CrossProd for hyperparameter generation

From a697713fbe5cbd43a4387d0c13353d0e8d259da2 Mon Sep 17 00:00:00 2001
From: emilianopp <emilianopp550@gmail.com>
Date: Fri, 22 Aug 2025 01:06:14 +0000
Subject: [PATCH 05/10] bandaid

---
 src/agentlab/agents/generic_agent/generic_agent.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/agentlab/agents/generic_agent/generic_agent.py b/src/agentlab/agents/generic_agent/generic_agent.py
index dd6921d1..b65e6eac 100644
--- a/src/agentlab/agents/generic_agent/generic_agent.py
+++ b/src/agentlab/agents/generic_agent/generic_agent.py
@@ -68,7 +68,7 @@ def close(self):
 
     def make_agent(self):
         return GenericAgent(
-            chat_model_args=self.chat_model_args, flags=self.flags, max_retry=self.max_retry
+            chat_model_args=self.chat_model_args, flags=self.flags, max_retry=self.max_retry,privaleged_actions_path=self.privaleged_actions_path
         )
 
 
@@ -79,6 +79,7 @@ def __init__(
         chat_model_args: BaseModelArgs,
         flags: GenericPromptFlags,
         max_retry: int = 4,
+        privaleged_actions_path: str = None,
     ):
 
         self.chat_llm = chat_model_args.make_model()

From 29b85cfabf442cbfe8bcde5c3d5cd35e589f1c3d Mon Sep 17 00:00:00 2001
From: emilianopp <emilianopp550@gmail.com>
Date: Fri, 22 Aug 2025 01:06:58 +0000
Subject: [PATCH 06/10] bandaid

---
 src/agentlab/agents/generic_agent/generic_agent.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/agentlab/agents/generic_agent/generic_agent.py b/src/agentlab/agents/generic_agent/generic_agent.py
index b65e6eac..f4dffc06 100644
--- a/src/agentlab/agents/generic_agent/generic_agent.py
+++ b/src/agentlab/agents/generic_agent/generic_agent.py
@@ -81,7 +81,7 @@ def __init__(
         max_retry: int = 4,
         privaleged_actions_path: str = None,
     ):
-
+        self.privaleged_actions_path = privaleged_actions_path
         self.chat_llm = chat_model_args.make_model()
         self.chat_model_args = chat_model_args
         self.max_retry = max_retry

From 3fb5677d93f78ac73158804a9445d6cca412b45d Mon Sep 17 00:00:00 2001
From: emilianopp <emilianopp550@gmail.com>
Date: Fri, 22 Aug 2025 01:12:50 +0000
Subject: [PATCH 07/10] bandaid

---
 src/agentlab/agents/generic_agent/generic_agent.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/agentlab/agents/generic_agent/generic_agent.py b/src/agentlab/agents/generic_agent/generic_agent.py
index f4dffc06..0a5290b1 100644
--- a/src/agentlab/agents/generic_agent/generic_agent.py
+++ b/src/agentlab/agents/generic_agent/generic_agent.py
@@ -24,14 +24,14 @@
 from agentlab.llm.tracking import cost_tracker_decorator
 
 from .generic_agent_prompt import GenericPromptFlags, MainPrompt
-
+import Path
 
 @dataclass
 class GenericAgentArgs(AgentArgs):
     chat_model_args: BaseModelArgs = None
     flags: GenericPromptFlags = None
     max_retry: int = 4
-    privaleged_actions_path : None 
+    privaleged_actions_path :Path =  None 
 
     def __post_init__(self):
         try:  # some attributes might be temporarily args.CrossProd for hyperparameter generation
@@ -79,7 +79,8 @@ def __init__(
         chat_model_args: BaseModelArgs,
         flags: GenericPromptFlags,
         max_retry: int = 4,
-        privaleged_actions_path: str = None,
+        privaleged_actions_path: Path = None,
+
     ):
         self.privaleged_actions_path = privaleged_actions_path
         self.chat_llm = chat_model_args.make_model()

From edf9f3aca36d801e6c5668bb1403313791232c45 Mon Sep 17 00:00:00 2001
From: emilianopp <emilianopp550@gmail.com>
Date: Fri, 22 Aug 2025 01:15:36 +0000
Subject: [PATCH 08/10] bandaid

---
 src/agentlab/agents/generic_agent/generic_agent.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/agentlab/agents/generic_agent/generic_agent.py b/src/agentlab/agents/generic_agent/generic_agent.py
index 0a5290b1..253b1f40 100644
--- a/src/agentlab/agents/generic_agent/generic_agent.py
+++ b/src/agentlab/agents/generic_agent/generic_agent.py
@@ -24,7 +24,8 @@
 from agentlab.llm.tracking import cost_tracker_decorator
 
 from .generic_agent_prompt import GenericPromptFlags, MainPrompt
-import Path
+from pathlib import Path
+
 
 @dataclass
 class GenericAgentArgs(AgentArgs):

From 9b0bea3c2673b18c6e08682e9028758061e456a0 Mon Sep 17 00:00:00 2001
From: emilianopp <emilianopp550@gmail.com>
Date: Fri, 22 Aug 2025 19:11:05 +0000
Subject: [PATCH 09/10] set goal

---
 src/agentlab/agents/generic_agent/generic_agent.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/agentlab/agents/generic_agent/generic_agent.py b/src/agentlab/agents/generic_agent/generic_agent.py
index 253b1f40..60636daa 100644
--- a/src/agentlab/agents/generic_agent/generic_agent.py
+++ b/src/agentlab/agents/generic_agent/generic_agent.py
@@ -215,4 +215,7 @@ def set_task(self, task: str):
         task: str
             The new task for the agent.
         """
-        self.task = task
\ No newline at end of file
+        self.task = task
+    def set_goal(self, goal):
+        self.goal = goal
+        self.trajectory = self.privileged_observations.get_random(task=self.task, goal=self.goal)

From 6161634c381b44d147aa88e33f2afca035bfa8e9 Mon Sep 17 00:00:00 2001
From: emilianopp <emilianopp550@gmail.com>
Date: Fri, 22 Aug 2025 22:42:08 +0000
Subject: [PATCH 10/10] bandaid

---
 src/agentlab/agents/generic_agent/generic_agent.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/agentlab/agents/generic_agent/generic_agent.py b/src/agentlab/agents/generic_agent/generic_agent.py
index 60636daa..f3af0a53 100644
--- a/src/agentlab/agents/generic_agent/generic_agent.py
+++ b/src/agentlab/agents/generic_agent/generic_agent.py
@@ -218,4 +218,3 @@ def set_task(self, task: str):
         self.task = task
     def set_goal(self, goal):
         self.goal = goal
-        self.trajectory = self.privileged_observations.get_random(task=self.task, goal=self.goal)