inclusionAI
diff --git a/‎train/adapter/verl/agent_template.py‎
Lines changed: 15 additions & 4 deletions b/‎train/adapter/verl/agent_template.py‎
Lines changed: 15 additions & 4 deletions
diff --git a/‎train/adapter/verl/verl_trainer.py‎
Lines changed: 122 additions & 47 deletions b/‎train/adapter/verl/verl_trainer.py‎
Lines changed: 122 additions & 47 deletions
diff --git a/‎train/examples/train_gaia_with_aworld_verl/gaia_data/sample_test.parquet‎
11.2 KB b/‎train/examples/train_gaia_with_aworld_verl/gaia_data/sample_test.parquet‎
11.2 KB
diff --git a/‎train/examples/train_gaia_with_aworld_verl/gaia_data/sample_train.parquet‎
11.2 KB b/‎train/examples/train_gaia_with_aworld_verl/gaia_data/sample_train.parquet‎
11.2 KB
@@ -11,6 +11,8 @@
 from aworld.logs.util import logger
 from {parser_module} import {parser_name}
 
+{agent_import_str}
+{tool_aggregate_func_import_str}
 from train.adapter.verl.aworld_agent_loop import AworldAgentLoop
 
 
@@ -28,20 +30,29 @@ async def build_agents(self) -> Union[Agent, Swarm]:
                     "request_id": uuid.uuid4().hex,
                     "tool_parser": "hermes"
                 }},
-                {kv_parameters}
+                {model_kv_parameters}
             ),
         )
 
         logger.info(f"agent config: ", conf)
         mcp_config = {mcp_config}
-        return Agent(
+        return {real_agent}(
             conf=conf,
             name="{agent_name}",
             desc="{agent_desc}",
-            system_prompt="{system_prompt}",
+            system_prompt='''{system_prompt}''',
+            tool_names={tool_names},
+            agent_names={agent_names},
+            wait_tool_result={wait_tool_result},
+            feedback_tool_result={feedback_tool_result},
+            black_tool_actions={black_tool_actions},
+            skill_configs={skill_configs},
+            event_handler_name={event_handler_name},
+            tools_aggregate_func={tools_aggregate_func},
             mcp_config=mcp_config,
             mcp_servers=list(server_name for server_name in mcp_config.get("mcpServers", {{}}).keys()),
-            model_output_parser={parser_name}()
+            model_output_parser={parser_name}(),
+            {extend_params}
         )
 
 """
@@ -5,7 +5,7 @@
 import traceback
 import yaml
 
-from typing import Callable, Union, Any
+from typing import Callable, Union, Tuple
 
 from datasets import Dataset
 from omegaconf import OmegaConf
@@ -14,6 +14,7 @@
 from aworld.agents.llm_agent import Agent
 from aworld.config import BaseConfig, ConfigDict, load_config
 from aworld.core.common import Config
+from aworld.logs.util import logger
 from train.adapter.verl.agent_template import VERL_TEMPLATE
 from train.trainer.trainer_processor import TrainerProcessor
 
@@ -29,7 +30,9 @@ def train(self):
 
         main(self.config)
 
-    def check_dataset(self, dataset: Union[str, Dataset], test_dataset: Union[str, Dataset] = None):
+    def check_dataset(self, dataset: Union[str, Dataset], test_dataset: Union[str, Dataset] = None) -> Tuple[str, str]:
+        logger.info("Check dataset...")
+
         if isinstance(dataset, str):
             # means dataset path
             dataset_path = dataset
@@ -41,6 +44,9 @@ def check_dataset(self, dataset: Union[str, Dataset], test_dataset: Union[str, D
             raise ValueError("Train dataset must be a string or a Dataset")
         self.train_dataset_path = dataset_path
 
+        if not test_dataset:
+            test_dataset = dataset_path
+
         if isinstance(test_dataset, str):
             # means dataset path
             test_dataset_path = test_dataset
@@ -51,9 +57,19 @@ def check_dataset(self, dataset: Union[str, Dataset], test_dataset: Union[str, D
             test_dataset_path = None
         self.test_dataset_path = test_dataset_path
 
-    def check_reward(self, reward_func: Union[str, Callable[..., float]]):
+        logger.info(f"View datasets in file: {self.train_dataset_path} and {self.test_dataset_path}")
+        return self.train_dataset_path, self.test_dataset_path
+
+    def check_reward(self, reward_func: Union[str, Callable[..., float]]) -> Tuple[str, str]:
+        logger.info("Check reward...")
+
         if isinstance(reward_func, str):
-            return reward_func, os.path.basename(reward_func).replace(".py", "")
+            # means reward func file path
+            name = os.path.basename(reward_func).replace(".py", "")
+            self.reward_file_path = reward_func
+            self.reward_func_name = name
+            logger.info(f"View reward function in file: {reward_func}, name is: {name}")
+            return reward_func, name
 
         # data_source, solution_str, ground_truth, extra_info=None
         sig = inspect.signature(reward_func)
@@ -78,6 +94,7 @@ def check_reward(self, reward_func: Union[str, Callable[..., float]]):
         content = inspect.getsource(reward_func)
         if 'if __name__' in content and '__main__' in content:
             # have __name__ == '__main__', save to function to the new file
+            # and the func must is a dependency-free function
             reward_file_path = f'{self.run_path}/reward_func.py'
             with open(reward_file_path, 'w') as writer:
                 writer.write(content)
@@ -86,110 +103,168 @@ def check_reward(self, reward_func: Union[str, Callable[..., float]]):
 
         self.reward_file_path = reward_file_path
         self.reward_func_name = reward_func.__name__
+        logger.info(f"View reward function in file: {reward_file_path}, name is: {self.reward_file_path}")
         return reward_file_path, reward_func.__name__
 
-    def check_agent(self, agent: Union[str, Agent]):
+    def check_agent(self, agent: Union[str, Agent]) -> str:
+        """Check single agent instance, and create agent loop dynamically.
+
+        NOTE: Single-agent only now, Swarm to be added in the future.
+
+        Returns:
+            Return agent yaml file used to VeRL agent loop.
+        """
+        logger.info("Check agent...")
+
         if isinstance(agent, str):
             # means an agent yaml config file path
             config_dict = load_config(agent)
             agent = Agent(**config_dict)
 
+        # model params
         model_config: BaseConfig = agent.conf.llm_config
         if isinstance(model_config, dict):
             model_dict = dict(model_config)
         else:
             model_dict = dict(model_config.to_dict())
-        model_dict.pop("llm_provider", None)
-        model_dict.pop("llm_model_name", None)
-        model_dict.pop("llm_base_url", None)
-        model_dict.pop("llm_api_key", None)
-        model_dict.pop("llm_client_type", None)
-        model_dict.pop("params", None)
-        model_dict.pop("model_type", None)
-
-        kv_parameters = ",\n".join([f"{k}={v}" for k, v in model_dict.items()])
+
+        for key in ["llm_provider", "llm_model_name", "llm_base_url",
+                    "llm_api_key", "llm_client_type", "params", "model_type"]:
+            model_dict.pop(key, None)
+
+        model_kv_parameters = ",\n".join([f"{k}={v}" for k, v in model_dict.items()])
+
+        # agent params
+        func_name = None
+        func_str = ''
+        if agent.tools_aggregate_func != agent._tools_aggregate_func:
+            # special process tools_aggregate_func
+            if agent.tools_aggregate_func.__module__ == '__main__':
+                raise ValueError("tools_aggregate_func must be in a independent file")
+            else:
+                func_str = f"from {agent.tools_aggregate_func.__module__} import {agent.tools_aggregate_func.__name__}"
+            func_name = agent.tools_aggregate_func.__name__
+
+        if agent.__class__ == Agent:
+            import_str = ''
+            extend_params = ''
+        else:
+            # custom agent, the custom parameters must be explicitly specified
+            import_str = f"from {agent.__module__} import {agent.__class__.__name__}"
+            base_sig = inspect.signature(Agent.__init__)
+            base_params = base_sig.parameters
+
+            sig = inspect.signature(agent.__init__)
+            kv = []
+            for k, v in sig.parameters.items():
+                if k not in base_params:
+                    kv.append(f"{k}={getattr(agent, k)}")
+            extend_params = ',\n'.join(kv)
+
+        # NOTE: If the basic interface of the `Agent` changes, an upgrade is required
         con = VERL_TEMPLATE.format(agent_name=agent.name(),
                                    agent_desc=agent.desc(),
                                    system_prompt=agent.system_prompt,
                                    mcp_config=agent.mcp_config,
+                                   tool_names=agent.tool_names,
+                                   agent_names=agent.handoffs,
+                                   wait_tool_result=agent.wait_tool_result,
+                                   feedback_tool_result=agent.feedback_tool_result,
+                                   black_tool_actions=agent.black_tool_actions,
+                                   skill_configs=agent.skill_configs,
+                                   event_handler_name=agent.event_handler_name,
+                                   tool_aggregate_func_import_str=func_str,
+                                   tools_aggregate_func=func_name,
                                    parser_module=type(agent.model_output_parser).__module__,
                                    parser_name=type(agent.model_output_parser).__name__,
-                                   kv_parameters=kv_parameters)
+                                   model_kv_parameters=model_kv_parameters,
+                                   agent_import_str=import_str,
+                                   real_agent=agent.__class__.__name__,
+                                   extend_params=extend_params)
         module = f"{self.run_path}/{agent.name()}"
         with open(f"{module}.py", 'w+') as write:
             write.writelines(con)
 
         # VeRL agent config file
         module = module.replace(os.getcwd(), '').replace('/', '.')
-        if module[0] == '.':
-            module = module[1:]
+        module = module[1:] if module[0] == '.' else module
         con = f"""- name: {agent.name()}
   _target_: {module}.VerlAgentLoop
                """
-        with open(f"{self.run_path}/agent.yaml", "w+") as write:
+
+        agent_yaml = f"{self.run_path}/agent.yaml"
+        with open(agent_yaml, "w+") as write:
             write.writelines(con)
-        self.agent_yaml = f"{self.run_path}/agent.yaml"
+        self.agent_yaml = agent_yaml
+        logger.info(f"View agent config in file: {agent_yaml}")
         return self.agent_yaml
 
-    def check_config(self, config: Union[str, Any]):
+    def check_config(self, config: Union[str, Config]) -> DictConfig:
         import verl.trainer.config
 
-        file_path = os.path.join(os.path.dirname(verl.trainer.config.__file__), "_generated_ppo_trainer.yaml")
-        try:
-            with open(file_path, "r") as file:
-                yaml_data = yaml.safe_load(file)
-        except FileNotFoundError:
-            raise ValueError(f"Can not find the file: {config}")
-        except Exception:
-            raise RuntimeError(f"{config} read fail.\n", traceback.format_exc())
+        logger.info("Check config...")
 
-        configs = DictConfig(OmegaConf.to_container(DictConfig(yaml_data), resolve=True))
+        # custom config or config file
+        custom_configs = dict()
         if isinstance(config, str):
             try:
                 with open(config, "r") as file:
-                    yaml_data = yaml.safe_load(file)
-                configs.merge_with(yaml_data)
+                    custom_configs = yaml.safe_load(file)
             except FileNotFoundError:
                 raise ValueError(f"Can not find the file: {config}")
             except Exception:
                 raise RuntimeError(f"{config} read fail.\n", traceback.format_exc())
-
         elif isinstance(config, Config):
             if isinstance(config, BaseConfig):
-                config_dict = ConfigDict(config.model_dump())
-                configs.merge_with(config_dict)
-
+                custom_configs = ConfigDict(config.model_dump())
+            else:
+                custom_configs = config
         else:
             raise ValueError("Config must be a string or a Config")
 
+        # full config
+        file_path = os.path.join(os.path.dirname(verl.trainer.config.__file__), "_generated_ppo_trainer.yaml")
+        try:
+            with open(file_path, "r") as file:
+                root_configs = yaml.safe_load(file)
+        except FileNotFoundError:
+            raise ValueError(f"Can not find the file: {config}")
+        except Exception:
+            raise RuntimeError(f"{config} read fail.\n", traceback.format_exc())
+
+        configs = OmegaConf.merge(root_configs, custom_configs)
+        configs = DictConfig(OmegaConf.to_container(configs, resolve=True))
+        logger.debug(f"train full configs: {configs}")
+
         self.config = configs
         # replace to real value, because the values are dynamically generated
-        if not self.config['actor_rollout_ref']['rollout']['agent']['agent_loop_config_path']:
+        if not self.config.actor_rollout_ref.rollout.agent.agent_loop_config_path:
             if not hasattr(self, 'agent_yaml'):
                 raise RuntimeError("Please check agent first before check config")
-            self.config['actor_rollout_ref']['rollout']['agent']['agent_loop_config_path'] = self.agent_yaml
+            self.config.actor_rollout_ref.rollout.agent.agent_loop_config_path = self.agent_yaml
 
-        if not self.config['custom_reward_function']['name']:
+        if not self.config.custom_reward_function.name:
             if not hasattr(self, 'reward_func_name'):
                 raise RuntimeError("Please check reward function first before check config")
-            self.config['custom_reward_function']['name'] = self.reward_func_name
-        if not self.config['custom_reward_function']['path']:
-            self.config['custom_reward_function']['path'] = self.reward_file_path
+            self.config.custom_reward_function.name = self.reward_func_name
+        if not self.config.custom_reward_function.path:
+            self.config.custom_reward_function.path = self.reward_file_path
 
-        if not self.config['data']['train_files']:
+        if not self.config.data.train_files:
             if not hasattr(self, 'train_dataset_path'):
                 raise RuntimeError("Please check train dataset first before check config")
-            self.config['data']['train_files'] = [self.train_dataset_path]
-        if not self.config['data']['val_files']:
+            self.config.data.train_files = [self.train_dataset_path]
+        if not self.config.data.val_files:
             if not hasattr(self, 'test_dataset_path'):
                 raise RuntimeError("Please check test dataset first before check config")
-            self.config['data']['val_files'] = [self.test_dataset_path]
+            self.config.data.val_files = [self.test_dataset_path]
 
-        if not self.config['trainer']['default_local_dir']:
+        if not self.config.trainer.default_local_dir:
             local_dir = os.path.join(self.run_path, 'checkpoints')
             os.makedirs(local_dir, exist_ok=True)
-            self.config['trainer']['default_local_dir'] = local_dir
+            self.config.trainer.default_local_dir = local_dir
 
         # for check
         yaml.safe_dump(OmegaConf.to_container(self.config), open(f"{self.run_path}/final_trainer.yaml", "w"))
+        logger.info(f"View final config in file: {self.run_path}/final_trainer.yaml")
         return self.config