From dd07b0d86e155ed5adb4a2d02185b592633324af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=AE=B7=E9=91=AB?= Date: Wed, 26 Nov 2025 15:18:56 +0800 Subject: [PATCH] feat: add diff tool and support for chrome-devtools --- ms_agent/agent/llm_agent.py | 8 +- ms_agent/tools/filesystem_tool.py | 94 ++++++++++++++++++- projects/code_scratch/architecture.yaml | 6 +- .../code_scratch/callbacks/eval_callback.py | 50 ++++------ projects/code_scratch/coding.yaml | 2 +- projects/code_scratch/refine.yaml | 49 ++++++---- 6 files changed, 158 insertions(+), 51 deletions(-) diff --git a/ms_agent/agent/llm_agent.py b/ms_agent/agent/llm_agent.py index af7d42c63..82ed4272b 100644 --- a/ms_agent/agent/llm_agent.py +++ b/ms_agent/agent/llm_agent.py @@ -405,7 +405,7 @@ def handle_new_response(self, messages: List[Message], assert response_message is not None, 'No response message generated from LLM.' if response_message.tool_calls: self.log_output('[tool_calling]:') - for tool_call in response_message.tool_calls: + for idx, tool_call in enumerate(response_message.tool_calls): tool_call = deepcopy(tool_call) if isinstance(tool_call['arguments'], str): try: @@ -413,6 +413,12 @@ def handle_new_response(self, messages: List[Message], tool_call['arguments']) except json.decoder.JSONDecodeError: pass + if tool_call['arguments'] is None: + response_message.tool_calls[idx]['arguments'] = { + '__error__': + 'Original arguments were None, replaced by default.' + } + self.log_output( json.dumps(tool_call, ensure_ascii=False, indent=4)) diff --git a/ms_agent/tools/filesystem_tool.py b/ms_agent/tools/filesystem_tool.py index 03dc966ae..3d6b1992a 100644 --- a/ms_agent/tools/filesystem_tool.py +++ b/ms_agent/tools/filesystem_tool.py @@ -5,8 +5,9 @@ from ms_agent.llm.utils import Tool from ms_agent.tools.base import ToolBase -from ms_agent.utils import get_logger +from ms_agent.utils import MAX_CONTINUE_RUNS, get_logger, retry from ms_agent.utils.constants import DEFAULT_OUTPUT_DIR +from openai import OpenAI logger = get_logger() @@ -21,6 +22,12 @@ def __init__(self, config, **kwargs): super(FileSystemTool, self).__init__(config) self.exclude_func(getattr(config.tools, 'file_system', None)) self.output_dir = getattr(config, 'output_dir', DEFAULT_OUTPUT_DIR) + if 'edit_file' not in self.exclude_functions: + self.edit_file_config = getattr(config.tools.file_system, + 'edit_file_config', None) + self.client = OpenAI( + api_key=self.edit_file_config.api_key, + base_url=self.edit_file_config.base_url) self.trust_remote_code = kwargs.get('trust_remote_code', False) self.allow_read_all_files = getattr( getattr(config.tools, 'file_system', {}), 'allow_read_all_files', @@ -125,6 +132,65 @@ async def get_tools(self): 'required': ['path'], 'additionalProperties': False }), + Tool( + tool_name='edit_file', + server_name='file_system', + description= + ('Use this tool to make an edit to an existing file.\n\n' + 'This will be read by a less intelligent model, which will quickly apply the edit. ' + 'You should make it clear what the edit is, while also minimizing the unchanged code you write.\n' + 'When writing the edit, you should specify each edit in sequence, with the special comment ' + '// ... existing code ... to represent unchanged code in between edited lines.\n\n' + 'For example:\n\n// ... existing code ...\nFIRST_EDIT\n// ... existing code ...\n' + 'SECOND_EDIT\n// ... existing code ...\nTHIRD_EDIT\n// ... existing code ...\n\n' + 'You should still bias towards repeating as few lines of the original file ' + 'as possible to convey the change.\n' + 'But, each edit should contain minimally sufficient context of unchanged lines ' + "around the code you're editing to resolve ambiguity.\n" + 'DO NOT omit spans of pre-existing code (or comments) without using the ' + '// ... existing code ... comment to indicate its absence. ' + 'If you omit the existing code comment, the model may inadvertently delete these lines.\n' + 'If you plan on deleting a section, you must provide context before and after to delete it. ' + 'If the initial code is ```code \\n Block 1 \\n Block 2 \\n Block 3 \\n code```, ' + 'and you want to remove Block 2, you would output ' + '```// ... existing code ... \\n Block 1 \\n Block 3 \\n // ... existing code ...```.\n' + 'Make sure it is clear what the edit should be, and where it should be applied.\n' + 'Make edits to a file in a single edit_file call ' + 'instead of multiple edit_file calls to the same file. ' + 'The apply model can handle many distinct edits at once.' + ), + parameters={ + 'type': 'object', + 'properties': { + 'path': { + 'type': 'string', + 'description': + 'Path of the target file to modify.' + }, + 'instructions': { + 'type': + 'string', + 'description': + ('A single sentence instruction describing ' + 'what you are going to do for the sketched edit. ' + 'This is used to assist the less intelligent model in applying the edit. ' + 'Use the first person to describe what you are going to do. ' + 'Use it to disambiguate uncertainty in the edit.' + ) + }, + 'code_edit': { + 'type': + 'string', + 'description': + ('Specify ONLY the precise lines of code that you wish to edit. ' + 'NEVER specify or write out unchanged code. ' + 'Instead, represent all unchanged code using the comment of the language ' + "you're editing in - example: // ... existing code ..." + ) + } + }, + 'required': ['path', 'instructions', 'code_edit'] + }), ] } return { @@ -267,3 +333,29 @@ async def list_files(self, path: str = None): except Exception as e: return f'List files of <{path or "root path"}> failed, error: ' + str( e) + + @retry(max_attempts=MAX_CONTINUE_RUNS, delay=1.0) + async def edit_file(self, + path: str = None, + instructions: str = None, + code_edit: str = None): + try: + with open(os.path.join(self.output_dir, path), 'r') as f: + initial_code = f.read() + response = self.client.chat.completions.create( + model=self.edit_file_config.diff_model, + messages=[{ + 'role': + 'user', + 'content': + (f'{instructions}\n' + f'{initial_code}\n' + f'{code_edit}') + }]) + merged_code = response.choices[0].message.content + + with open(os.path.join(self.output_dir, path), 'w') as f: + f.write(merged_code) + return f'Edit file <{path}> successfully.' + except Exception as e: + return f'Edit file <{path}> failed, error: ' + str(e) diff --git a/projects/code_scratch/architecture.yaml b/projects/code_scratch/architecture.yaml index 76936bac1..c790a05a5 100644 --- a/projects/code_scratch/architecture.yaml +++ b/projects/code_scratch/architecture.yaml @@ -1,6 +1,6 @@ llm: service: openai - model: claude-sonnet-4-5-20250929 + model: claude-haiku-4-5-20251001 openai_api_key: openai_base_url: https://dashscope.aliyuncs.com/compatible-mode/v1 @@ -53,6 +53,10 @@ prompt: callbacks: - callbacks/artifact_callback +tools: + file_system: + mcp: false + max_chat_round: 1 tool_call_timeout: 30000 diff --git a/projects/code_scratch/callbacks/eval_callback.py b/projects/code_scratch/callbacks/eval_callback.py index 0757bcb96..2511d61af 100644 --- a/projects/code_scratch/callbacks/eval_callback.py +++ b/projects/code_scratch/callbacks/eval_callback.py @@ -4,7 +4,6 @@ from contextlib import contextmanager from typing import List, Optional -from file_parser import extract_code_blocks from ms_agent.agent.runtime import Runtime from ms_agent.callbacks import Callback from ms_agent.llm.utils import Message @@ -26,6 +25,7 @@ def __init__(self, config: DictConfig): self.compile_round = 300 self.cur_round = 0 self.last_issue_length = 0 + self.devtool_prompt = getattr(config.prompt, 'devtool', None) async def on_task_begin(self, runtime: Runtime, messages: List[Message]): self.omit_intermediate_messages(messages) @@ -87,25 +87,17 @@ def check_install(): @staticmethod def check_runtime(): try: - os.system('pkill -f node') - if os.getcwd().endswith('backend'): - result = subprocess.run(['npm', 'run', 'dev'], - capture_output=True, - text=True, - timeout=5, - stdin=subprocess.DEVNULL) - else: - result = subprocess.run(['npm', 'run', 'build'], - capture_output=True, - text=True, - check=True) + result = subprocess.run(['npm', 'run', 'dev'], + capture_output=True, + text=True, + timeout=5, + stdin=subprocess.DEVNULL) except subprocess.CalledProcessError as e: output = EvalCallback._parse_e_msg(e) except subprocess.TimeoutExpired as e: output = EvalCallback._parse_e_msg(e) else: output = result.stdout + '\n' + result.stderr - os.system('pkill -f node') return output def _run_compile(self): @@ -139,12 +131,21 @@ async def on_generate_response(self, runtime: Runtime, self.last_issue_length = len(messages) - 3 - self.last_issue_length self.omit_intermediate_messages(messages) query = self.get_compile_feedback('frontend').strip() + + # compile -> devtools if not query: - human_feedback = True - query = self.get_human_feedback().strip() + feedback_type = 'devtools' + query = self.devtool_prompt + self.devtool_prompt = 'Use chrome-devtools to thoroughly test again' else: - human_feedback = False + feedback_type = 'compling' logger.warn(f'[Compile Feedback]: {query}]') + + # devtools -> human + if not query: + feedback_type = 'human' + query = self.get_human_feedback().strip() + if not query: self.feedback_ended = True feedback = ( @@ -153,22 +154,11 @@ async def on_generate_response(self, runtime: Runtime, else: all_local_files = await self.file_system.list_files() feedback = ( - f'Feedback from {"human" if human_feedback else "compling"}: {query}\n' + f'Feedback from {feedback_type}: {query}\n' f'The files on the local system of this project: {all_local_files}\n' f'Now please analyze and fix this issue:\n') messages.append(Message(role='user', content=feedback)) - - async def on_tool_call(self, runtime: Runtime, messages: List[Message]): - design, _ = extract_code_blocks( - messages[-1].content, target_filename='design.txt') - if len(design) > 0: - front, design = messages[-1].content.split( - '```text: design.txt', maxsplit=1) - design, end = design.rsplit('```', 1) - design = design.strip() - if design: - messages[2].content = await self.do_arch_update( - runtime=runtime, messages=messages, updated_arch=design) + logger.info(messages) async def after_tool_call(self, runtime: Runtime, messages: List[Message]): runtime.should_stop = runtime.should_stop and self.feedback_ended diff --git a/projects/code_scratch/coding.yaml b/projects/code_scratch/coding.yaml index d138ad1aa..298f82864 100644 --- a/projects/code_scratch/coding.yaml +++ b/projects/code_scratch/coding.yaml @@ -1,6 +1,6 @@ llm: service: openai - model: claude-sonnet-4-5-20250929 + model: claude-haiku-4-5-20251001 openai_api_key: openai_base_url: https://dashscope.aliyuncs.com/compatible-mode/v1 diff --git a/projects/code_scratch/refine.yaml b/projects/code_scratch/refine.yaml index a2ec5e96b..47c0decef 100644 --- a/projects/code_scratch/refine.yaml +++ b/projects/code_scratch/refine.yaml @@ -1,6 +1,6 @@ llm: service: openai - model: claude-sonnet-4-5-20250929 + model: claude-haiku-4-5-20251001 openai_api_key: openai_base_url: https://dashscope.aliyuncs.com/compatible-mode/v1 @@ -40,21 +40,7 @@ prompt: * Do a minimum change in case that the normal code is damaged, if you are doing a break change, change related files also * Fix other issues you discover while reading the code files, and these issues need to be ones where you have identified the root cause - 4. Express your thinking in concise and clear language. When you fix files, you should use the following format: - - ```type: filename - text - ``` - - for example: - ```javascript: frontend/index.js - your code here - ``` - - `javascript: frontend/index.js` will be used as the filename. If you are fixing a file, you need to: - * Read the target file - * Follow the original data structures and file imports, do not break it(you may read more files depends on) - * Then output the complete fixed code of the file. + 4. Express your thinking in concise and clear language. When you fix files, you should use the edit_file tool If you only output code snippets to demonstrate your conclusions, you can use standard code blocks: @@ -66,8 +52,27 @@ prompt: Let's begin: + devtool: | + Use chrome-devtools to thoroughly test the generated frontend and backend code: + * List all console messages using list_console_messages to identify JavaScript errors, warnings, or logs + * Get detailed error information using get_console_message for each error or warning found + * List network requests using list_network_requests to check if API calls are successful, verify HTTP status codes, and identify failed requests + * Get detailed network request/response information using get_network_request to analyze request headers, payloads, and response data + * Take a snapshot of the page to understand the current UI state and available interactive elements + * Test the implemented functionality by: + - Clicking on interactive elements (buttons, links, forms) using click tool + - Filling out forms using fill or fill_form tools to test user input workflows + - Navigating between pages to verify routing works correctly + - Testing keyboard interactions using press_key when necessary + * Take screenshots at critical steps to document the UI state and verify visual correctness + * Analyze the feedback from all these operations to identify: + - Console errors (e.g., undefined variables, import errors, runtime exceptions) + - Network failures (e.g., 404/500 errors, CORS issues, timeout problems) + - UI/UX issues (e.g., broken layouts, missing elements, non-functional buttons) + - Logic errors (e.g., incorrect data display, failed form submissions) + * Use this comprehensive feedback to help the refine model better understand and fix the issues + callbacks: - - callbacks/artifact_callback - callbacks/eval_callback tools: @@ -77,6 +82,16 @@ tools: - create_directory - write_file - list_files + edit_file_config: + diff_model: morph-v3-fast + api_key: + base_url: https://api.morphllm.com/v1 + + chrome-devtools: + mcp: true + command: "npx" + args: ["-y", "chrome-devtools-mcp@latest"] + transport: "stdio" max_chat_round: 100