diff --git a/examples/strands_migration_agent/README.md b/examples/strands_migration_agent/README.md index 71b1b8f..f4ba9b8 100644 --- a/examples/strands_migration_agent/README.md +++ b/examples/strands_migration_agent/README.md @@ -1,6 +1,8 @@ # Strands Migration Agent -This agent migrates repos written in Java 8 to use Java 17. This example is under active development alongside the `agentcore-rl-toolkit` library. +This agent tackles the problem of code migration from Java 8 to Java 17 as introduced in [MigrationBench](https://github.com/amazon-science/MigrationBench). +It builds upon the official [JavaMigrationAgent](https://github.com/amazon-science/JavaMigration/tree/main/java_migration_agent) with open source LLMs. +This example is under active development alongside the `agentcore-rl-toolkit` library. ## Basic Setup @@ -139,9 +141,11 @@ curl -X POST http://localhost:8080/invocations \ -H "Content-Type: application/json" \ -d '{ "prompt": "Please help migrate this repo: {repo_path}. There are {num_tests} test cases in it.", - "repo_uri": "s3://{BUCKET}/tars/test/15093015999__EJServer/15093015999__EJServer.tar.gz", - "metadata_uri": "s3://{BUCKET}/tars/test/15093015999__EJServer/metadata.json", + "repo_uri": "s3://my-migration-bench-data/tars/test/15093015999__EJServer/15093015999__EJServer.tar.gz", + "metadata_uri": "s3://my-migration-bench-data/tars/test/15093015999__EJServer/metadata.json", "require_maximal_migration": false, + "use_dependency_search_tool": true, + "apply_static_update": true, "_rollout": { "exp_id": "dev", "s3_bucket": "agentcore-rl", @@ -301,3 +305,17 @@ python evaluate.py --exp_id my_eval --max_concurrent 50 --max_pool_connections 5 ``` Results are saved as JSONL files under `results/` (e.g., `results/my_eval.jsonl`). + +## 📚 Citation +If you use our work on code migration, please cite +```bibtex +@misc{liu2025migrationbenchrepositorylevelcodemigration, + title={MigrationBench: Repository-Level Code Migration Benchmark from Java 8}, + author={Linbo Liu and Xinle Liu and Qiang Zhou and Lin Chen and Yihan Liu and Hoan Nguyen and Behrooz Omidvar-Tehrani and Xi Shen and Jun Huan and Omer Tripp and Anoop Deoras}, + year={2025}, + eprint={2505.09569}, + archivePrefix={arXiv}, + primaryClass={cs.SE}, + url={https://arxiv.org/abs/2505.09569}, +} +``` diff --git a/examples/strands_migration_agent/eval_utils.py b/examples/strands_migration_agent/eval_utils.py index 6d0099b..1b38c23 100644 --- a/examples/strands_migration_agent/eval_utils.py +++ b/examples/strands_migration_agent/eval_utils.py @@ -40,7 +40,12 @@ def get_s3_folder_uris(s3_uri: str) -> list[str]: return folder_uris -def prepare_payload(folder_uri: str) -> dict: +def prepare_payload( + folder_uri: str, + require_maximal_migration: bool = False, + apply_static_update: bool = False, + use_dependency_search_tool: bool = False, +) -> dict: """ Prepare a single payload for a repository folder. @@ -60,7 +65,9 @@ def prepare_payload(folder_uri: str) -> dict: "prompt": "Please help migrate this repo: {repo_path}. There are {num_tests} test cases in it.", "repo_uri": repo_uri, "metadata_uri": metadata_uri, - "require_maximal_migration": False, + "require_maximal_migration": require_maximal_migration, + "apply_static_update": apply_static_update, + "use_dependency_search_tool": use_dependency_search_tool, } diff --git a/examples/strands_migration_agent/evaluate.py b/examples/strands_migration_agent/evaluate.py index 376e8d1..c3495a2 100644 --- a/examples/strands_migration_agent/evaluate.py +++ b/examples/strands_migration_agent/evaluate.py @@ -90,6 +90,24 @@ def main(): default=eval_config.get("sampling_params"), help="Sampling parameters as JSON string (e.g. '{\"temperature\": 0.7}')", ) + parser.add_argument( + "--require_maximal_migration", + action="store_true", + default=False, + help="Whether a repository is evaluated under maximal migration", + ) + parser.add_argument( + "--apply_static_update", + action="store_true", + default=False, + help="Whether to apply static update on JDK and dependency versions", + ) + parser.add_argument( + "--use_dependency_search_tool", + action="store_true", + default=False, + help="Whether to allow dependency search tool for agent", + ) args = parser.parse_args() @@ -114,7 +132,10 @@ def main(): logger.info(f"Found {len(s3_folder_uris)} repositories to evaluate") # Prepare payloads - payloads = [prepare_payload(uri) for uri in s3_folder_uris] + payloads = [ + prepare_payload(uri, args.require_maximal_migration, args.apply_static_update, args.use_dependency_search_tool) + for uri in s3_folder_uris + ] # Setup results directory and file results_dir = Path(__file__).parent / "results" @@ -197,7 +218,7 @@ def main(): logger.info("=" * 50) logger.info(f"Evaluation complete: {succeeded} succeeded, {failed} failed") logger.info(f"Task success rate: {task_successes}/{total_repos} ({success_rate:.1%})") - logger.info(f"Total benchmark time: {total_time:.1f}s ({total_time/60:.1f}m)") + logger.info(f"Total benchmark time: {total_time:.1f}s ({total_time / 60:.1f}m)") logger.info(f"Results saved to: {result_path}") diff --git a/examples/strands_migration_agent/evaluate_async.py b/examples/strands_migration_agent/evaluate_async.py index 85f9226..9987242 100644 --- a/examples/strands_migration_agent/evaluate_async.py +++ b/examples/strands_migration_agent/evaluate_async.py @@ -217,6 +217,24 @@ async def main(): default=eval_config.get("sampling_params"), help="Sampling parameters as JSON string (e.g. '{\"temperature\": 0.7}')", ) + parser.add_argument( + "--require_maximal_migration", + action="store_true", + default=False, + help="Whether a repository is evaluated under maximal migration", + ) + parser.add_argument( + "--apply_static_update", + action="store_true", + default=False, + help="Whether to apply static update on JDK and dependency versions", + ) + parser.add_argument( + "--use_dependency_search_tool", + action="store_true", + default=False, + help="Whether to allow dependency search tool for agent", + ) args = parser.parse_args() @@ -242,7 +260,10 @@ async def main(): logger.info(f"Found {len(s3_folder_uris)} repositories to evaluate") # Prepare payloads - payloads = [prepare_payload(uri) for uri in s3_folder_uris] + payloads = [ + prepare_payload(uri, args.require_maximal_migration, args.apply_static_update, args.use_dependency_search_tool) + for uri in s3_folder_uris + ] # Setup results directory and file results_dir = Path(__file__).parent / "results" diff --git a/examples/strands_migration_agent/models.py b/examples/strands_migration_agent/models.py index 6ceed21..ba4db7d 100644 --- a/examples/strands_migration_agent/models.py +++ b/examples/strands_migration_agent/models.py @@ -6,6 +6,8 @@ class InvocationRequest(BaseModel): repo_uri: str metadata_uri: str require_maximal_migration: bool + use_dependency_search_tool: bool = False + apply_static_update: bool = False class RepoMetaData(BaseModel): diff --git a/examples/strands_migration_agent/pyproject.toml b/examples/strands_migration_agent/pyproject.toml index 2280d81..c4b190d 100644 --- a/examples/strands_migration_agent/pyproject.toml +++ b/examples/strands_migration_agent/pyproject.toml @@ -12,10 +12,12 @@ dependencies = [ "strands-agents[openai]>=1.18.0", "strands-agents-tools>=0.2.16", "migrationbench", + "java-migration-agent", ] [tool.setuptools] py-modules = ["rl_app", "reward", "models", "utils"] [tool.uv.sources] -migrationbench = { git = "https://github.com/amazon-science/MigrationBench.git", rev = "354a7858567efd63583224080586371db48e7388" } +migrationbench = { git = "https://github.com/amazon-science/MigrationBench.git" } +java-migration-agent = { git = "https://github.com/amazon-science/JavaMigration.git", subdirectory = "java_migration_agent" } diff --git a/examples/strands_migration_agent/rl_app.py b/examples/strands_migration_agent/rl_app.py index 997d0e0..b194031 100644 --- a/examples/strands_migration_agent/rl_app.py +++ b/examples/strands_migration_agent/rl_app.py @@ -2,6 +2,7 @@ import time from dotenv import load_dotenv +from java_migration_agent.tools.dependency_tools import search_dependency_version from models import InvocationRequest, RepoMetaData from reward import MigrationReward from strands import Agent @@ -33,7 +34,7 @@ + "Example: mvn -ntp clean verify 2>&1 | tail -n 100\n" + "- If you need to see earlier output, run a separate command with `head -n 100`.\n" + "- When you have finished the task, generate a paragraph summarizing the changes you made " - + "without using any tools." + + "without using any tools.\n" ) reward_fn = MigrationReward() @@ -44,16 +45,44 @@ def invoke_agent(payload: dict): base_url = payload["_rollout"]["base_url"] model_id = payload["_rollout"]["model_id"] params = payload["_rollout"].get("sampling_params", {}) + tools = [shell, editor] + + request = InvocationRequest(**payload) + prompt = system_prompt + if request.require_maximal_migration: + prompt += ( + "\nYou should make sure all dependencies in the `pom.xml` file " + "are updated to their latest versions that support Java 17." + ) + + if request.apply_static_update: + prompt += ( + "\nDependencies in the `pom.xml` file have been updated to their " + "latest versions that support Java 17, but these changes might introduce " + "compatibility issues in the codebase. Please fix any such issues in your " + "migration. Do not downgrade the dependency versions back to their JDK 8 " + "compatible versions." + ) + + if request.use_dependency_search_tool: + prompt += ( + "\nYou have access to a dependency version lookup tool. When updating dependencies " + "in pom.xml:\n" + "1. Use the search_dependency_version tool to look up the recommended Java 17 " + "compatible version for each dependency\n" + "2. If a dependency is not found in the database, use your knowledge to select " + "an appropriate version\n" + ) + tools.append(search_dependency_version) model = OpenAIModel(client_args={"api_key": "EMPTY", "base_url": base_url}, model_id=model_id, params=params) agent = Agent( model=model, - tools=[shell, editor], - system_prompt=system_prompt, + tools=tools, + system_prompt=prompt, ) - request = InvocationRequest(**payload) metadata = RepoMetaData(**load_metadata_from_s3(request.metadata_uri)) start_time = time.time() @@ -62,7 +91,7 @@ def invoke_agent(payload: dict): logger.info(f"Loaded repo into: {repo_path} (took {load_duration:.2f}s)") start_time = time.time() - setup_repo_environment(repo_path) + setup_repo_environment(repo_path, request.use_dependency_search_tool) setup_duration = time.time() - start_time logger.info(f"Finished repo setup for: {repo_path} (took {setup_duration:.2f}s)") diff --git a/examples/strands_migration_agent/utils.py b/examples/strands_migration_agent/utils.py index 6fc7410..1563c08 100644 --- a/examples/strands_migration_agent/utils.py +++ b/examples/strands_migration_agent/utils.py @@ -6,6 +6,7 @@ import tarfile import boto3 +from java_migration_agent.preprocessing import update_dependency_version, update_jdk_related logger = logging.getLogger(__name__) @@ -29,7 +30,7 @@ def load_metadata_from_s3(s3_uri: str) -> dict: return json.loads(content) -def setup_repo_environment(repo_path: str): +def setup_repo_environment(repo_path: str, apply_static_update: bool = False): """ 1. Pre-warm Maven caches (best-effort) 2. Make sure git works. @@ -57,6 +58,10 @@ def setup_repo_environment(repo_path: str): capture_output=True, ) logger.info("git working properly!") + if apply_static_update: + logger.info("Apply static update on jdk and dependency versions") + update_jdk_related(repo_path) + update_dependency_version(repo_path) def load_repo_from_s3(s3_uri: str) -> str: