diff --git a/agent/enhancer.py b/agent/enhancer.py index 8abb12d72..8a25912e4 100644 --- a/agent/enhancer.py +++ b/agent/enhancer.py @@ -14,11 +14,13 @@ """An LLM agent to improve a fuzz target's runtime performance. Use it as a usual module locally, or as script in cloud builds. """ +import os + import logger +from agent.jvm_coverage_enhancer import JvmCoverageEnhancer from agent.prototyper import Prototyper from llm_toolkit.prompt_builder import (CoverageEnhancerTemplateBuilder, - EnhancerTemplateBuilder, - JvmFixingBuilder) + EnhancerTemplateBuilder) from llm_toolkit.prompts import Prompt, TextPrompt from results import AnalysisResult, BuildResult, Result @@ -48,37 +50,37 @@ def _initial_prompt(self, results: list[Result]) -> Prompt: trial=self.trial) return Prompt() + # Delegate JVM-specific logic to JvmCoverageEnhancer if benchmark.language == 'jvm': - # TODO: Do this in a separate agent for JVM coverage. - builder = JvmFixingBuilder(self.llm, benchmark, - last_result.run_result.fuzz_target_source, []) - prompt = builder.build([], None, None) + return JvmCoverageEnhancer(self.llm, benchmark, last_result, + last_build_result, self.args).initial_prompt() + + #TODO(dongge): Refine this logic. + if last_result.semantic_result: + error_desc, errors = last_result.semantic_result.get_error_info() + builder = EnhancerTemplateBuilder(self.llm, benchmark, last_build_result, + error_desc, errors) + elif last_result.coverage_result: + builder = CoverageEnhancerTemplateBuilder( + self.llm, + benchmark, + last_build_result, + coverage_result=last_result.coverage_result) else: - # TODO(dongge): Refine this logic. - if last_result.semantic_result: - error_desc, errors = last_result.semantic_result.get_error_info() - builder = EnhancerTemplateBuilder(self.llm, benchmark, - last_build_result, error_desc, errors) - elif last_result.coverage_result: - builder = CoverageEnhancerTemplateBuilder( - self.llm, - benchmark, - last_build_result, - coverage_result=last_result.coverage_result) - else: - logger.error( - 'Last result does not contain either semantic result or ' - 'coverage result', - trial=self.trial) - # TODO(dongge): Give some default initial prompt. - prompt = TextPrompt( - 'Last result does not contain either semantic result or ' - 'coverage result') - return prompt - prompt = builder.build(example_pair=[], - tool_guides=self.inspect_tool.tutorial(), - project_dir=self.inspect_tool.project_dir) - # TODO: A different file name/dir. - prompt.save(self.args.work_dirs.prompt) + logger.error( + '''Last result does not contain either semantic result or coverage + result''', + trial=self.trial) + # TODO(dongge): Give some default initial prompt. + return TextPrompt( + '''Last result does not contain either semantic result or coverage + result''') + prompt = builder.build(example_pair=[], + tool_guides=self.inspect_tool.tutorial(), + project_dir=self.inspect_tool.project_dir) + # Save to a dedicated enhancer prompt file + prompt_path = os.path.join(self.args.work_dirs.prompt, + 'enhancer_initial.txt') + prompt.save(prompt_path) return prompt diff --git a/agent/jvm_coverage_enhancer.py b/agent/jvm_coverage_enhancer.py new file mode 100644 index 000000000..96f2b2569 --- /dev/null +++ b/agent/jvm_coverage_enhancer.py @@ -0,0 +1,55 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Module: JVM Coverage Enhancer + +This module provides a helper agent to improve code coverage for JVM-based +fuzz targets by generating or fixing JVM harnesses using LLM prompts. +""" + +import os + +from agent.prototyper import Prototyper +from llm_toolkit.prompt_builder import JvmFixingBuilder +from llm_toolkit.prompts import Prompt +from results import AnalysisResult, BuildResult + + +class JvmCoverageEnhancer(Prototyper): + """Helper agent for JVM-specific coverage improvement.""" + + def __init__(self, llm, benchmark, analysis_result: AnalysisResult, + build_result: BuildResult, args): + super().__init__(llm, benchmark, args=args) + self.benchmark = benchmark + self.analysis = analysis_result + self.build = build_result + self.args = args + + def initial_prompt(self) -> Prompt: + """Constructs initial JVM-focused prompt.""" + # Extract the fuzz target source code + source_code = self.analysis.run_result.fuzz_target_source + + # Build the JVM fixing prompt + builder = JvmFixingBuilder(model=self.llm, + benchmark=self.benchmark, + generated_harness=source_code, + errors=[]) + prompt = builder.build(example_pair=[]) + + # Save to a dedicated JVM prompt file + prompt_path = os.path.join(self.args.work_dirs.prompt, 'jvm_initial.txt') + prompt.save(prompt_path) + return prompt diff --git a/agent/one_prompt_enhancer.py b/agent/one_prompt_enhancer.py index 7840c9e76..7709c5b28 100644 --- a/agent/one_prompt_enhancer.py +++ b/agent/one_prompt_enhancer.py @@ -14,10 +14,12 @@ """An LLM agent to improve a fuzz target's runtime performance. Use it as a usual module locally, or as script in cloud builds. """ +from jvm_coverage_enhancer import JvmCoverageEnhancer + import logger from agent.one_prompt_prototyper import OnePromptPrototyper from experiment.workdir import WorkDirs -from llm_toolkit.prompt_builder import DefaultTemplateBuilder, JvmFixingBuilder +from llm_toolkit.prompt_builder import DefaultTemplateBuilder from llm_toolkit.prompts import Prompt from results import AnalysisResult, BuildResult, Result @@ -37,12 +39,21 @@ def _initial_prompt(self, results: list[Result]) -> Prompt: return Prompt() if benchmark.language == 'jvm': - # TODO: Do this in a separate agent for JVM coverage. - builder = JvmFixingBuilder(self.llm, benchmark, - last_result.run_result.fuzz_target_source, []) - prompt = builder.build([], None, None) + + # Create a temporary BuildResult for JVM enhancer instantiation + temp_build = BuildResult(benchmark=benchmark, + trial=last_result.trial, + work_dirs=last_result.work_dirs, + author=self, + chat_history={}) + # Delegate JVM-specific coverage enhancement to the new enhancer + jvm_enhancer = JvmCoverageEnhancer(llm=self.llm, + benchmark=benchmark, + analysis_result=last_result, + build_result=temp_build, + args=self.args) + prompt = jvm_enhancer.initial_prompt() else: - # TODO(dongge): Refine this logic. builder = DefaultTemplateBuilder(self.llm) if last_result.semantic_result: error_desc, errors = last_result.semantic_result.get_error_info() @@ -61,8 +72,8 @@ def _initial_prompt(self, results: list[Result]) -> Prompt: coverage_result=last_result.coverage_result, context='', instruction='') - # TODO: A different file name/dir. - prompt.save(self.args.work_dirs.prompt) + # TODO: A different file name/dir. + prompt.save(self.args.work_dirs.prompt) return prompt @@ -70,7 +81,6 @@ def execute(self, result_history: list[Result]) -> BuildResult: """Executes the agent based on previous result.""" last_result = result_history[-1] logger.info('Executing One Prompt Enhancer', trial=last_result.trial) - # Use keep to avoid deleting files, such as benchmark.yaml WorkDirs(self.args.work_dirs.base, keep=True) prompt = self._initial_prompt(result_history)