From 80ec5de50782dc084a7d63d8e5370ce58d6b55ba Mon Sep 17 00:00:00 2001 From: harshit chourasiya Date: Sat, 26 Apr 2025 09:03:26 +0000 Subject: [PATCH 1/9] Pending changes exported from your codespace --- agent/enhancer.py | 125 ++++++++++++++++++--------------- agent/jvm_coverage_enhancer.py | 52 ++++++++++++++ agent/one_prompt_enhancer.py | 66 +++++++++-------- 3 files changed, 160 insertions(+), 83 deletions(-) create mode 100644 agent/jvm_coverage_enhancer.py diff --git a/agent/enhancer.py b/agent/enhancer.py index 8abb12d721..f22d67b530 100644 --- a/agent/enhancer.py +++ b/agent/enhancer.py @@ -14,11 +14,14 @@ """An LLM agent to improve a fuzz target's runtime performance. Use it as a usual module locally, or as script in cloud builds. """ +import os import logger from agent.prototyper import Prototyper -from llm_toolkit.prompt_builder import (CoverageEnhancerTemplateBuilder, - EnhancerTemplateBuilder, - JvmFixingBuilder) +from agent.jvm_coverage_enhancer import JvmCoverageEnhancer +from llm_toolkit.prompt_builder import ( + CoverageEnhancerTemplateBuilder, + EnhancerTemplateBuilder +) from llm_toolkit.prompts import Prompt, TextPrompt from results import AnalysisResult, BuildResult, Result @@ -26,59 +29,71 @@ class Enhancer(Prototyper): """The Agent to refine a compilable fuzz target for higher coverage.""" - def _initial_prompt(self, results: list[Result]) -> Prompt: - """Constructs initial prompt of the agent.""" - last_result = results[-1] - benchmark = last_result.benchmark + def _initial_prompt(self, results: list[Result]) -> Prompt: + """Constructs initial prompt of the agent.""" + last_result = results[-1] + benchmark = last_result.benchmark - if not isinstance(last_result, AnalysisResult): - logger.error('The last result in Enhancer is not AnalysisResult: %s', - results, - trial=self.trial) - return Prompt() + if not isinstance(last_result, AnalysisResult): + logger.error( + 'The last result in Enhancer is not AnalysisResult: %s', + results, + trial=self.trial + ) + return Prompt() - last_build_result = None - for result in results[::-1]: - if isinstance(result, BuildResult): - last_build_result = result - break - if not last_build_result: - logger.error('Unable to find the last build result in Enhancer : %s', - results, - trial=self.trial) - return Prompt() + # Find the most recent build result + last_build = next((r for r in reversed(results) if isinstance(r, BuildResult)), None) + if last_build is None: + logger.error( + 'Unable to find the last build result in Enhancer: %s', + results, + trial=self.trial + ) + return Prompt() - if benchmark.language == 'jvm': - # TODO: Do this in a separate agent for JVM coverage. - builder = JvmFixingBuilder(self.llm, benchmark, - last_result.run_result.fuzz_target_source, []) - prompt = builder.build([], None, None) - else: - # TODO(dongge): Refine this logic. - if last_result.semantic_result: - error_desc, errors = last_result.semantic_result.get_error_info() - builder = EnhancerTemplateBuilder(self.llm, benchmark, - last_build_result, error_desc, errors) - elif last_result.coverage_result: - builder = CoverageEnhancerTemplateBuilder( - self.llm, - benchmark, - last_build_result, - coverage_result=last_result.coverage_result) - else: - logger.error( - 'Last result does not contain either semantic result or ' - 'coverage result', - trial=self.trial) - # TODO(dongge): Give some default initial prompt. - prompt = TextPrompt( - 'Last result does not contain either semantic result or ' - 'coverage result') - return prompt - prompt = builder.build(example_pair=[], - tool_guides=self.inspect_tool.tutorial(), - project_dir=self.inspect_tool.project_dir) - # TODO: A different file name/dir. - prompt.save(self.args.work_dirs.prompt) + # Delegate JVM-specific logic to JvmCoverageEnhancer + if benchmark.language == 'jvm': + return JvmCoverageEnhancer( + self.llm, + benchmark, + last_result, + last_build, + self.args + ).initial_prompt() - return prompt + # Non-JVM path: reuse existing logic + if last_result.semantic_result: + error_desc, errors = last_result.semantic_result.get_error_info() + builder = EnhancerTemplateBuilder( + self.llm, + benchmark, + last_build, + error_desc, + errors + ) + elif last_result.coverage_result: + builder = CoverageEnhancerTemplateBuilder( + self.llm, + benchmark, + last_build, + coverage_result=last_result.coverage_result + ) + else: + logger.error( + 'Last result does not contain either semantic result or coverage result', + trial=self.trial + ) + return TextPrompt( + 'Last result does not contain either semantic result or coverage result' + ) + + prompt = builder.build( + example_pair=[], + tool_guides=self.inspect_tool.tutorial(), + project_dir=self.inspect_tool.project_dir + ) + # Save to a dedicated enhancer prompt file + prompt_path = os.path.join(self.args.work_dirs.prompt, 'enhancer_initial.txt') + prompt.save(prompt_path) + return prompt \ No newline at end of file diff --git a/agent/jvm_coverage_enhancer.py b/agent/jvm_coverage_enhancer.py new file mode 100644 index 0000000000..6e73268ede --- /dev/null +++ b/agent/jvm_coverage_enhancer.py @@ -0,0 +1,52 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import logger +from agent.prototyper import Prototyper +from llm_toolkit.prompt_builder import JvmFixingBuilder +from llm_toolkit.prompts import Prompt +from results import AnalysisResult, BuildResult + + +class JvmCoverageEnhancer(Prototyper): + """Helper agent for JVM-specific coverage improvement.""" + + def __init__( + self, + llm, + benchmark, + analysis_result: AnalysisResult, + build_result: BuildResult, + args + ): + super().__init__(llm, benchmark, args=args) + self.analysis = analysis_result + self.build = build_result + + def initial_prompt(self) -> Prompt: + """Constructs initial JVM-focused prompt.""" + # Build the JVM fixing prompt + source_code = self.analysis.run_result.fuzz_target_source + builder = JvmFixingBuilder( + self.llm, + self.benchmark, + source_code, + [] + ) + prompt = builder.build(example_pair=[], tool_guides=None, project_dir=None) + + # Save to a dedicated JVM prompt file + prompt_path = os.path.join(self.args.work_dirs.prompt, 'jvm_initial.txt') + prompt.save(prompt_path) + return prompt diff --git a/agent/one_prompt_enhancer.py b/agent/one_prompt_enhancer.py index 7840c9e767..d2fd6a6974 100644 --- a/agent/one_prompt_enhancer.py +++ b/agent/one_prompt_enhancer.py @@ -17,9 +17,10 @@ import logger from agent.one_prompt_prototyper import OnePromptPrototyper from experiment.workdir import WorkDirs -from llm_toolkit.prompt_builder import DefaultTemplateBuilder, JvmFixingBuilder +from llm_toolkit.prompt_builder import DefaultTemplateBuilder from llm_toolkit.prompts import Prompt from results import AnalysisResult, BuildResult, Result +from jvm_coverage_enhancer import JvmCoverageEnhancer class OnePromptEnhancer(OnePromptPrototyper): @@ -36,33 +37,42 @@ def _initial_prompt(self, results: list[Result]) -> Prompt: trial=self.trial) return Prompt() - if benchmark.language == 'jvm': - # TODO: Do this in a separate agent for JVM coverage. - builder = JvmFixingBuilder(self.llm, benchmark, - last_result.run_result.fuzz_target_source, []) - prompt = builder.build([], None, None) - else: - # TODO(dongge): Refine this logic. - builder = DefaultTemplateBuilder(self.llm) - if last_result.semantic_result: - error_desc, errors = last_result.semantic_result.get_error_info() - prompt = builder.build_fixer_prompt(benchmark, - last_result.fuzz_target_source, - error_desc, - errors, - context='', - instruction='') - else: - prompt = builder.build_fixer_prompt( - benchmark=benchmark, - raw_code=last_result.fuzz_target_source, - error_desc='', - errors=[], - coverage_result=last_result.coverage_result, - context='', - instruction='') - # TODO: A different file name/dir. - prompt.save(self.args.work_dirs.prompt) + # For JVM benchmarks, delegate to the dedicated coverage enhancer + if benchmark.language == 'jvm': + jvm_agent = JvmCoverageEnhancer( + llm=self.llm, + benchmark=benchmark, + analysis_result=last_result, + build_result=None, + args=self.args + ) + prompt = jvm_agent.initial_prompt() + else: + builder = DefaultTemplateBuilder(self.llm) + # If there were semantic errors, build a fixer prompt + if last_result.semantic_result: + error_desc, errors = last_result.semantic_result.get_error_info() + prompt = builder.build_fixer_prompt( + benchmark, + last_result.fuzz_target_source, + error_desc, + errors, + context='', + instruction='' + ) + else: + # Build a default fixer prompt based on coverage feedback + prompt = builder.build_fixer_prompt( + benchmark=benchmark, + raw_code=last_result.fuzz_target_source, + error_desc='', + errors=[], + coverage_result=last_result.coverage_result, + context='', + instruction='' + ) + # Persist the prompt for downstream steps + prompt.save(self.args.work_dirs.prompt) return prompt From befeee32513a962b29b274a60590a2e2e9c4b3cd Mon Sep 17 00:00:00 2001 From: harshit chourasiya Date: Sat, 26 Apr 2025 10:40:37 +0000 Subject: [PATCH 2/9] lint --- agent/enhancer.py | 55 +++++++++++----------- agent/one_prompt_enhancer.py | 88 ++++++++++++++++++++---------------- 2 files changed, 77 insertions(+), 66 deletions(-) diff --git a/agent/enhancer.py b/agent/enhancer.py index f22d67b530..79a8500259 100644 --- a/agent/enhancer.py +++ b/agent/enhancer.py @@ -29,38 +29,37 @@ class Enhancer(Prototyper): """The Agent to refine a compilable fuzz target for higher coverage.""" - def _initial_prompt(self, results: list[Result]) -> Prompt: - """Constructs initial prompt of the agent.""" - last_result = results[-1] - benchmark = last_result.benchmark + def _initial_prompt(self, results: list[Result]) -> Prompt: + """Constructs initial prompt of the agent.""" + last_result = results[-1] + benchmark = last_result.benchmark - if not isinstance(last_result, AnalysisResult): - logger.error( - 'The last result in Enhancer is not AnalysisResult: %s', - results, - trial=self.trial - ) - return Prompt() + if not isinstance(last_result, AnalysisResult): + logger.error('The last result in Enhancer is not AnalysisResult: %s', + results, + trial=self.trial) + return Prompt() - # Find the most recent build result - last_build = next((r for r in reversed(results) if isinstance(r, BuildResult)), None) - if last_build is None: - logger.error( - 'Unable to find the last build result in Enhancer: %s', - results, - trial=self.trial - ) - return Prompt() + last_build_result = None + for result in results[::-1]: + if isinstance(result, BuildResult): + last_build_result = result + break + if not last_build_result: + logger.error('Unable to find the last build result in Enhancer : %s', + results, + trial=self.trial) + return Prompt() # Delegate JVM-specific logic to JvmCoverageEnhancer - if benchmark.language == 'jvm': - return JvmCoverageEnhancer( - self.llm, - benchmark, - last_result, - last_build, - self.args - ).initial_prompt() + if benchmark.language == 'jvm': + return JvmCoverageEnhancer( + self.llm, + benchmark, + last_result, + last_build, + self.args + ).initial_prompt() # Non-JVM path: reuse existing logic if last_result.semantic_result: diff --git a/agent/one_prompt_enhancer.py b/agent/one_prompt_enhancer.py index d2fd6a6974..e12531590d 100644 --- a/agent/one_prompt_enhancer.py +++ b/agent/one_prompt_enhancer.py @@ -24,18 +24,20 @@ class OnePromptEnhancer(OnePromptPrototyper): - """The Agent to generate a simple but valid fuzz target from scratch.""" + """The Agent to generate a simple but valid fuzz target from scratch.""" - def _initial_prompt(self, results: list[Result]) -> Prompt: - """Constructs initial prompt of the agent.""" - last_result = results[-1] - benchmark = last_result.benchmark + def _initial_prompt(self, results: list[Result]) -> Prompt: + """Constructs initial prompt of the agent.""" + last_result = results[-1] + benchmark = last_result.benchmark - if not isinstance(last_result, AnalysisResult): - logger.error('The last result in Enhancer is not AnalysisResult: %s', - results, - trial=self.trial) - return Prompt() + if not isinstance(last_result, AnalysisResult): + logger.error( + 'The last result in Enhancer is not AnalysisResult: %s', + results, + trial=self.trial + ) + return Prompt() # For JVM benchmarks, delegate to the dedicated coverage enhancer if benchmark.language == 'jvm': @@ -44,21 +46,22 @@ def _initial_prompt(self, results: list[Result]) -> Prompt: benchmark=benchmark, analysis_result=last_result, build_result=None, - args=self.args + args=self.args, ) prompt = jvm_agent.initial_prompt() else: builder = DefaultTemplateBuilder(self.llm) + # If there were semantic errors, build a fixer prompt if last_result.semantic_result: error_desc, errors = last_result.semantic_result.get_error_info() prompt = builder.build_fixer_prompt( - benchmark, - last_result.fuzz_target_source, - error_desc, - errors, + benchmark=benchmark, + raw_code=last_result.fuzz_target_source, + error_desc=error_desc, + errors=errors, context='', - instruction='' + instruction='', ) else: # Build a default fixer prompt based on coverage feedback @@ -69,34 +72,43 @@ def _initial_prompt(self, results: list[Result]) -> Prompt: errors=[], coverage_result=last_result.coverage_result, context='', - instruction='' + instruction='', ) + # Persist the prompt for downstream steps prompt.save(self.args.work_dirs.prompt) - return prompt + return prompt + + def execute(self, result_history: list[Result]) -> BuildResult: + """Executes the agent based on previous result.""" + last_result = result_history[-1] + logger.info('Executing One Prompt Enhancer', trial=last_result.trial) - def execute(self, result_history: list[Result]) -> BuildResult: - """Executes the agent based on previous result.""" - last_result = result_history[-1] - logger.info('Executing One Prompt Enhancer', trial=last_result.trial) - # Use keep to avoid deleting files, such as benchmark.yaml - WorkDirs(self.args.work_dirs.base, keep=True) + # Use keep to avoid deleting files, such as benchmark.yaml + WorkDirs(self.args.work_dirs.base, keep=True) - prompt = self._initial_prompt(result_history) - cur_round = 1 - build_result = BuildResult(benchmark=last_result.benchmark, - trial=last_result.trial, - work_dirs=last_result.work_dirs, - author=self, - chat_history={self.name: prompt.gettext()}) + prompt = self._initial_prompt(result_history) + cur_round = 1 + build_result = BuildResult( + benchmark=last_result.benchmark, + trial=last_result.trial, + work_dirs=last_result.work_dirs, + author=self, + chat_history={self.name: prompt.gettext()}, + ) + + while prompt and cur_round <= self.max_round: + self._generate_fuzz_target( + prompt, + result_history, + build_result, + cur_round, + ) - while prompt and cur_round <= self.max_round: - self._generate_fuzz_target(prompt, result_history, build_result, - cur_round) + self._validate_fuzz_target(cur_round, build_result) + prompt = self._advice_fuzz_target(build_result, cur_round) + cur_round += 1 - self._validate_fuzz_target(cur_round, build_result) - prompt = self._advice_fuzz_target(build_result, cur_round) - cur_round += 1 + return build_result - return build_result From d2a40fc39e5058dba2ca2a02423d0b9fc3327f7d Mon Sep 17 00:00:00 2001 From: harshit chourasiya Date: Sat, 26 Apr 2025 11:52:55 +0000 Subject: [PATCH 3/9] comments --- agent/enhancer.py | 5 +++-- agent/one_prompt_enhancer.py | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/agent/enhancer.py b/agent/enhancer.py index 79a8500259..d85902173b 100644 --- a/agent/enhancer.py +++ b/agent/enhancer.py @@ -51,7 +51,7 @@ def _initial_prompt(self, results: list[Result]) -> Prompt: trial=self.trial) return Prompt() - # Delegate JVM-specific logic to JvmCoverageEnhancer + # Delegate JVM-specific logic to JvmCoverageEnhancer if benchmark.language == 'jvm': return JvmCoverageEnhancer( self.llm, @@ -61,7 +61,7 @@ def _initial_prompt(self, results: list[Result]) -> Prompt: self.args ).initial_prompt() - # Non-JVM path: reuse existing logic + #TODO(dongge): Refine this logic. if last_result.semantic_result: error_desc, errors = last_result.semantic_result.get_error_info() builder = EnhancerTemplateBuilder( @@ -83,6 +83,7 @@ def _initial_prompt(self, results: list[Result]) -> Prompt: 'Last result does not contain either semantic result or coverage result', trial=self.trial ) + # TODO(dongge): Give some default initial prompt. return TextPrompt( 'Last result does not contain either semantic result or coverage result' ) diff --git a/agent/one_prompt_enhancer.py b/agent/one_prompt_enhancer.py index e12531590d..e411042f5c 100644 --- a/agent/one_prompt_enhancer.py +++ b/agent/one_prompt_enhancer.py @@ -53,6 +53,7 @@ def _initial_prompt(self, results: list[Result]) -> Prompt: builder = DefaultTemplateBuilder(self.llm) # If there were semantic errors, build a fixer prompt + # TODO(dongge): Refine this logic. if last_result.semantic_result: error_desc, errors = last_result.semantic_result.get_error_info() prompt = builder.build_fixer_prompt( @@ -75,7 +76,7 @@ def _initial_prompt(self, results: list[Result]) -> Prompt: instruction='', ) - # Persist the prompt for downstream steps + # TODO: A different file name/dir. prompt.save(self.args.work_dirs.prompt) return prompt From 29e1966980e59dd98b6ac2afdcb941886f0249b8 Mon Sep 17 00:00:00 2001 From: harshtech123 Date: Wed, 30 Apr 2025 07:47:58 +0530 Subject: [PATCH 4/9] presubmit --- agent/enhancer.py | 77 +++++++---------- agent/jvm_coverage_enhancer.py | 43 ++++------ agent/one_prompt_enhancer.py | 151 ++++++++++++++++----------------- 3 files changed, 121 insertions(+), 150 deletions(-) diff --git a/agent/enhancer.py b/agent/enhancer.py index d85902173b..89144f53cc 100644 --- a/agent/enhancer.py +++ b/agent/enhancer.py @@ -18,10 +18,8 @@ import logger from agent.prototyper import Prototyper from agent.jvm_coverage_enhancer import JvmCoverageEnhancer -from llm_toolkit.prompt_builder import ( - CoverageEnhancerTemplateBuilder, - EnhancerTemplateBuilder -) +from llm_toolkit.prompt_builder import (CoverageEnhancerTemplateBuilder, + EnhancerTemplateBuilder) from llm_toolkit.prompts import Prompt, TextPrompt from results import AnalysisResult, BuildResult, Result @@ -53,47 +51,34 @@ def _initial_prompt(self, results: list[Result]) -> Prompt: # Delegate JVM-specific logic to JvmCoverageEnhancer if benchmark.language == 'jvm': - return JvmCoverageEnhancer( - self.llm, - benchmark, - last_result, - last_build, - self.args - ).initial_prompt() + return JvmCoverageEnhancer(self.llm, benchmark, last_result, last_build, + self.args).initial_prompt() - #TODO(dongge): Refine this logic. - if last_result.semantic_result: - error_desc, errors = last_result.semantic_result.get_error_info() - builder = EnhancerTemplateBuilder( - self.llm, - benchmark, - last_build, - error_desc, - errors - ) - elif last_result.coverage_result: - builder = CoverageEnhancerTemplateBuilder( - self.llm, - benchmark, - last_build, - coverage_result=last_result.coverage_result - ) - else: - logger.error( - 'Last result does not contain either semantic result or coverage result', - trial=self.trial - ) - # TODO(dongge): Give some default initial prompt. - return TextPrompt( - 'Last result does not contain either semantic result or coverage result' - ) + #TODO(dongge): Refine this logic. + if last_result.semantic_result: + error_desc, errors = last_result.semantic_result.get_error_info() + builder = EnhancerTemplateBuilder(self.llm, benchmark, last_build, + error_desc, errors) + elif last_result.coverage_result: + builder = CoverageEnhancerTemplateBuilder( + self.llm, + benchmark, + last_build, + coverage_result=last_result.coverage_result) + else: + logger.error( + 'Last result does not contain either semantic result or coverage result', + trial=self.trial) + # TODO(dongge): Give some default initial prompt. + return TextPrompt( + 'Last result does not contain either semantic result or coverage result' + ) - prompt = builder.build( - example_pair=[], - tool_guides=self.inspect_tool.tutorial(), - project_dir=self.inspect_tool.project_dir - ) - # Save to a dedicated enhancer prompt file - prompt_path = os.path.join(self.args.work_dirs.prompt, 'enhancer_initial.txt') - prompt.save(prompt_path) - return prompt \ No newline at end of file + prompt = builder.build(example_pair=[], + tool_guides=self.inspect_tool.tutorial(), + project_dir=self.inspect_tool.project_dir) + # Save to a dedicated enhancer prompt file + prompt_path = os.path.join(self.args.work_dirs.prompt, + 'enhancer_initial.txt') + prompt.save(prompt_path) + return prompt diff --git a/agent/jvm_coverage_enhancer.py b/agent/jvm_coverage_enhancer.py index 6e73268ede..25c30773f8 100644 --- a/agent/jvm_coverage_enhancer.py +++ b/agent/jvm_coverage_enhancer.py @@ -20,33 +20,22 @@ class JvmCoverageEnhancer(Prototyper): - """Helper agent for JVM-specific coverage improvement.""" + """Helper agent for JVM-specific coverage improvement.""" - def __init__( - self, - llm, - benchmark, - analysis_result: AnalysisResult, - build_result: BuildResult, - args - ): - super().__init__(llm, benchmark, args=args) - self.analysis = analysis_result - self.build = build_result + def __init__(self, llm, benchmark, analysis_result: AnalysisResult, + build_result: BuildResult, args): + super().__init__(llm, benchmark, args=args) + self.analysis = analysis_result + self.build = build_result - def initial_prompt(self) -> Prompt: - """Constructs initial JVM-focused prompt.""" - # Build the JVM fixing prompt - source_code = self.analysis.run_result.fuzz_target_source - builder = JvmFixingBuilder( - self.llm, - self.benchmark, - source_code, - [] - ) - prompt = builder.build(example_pair=[], tool_guides=None, project_dir=None) + def initial_prompt(self) -> Prompt: + """Constructs initial JVM-focused prompt.""" + # Build the JVM fixing prompt + source_code = self.analysis.run_result.fuzz_target_source + builder = JvmFixingBuilder(self.llm, self.benchmark, source_code, []) + prompt = builder.build(example_pair=[], tool_guides=None, project_dir=None) - # Save to a dedicated JVM prompt file - prompt_path = os.path.join(self.args.work_dirs.prompt, 'jvm_initial.txt') - prompt.save(prompt_path) - return prompt + # Save to a dedicated JVM prompt file + prompt_path = os.path.join(self.args.work_dirs.prompt, 'jvm_initial.txt') + prompt.save(prompt_path) + return prompt diff --git a/agent/one_prompt_enhancer.py b/agent/one_prompt_enhancer.py index e411042f5c..6a06740854 100644 --- a/agent/one_prompt_enhancer.py +++ b/agent/one_prompt_enhancer.py @@ -24,92 +24,89 @@ class OnePromptEnhancer(OnePromptPrototyper): - """The Agent to generate a simple but valid fuzz target from scratch.""" + """The Agent to generate a simple but valid fuzz target from scratch.""" - def _initial_prompt(self, results: list[Result]) -> Prompt: - """Constructs initial prompt of the agent.""" - last_result = results[-1] - benchmark = last_result.benchmark + def _initial_prompt(self, results: list[Result]) -> Prompt: + """Constructs initial prompt of the agent.""" + last_result = results[-1] + benchmark = last_result.benchmark - if not isinstance(last_result, AnalysisResult): - logger.error( - 'The last result in Enhancer is not AnalysisResult: %s', - results, - trial=self.trial - ) - return Prompt() + if not isinstance(last_result, AnalysisResult): + logger.error('The last result in Enhancer is not AnalysisResult: %s', + results, + trial=self.trial) + return Prompt() - # For JVM benchmarks, delegate to the dedicated coverage enhancer - if benchmark.language == 'jvm': - jvm_agent = JvmCoverageEnhancer( - llm=self.llm, - benchmark=benchmark, - analysis_result=last_result, - build_result=None, - args=self.args, - ) - prompt = jvm_agent.initial_prompt() - else: - builder = DefaultTemplateBuilder(self.llm) + # For JVM benchmarks, delegate to the dedicated coverage enhancer + if benchmark.language == 'jvm': + jvm_agent = JvmCoverageEnhancer( + llm=self.llm, + benchmark=benchmark, + analysis_result=last_result, + build_result=None, + args=self.args, + ) + prompt = jvm_agent.initial_prompt() + else: + builder = DefaultTemplateBuilder(self.llm) - # If there were semantic errors, build a fixer prompt - # TODO(dongge): Refine this logic. - if last_result.semantic_result: - error_desc, errors = last_result.semantic_result.get_error_info() - prompt = builder.build_fixer_prompt( - benchmark=benchmark, - raw_code=last_result.fuzz_target_source, - error_desc=error_desc, - errors=errors, - context='', - instruction='', - ) - else: - # Build a default fixer prompt based on coverage feedback - prompt = builder.build_fixer_prompt( - benchmark=benchmark, - raw_code=last_result.fuzz_target_source, - error_desc='', - errors=[], - coverage_result=last_result.coverage_result, - context='', - instruction='', - ) - - # TODO: A different file name/dir. - prompt.save(self.args.work_dirs.prompt) + # If there were semantic errors, build a fixer prompt + # TODO(dongge): Refine this logic. + if last_result.semantic_result: + error_desc, errors = last_result.semantic_result.get_error_info() + prompt = builder.build_fixer_prompt( + benchmark=benchmark, + raw_code=last_result.fuzz_target_source, + error_desc=error_desc, + errors=errors, + context='', + instruction='', + ) + else: + # Build a default fixer prompt based on coverage feedback + prompt = builder.build_fixer_prompt( + benchmark=benchmark, + raw_code=last_result.fuzz_target_source, + error_desc='', + errors=[], + coverage_result=last_result.coverage_result, + context='', + instruction='', + ) - return prompt + # TODO: A different file name/dir. + prompt.save(self.args.work_dirs.prompt) - def execute(self, result_history: list[Result]) -> BuildResult: - """Executes the agent based on previous result.""" - last_result = result_history[-1] - logger.info('Executing One Prompt Enhancer', trial=last_result.trial) + return prompt - # Use keep to avoid deleting files, such as benchmark.yaml - WorkDirs(self.args.work_dirs.base, keep=True) + def execute(self, result_history: list[Result]) -> BuildResult: + """Executes the agent based on previous result.""" + last_result = result_history[-1] + logger.info('Executing One Prompt Enhancer', trial=last_result.trial) - prompt = self._initial_prompt(result_history) - cur_round = 1 - build_result = BuildResult( - benchmark=last_result.benchmark, - trial=last_result.trial, - work_dirs=last_result.work_dirs, - author=self, - chat_history={self.name: prompt.gettext()}, - ) + # Use keep to avoid deleting files, such as benchmark.yaml + WorkDirs(self.args.work_dirs.base, keep=True) - while prompt and cur_round <= self.max_round: - self._generate_fuzz_target( - prompt, - result_history, - build_result, - cur_round, - ) + prompt = self._initial_prompt(result_history) + cur_round = 1 + build_result = BuildResult( + benchmark=last_result.benchmark, + trial=last_result.trial, + work_dirs=last_result.work_dirs, + author=self, + chat_history={self.name: prompt.gettext()}, + ) - self._validate_fuzz_target(cur_round, build_result) - prompt = self._advice_fuzz_target(build_result, cur_round) - cur_round += 1 + while prompt and cur_round <= self.max_round: + self._generate_fuzz_target( + prompt, + result_history, + build_result, + cur_round, + ) - return build_result + self._validate_fuzz_target(cur_round, build_result) + prompt = self._advice_fuzz_target(build_result, cur_round) + cur_round += 1 + return build_result From e488d57e9126fb45359b9e134d37090f7810d742 Mon Sep 17 00:00:00 2001 From: harshtech123 Date: Wed, 30 Apr 2025 08:30:42 +0530 Subject: [PATCH 5/9] formatting --- agent/enhancer.py | 17 +++++----- agent/jvm_coverage_enhancer.py | 1 + agent/one_prompt_enhancer.py | 57 +++++++++++++++++----------------- 3 files changed, 39 insertions(+), 36 deletions(-) diff --git a/agent/enhancer.py b/agent/enhancer.py index 89144f53cc..6d164a662a 100644 --- a/agent/enhancer.py +++ b/agent/enhancer.py @@ -15,9 +15,10 @@ Use it as a usual module locally, or as script in cloud builds. """ import os + import logger -from agent.prototyper import Prototyper from agent.jvm_coverage_enhancer import JvmCoverageEnhancer +from agent.prototyper import Prototyper from llm_toolkit.prompt_builder import (CoverageEnhancerTemplateBuilder, EnhancerTemplateBuilder) from llm_toolkit.prompts import Prompt, TextPrompt @@ -61,17 +62,17 @@ def _initial_prompt(self, results: list[Result]) -> Prompt: error_desc, errors) elif last_result.coverage_result: builder = CoverageEnhancerTemplateBuilder( - self.llm, - benchmark, - last_build, - coverage_result=last_result.coverage_result) + self.llm, + benchmark, + last_build, + coverage_result=last_result.coverage_result) else: logger.error( - 'Last result does not contain either semantic result or coverage result', - trial=self.trial) + 'Last result does not contain either semantic result or coverage result', + trial=self.trial) # TODO(dongge): Give some default initial prompt. return TextPrompt( - 'Last result does not contain either semantic result or coverage result' + 'Last result does not contain either semantic result or coverage result' ) prompt = builder.build(example_pair=[], diff --git a/agent/jvm_coverage_enhancer.py b/agent/jvm_coverage_enhancer.py index 25c30773f8..3ed136c0ec 100644 --- a/agent/jvm_coverage_enhancer.py +++ b/agent/jvm_coverage_enhancer.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import os + import logger from agent.prototyper import Prototyper from llm_toolkit.prompt_builder import JvmFixingBuilder diff --git a/agent/one_prompt_enhancer.py b/agent/one_prompt_enhancer.py index 6a06740854..4be24e6bde 100644 --- a/agent/one_prompt_enhancer.py +++ b/agent/one_prompt_enhancer.py @@ -14,13 +14,14 @@ """An LLM agent to improve a fuzz target's runtime performance. Use it as a usual module locally, or as script in cloud builds. """ +from jvm_coverage_enhancer import JvmCoverageEnhancer + import logger from agent.one_prompt_prototyper import OnePromptPrototyper from experiment.workdir import WorkDirs from llm_toolkit.prompt_builder import DefaultTemplateBuilder from llm_toolkit.prompts import Prompt from results import AnalysisResult, BuildResult, Result -from jvm_coverage_enhancer import JvmCoverageEnhancer class OnePromptEnhancer(OnePromptPrototyper): @@ -40,11 +41,11 @@ def _initial_prompt(self, results: list[Result]) -> Prompt: # For JVM benchmarks, delegate to the dedicated coverage enhancer if benchmark.language == 'jvm': jvm_agent = JvmCoverageEnhancer( - llm=self.llm, - benchmark=benchmark, - analysis_result=last_result, - build_result=None, - args=self.args, + llm=self.llm, + benchmark=benchmark, + analysis_result=last_result, + build_result=None, + args=self.args, ) prompt = jvm_agent.initial_prompt() else: @@ -55,23 +56,23 @@ def _initial_prompt(self, results: list[Result]) -> Prompt: if last_result.semantic_result: error_desc, errors = last_result.semantic_result.get_error_info() prompt = builder.build_fixer_prompt( - benchmark=benchmark, - raw_code=last_result.fuzz_target_source, - error_desc=error_desc, - errors=errors, - context='', - instruction='', + benchmark=benchmark, + raw_code=last_result.fuzz_target_source, + error_desc=error_desc, + errors=errors, + context='', + instruction='', ) else: # Build a default fixer prompt based on coverage feedback prompt = builder.build_fixer_prompt( - benchmark=benchmark, - raw_code=last_result.fuzz_target_source, - error_desc='', - errors=[], - coverage_result=last_result.coverage_result, - context='', - instruction='', + benchmark=benchmark, + raw_code=last_result.fuzz_target_source, + error_desc='', + errors=[], + coverage_result=last_result.coverage_result, + context='', + instruction='', ) # TODO: A different file name/dir. @@ -90,19 +91,19 @@ def execute(self, result_history: list[Result]) -> BuildResult: prompt = self._initial_prompt(result_history) cur_round = 1 build_result = BuildResult( - benchmark=last_result.benchmark, - trial=last_result.trial, - work_dirs=last_result.work_dirs, - author=self, - chat_history={self.name: prompt.gettext()}, + benchmark=last_result.benchmark, + trial=last_result.trial, + work_dirs=last_result.work_dirs, + author=self, + chat_history={self.name: prompt.gettext()}, ) while prompt and cur_round <= self.max_round: self._generate_fuzz_target( - prompt, - result_history, - build_result, - cur_round, + prompt, + result_history, + build_result, + cur_round, ) self._validate_fuzz_target(cur_round, build_result) From b4f526a747d7490fdd8f8bbf06269c7decccc60f Mon Sep 17 00:00:00 2001 From: harshtech123 Date: Wed, 30 Apr 2025 08:59:16 +0530 Subject: [PATCH 6/9] last_build to last_build_Result --- agent/enhancer.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/agent/enhancer.py b/agent/enhancer.py index 6d164a662a..eaa129b54a 100644 --- a/agent/enhancer.py +++ b/agent/enhancer.py @@ -52,19 +52,19 @@ def _initial_prompt(self, results: list[Result]) -> Prompt: # Delegate JVM-specific logic to JvmCoverageEnhancer if benchmark.language == 'jvm': - return JvmCoverageEnhancer(self.llm, benchmark, last_result, last_build, - self.args).initial_prompt() + return JvmCoverageEnhancer(self.llm, benchmark, last_result, + last_build_result, self.args).initial_prompt() #TODO(dongge): Refine this logic. if last_result.semantic_result: error_desc, errors = last_result.semantic_result.get_error_info() - builder = EnhancerTemplateBuilder(self.llm, benchmark, last_build, + builder = EnhancerTemplateBuilder(self.llm, benchmark, last_build_result, error_desc, errors) elif last_result.coverage_result: builder = CoverageEnhancerTemplateBuilder( self.llm, benchmark, - last_build, + last_build_result, coverage_result=last_result.coverage_result) else: logger.error( From 0d491608bd82100cfd7b900f511f22ba0f2af42b Mon Sep 17 00:00:00 2001 From: harshtech123 Date: Wed, 30 Apr 2025 09:09:24 +0530 Subject: [PATCH 7/9] updated jvm_coverage_enhancer.py --- agent/jvm_coverage_enhancer.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/agent/jvm_coverage_enhancer.py b/agent/jvm_coverage_enhancer.py index 3ed136c0ec..0c48ae4999 100644 --- a/agent/jvm_coverage_enhancer.py +++ b/agent/jvm_coverage_enhancer.py @@ -11,9 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +""" +Module: JVM Coverage Enhancer + +This module provides a helper agent to improve code coverage for JVM-based +fuzz targets by generating or fixing JVM harnesses using LLM prompts. +""" + import os -import logger from agent.prototyper import Prototyper from llm_toolkit.prompt_builder import JvmFixingBuilder from llm_toolkit.prompts import Prompt @@ -26,15 +32,23 @@ class JvmCoverageEnhancer(Prototyper): def __init__(self, llm, benchmark, analysis_result: AnalysisResult, build_result: BuildResult, args): super().__init__(llm, benchmark, args=args) + self.benchmark = benchmark self.analysis = analysis_result self.build = build_result + self.args = args def initial_prompt(self) -> Prompt: """Constructs initial JVM-focused prompt.""" - # Build the JVM fixing prompt + # Extract the fuzz target source code source_code = self.analysis.run_result.fuzz_target_source - builder = JvmFixingBuilder(self.llm, self.benchmark, source_code, []) - prompt = builder.build(example_pair=[], tool_guides=None, project_dir=None) + + # Build the JVM fixing prompt + builder = JvmFixingBuilder(model=self.llm, + benchmark=self.benchmark, + generated_harness=source_code, + errors=[]) + # Use correct signature: only example_pair is required + prompt = builder.build(example_pair=[]) # Save to a dedicated JVM prompt file prompt_path = os.path.join(self.args.work_dirs.prompt, 'jvm_initial.txt') From 5e9f5adbee03a983a02257655361c9d75462a0d5 Mon Sep 17 00:00:00 2001 From: harshtech123 Date: Wed, 30 Apr 2025 11:57:56 +0530 Subject: [PATCH 8/9] fix lint --- agent/one_prompt_enhancer.py | 71 ++++++++++++++++-------------------- 1 file changed, 31 insertions(+), 40 deletions(-) diff --git a/agent/one_prompt_enhancer.py b/agent/one_prompt_enhancer.py index 4be24e6bde..bc6c08de2b 100644 --- a/agent/one_prompt_enhancer.py +++ b/agent/one_prompt_enhancer.py @@ -38,33 +38,33 @@ def _initial_prompt(self, results: list[Result]) -> Prompt: trial=self.trial) return Prompt() - # For JVM benchmarks, delegate to the dedicated coverage enhancer if benchmark.language == 'jvm': - jvm_agent = JvmCoverageEnhancer( - llm=self.llm, - benchmark=benchmark, - analysis_result=last_result, - build_result=None, - args=self.args, - ) - prompt = jvm_agent.initial_prompt() + + # Create a temporary BuildResult for JVM enhancer instantiation + temp_build = BuildResult(benchmark=benchmark, + trial=last_result.trial, + work_dirs=last_result.work_dirs, + author=self, + chat_history={}) + # Delegate JVM-specific coverage enhancement to the new enhancer + jvm_enhancer = JvmCoverageEnhancer(llm=self.llm, + benchmark=benchmark, + analysis_result=last_result, + build_result=temp_build, + args=self.args) + prompt = jvm_enhancer.initial_prompt() else: + # Existing non-JVM logic builder = DefaultTemplateBuilder(self.llm) - - # If there were semantic errors, build a fixer prompt - # TODO(dongge): Refine this logic. if last_result.semantic_result: error_desc, errors = last_result.semantic_result.get_error_info() - prompt = builder.build_fixer_prompt( - benchmark=benchmark, - raw_code=last_result.fuzz_target_source, - error_desc=error_desc, - errors=errors, - context='', - instruction='', - ) + prompt = builder.build_fixer_prompt(benchmark, + last_result.fuzz_target_source, + error_desc, + errors, + context='', + instruction='') else: - # Build a default fixer prompt based on coverage feedback prompt = builder.build_fixer_prompt( benchmark=benchmark, raw_code=last_result.fuzz_target_source, @@ -72,11 +72,9 @@ def _initial_prompt(self, results: list[Result]) -> Prompt: errors=[], coverage_result=last_result.coverage_result, context='', - instruction='', - ) - - # TODO: A different file name/dir. - prompt.save(self.args.work_dirs.prompt) + instruction='') + # TODO: A different file name/dir. + prompt.save(self.args.work_dirs.prompt) return prompt @@ -84,27 +82,20 @@ def execute(self, result_history: list[Result]) -> BuildResult: """Executes the agent based on previous result.""" last_result = result_history[-1] logger.info('Executing One Prompt Enhancer', trial=last_result.trial) - # Use keep to avoid deleting files, such as benchmark.yaml WorkDirs(self.args.work_dirs.base, keep=True) prompt = self._initial_prompt(result_history) cur_round = 1 - build_result = BuildResult( - benchmark=last_result.benchmark, - trial=last_result.trial, - work_dirs=last_result.work_dirs, - author=self, - chat_history={self.name: prompt.gettext()}, - ) + build_result = BuildResult(benchmark=last_result.benchmark, + trial=last_result.trial, + work_dirs=last_result.work_dirs, + author=self, + chat_history={self.name: prompt.gettext()}) while prompt and cur_round <= self.max_round: - self._generate_fuzz_target( - prompt, - result_history, - build_result, - cur_round, - ) + self._generate_fuzz_target(prompt, result_history, build_result, + cur_round) self._validate_fuzz_target(cur_round, build_result) prompt = self._advice_fuzz_target(build_result, cur_round) From c66792e440015468eddfdefb4aa572ddec93fb21 Mon Sep 17 00:00:00 2001 From: harshtech123 Date: Wed, 30 Apr 2025 12:03:33 +0530 Subject: [PATCH 9/9] final updates --- agent/enhancer.py | 7 ++++--- agent/jvm_coverage_enhancer.py | 3 +-- agent/one_prompt_enhancer.py | 2 -- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/agent/enhancer.py b/agent/enhancer.py index eaa129b54a..8a25912e4a 100644 --- a/agent/enhancer.py +++ b/agent/enhancer.py @@ -68,12 +68,13 @@ def _initial_prompt(self, results: list[Result]) -> Prompt: coverage_result=last_result.coverage_result) else: logger.error( - 'Last result does not contain either semantic result or coverage result', + '''Last result does not contain either semantic result or coverage + result''', trial=self.trial) # TODO(dongge): Give some default initial prompt. return TextPrompt( - 'Last result does not contain either semantic result or coverage result' - ) + '''Last result does not contain either semantic result or coverage + result''') prompt = builder.build(example_pair=[], tool_guides=self.inspect_tool.tutorial(), diff --git a/agent/jvm_coverage_enhancer.py b/agent/jvm_coverage_enhancer.py index 0c48ae4999..96f2b25690 100644 --- a/agent/jvm_coverage_enhancer.py +++ b/agent/jvm_coverage_enhancer.py @@ -14,7 +14,7 @@ """ Module: JVM Coverage Enhancer -This module provides a helper agent to improve code coverage for JVM-based +This module provides a helper agent to improve code coverage for JVM-based fuzz targets by generating or fixing JVM harnesses using LLM prompts. """ @@ -47,7 +47,6 @@ def initial_prompt(self) -> Prompt: benchmark=self.benchmark, generated_harness=source_code, errors=[]) - # Use correct signature: only example_pair is required prompt = builder.build(example_pair=[]) # Save to a dedicated JVM prompt file diff --git a/agent/one_prompt_enhancer.py b/agent/one_prompt_enhancer.py index bc6c08de2b..7709c5b28a 100644 --- a/agent/one_prompt_enhancer.py +++ b/agent/one_prompt_enhancer.py @@ -54,7 +54,6 @@ def _initial_prompt(self, results: list[Result]) -> Prompt: args=self.args) prompt = jvm_enhancer.initial_prompt() else: - # Existing non-JVM logic builder = DefaultTemplateBuilder(self.llm) if last_result.semantic_result: error_desc, errors = last_result.semantic_result.get_error_info() @@ -82,7 +81,6 @@ def execute(self, result_history: list[Result]) -> BuildResult: """Executes the agent based on previous result.""" last_result = result_history[-1] logger.info('Executing One Prompt Enhancer', trial=last_result.trial) - # Use keep to avoid deleting files, such as benchmark.yaml WorkDirs(self.args.work_dirs.base, keep=True) prompt = self._initial_prompt(result_history)