1010from agent .base_agent import BaseAgent
1111from data_prep .project_context .context_introspector import ContextRetriever
1212from experiment .benchmark import Benchmark
13- from llm_toolkit .prompt_builder import EXAMPLES as EXAMPLE_FUZZ_TARGETS
1413from llm_toolkit .prompt_builder import (DefaultTemplateBuilder ,
1514 PrototyperTemplateBuilder )
1615from llm_toolkit .prompts import Prompt
@@ -48,23 +47,31 @@ def _update_fuzz_target_and_build_script(self, cur_round: int, response: str,
4847 self ._parse_tag (response , 'fuzz target' ))
4948 build_result .fuzz_target_source = fuzz_target_source
5049 if fuzz_target_source :
51- logger .debug ('ROUND %02d Parsed fuzz target from LLM: %s' , cur_round ,
52- fuzz_target_source )
50+ logger .debug ('ROUND %02d Parsed fuzz target from LLM: %s' ,
51+ cur_round ,
52+ fuzz_target_source ,
53+ trial = build_result .trial )
5354 else :
5455 logger .error ('ROUND %02d No fuzz target source code in conclusion: %s' ,
55- cur_round , response )
56+ cur_round ,
57+ response ,
58+ trial = build_result .trial )
5659
5760 build_script_source = self ._filter_code (
5861 self ._parse_tag (response , 'build script' ))
5962 # Sometimes LLM adds chronos, which makes no sense for new build scripts.
6063 build_result .build_script_source = build_script_source .replace (
6164 'source /src/chronos.sh' , '' )
6265 if build_script_source :
63- logger .debug ('ROUND %02d Parsed build script from LLM: %s' , cur_round ,
64- build_script_source )
66+ logger .debug ('ROUND %02d Parsed build script from LLM: %s' ,
67+ cur_round ,
68+ build_script_source ,
69+ trial = build_result .trial )
6570 else :
66- logger .debug ('ROUND %02d No build script in conclusion: %s' , cur_round ,
67- response )
71+ logger .debug ('ROUND %02d No build script in conclusion: %s' ,
72+ cur_round ,
73+ response ,
74+ trial = build_result .trial )
6875
6976 def _update_build_result (self , build_result : BuildResult ,
7077 compile_process : sp .CompletedProcess , status : bool ,
@@ -84,20 +91,22 @@ def _validate_fuzz_target_and_build_script(self, cur_round: int,
8491 # 2. Recompile with the modified build script, if any.
8592 build_script_source = build_result .build_script_source
8693
87- logger .info ('First compile fuzz target without modifying build script.' )
94+ logger .info ('First compile fuzz target without modifying build script.' ,
95+ trial = build_result .trial )
8896 build_result .build_script_source = ''
8997 self ._validate_fuzz_target_and_build_script_via_compile (
9098 cur_round , build_result )
9199
92100 if not build_result .success and build_script_source :
93- logger .info ('Then compile fuzz target with modified build script.' )
101+ logger .info ('Then compile fuzz target with modified build script.' ,
102+ trial = build_result .trial )
94103 build_result .build_script_source = build_script_source
95104 self ._validate_fuzz_target_and_build_script_via_compile (
96105 cur_round , build_result )
97106
98107 def _validate_fuzz_target_references_function (
99108 self , compilation_tool : ProjectContainerTool , benchmark : Benchmark ,
100- cur_round : int ) -> bool :
109+ cur_round : int , trial : int ) -> bool :
101110 """Validates if the LLM generated fuzz target assembly code references
102111 function-under-test."""
103112 disassemble_result = compilation_tool .execute (
@@ -106,10 +115,13 @@ def _validate_fuzz_target_references_function(
106115 function_referenced = (disassemble_result .returncode == 0 and
107116 benchmark .function_name in disassemble_result .stdout )
108117 logger .debug ('ROUND %02d Final fuzz target function referenced: %s' ,
109- cur_round , function_referenced )
118+ cur_round ,
119+ function_referenced ,
120+ trial = trial )
110121 if not function_referenced :
111122 logger .debug ('ROUND %02d Final fuzz target function not referenced' ,
112- cur_round )
123+ cur_round ,
124+ trial = trial )
113125 return function_referenced
114126
115127 def _validate_fuzz_target_and_build_script_via_compile (
@@ -133,25 +145,33 @@ def _validate_fuzz_target_and_build_script_via_compile(
133145 file_content = build_result .build_script_source ))
134146
135147 # Recompile.
136- logger .info ('===== ROUND %02d Recompile =====' , cur_round )
148+ logger .info ('===== ROUND %02d Recompile =====' ,
149+ cur_round ,
150+ trial = build_result .trial )
137151 start_time = time .time ()
138152 compile_process = compilation_tool .compile ()
139153 end_time = time .time ()
140- logger .debug ('ROUND %02d compilation time: %s' , cur_round ,
141- timedelta (seconds = end_time - start_time ))
154+ logger .debug ('ROUND %02d compilation time: %s' ,
155+ cur_round ,
156+ timedelta (seconds = end_time - start_time ),
157+ trial = build_result .trial )
142158 compile_succeed = compile_process .returncode == 0
143- logger .debug ('ROUND %02d Fuzz target compiles: %s' , cur_round ,
144- compile_succeed )
159+ logger .debug ('ROUND %02d Fuzz target compiles: %s' ,
160+ cur_round ,
161+ compile_succeed ,
162+ trial = build_result .trial )
145163
146164 # Double-check binary.
147165 ls_result = compilation_tool .execute (f'ls /out/{ benchmark .target_name } ' )
148166 binary_exists = ls_result .returncode == 0
149- logger .debug ('ROUND %02d Final fuzz target binary exists: %s' , cur_round ,
150- binary_exists )
167+ logger .debug ('ROUND %02d Final fuzz target binary exists: %s' ,
168+ cur_round ,
169+ binary_exists ,
170+ trial = build_result .trial )
151171
152172 # Validate if function-under-test is referenced by the fuzz target.
153173 function_referenced = self ._validate_fuzz_target_references_function (
154- compilation_tool , benchmark , cur_round )
174+ compilation_tool , benchmark , cur_round , build_result . trial )
155175
156176 compilation_tool .terminate ()
157177 self ._update_build_result (build_result ,
@@ -164,18 +184,24 @@ def _container_handle_conclusion(
164184 build_result : BuildResult ) -> Optional [Prompt ]:
165185 """Runs a compilation tool to validate the new fuzz target and build script
166186 from LLM."""
167- logger .info ('----- ROUND %02d Received conclusion -----' , cur_round )
187+ logger .info ('----- ROUND %02d Received conclusion -----' ,
188+ cur_round ,
189+ trial = build_result .trial )
168190
169191 self ._update_fuzz_target_and_build_script (cur_round , response , build_result )
170192
171193 self ._validate_fuzz_target_and_build_script (cur_round , build_result )
172194 if build_result .success :
173- logger .info ('***** Prototyper succeded in %02d rounds *****' , cur_round )
195+ logger .info ('***** Prototyper succeded in %02d rounds *****' ,
196+ cur_round ,
197+ trial = build_result .trial )
174198 return None
175199
176200 if not build_result .compiles :
177201 compile_log = self .llm .truncate_prompt (build_result .compile_log )
178- logger .info ('***** Failed to recompile in %02d rounds *****' , cur_round )
202+ logger .info ('***** Failed to recompile in %02d rounds *****' ,
203+ cur_round ,
204+ trial = build_result .trial )
179205 prompt_text = (
180206 'Failed to build fuzz target. Here is the fuzz target, build script, '
181207 'compliation command, and other compilation runtime output. Analyze '
@@ -205,7 +231,9 @@ def _container_handle_conclusion(
205231 elif not build_result .is_function_referenced :
206232 logger .info (
207233 '***** Fuzz target does not reference function-under-test in %02d '
208- 'rounds *****' , cur_round )
234+ 'rounds *****' ,
235+ cur_round ,
236+ trial = build_result .trial )
209237 prompt_text = (
210238 'The fuzz target builds successfully, but the target function '
211239 f'`{ build_result .benchmark .function_signature } ` was not used by '
@@ -229,14 +257,16 @@ def _container_tool_reaction(self, cur_round: int, response: str,
229257 return self ._container_handle_conclusion (cur_round , response ,
230258 build_result )
231259 # Other responses are invalid.
232- logger .warning ('ROUND %02d Invalid response from LLM: %s' , cur_round ,
233- response )
260+ logger .warning ('ROUND %02d Invalid response from LLM: %s' ,
261+ cur_round ,
262+ response ,
263+ trial = build_result .trial )
234264 return self ._container_handle_invalid_tool_usage (self .inspect_tool )
235265
236266 def execute (self , result_history : list [Result ]) -> BuildResult :
237267 """Executes the agent based on previous result."""
238- logger .info ('Executing Prototyper' )
239268 last_result = result_history [- 1 ]
269+ logger .info ('Executing Prototyper' , trial = last_result .trial )
240270 benchmark = last_result .benchmark
241271 self .inspect_tool = ProjectContainerTool (benchmark , name = 'inspect' )
242272 self .inspect_tool .compile (extra_commands = ' && rm -rf /out/* > /dev/null' )
@@ -250,13 +280,17 @@ def execute(self, result_history: list[Result]) -> BuildResult:
250280 try :
251281 client = self .llm .get_chat_client (model = self .llm .get_model ())
252282 while prompt and cur_round < MAX_ROUND :
253- response = self .chat_llm (cur_round , client = client , prompt = prompt )
283+ response = self .chat_llm (cur_round ,
284+ client = client ,
285+ prompt = prompt ,
286+ trial = last_result .trial )
254287 prompt = self ._container_tool_reaction (cur_round , response ,
255288 build_result )
256289 cur_round += 1
257290 finally :
258291 # Cleanup: stop and remove the container
259292 logger .debug ('Stopping and removing the inspect container %s' ,
260- self .inspect_tool .container_id )
293+ self .inspect_tool .container_id ,
294+ trial = last_result .trial )
261295 self .inspect_tool .terminate ()
262296 return build_result
0 commit comments