Skip to content

Commit d8c0fcb

Browse files
authored
Improving Function and Context Analyzer Agents (#1144)
This PR makes the following improvements to the function analyzer and context analyzer agents in OSS-Fuzz-Gen. Many of these improvements are targeted at resolving observed errors in these agents. Context Analyzer - Adds a field for source_code_evidence in its final response. This forces the context analyzer to provide source code evidence to back its analysis. - Modifies agents built using ADK library to print their final response (in JSON format) using xml tags to make them more readable and consistent with the output of other OSS-Fuzz-Gen agents in the logs and HTML report. - Modifies context analyzer prompts so it specifies steps the LLM should take to analyze the feasibility of a crash. Coverage Analyzer - Provides the list of function requirements to the coverage analyzer. - Modifies the coverage analyzer's prompt to instruct it to not suggest coverage improvement changes that will violate the derived function requirements. Function Analyzer - Modifies function analyzer to report its final response using a function tool - Handles invalid LLM responses in the function analyzer agent - Modifies the Function Analyzer's prompt so it specifies steps the function analyzer should take to analyze a function's requirements. Analysis Stage - Modifies the Analysis stage to only execute the context analyzer if the crash analyzer classified the crash as a bug in the program. models.py - Handle LLM responses containing zero or multiple entries in the content.parts array. These LLM responses previously caused uncaught exceptions that crashed the agent. - Modifies Gemini-2.5-flash models to use stable versions prompt_builder.py Integrates result and analysis from the context analyzer into the Crash Enhancer agent.
1 parent bca17b0 commit d8c0fcb

34 files changed

+626
-150
lines changed

agent/base_agent.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
"""The abstract base class for LLM agents in stages."""
1515
import argparse
1616
import asyncio
17+
import json
1718
import os
1819
import random
1920
import re
@@ -370,6 +371,20 @@ def __init__(self,
370371

371372
logger.info('ADK Agent %s created.', self.name, trial=self.trial)
372373

374+
def get_xml_representation(self, response: Optional[dict]) -> str:
375+
"""Returns the XML representation of the response."""
376+
if not response:
377+
return ''
378+
# If the response is not a dict, return it as string
379+
if not isinstance(response, dict):
380+
return str(response)
381+
# Now, we wrap items in a dict with xml tags.
382+
xml_rep = []
383+
for key, value in response.items():
384+
xml_obj = f'<{key}>\n{value}\n</{key}>'
385+
xml_rep.append(xml_obj)
386+
return '\n'.join(xml_rep)
387+
373388
def chat_llm(self, cur_round: int, client: Any, prompt: Prompt,
374389
trial: int) -> Any:
375390
"""Call the agent with the given prompt, running async code in sync."""
@@ -398,10 +413,14 @@ async def _call():
398413
self.log_llm_response(final_response)
399414
elif event.content.parts[0].function_response:
400415
final_response = event.content.parts[0].function_response.response
416+
self.log_llm_response(self.get_xml_representation(final_response))
401417
elif event.actions and event.actions.escalate:
402418
error_message = event.error_message
403419
logger.error('Agent escalated: %s', error_message, trial=self.trial)
404420

421+
if not final_response:
422+
self.log_llm_response('No valid response from LLM.')
423+
405424
return final_response
406425

407426
return self.llm.with_retry_on_error(lambda: asyncio.run(_call()),

agent/context_analyzer.py

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,18 @@ def execute(self,
7373

7474
last_result = result_history[-1]
7575

76-
if not isinstance(
77-
last_result, resultslib.AnalysisResult) or not last_result.crash_result:
78-
logger.error(f'Expected last result to be AnalysisResult, got %s.',
76+
# Validate that the last result is an AnalysisResult and has a valid crash_result
77+
if not isinstance(last_result, resultslib.AnalysisResult):
78+
logger.error('Expected last result to be AnalysisResult, got %s.',
7979
type(last_result),
8080
trial=self.trial)
8181
return last_result
8282

83+
if not last_result.crash_result:
84+
logger.error('Missing crash_result in the AnalysisResult.',
85+
trial=self.trial)
86+
return last_result
87+
8388
context_result = None
8489

8590
# Initialize the ProjectContainerTool for local file search
@@ -99,6 +104,10 @@ def execute(self,
99104
trial=result_history[-1].trial)
100105
context_result = resultslib.CrashContextResult.from_dict(final_response)
101106
if context_result:
107+
logger.info(
108+
'Is context analyzer result consistent: %s',
109+
str(context_result.feasible == last_result.crash_result.true_bug),
110+
trial=self.trial)
102111
break
103112
logger.error('Failed to parse LLM response into CrashContextResult.',
104113
trial=self.trial)
@@ -239,31 +248,30 @@ def get_function_implementation(self, project_name: str,
239248
return response
240249

241250
def report_final_result(self, feasible: bool, analysis: str,
242-
recommendations: str,
251+
source_code_evidence: str, recommendations: str,
243252
tool_context: ToolContext) -> dict:
244253
"""
245254
Provide final result, including the crash feasibility,
246255
detailed analysis, and any recommendations.
247256
248257
Args:
249258
feasible (bool): True if the crash is feasible, False otherwise.
250-
analysis (str): Detailed analysis and source code evidence showing
259+
analysis (str): Detailed analysis showing
251260
why the crash is or is not feasible.
261+
source_code_evidence (str): Source code evidence supporting the analysis.
262+
This MUST show the constraints on input variables and why they make the crash feasible or not feasible.
252263
recommendations (str): Recommendations for modifying the fuzz target to
253264
prevent the crash. If the crash is feasible,
254265
this should be empty.
255266
256267
Returns:
257268
This function will not return anything to the LLM.
258269
"""
259-
response = f"""
260-
<feasible>\n{feasible}\n</feasible>
261-
<analysis>\n{analysis}\n</analysis>
262-
<recommendations>\n{recommendations}\n</recommendations>
263-
"""
264-
self.log_llm_response(response)
265270
crash_context_result = resultslib.CrashContextResult(
266-
feasible=feasible, analysis=analysis, recommendations=recommendations)
271+
feasible=feasible,
272+
analysis=analysis,
273+
source_code_evidence=source_code_evidence,
274+
recommendations=recommendations)
267275

268276
# We have received final result. Instruct the agent to terminate execution.
269277
# tool_context._invocation_context.end_invocation = True

agent/coverage_analyzer.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,13 @@ def _initial_prompt(self, results: list[Result]) -> Prompt:
4545
trial=self.trial)
4646
return Prompt()
4747

48+
function_requirements = self.get_function_requirements()
49+
4850
builder = CoverageAnalyzerTemplateBuilder(self.llm, benchmark, last_result)
4951
prompt = builder.build(example_pair=[],
5052
tool_guides=self.inspect_tool.tutorial(),
51-
project_dir=self.inspect_tool.project_dir)
53+
project_dir=self.inspect_tool.project_dir,
54+
function_requirements=function_requirements)
5255
# TODO: A different file name/dir.
5356
prompt.save(self.args.work_dirs.prompt)
5457

agent/crash_analyzer.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,14 @@ def _container_tool_reaction(self, cur_round: int, response: str,
134134
# If there's a conclusion tag and a tool usage tag, then there's an error
135135
prompt = prompt_builder.CrashAnalyzerTemplateBuilder(self.llm,
136136
None).build([])
137+
if self._parse_tag(response, 'gdb output') or self._parse_tag(
138+
response, 'gdb command'):
139+
extra_note = 'NOTE: It seems you have hallucinated interaction with the GDB tool. ' \
140+
'You MUST restart the GDB interaction again and erase the previous interaction from your memory.'
141+
self.gdb_tool_used = False
142+
return self._container_handle_invalid_tool_usage(
143+
[self.gdb_tool, self.bash_tool], cur_round, response, prompt,
144+
extra_note)
137145
if self._parse_tag(response, 'conclusion') and (self._parse_tag(
138146
response, 'gdb') or self._parse_tag(response, 'bash')):
139147
extra_note = 'NOTE: You cannot provide both tool commands and conclusion in the same response.'

agent/function_analyzer.py

Lines changed: 64 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,10 @@
1818
"""
1919

2020
import argparse
21-
import os
2221
from typing import Optional
2322

23+
from google.adk.tools import ToolContext
24+
2425
import logger
2526
import results as resultslib
2627
from agent import base_agent
@@ -50,7 +51,10 @@ def __init__(self,
5051

5152
instruction = builder.get_instruction().get()
5253

53-
tools = [self.get_function_implementation, self.search_project_files]
54+
tools = [
55+
self.get_function_implementation, self.search_project_files,
56+
self.return_final_result
57+
]
5458

5559
super().__init__(trial, llm, args, benchmark, description, instruction,
5660
tools, name)
@@ -74,17 +78,19 @@ def write_requirements_to_file(self, args, requirements: str) -> str:
7478

7579
return requirement_path
7680

77-
def handle_llm_response(self, final_response_text: str,
78-
result: resultslib.Result) -> None:
81+
def handle_llm_response(
82+
self, function_analysis_result: resultslib.FunctionAnalysisResult,
83+
result: resultslib.Result) -> None:
7984
"""Handle the LLM response and update the result."""
8085

81-
result_str = self._parse_tag(final_response_text, 'response')
82-
requirements = self._parse_tag(result_str, 'requirements')
83-
if requirements:
84-
# Write the requirements to a file
85-
requirement_path = self.write_requirements_to_file(self.args, result_str)
86-
function_analysis = resultslib.FunctionAnalysisResult(requirement_path)
87-
result.function_analysis = function_analysis
86+
function_requirements_text = self.get_xml_representation(
87+
function_analysis_result.to_dict())
88+
89+
# Write the requirements to a file
90+
requirement_path = self.write_requirements_to_file(
91+
self.args, function_requirements_text)
92+
function_analysis_result.function_analysis_path = requirement_path
93+
result.function_analysis = function_analysis_result
8894

8995
def execute(self,
9096
result_history: list[resultslib.Result]) -> resultslib.Result:
@@ -106,15 +112,27 @@ def execute(self,
106112
# Call the agent asynchronously and return the result
107113
prompt = self._initial_prompt(result_history)
108114

109-
final_response_text = self.chat_llm(self.round,
110-
client=None,
111-
prompt=prompt,
112-
trial=result_history[-1].trial)
115+
while self.round < self.max_round:
116+
final_response = self.chat_llm(self.round,
117+
client=None,
118+
prompt=prompt,
119+
trial=result_history[-1].trial)
113120

114-
self.handle_llm_response(final_response_text, result)
121+
function_analyzer_result = resultslib.FunctionAnalysisResult.from_dict(
122+
final_response)
123+
if function_analyzer_result:
124+
self.handle_llm_response(function_analyzer_result, result)
125+
break
115126

116-
self.inspect_tool.terminate()
127+
# Handle invalid LLM response
128+
template_builder = prompt_builder.FunctionAnalyzerTemplateBuilder(
129+
self.llm, self.benchmark)
130+
131+
prompt = self._container_handle_invalid_tool_usage(
132+
[self.inspect_tool], self.round, final_response,
133+
template_builder.build(), template_builder.get_response_format())
117134

135+
self.inspect_tool.terminate()
118136
return result
119137

120138
def _initial_prompt(
@@ -126,7 +144,7 @@ def _initial_prompt(
126144
builder = prompt_builder.FunctionAnalyzerTemplateBuilder(
127145
self.llm, self.benchmark)
128146

129-
prompt = builder.build_prompt()
147+
prompt = builder.build_prompt(self.inspect_tool.project_dir)
130148

131149
prompt.append(self.inspect_tool.tutorial())
132150

@@ -230,3 +248,31 @@ def get_function_implementation(self, project_name: str,
230248
self.log_llm_prompt(response)
231249

232250
return response
251+
252+
def return_final_result(self, project_name: str, function_signature: str,
253+
description: str, requirements: str,
254+
tool_context: ToolContext) -> dict:
255+
"""
256+
Provide final analysis results, including a detailed description of the function and requirements on its input and global variables.
257+
258+
Args:
259+
project_name (str): The name of the project.
260+
function_signature (str): The signature of the function you were provided.
261+
description (str): A detailed description of the function.
262+
requirements (str): Requirements on the function's input and global variables, formatted using <requirement> tags.
263+
264+
Returns:
265+
This function does not return anything.
266+
"""
267+
268+
function_analysis = resultslib.FunctionAnalysisResult(
269+
description=description,
270+
function_signature=function_signature,
271+
project_name=project_name,
272+
requirements=requirements,
273+
)
274+
275+
# We have received final result. Instruct the agent to terminate execution.
276+
# tool_context._invocation_context.end_invocation = True
277+
self.end_llm_chat(tool_context)
278+
return function_analysis.to_dict()
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
"functions":
2+
- "name": "_Z20symbolic_to_physicalRK21block_size_descriptorRK25symbolic_compressed_blockPh"
3+
"params":
4+
- "name": "bsd"
5+
"type": "bool "
6+
- "name": "scb"
7+
"type": "bool "
8+
- "name": "pcb"
9+
"type": "bool "
10+
"return_type": "void"
11+
"signature": "void symbolic_to_physical(const struct block_size_descriptor &, const struct symbolic_compressed_block &, uint8_t *)"
12+
"language": "c++"
13+
"project": "astc-encoder"
14+
"target_name": "fuzz_astc_physical_to_symbolic"
15+
"target_path": "/src/astc-encoder/Source/Fuzzers/fuzz_astc_physical_to_symbolic.cpp"
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
"functions":
2+
- "name": "dns_zt_asyncload"
3+
"params":
4+
- "name": "zt"
5+
"type": "bool "
6+
- "name": "newonly"
7+
"type": "bool"
8+
- "name": "loaddone"
9+
"type": "bool "
10+
- "name": "arg"
11+
"type": "bool "
12+
"return_type": "int"
13+
"signature": "isc_result_t dns_zt_asyncload(dns_zt_t *, bool, dns_zt_callback_t *, void *)"
14+
"language": "c"
15+
"project": "bind9"
16+
"target_name": "isc_lex_gettoken_fuzzer"
17+
"target_path": "/src/bind9/fuzz/isc_lex_gettoken.c"
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
"functions":
2+
- "name": "g_obex_get_req"
3+
"params":
4+
- "name": "obex"
5+
"type": "bool "
6+
- "name": "data_func"
7+
"type": "bool "
8+
- "name": "complete_func"
9+
"type": "bool "
10+
- "name": "user_data"
11+
"type": "bool "
12+
- "name": "err"
13+
"type": "bool "
14+
- "name": "first_hdr_id"
15+
"type": "int"
16+
"return_type": "int"
17+
"signature": "guint g_obex_get_req(GObex *, GObexDataConsumer, GObexFunc, gpointer, GError **, guint, void)"
18+
"language": "c"
19+
"project": "bluez"
20+
"target_name": "fuzz_textfile"
21+
"target_path": "/src/fuzz_textfile.c"
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
"functions":
2+
- "name": "Lzma2Decode"
3+
"params":
4+
- "name": ""
5+
"type": "bool "
6+
- "name": ""
7+
"type": "bool "
8+
- "name": ""
9+
"type": "bool "
10+
- "name": ""
11+
"type": "bool "
12+
- "name": ""
13+
"type": "char"
14+
- "name": ""
15+
"type": "int"
16+
- "name": ""
17+
"type": "bool "
18+
- "name": ""
19+
"type": "bool "
20+
"return_type": "int"
21+
"signature": "SRes Lzma2Decode(Byte *, SizeT *, const Byte *, SizeT *, Byte, ELzmaFinishMode, ELzmaStatus *, ISzAlloc *)"
22+
"language": "c++"
23+
"project": "clamav"
24+
"target_name": "clamav_scanfile_HWP3_fuzzer"
25+
"target_path": "/src/clamav/fuzz/clamav_scanfile_fuzzer.cpp"
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
"functions":
2+
- "name": "_ZNK12_GLOBAL__N_113TiffThumbnail4copyERKN5Exiv28ExifDataE"
3+
"params":
4+
- "name": ""
5+
"type": "bool "
6+
- "name": ""
7+
"type": "bool "
8+
- "name": ""
9+
"type": "bool "
10+
"return_type": "void"
11+
"signature": "struct DataBuf (anonymous namespace)::TiffThumbnail::copy(const ExifData &)"
12+
"language": "c++"
13+
"project": "exiv2"
14+
"target_name": "fuzz-read-print-write"
15+
"target_path": "/src/exiv2/fuzz/fuzz-read-print-write.cpp"

0 commit comments

Comments
 (0)