google
diff --git a/‎agent/base_agent.py‎
Lines changed: 19 additions & 0 deletions b/‎agent/base_agent.py‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎agent/context_analyzer.py‎
Lines changed: 20 additions & 12 deletions b/‎agent/context_analyzer.py‎
Lines changed: 20 additions & 12 deletions
diff --git a/‎agent/coverage_analyzer.py‎
Lines changed: 4 additions & 1 deletion b/‎agent/coverage_analyzer.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎agent/crash_analyzer.py‎
Lines changed: 8 additions & 0 deletions b/‎agent/crash_analyzer.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎agent/function_analyzer.py‎
Lines changed: 64 additions & 18 deletions b/‎agent/function_analyzer.py‎
Lines changed: 64 additions & 18 deletions
diff --git a/‎benchmark-sets/analyzer-tests-1/astc-encoder.yaml‎
Lines changed: 15 additions & 0 deletions b/‎benchmark-sets/analyzer-tests-1/astc-encoder.yaml‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎benchmark-sets/analyzer-tests-1/bind9.yaml‎
Lines changed: 17 additions & 0 deletions b/‎benchmark-sets/analyzer-tests-1/bind9.yaml‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎benchmark-sets/analyzer-tests-1/bluez.yaml‎
Lines changed: 21 additions & 0 deletions b/‎benchmark-sets/analyzer-tests-1/bluez.yaml‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎benchmark-sets/analyzer-tests-1/clamav.yaml‎
Lines changed: 25 additions & 0 deletions b/‎benchmark-sets/analyzer-tests-1/clamav.yaml‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎benchmark-sets/analyzer-tests-1/exiv2.yaml‎
Lines changed: 15 additions & 0 deletions b/‎benchmark-sets/analyzer-tests-1/exiv2.yaml‎
Lines changed: 15 additions & 0 deletions
@@ -14,6 +14,7 @@
 """The abstract base class for LLM agents in stages."""
 import argparse
 import asyncio
+import json
 import os
 import random
 import re
@@ -370,6 +371,20 @@ def __init__(self,
 
     logger.info('ADK Agent %s created.', self.name, trial=self.trial)
 
+  def get_xml_representation(self, response: Optional[dict]) -> str:
+    """Returns the XML representation of the response."""
+    if not response:
+      return ''
+    # If the response is not a dict, return it as string
+    if not isinstance(response, dict):
+      return str(response)
+    # Now, we wrap items in a dict with xml tags.
+    xml_rep = []
+    for key, value in response.items():
+      xml_obj = f'<{key}>\n{value}\n</{key}>'
+      xml_rep.append(xml_obj)
+    return '\n'.join(xml_rep)
+
   def chat_llm(self, cur_round: int, client: Any, prompt: Prompt,
                trial: int) -> Any:
     """Call the agent with the given prompt, running async code in sync."""
@@ -398,10 +413,14 @@ async def _call():
               self.log_llm_response(final_response)
             elif event.content.parts[0].function_response:
               final_response = event.content.parts[0].function_response.response
+              self.log_llm_response(self.get_xml_representation(final_response))
           elif event.actions and event.actions.escalate:
             error_message = event.error_message
             logger.error('Agent escalated: %s', error_message, trial=self.trial)
 
+      if not final_response:
+        self.log_llm_response('No valid response from LLM.')
+
       return final_response
 
     return self.llm.with_retry_on_error(lambda: asyncio.run(_call()),
 
@@ -73,13 +73,18 @@ def execute(self,
 
     last_result = result_history[-1]
 
-    if not isinstance(
-        last_result, resultslib.AnalysisResult) or not last_result.crash_result:
-      logger.error(f'Expected last result to be AnalysisResult, got %s.',
+    # Validate that the last result is an AnalysisResult and has a valid crash_result
+    if not isinstance(last_result, resultslib.AnalysisResult):
+      logger.error('Expected last result to be AnalysisResult, got %s.',
                    type(last_result),
                    trial=self.trial)
       return last_result
 
+    if not last_result.crash_result:
+      logger.error('Missing crash_result in the AnalysisResult.',
+                   trial=self.trial)
+      return last_result
+
     context_result = None
 
     # Initialize the ProjectContainerTool for local file search
@@ -99,6 +104,10 @@ def execute(self,
                                      trial=result_history[-1].trial)
       context_result = resultslib.CrashContextResult.from_dict(final_response)
       if context_result:
+        logger.info(
+            'Is context analyzer result consistent: %s',
+            str(context_result.feasible == last_result.crash_result.true_bug),
+            trial=self.trial)
         break
       logger.error('Failed to parse LLM response into CrashContextResult.',
                    trial=self.trial)
@@ -239,31 +248,30 @@ def get_function_implementation(self, project_name: str,
     return response
 
   def report_final_result(self, feasible: bool, analysis: str,
-                          recommendations: str,
+                          source_code_evidence: str, recommendations: str,
                           tool_context: ToolContext) -> dict:
     """
     Provide final result, including the crash feasibility,
         detailed analysis, and any recommendations.
 
     Args:
         feasible (bool): True if the crash is feasible, False otherwise.
-        analysis (str): Detailed analysis and source code evidence showing
+        analysis (str): Detailed analysis showing
                         why the crash is or is not feasible.
+        source_code_evidence (str): Source code evidence supporting the analysis.
+                                    This MUST show the constraints on input variables and why they make the crash feasible or not feasible.
         recommendations (str): Recommendations for modifying the fuzz target to
                         prevent the crash. If the crash is feasible,
                         this should be empty.
 
     Returns:
         This function will not return anything to the LLM.
     """
-    response = f"""
-      <feasible>\n{feasible}\n</feasible>
-      <analysis>\n{analysis}\n</analysis>
-      <recommendations>\n{recommendations}\n</recommendations>
-    """
-    self.log_llm_response(response)
     crash_context_result = resultslib.CrashContextResult(
-        feasible=feasible, analysis=analysis, recommendations=recommendations)
+        feasible=feasible,
+        analysis=analysis,
+        source_code_evidence=source_code_evidence,
+        recommendations=recommendations)
 
     # We have received final result. Instruct the agent to terminate execution.
     # tool_context._invocation_context.end_invocation = True
 
@@ -45,10 +45,13 @@ def _initial_prompt(self, results: list[Result]) -> Prompt:
                    trial=self.trial)
       return Prompt()
 
+    function_requirements = self.get_function_requirements()
+
     builder = CoverageAnalyzerTemplateBuilder(self.llm, benchmark, last_result)
     prompt = builder.build(example_pair=[],
                            tool_guides=self.inspect_tool.tutorial(),
-                           project_dir=self.inspect_tool.project_dir)
+                           project_dir=self.inspect_tool.project_dir,
+                           function_requirements=function_requirements)
     # TODO: A different file name/dir.
     prompt.save(self.args.work_dirs.prompt)
 
 
@@ -134,6 +134,14 @@ def _container_tool_reaction(self, cur_round: int, response: str,
     # If there's a conclusion tag and a tool usage tag, then there's an error
     prompt = prompt_builder.CrashAnalyzerTemplateBuilder(self.llm,
                                                          None).build([])
+    if self._parse_tag(response, 'gdb output') or self._parse_tag(
+        response, 'gdb command'):
+      extra_note = 'NOTE: It seems you have hallucinated interaction with the GDB tool. ' \
+      'You MUST restart the GDB interaction again and erase the previous interaction from your memory.'
+      self.gdb_tool_used = False
+      return self._container_handle_invalid_tool_usage(
+          [self.gdb_tool, self.bash_tool], cur_round, response, prompt,
+          extra_note)
     if self._parse_tag(response, 'conclusion') and (self._parse_tag(
         response, 'gdb') or self._parse_tag(response, 'bash')):
       extra_note = 'NOTE: You cannot provide both tool commands and conclusion in the same response.'
 
@@ -18,9 +18,10 @@
 """
 
 import argparse
-import os
 from typing import Optional
 
+from google.adk.tools import ToolContext
+
 import logger
 import results as resultslib
 from agent import base_agent
@@ -50,7 +51,10 @@ def __init__(self,
 
     instruction = builder.get_instruction().get()
 
-    tools = [self.get_function_implementation, self.search_project_files]
+    tools = [
+        self.get_function_implementation, self.search_project_files,
+        self.return_final_result
+    ]
 
     super().__init__(trial, llm, args, benchmark, description, instruction,
                      tools, name)
@@ -74,17 +78,19 @@ def write_requirements_to_file(self, args, requirements: str) -> str:
 
     return requirement_path
 
-  def handle_llm_response(self, final_response_text: str,
-                          result: resultslib.Result) -> None:
+  def handle_llm_response(
+      self, function_analysis_result: resultslib.FunctionAnalysisResult,
+      result: resultslib.Result) -> None:
     """Handle the LLM response and update the result."""
 
-    result_str = self._parse_tag(final_response_text, 'response')
-    requirements = self._parse_tag(result_str, 'requirements')
-    if requirements:
-      # Write the requirements to a file
-      requirement_path = self.write_requirements_to_file(self.args, result_str)
-      function_analysis = resultslib.FunctionAnalysisResult(requirement_path)
-      result.function_analysis = function_analysis
+    function_requirements_text = self.get_xml_representation(
+        function_analysis_result.to_dict())
+
+    # Write the requirements to a file
+    requirement_path = self.write_requirements_to_file(
+        self.args, function_requirements_text)
+    function_analysis_result.function_analysis_path = requirement_path
+    result.function_analysis = function_analysis_result
 
   def execute(self,
               result_history: list[resultslib.Result]) -> resultslib.Result:
@@ -106,15 +112,27 @@ def execute(self,
     # Call the agent asynchronously and return the result
     prompt = self._initial_prompt(result_history)
 
-    final_response_text = self.chat_llm(self.round,
-                                        client=None,
-                                        prompt=prompt,
-                                        trial=result_history[-1].trial)
+    while self.round < self.max_round:
+      final_response = self.chat_llm(self.round,
+                                     client=None,
+                                     prompt=prompt,
+                                     trial=result_history[-1].trial)
 
-    self.handle_llm_response(final_response_text, result)
+      function_analyzer_result = resultslib.FunctionAnalysisResult.from_dict(
+          final_response)
+      if function_analyzer_result:
+        self.handle_llm_response(function_analyzer_result, result)
+        break
 
-    self.inspect_tool.terminate()
+      # Handle invalid LLM response
+      template_builder = prompt_builder.FunctionAnalyzerTemplateBuilder(
+          self.llm, self.benchmark)
+
+      prompt = self._container_handle_invalid_tool_usage(
+          [self.inspect_tool], self.round, final_response,
+          template_builder.build(), template_builder.get_response_format())
 
+    self.inspect_tool.terminate()
     return result
 
   def _initial_prompt(
@@ -126,7 +144,7 @@ def _initial_prompt(
     builder = prompt_builder.FunctionAnalyzerTemplateBuilder(
         self.llm, self.benchmark)
 
-    prompt = builder.build_prompt()
+    prompt = builder.build_prompt(self.inspect_tool.project_dir)
 
     prompt.append(self.inspect_tool.tutorial())
 
@@ -230,3 +248,31 @@ def get_function_implementation(self, project_name: str,
     self.log_llm_prompt(response)
 
     return response
+
+  def return_final_result(self, project_name: str, function_signature: str,
+                          description: str, requirements: str,
+                          tool_context: ToolContext) -> dict:
+    """
+    Provide final analysis results, including a detailed description of the function and requirements on its input and global variables.
+
+    Args:
+        project_name (str): The name of the project.
+        function_signature (str): The signature of the function you were provided.
+        description (str): A detailed description of the function.
+        requirements (str): Requirements on the function's input and global variables, formatted using <requirement> tags.
+
+    Returns:
+        This function does not return anything.
+    """
+
+    function_analysis = resultslib.FunctionAnalysisResult(
+        description=description,
+        function_signature=function_signature,
+        project_name=project_name,
+        requirements=requirements,
+    )
+
+    # We have received final result. Instruct the agent to terminate execution.
+    # tool_context._invocation_context.end_invocation = True
+    self.end_llm_chat(tool_context)
+    return function_analysis.to_dict()
@@ -0,0 +1,15 @@
+"functions":
+- "name": "_Z20symbolic_to_physicalRK21block_size_descriptorRK25symbolic_compressed_blockPh"
+  "params":
+  - "name": "bsd"
+    "type": "bool "
+  - "name": "scb"
+    "type": "bool "
+  - "name": "pcb"
+    "type": "bool "
+  "return_type": "void"
+  "signature": "void symbolic_to_physical(const struct block_size_descriptor &, const struct symbolic_compressed_block &, uint8_t *)"
+"language": "c++"
+"project": "astc-encoder"
+"target_name": "fuzz_astc_physical_to_symbolic"
+"target_path": "/src/astc-encoder/Source/Fuzzers/fuzz_astc_physical_to_symbolic.cpp"
@@ -0,0 +1,17 @@
+"functions":
+- "name": "dns_zt_asyncload"
+  "params":
+  - "name": "zt"
+    "type": "bool "
+  - "name": "newonly"
+    "type": "bool"
+  - "name": "loaddone"
+    "type": "bool "
+  - "name": "arg"
+    "type": "bool "
+  "return_type": "int"
+  "signature": "isc_result_t dns_zt_asyncload(dns_zt_t *, bool, dns_zt_callback_t *, void *)"
+"language": "c"
+"project": "bind9"
+"target_name": "isc_lex_gettoken_fuzzer"
+"target_path": "/src/bind9/fuzz/isc_lex_gettoken.c"
@@ -0,0 +1,21 @@
+"functions":
+- "name": "g_obex_get_req"
+  "params":
+  - "name": "obex"
+    "type": "bool "
+  - "name": "data_func"
+    "type": "bool "
+  - "name": "complete_func"
+    "type": "bool "
+  - "name": "user_data"
+    "type": "bool "
+  - "name": "err"
+    "type": "bool "
+  - "name": "first_hdr_id"
+    "type": "int"
+  "return_type": "int"
+  "signature": "guint g_obex_get_req(GObex *, GObexDataConsumer, GObexFunc, gpointer, GError **, guint, void)"
+"language": "c"
+"project": "bluez"
+"target_name": "fuzz_textfile"
+"target_path": "/src/fuzz_textfile.c"
@@ -0,0 +1,25 @@
+"functions":
+- "name": "Lzma2Decode"
+  "params":
+  - "name": ""
+    "type": "bool "
+  - "name": ""
+    "type": "bool "
+  - "name": ""
+    "type": "bool "
+  - "name": ""
+    "type": "bool "
+  - "name": ""
+    "type": "char"
+  - "name": ""
+    "type": "int"
+  - "name": ""
+    "type": "bool "
+  - "name": ""
+    "type": "bool "
+  "return_type": "int"
+  "signature": "SRes Lzma2Decode(Byte *, SizeT *, const Byte *, SizeT *, Byte, ELzmaFinishMode, ELzmaStatus *, ISzAlloc *)"
+"language": "c++"
+"project": "clamav"
+"target_name": "clamav_scanfile_HWP3_fuzzer"
+"target_path": "/src/clamav/fuzz/clamav_scanfile_fuzzer.cpp"
@@ -0,0 +1,15 @@
+"functions":
+- "name": "_ZNK12_GLOBAL__N_113TiffThumbnail4copyERKN5Exiv28ExifDataE"
+  "params":
+  - "name": ""
+    "type": "bool "
+  - "name": ""
+    "type": "bool "
+  - "name": ""
+    "type": "bool "
+  "return_type": "void"
+  "signature": "struct DataBuf (anonymous namespace)::TiffThumbnail::copy(const ExifData &)"
+"language": "c++"
+"project": "exiv2"
+"target_name": "fuzz-read-print-write"
+"target_path": "/src/exiv2/fuzz/fuzz-read-print-write.cpp"