-
Notifications
You must be signed in to change notification settings - Fork 130
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
experimental: add new tooling that shows how to create from scratch (#…
…768) This uses a new version of Fuzz Introspector to analyse code that is not in OSS-Fuzz and also uses the prompt generation logic from OSS-Fuzz-gen core. This is an mimimum viable use case in terms of generating harnesses by way of FI and OFG. --------- Signed-off-by: David Korczynski <[email protected]>
- Loading branch information
1 parent
db10eac
commit ee94805
Showing
3 changed files
with
181 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
# Sample tooling for generating harnesses for a codebase without harnesses | ||
|
||
|
||
To run this you need a local version of Fuzz Introspector and a target code | ||
base you want to analyse. | ||
|
||
Sample run where `${MODEL}` holds your model name: | ||
|
||
```sh | ||
# Create virtual environment | ||
python3.11 -m virtualenv .venv | ||
. .venv/bin/activate | ||
|
||
# Install Fuzz Introspector in virtual environment | ||
git clone https://github.com/ossf/fuzz-introspector | ||
cd fuzz-introspector/src | ||
python3 -m pip install -e . | ||
cd ../../ | ||
|
||
|
||
# Prepare a target | ||
git clone https://github.com/dvhar/dateparse | ||
|
||
# Clone oss-fuzz-gen | ||
git clone https://github.com/google/oss-fuzz-gen | ||
cd oss-fuzz-gen | ||
|
||
# Generate a harness | ||
python3 -m experimental.from_scratch.generate \ | ||
-l ${MODEL} \ | ||
-f dateparse \ | ||
-t ../dateparse/ | ||
|
||
# Show harness | ||
cat responses/01.rawoutput | ||
""" | ||
#include <stdio.h> | ||
#include <string.h> | ||
typedef struct{int year;int month; int day;} date_t; | ||
int dateparse(const char* datestr, date_t* t, int *offset, int stringlen); // prototype | ||
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { | ||
date_t t; | ||
int offset = 0; | ||
// ensure NULL termination for the data string | ||
char* datestr = (char*)malloc(size + 1); | ||
if (!datestr) | ||
return 0; | ||
memcpy(datestr, data, size); | ||
datestr[size] = '\0'; | ||
dateparse(datestr, &t, &offset, size); | ||
free(datestr); | ||
return 0; | ||
} | ||
""" | ||
``` |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
#!/usr/bin/env python3 | ||
# Copyright 2025 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
"""Module for generating harnesses in arbitrary projects.""" | ||
|
||
import argparse | ||
import os | ||
import sys | ||
from typing import Optional | ||
|
||
# pyright: reportMissingImports = false | ||
from fuzz_introspector.frontends import oss_fuzz as fi_oss_fuzz | ||
|
||
from experiment import benchmark as benchmarklib | ||
from llm_toolkit import models, prompt_builder, prompts | ||
|
||
NUM_SAMPLES: int = 1 | ||
TEMPERATURE: float = 1 | ||
MAX_TOKENS: int = 8192 | ||
|
||
|
||
def parse_args() -> argparse.Namespace: | ||
"""Parses command line arguments.""" | ||
parser = argparse.ArgumentParser( | ||
description='Run all experiments that evaluates all target functions.') | ||
parser.add_argument('-l', | ||
'--model', | ||
default=models.DefaultModel.name, | ||
help=('Models available: ' | ||
f'{", ".join(models.LLM.all_llm_names())}')) | ||
parser.add_argument('-r', | ||
'--response-dir', | ||
default='./responses', | ||
help='LLM response directory.') | ||
parser.add_argument('-f', | ||
'--function', | ||
help='Name of function to generate a target for.', | ||
required=True) | ||
parser.add_argument('-t', | ||
'--target-dir', | ||
help='Directory with project source.', | ||
required=True) | ||
return parser.parse_args() | ||
|
||
|
||
def setup_model(args) -> models.LLM: | ||
return models.LLM.setup(ai_binary='', | ||
name=args.model, | ||
max_tokens=MAX_TOKENS, | ||
num_samples=NUM_SAMPLES, | ||
temperature=TEMPERATURE) | ||
|
||
|
||
def get_target_benchmark( | ||
language, target_dir, | ||
target_function_name) -> Optional[benchmarklib.Benchmark]: | ||
"""Run introspector analysis on a target directory and extract benchmark""" | ||
project = fi_oss_fuzz.analyse_folder(language=language, | ||
directory=target_dir, | ||
dump_output=False) | ||
# Trigger some analysis | ||
project.dump_module_logic(report_name='', dump_output=False) | ||
|
||
for function in project.all_functions: | ||
if function.name == target_function_name: | ||
param_list = [] | ||
for idx, arg_name in function.arg_names: | ||
param_list.append({'name': arg_name, 'type': function.arg_types[idx]}) | ||
|
||
return benchmarklib.Benchmark( | ||
benchmark_id='sample', | ||
project='no-name', | ||
language=language, | ||
function_name=function.name, | ||
function_signature=function.sig, | ||
return_type=function.return_type, | ||
params=param_list, | ||
target_path=function.parent_source.source_file) | ||
return None | ||
|
||
|
||
def construct_fuzz_prompt(model, benchmark) -> prompts.Prompt: | ||
"""Local benchmarker""" | ||
builder = prompt_builder.DefaultTemplateBuilder(model, benchmark=benchmark) | ||
fuzz_prompt = builder.build([]) | ||
return fuzz_prompt | ||
|
||
|
||
def main(): | ||
args = parse_args() | ||
model = setup_model(args) | ||
|
||
target_benchmark = get_target_benchmark('c++', args.target_dir, args.function) | ||
if target_benchmark is None: | ||
print('Could not find target function. Exiting.') | ||
sys.exit(0) | ||
|
||
fuzz_prompt = construct_fuzz_prompt(model, target_benchmark) | ||
os.makedirs(args.response_dir, exist_ok=True) | ||
print('Querying with the prompt') | ||
print('-' * 40) | ||
print(fuzz_prompt.get()) | ||
print('-' * 40) | ||
print(f'Running query and writing results in {args.reponse_dir}') | ||
model.query_llm(fuzz_prompt, response_dir=args.response_dir) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |