Skip to content

Commit

Permalink
experimental: refactor to have harness class (#470)
Browse files Browse the repository at this point in the history
Make the code cleaner and work towards
#450

Signed-off-by: David Korczynski <[email protected]>
  • Loading branch information
DavidKorczynski authored Jul 14, 2024
1 parent 526d00e commit eb7ddb3
Showing 1 changed file with 76 additions and 64 deletions.
140 changes: 76 additions & 64 deletions experimental/c-cpp/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,37 @@ def setup_model(model: str):
LLM_MODEL = model


class AutogeneratedHarness:
"""Represents a generated harness and holds corresponding artifacts."""

def __init__(self, build_script: str, source_code: str, harness_path: str,
harness_out: str, fuzzer_intrinsics: Dict[str,
Any], language: str):
self.build_script = build_script
self.source_code = source_code
self.harness_path = harness_path
self.harness_out = harness_out
self.fuzzer_intrinsics = fuzzer_intrinsics
self.language = language

def dump_build_and_harness(self, fuzzer_gen_dir: str) -> None:
"""Writes the fuzzer source code and the build script in the provided
directory and also in the paths used by OSS-Fuzz to build the harness.
"""
# Store for later use
with open(os.path.join(fuzzer_gen_dir, 'build.sh'), 'w') as f:
f.write(self.build_script)
with open(os.path.join(fuzzer_gen_dir, os.path.basename(self.harness_path)),
'w') as f:
f.write(self.source_code)

# Write so they can be build using `compile`
with open(self.harness_path, 'w') as f:
f.write(self.source_code)
with open('/src/build.sh', 'w') as f:
f.write(self.build_script)


def get_all_files_in_path(base_path: str,
path_to_subtract: Optional[str] = None) -> List[str]:
"""Gets all files in a tree and returns as a list of strings."""
Expand Down Expand Up @@ -898,9 +929,9 @@ def generate_harness_intrinsics(
language: str,
test_dir: str,
fuzzer_build_cmd: List[str],
verbose_logging: bool = True) -> List[Dict[str, Any]]:
verbose_logging: bool = True) -> List[AutogeneratedHarness]:
"""Get fuzzer source code, build script and misc for each heuristic."""
# TODO (david): add oss-fuzz-gen ore prompt generation logic.
# TODO (david): add oss-fuzz-gen core prompt generation logic.

# Get list of target functions for the heuristic.
fuzzer_targets = heuristic.get_fuzzing_targets()
Expand Down Expand Up @@ -937,21 +968,18 @@ def generate_harness_intrinsics(
fuzz_includes = fuzzer_intrinsics["build-command-includes"]
final_asan_build_script += f'{fuzz_cmd} {fuzz_includes} -o {fuzzer_out}'

# Wrap all parts we need for building and running the fuzzer.
harness_builds_to_validate.append({
'build-script': final_asan_build_script,
'source': fuzzer_intrinsics['full-source-code'],
'fuzzer-file': fuzzer_target_file,
'fuzzer-out': fuzzer_out,
'fuzzer-intrinsics': fuzzer_intrinsics,
})
harness_builds_to_validate.append(
AutogeneratedHarness(final_asan_build_script,
fuzzer_intrinsics['full-source-code'],
fuzzer_target_file, fuzzer_out, fuzzer_intrinsics,
language))
return harness_builds_to_validate


def evaluate_heuristic(test_dir, result_to_validate, fuzzer_intrinsics,
def evaluate_heuristic(test_dir, auto_generated_harness: AutogeneratedHarness,
heuristics_passed, idx_to_use,
disable_fuzz_build_and_test, folders_with_results,
outdir, github_repo, language, introspector_report):
outdir, github_repo, introspector_report):
"""For a given result, will write the harness and build to the file system
and run the OSS-Fuzz `compile` command to verify that the build script +
harness builds."""
Expand All @@ -967,33 +995,19 @@ def evaluate_heuristic(test_dir, result_to_validate, fuzzer_intrinsics,
shutil.rmtree(fuzzer_gen_dir)
os.mkdir(fuzzer_gen_dir)

_, _, fuzzer_target_file, _ = get_language_defaults(language)

# Dump introspector report so we can debug it
with open(os.path.join(fuzzer_gen_dir, 'summary.json'), 'w') as f:
json.dump(introspector_report, f)

# Write the fuzzer in the directory where we store the source code, just
# for covenience so we can easily see later.
with open(os.path.join(fuzzer_gen_dir, 'build.sh'), 'w') as f:
f.write(result_to_validate['build-script'])
with open(os.path.join(fuzzer_gen_dir, os.path.basename(fuzzer_target_file)),
'w') as f:
f.write(result_to_validate['source'])

# Write the build/fuzzer files as used by oss-fuzz and the build script.
with open(result_to_validate['fuzzer-file'], 'w') as f:
f.write(result_to_validate['source'])
with open('/src/build.sh', 'w') as f:
f.write(result_to_validate['build-script'])
auto_generated_harness.dump_build_and_harness(fuzzer_gen_dir)

# Skip build process if specified.
if disable_fuzz_build_and_test:
return

# Cleanup any existing fuzzers
if os.path.isfile(result_to_validate['fuzzer-out']):
os.remove(result_to_validate['fuzzer-out'])
if os.path.isfile(auto_generated_harness.harness_out):
os.remove(auto_generated_harness.harness_out)

modified_env = os.environ
modified_env['SANITIZER'] = 'address'
Expand All @@ -1016,19 +1030,20 @@ def evaluate_heuristic(test_dir, result_to_validate, fuzzer_intrinsics,
os.path.basename(test_dir) + f'-fuzzer-generated-{idx_to_use}')

folders_with_results.add(fuzzer_gen_dir)
if os.path.isfile(result_to_validate['fuzzer-out']):
shutil.copy(result_to_validate['fuzzer-out'], destination_folder)
if os.path.isfile(auto_generated_harness.harness_out):
shutil.copy(auto_generated_harness.harness_out, destination_folder)

# Copy artifacts to fuzzer_gen_dir if build was successful.
if build_returned_error is False:
heuristics_passed[fuzzer_intrinsics['autogen-id']] = True
heuristics_passed[
auto_generated_harness.fuzzer_intrinsics['autogen-id']] = True

# Write the prompt to out
with open(os.path.join(fuzzer_gen_dir, 'prompt.txt'), 'w') as f:
f.write(fuzzer_intrinsics['prompt'])
f.write(auto_generated_harness.fuzzer_intrinsics['prompt'])

# Run the fuzzer and observer error
if not os.path.isfile('/src/generated-fuzzer'):
if not os.path.isfile(auto_generated_harness.harness_out):
logger.info('No fuzzing harness executable')
logger.info('Copying [%s] to [%s]', fuzzer_gen_dir,
os.path.join(outdir, os.path.basename(fuzzer_gen_dir)))
Expand All @@ -1039,12 +1054,13 @@ def evaluate_heuristic(test_dir, result_to_validate, fuzzer_intrinsics,
logger.info('Running fuzzer')
run_out = open(os.path.join(fuzzer_gen_dir, 'fuzz-run.out'), 'w')
run_err = open(os.path.join(fuzzer_gen_dir, 'fuzz-run.err'), 'w')
corpus_dir = os.path.join(fuzzer_gen_dir, 'corpus',
os.path.basename(result_to_validate['fuzzer-out']))
corpus_dir = os.path.join(
fuzzer_gen_dir, 'corpus',
os.path.basename(auto_generated_harness.harness_out))
os.makedirs(corpus_dir)
try:
subprocess.check_call(
(f'{result_to_validate["fuzzer-out"]} -max_total_time=20'
(f'{auto_generated_harness.harness_out} -max_total_time=20'
f' {corpus_dir}'),
shell=True,
env=modified_env,
Expand All @@ -1061,11 +1077,11 @@ def evaluate_heuristic(test_dir, result_to_validate, fuzzer_intrinsics,
run_err = open(os.path.join(fuzzer_gen_dir, 'fuzz-no-leak-run.err'), 'w')
corpus_no_leak = os.path.join(
fuzzer_gen_dir, 'corpus',
os.path.basename(result_to_validate['fuzzer-out']) + '-no-leak')
os.path.basename(auto_generated_harness.harness_out) + '-no-leak')
os.makedirs(corpus_no_leak, exist_ok=True)
try:
subprocess.check_call(
(f'{result_to_validate["fuzzer-out"]} -max_total_time=20 '
(f'{auto_generated_harness.harness_out} -max_total_time=20 '
f'-detect_leaks=0 {corpus_no_leak}'),
shell=True,
env=modified_env,
Expand All @@ -1074,24 +1090,23 @@ def evaluate_heuristic(test_dir, result_to_validate, fuzzer_intrinsics,
except subprocess.CalledProcessError:
logger.info('[+] Running without leak detection failed')

logger.info('Copying 2 [%s] to [%s]', fuzzer_gen_dir,
os.path.join(outdir, os.path.basename(fuzzer_gen_dir)))
shutil.copytree(fuzzer_gen_dir,
os.path.join(outdir, os.path.basename(fuzzer_gen_dir)))
oss_fuzz_artifacts_dir = os.path.join(outdir,
os.path.basename(fuzzer_gen_dir))
logger.info('Copying 2 [%s] to [%s]', fuzzer_gen_dir, oss_fuzz_artifacts_dir)
shutil.copytree(fuzzer_gen_dir, oss_fuzz_artifacts_dir)

# Create an OSS-Fuzz integration and ClusterFuzzLite integration
create_clean_oss_fuzz_from_success(
github_repo, os.path.join(outdir, os.path.basename(fuzzer_gen_dir)),
language)
create_clean_clusterfuzz_lite_from_success(
github_repo, os.path.join(outdir, os.path.basename(fuzzer_gen_dir)),
language)
create_clean_oss_fuzz_from_success(github_repo, oss_fuzz_artifacts_dir,
auto_generated_harness.language)
create_clean_clusterfuzz_lite_from_success(github_repo,
oss_fuzz_artifacts_dir,
auto_generated_harness.language)


def create_clean_oss_fuzz_from_success(github_repo: str, success_dir: str,
def create_clean_oss_fuzz_from_success(github_repo: str, out_dir: str,
language: str) -> None:
"""Converts a successful out dir into a working OSS-Fuzz project."""
oss_fuzz_folder = os.path.join(success_dir, 'oss-fuzz-project')
oss_fuzz_folder = os.path.join(out_dir, 'oss-fuzz-project')
os.makedirs(oss_fuzz_folder)

# Project yaml
Expand All @@ -1107,7 +1122,7 @@ def create_clean_oss_fuzz_from_success(github_repo: str, success_dir: str,
# Copy fuzzer
_, _, fuzzer_target_file, _ = get_language_defaults(language)
shutil.copy(
os.path.join(success_dir, os.path.basename(fuzzer_target_file)),
os.path.join(out_dir, os.path.basename(fuzzer_target_file)),
os.path.join(oss_fuzz_folder,
os.path.basename(fuzzer_target_file).replace('empty-', '')))

Expand All @@ -1119,7 +1134,7 @@ def create_clean_oss_fuzz_from_success(github_repo: str, success_dir: str,
docker_out.write(dockerfile)

# Build file
with open(os.path.join(success_dir, 'build.sh'), 'r') as f:
with open(os.path.join(out_dir, 'build.sh'), 'r') as f:
build_content = f.read()

clean_build_content = convert_test_build_to_clean_build(
Expand All @@ -1129,11 +1144,10 @@ def create_clean_oss_fuzz_from_success(github_repo: str, success_dir: str,
f.write(clean_build_content)


def create_clean_clusterfuzz_lite_from_success(github_repo: str,
success_dir: str,
def create_clean_clusterfuzz_lite_from_success(github_repo: str, out_dir: str,
language: str) -> None:
"""Converts a successful out dir into a working ClusterFuzzLite project."""
cflite_folder = os.path.join(success_dir, 'clusterfuzz-lite-project')
cflite_folder = os.path.join(out_dir, 'clusterfuzz-lite-project')
os.makedirs(cflite_folder)

# Project yaml
Expand All @@ -1146,7 +1160,7 @@ def create_clean_clusterfuzz_lite_from_success(github_repo: str,
# Copy fuzzer
_, _, fuzzer_target_file, _ = get_language_defaults(language)
shutil.copy(
os.path.join(success_dir, os.path.basename(fuzzer_target_file)),
os.path.join(out_dir, os.path.basename(fuzzer_target_file)),
os.path.join(cflite_folder,
os.path.basename(fuzzer_target_file).replace('empty-', '')))

Expand All @@ -1158,7 +1172,7 @@ def create_clean_clusterfuzz_lite_from_success(github_repo: str,
docker_out.write(dockerfile)

# Build file
with open(os.path.join(success_dir, 'build.sh'), 'r') as f:
with open(os.path.join(out_dir, 'build.sh'), 'r') as f:
build_content = f.read()

clean_build_content = convert_test_build_to_clean_build(
Expand Down Expand Up @@ -1377,14 +1391,12 @@ def auto_generate(github_url,
# Build the fuzzer for each project
logger.info('Fuzzer harnesses to evaluate: %d',
len(harness_builds_to_validate))
for result_to_validate in harness_builds_to_validate:
for generated_harness in harness_builds_to_validate:
logger.info('Evaluating harness')
fuzzer_intrinsics = result_to_validate['fuzzer-intrinsics']
# Make a directory and store artifacts there
evaluate_heuristic(test_dir, result_to_validate, fuzzer_intrinsics,
heuristics_passed, idx, disable_fuzz_build_and_test,
folders_with_results, outdir, github_url, language,
introspector_report)
evaluate_heuristic(test_dir, generated_harness, heuristics_passed, idx,
disable_fuzz_build_and_test, folders_with_results,
outdir, github_url, introspector_report)
idx += 1

# Show those that succeeded.
Expand Down

0 comments on commit eb7ddb3

Please sign in to comment.