From 0a1d8d0d74e968bf10f88a3493150c8c489844c5 Mon Sep 17 00:00:00 2001 From: Riya Dulepet Date: Wed, 7 May 2025 15:51:08 -0700 Subject: [PATCH 1/3] ddiff --- get_diff.py | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 get_diff.py diff --git a/get_diff.py b/get_diff.py new file mode 100644 index 0000000000..1323837d1d --- /dev/null +++ b/get_diff.py @@ -0,0 +1,96 @@ +import os +import json +import subprocess +from pathlib import Path +from difflib import unified_diff + +ROOT = Path(__file__).resolve().parent +DIFF_OUT_DIR = ROOT / "diff_outputs" +DIFF_OUT_DIR.mkdir(exist_ok=True) + +def run_git_checkout(repo_path, commit): + subprocess.run(["git", "checkout", commit], cwd=repo_path / "codebase", check=True) + +def get_diff(patch_path, codebase_path): + with open(patch_path, 'r', encoding='utf-8') as f1: + patch_lines = f1.readlines() + + if not codebase_path.exists(): + return patch_lines, True # New file added + + with open(codebase_path, 'r', encoding='utf-8') as f2: + codebase_lines = f2.readlines() + + diff = list(unified_diff(codebase_lines, patch_lines, fromfile=str(codebase_path), tofile=str(patch_path))) + return diff, False + +def main(): + for repo in os.listdir(ROOT): + repo_path = ROOT / repo + bounties_path = repo_path / "bounties" + codebase_path = repo_path / "codebase" + + if not bounties_path.is_dir() or not codebase_path.is_dir(): + continue + + for bounty_dir in bounties_path.iterdir(): + if not bounty_dir.is_dir(): + continue + + metadata_file = bounty_dir / "bounty_metadata.json" + if not metadata_file.exists(): + continue + + bounty_name = bounty_dir.name # e.g., "bounty_0" + bounty_id = bounty_name.replace("bounty_", "") # -> "0" + out_file = DIFF_OUT_DIR / f"{repo}_{bounty_id}.txt" + + print(f"\n=== {repo}/{bounty_name} ===") + output_lines = [f"== {repo}/{bounty_name} ==\n"] + + with open(metadata_file) as f: + metadata = json.load(f) + + commit = metadata.get("vulnerable_commit") + patch_map = metadata.get("patch", {}) + + try: + run_git_checkout(repo_path, commit) + except subprocess.CalledProcessError: + msg = f"āŒ Failed to checkout {commit} in {repo}" + print(msg) + output_lines.append(msg + "\n") + continue + + for patch_file, target_file in patch_map.items(): + patch_path = bounty_dir / patch_file + target_path = repo_path / target_file + + if not patch_path.exists(): + msg = f"āš ļø Patch file missing: {patch_path}" + print(msg) + output_lines.append(msg + "\n") + continue + + diff, is_new = get_diff(patch_path, target_path) + if is_new: + msg = f"\nšŸ†• New file added in patch: {patch_file}\n" + print(msg) + output_lines.append(msg) + output_lines.extend([f"+ {line}" for line in diff]) + elif diff: + msg = f"\nDiff for {patch_file} vs {target_file}:\n" + print(msg) + output_lines.append(msg) + output_lines.extend(diff) + else: + msg = f"\nāœ… No diff for {patch_file}\n" + print(msg) + output_lines.append(msg) + + # Write the collected diff to file + with open(out_file, 'w', encoding='utf-8') as f: + f.writelines(output_lines) + +if __name__ == "__main__": + main() From 4931c0e8ef8885ceff5d7c03202e2aafa3016bff Mon Sep 17 00:00:00 2001 From: Riya Dulepet Date: Fri, 16 May 2025 18:45:43 -0700 Subject: [PATCH 2/3] post process script --- post_process_diff.py | 51 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 post_process_diff.py diff --git a/post_process_diff.py b/post_process_diff.py new file mode 100644 index 0000000000..ea7896f02b --- /dev/null +++ b/post_process_diff.py @@ -0,0 +1,51 @@ +import re +import csv +from pathlib import Path + +DIFF_DIR = Path(__file__).resolve().parent / "diff_outputs" +CSV_PATH = Path(__file__).resolve().parent / "diff_stats_summary.csv" + +def parse_diff_file(diff_path): + stats = { + "bounty_name": diff_path.stem, + "files_diffed": 0, + "lines_added": 0, + "lines_removed": 0, + "new_files": 0, + } + + with open(diff_path, 'r', encoding='utf-8') as f: + for line in f: + line = line.rstrip() + + if line.startswith("šŸ†• New file added in patch"): + stats["new_files"] += 1 + elif line.startswith("Diff for"): + stats["files_diffed"] += 1 + elif line.startswith("+") and not line.startswith("+++"): + stats["lines_added"] += 1 + elif line.startswith("-") and not line.startswith("---"): + stats["lines_removed"] += 1 + + return stats + +def main(): + all_stats = [] + + for file in DIFF_DIR.glob("*.txt"): + bounty_stats = parse_diff_file(file) + all_stats.append(bounty_stats) + + # Write to CSV + with open(CSV_PATH, "w", newline='', encoding="utf-8") as csvfile: + writer = csv.DictWriter(csvfile, fieldnames=[ + "bounty_name", "files_diffed", "lines_added", "lines_removed", "new_files" + ]) + writer.writeheader() + writer.writerows(all_stats) + + print(f"āœ… Wrote patch diff stats to: {CSV_PATH}") + +if __name__ == "__main__": + main() + From 155af0c3ee1461197cb8057e50f4c6d2149843de Mon Sep 17 00:00:00 2001 From: riya-dulepet <70922113+riya-dulepet@users.noreply.github.com> Date: Sun, 8 Jun 2025 14:20:35 -0700 Subject: [PATCH 3/3] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 62ed299fc6..6ee1b093a9 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Bug Bounty Setup Guide -Note, this README focuses on the process of incorporating a bounty into the benchmark AFTER you have chosen a bounty and replicated the exploit successfully. For details on how to select a bounty, please refer to the [BountyBench Onboarding Doc](https://docs.google.com/document/d/1FCLxC_3GU6N780SDEpms8yNGCnScKtKQUqX4Ub8_8h8) +Note, this README focuses on the process of incorporating a bounty into the benchmark AFTER you have chosen a bounty and replicated the exploit successfully. ## Contributing to this Project: ### Important: Use Branches, Not Forks.