Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions apps/openant-cli/cmd/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ var (
parseDiffBase string
parsePR int
parseDiffScope string
parseFresh bool
)

func init() {
Expand All @@ -38,14 +39,14 @@ func init() {
parseCmd.Flags().StringVar(&parseDiffBase, "diff-base", "", "Incremental mode: tag units overlapping diff vs this ref")
parseCmd.Flags().IntVar(&parsePR, "pr", 0, "Incremental mode against a GitHub PR number (mutex with --diff-base)")
parseCmd.Flags().StringVar(&parseDiffScope, "diff-scope", "changed_functions", "Diff scope: changed_files, changed_functions, callers")
parseCmd.Flags().BoolVar(&parseFresh, "fresh", false, "Delete existing dataset.json and reparse from scratch (other artifacts preserved)")
}

// buildParsePyArgs assembles the argv passed to the Python `openant parse`
// subprocess. Defaults that match the Python CLI (language=auto,
// level=reachable) are omitted so the Python side stays in charge of the
// canonical default value.
func buildParsePyArgs(repoPath, output, datasetName, language, level, manifestPath string) []string {
pyArgs := []string{"parse", repoPath, "--output", output}
// buildParsePyArgs constructs the argv passed to the Python parse subcommand.
// Extracted so tests can verify pass-through behavior without invoking the
// full Python runtime.
func buildParsePyArgs(repoPath, outputDir, datasetName, language, level, manifestPath string, fresh bool) []string {
pyArgs := []string{"parse", repoPath, "--output", outputDir}
if datasetName != "" {
pyArgs = append(pyArgs, "--name", datasetName)
}
Expand All @@ -58,6 +59,9 @@ func buildParsePyArgs(repoPath, output, datasetName, language, level, manifestPa
if manifestPath != "" {
pyArgs = append(pyArgs, "--diff-manifest", manifestPath)
}
if fresh {
pyArgs = append(pyArgs, "--fresh")
}
return pyArgs
}

Expand Down Expand Up @@ -113,7 +117,7 @@ func runParse(cmd *cobra.Command, args []string) {
}
}

pyArgs := buildParsePyArgs(repoPath, parseOutput, datasetName, parseLanguage, parseLevel, manifestPath)
pyArgs := buildParsePyArgs(repoPath, parseOutput, datasetName, parseLanguage, parseLevel, manifestPath, parseFresh)

result, err := python.Invoke(rt.Path, pyArgs, "", quiet, resolvedAPIKey())
if err != nil {
Expand Down
102 changes: 98 additions & 4 deletions apps/openant-cli/cmd/parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,14 @@ package cmd
import (
"strings"
"testing"

"github.com/spf13/cobra"
)

// ---------------------------------------------------------------------------
// --level flag
// ---------------------------------------------------------------------------

func TestParseLevelFlagDefaultIsReachable(t *testing.T) {
flag := parseCmd.Flag("level")
if flag == nil {
Expand Down Expand Up @@ -40,7 +46,7 @@ func TestBuildParsePyArgsLevelForwarding(t *testing.T) {
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
args := buildParsePyArgs("/repo", "/out", "", "auto", tc.level, "")
args := buildParsePyArgs("/repo", "/out", "", "auto", tc.level, "", false)
gotLevel, gotValue := findFlag(args, "--level")
if gotLevel != tc.wantLevel {
t.Errorf("--level present = %v, want %v (argv=%v)", gotLevel, tc.wantLevel, args)
Expand All @@ -53,7 +59,7 @@ func TestBuildParsePyArgsLevelForwarding(t *testing.T) {
}

func TestBuildParsePyArgsBaseline(t *testing.T) {
args := buildParsePyArgs("/repo", "/out", "org-repo-abc1234", "python", "exploitable", "/tmp/manifest.json")
args := buildParsePyArgs("/repo", "/out", "org-repo-abc1234", "python", "exploitable", "/tmp/manifest.json", false)
want := []string{
"parse", "/repo",
"--output", "/out",
Expand All @@ -72,8 +78,96 @@ func TestBuildParsePyArgsBaseline(t *testing.T) {
}
}

// findFlag returns whether name is present in argv, and its following value
// (or "" if it has no value).
// ---------------------------------------------------------------------------
// --fresh flag
// ---------------------------------------------------------------------------

func TestParseCmdHasFreshFlag(t *testing.T) {
flag := parseCmd.Flags().Lookup("fresh")
if flag == nil {
t.Fatal("parseCmd is missing the --fresh flag")
}
if flag.Value.Type() != "bool" {
t.Errorf("--fresh should be a bool flag, got type %q", flag.Value.Type())
}
if flag.DefValue != "false" {
t.Errorf("--fresh default should be false, got %q", flag.DefValue)
}
if flag.Usage == "" {
t.Error("--fresh flag is missing a usage/help string")
}
}

func TestParseCmdFreshFlagInitialState(t *testing.T) {
orig := parseFresh
defer func() { parseFresh = orig }()

parseFresh = false
if parseFresh {
t.Errorf("parseFresh should default to false, got true")
}
}

func TestParseCmdFreshFlagParses(t *testing.T) {
orig := parseFresh
defer func() {
parseFresh = orig
_ = parseCmd.Flags().Set("fresh", "false")
}()

parseFresh = false
if err := parseCmd.Flags().Set("fresh", "true"); err != nil {
t.Fatalf("failed to set --fresh: %v", err)
}
if !parseFresh {
t.Error("setting --fresh=true should make parseFresh true")
}

if err := parseCmd.Flags().Set("fresh", "false"); err != nil {
t.Fatalf("failed to set --fresh=false: %v", err)
}
if parseFresh {
t.Error("setting --fresh=false should make parseFresh false")
}
}

func TestParsePyArgsIncludesFreshWhenSet(t *testing.T) {
args := buildParsePyArgs("/some/repo", "/out", "", "auto", "reachable", "", true)

found, _ := findFlag(args, "--fresh")
if !found {
t.Errorf("expected --fresh in pyArgs when fresh=true, got %v", args)
}
}

func TestParsePyArgsOmitsFreshWhenUnset(t *testing.T) {
args := buildParsePyArgs("/some/repo", "/out", "", "auto", "reachable", "", false)

found, _ := findFlag(args, "--fresh")
if found {
t.Errorf("did not expect --fresh in pyArgs when fresh=false, got %v", args)
}
}
func TestParseCmdIsRegisteredOnRoot(t *testing.T) {
var found *cobra.Command
for _, c := range rootCmd.Commands() {
if c.Name() == "parse" {
found = c
break
}
}
if found == nil {
t.Fatal("parse command not registered on rootCmd")
}
if found.Flags().Lookup("fresh") == nil {
t.Error("parse subcommand resolved from root is missing --fresh flag")
}
}

// ---------------------------------------------------------------------------
// helpers
// ---------------------------------------------------------------------------

func findFlag(argv []string, name string) (bool, string) {
for i, a := range argv {
if a == name {
Expand Down
16 changes: 16 additions & 0 deletions libs/openant-core/core/parser_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ def parse_repository(
skip_tests: bool = True,
name: str = None,
diff_manifest: str | None = None,
fresh: bool = False,
) -> ParseResult:
"""Parse a repository into an OpenAnt dataset.

Expand All @@ -92,6 +93,9 @@ def parse_repository(
processing_level: "all", "reachable", "codeql", or "exploitable".
skip_tests: If True, exclude test files from parsing (default: True).
name: Dataset name override (default: derived from repo path basename).
fresh: If True, delete existing dataset.json before parsing so all
units are regenerated from scratch. Only dataset.json is deleted;
other artifacts in output_dir (e.g. analyzer outputs) are preserved.

Returns:
ParseResult with paths to generated files and stats.
Expand All @@ -104,6 +108,18 @@ def parse_repository(
output_dir = os.path.abspath(output_dir)
os.makedirs(output_dir, exist_ok=True)

if fresh:
dataset_path = os.path.join(output_dir, "dataset.json")
# Use try/except instead of exists()+remove() to avoid a TOCTOU race
# if a concurrent --fresh run removes the file between the two calls.
# Only dataset.json is deleted; other artifacts (analyzer outputs, etc.)
# in output_dir are preserved.
try:
os.remove(dataset_path)
print("[Parser] --fresh: deleted existing dataset.json", file=sys.stderr)
except FileNotFoundError:
pass

# Detect language if auto
if language == "auto":
language = detect_language(repo_path)
Expand Down
3 changes: 3 additions & 0 deletions libs/openant-core/openant/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ def cmd_parse(args):
skip_tests=not args.no_skip_tests,
name=getattr(args, "name", None),
diff_manifest=getattr(args, "diff_manifest", None),
fresh=getattr(args, "fresh", False),
)

ctx.summary = {
Expand Down Expand Up @@ -1038,6 +1039,8 @@ def main():
parse_p.add_argument("--no-skip-tests", action="store_true", help="Include test files in parsing (default: tests are skipped)")
parse_p.add_argument("--name", help="Dataset name (default: derived from repo path)")
parse_p.add_argument("--diff-manifest", help="Path to diff_manifest.json; tags units with diff_selected")
parse_p.add_argument("--fresh", action="store_true",
help="Delete existing dataset.json and reparse from scratch (default: reuse existing units; other artifacts preserved)")
parse_p.set_defaults(func=cmd_parse)

# ---------------------------------------------------------------
Expand Down
3 changes: 3 additions & 0 deletions libs/openant-core/parsers/javascript/unit_generator.js
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,9 @@ if (require.main === module) {
console.error(` Existing units: ${existingUnits.length}`);
console.error(` New units to add: ${newUnits.length}`);
console.error(` Duplicates skipped: ${duplicateCount}`);
if (duplicateCount > 0) {
console.error(` Note: ${duplicateCount} existing units kept as-is (use 'openant parse --fresh' to regenerate all units)`);
}

// Append new units to existing
finalResult = {
Expand Down
Loading
Loading