diff --git a/cmd/analyze.go b/cmd/analyze.go index 83ca5d0..7c5223e 100644 --- a/cmd/analyze.go +++ b/cmd/analyze.go @@ -14,6 +14,11 @@ func init() { var opts analyze.Options var noShards bool var threeFile bool + var narrate bool + var tour bool + var tourStrategy string + var tourSeed string + var tourBudget int c := &cobra.Command{ Use: "analyze [path]", @@ -25,7 +30,16 @@ Results are cached locally by content hash. Subsequent commands (dead-code, blast-radius, graph) reuse the cache automatically. By default, .graph.* shard files are written next to each source file. -Use --no-shards to skip writing graph files.`, +Use --no-shards to skip writing graph files. + +Linearization flags: + --narrate prefix each shard with a prose narrative preamble + --tour also emit .supermodel/TOUR.md (the reading spine) + --tour-strategy topo | bfs-seed | dfs-seed | centrality (default: topo) + --tour-seed seed file for bfs-seed/dfs-seed + --tour-budget chunk tour into chapters of this token budget + +See docs/linearization.md for design.`, Args: cobra.MaximumNArgs(1), RunE: func(cmd *cobra.Command, args []string) error { cfg, err := config.Load() @@ -38,6 +52,9 @@ Use --no-shards to skip writing graph files.`, if noShards && threeFile { return fmt.Errorf("--three-file cannot be used with --no-shards") } + if noShards && (narrate || tour) { + return fmt.Errorf("--narrate and --tour require shards (cannot combine with --no-shards)") + } dir := "." if len(args) > 0 { dir = args[0] @@ -46,7 +63,15 @@ Use --no-shards to skip writing graph files.`, // Shard mode: Generate handles the full pipeline (API call + // cache + shards) in a single upload. Running analyze.Run // first would duplicate the API call. - return shards.Generate(cmd.Context(), cfg, dir, shards.GenerateOptions{Force: opts.Force, ThreeFile: threeFile}) + return shards.Generate(cmd.Context(), cfg, dir, shards.GenerateOptions{ + Force: opts.Force, + ThreeFile: threeFile, + Narrate: narrate, + Tour: tour, + TourStrategy: tourStrategy, + TourSeed: tourSeed, + TourBudget: tourBudget, + }) } return analyze.Run(cmd.Context(), cfg, dir, opts) }, @@ -56,6 +81,11 @@ Use --no-shards to skip writing graph files.`, c.Flags().StringVarP(&opts.Output, "output", "o", "", "output format: human|json") c.Flags().BoolVar(&noShards, "no-shards", false, "skip writing .graph.* shard files") c.Flags().BoolVar(&threeFile, "three-file", false, "generate .calls/.deps/.impact files instead of single .graph") + c.Flags().BoolVar(&narrate, "narrate", false, "prefix each shard with a prose narrative preamble") + c.Flags().BoolVar(&tour, "tour", false, "also emit .supermodel/TOUR.md — the linear reading spine") + c.Flags().StringVar(&tourStrategy, "tour-strategy", "topo", "tour ordering: topo | bfs-seed | dfs-seed | centrality") + c.Flags().StringVar(&tourSeed, "tour-seed", "", "seed file for bfs-seed / dfs-seed strategies") + c.Flags().IntVar(&tourBudget, "tour-budget", 0, "chunk tour into chapters of this token budget (0 = single file)") rootCmd.AddCommand(c) } diff --git a/cmd/tour.go b/cmd/tour.go new file mode 100644 index 0000000..ac6071a --- /dev/null +++ b/cmd/tour.go @@ -0,0 +1,103 @@ +package cmd + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + + "github.com/spf13/cobra" + + "github.com/supermodeltools/cli/internal/api" + "github.com/supermodeltools/cli/internal/shards" + "github.com/supermodeltools/cli/internal/ui" +) + +func init() { + var strategyName string + var seed string + var narrate bool + var budgetTokens int + var dryRun bool + + c := &cobra.Command{ + Use: "tour [path]", + Short: "Emit a linearized reading order over the code graph", + Long: `Generates .supermodel/TOUR.md — a single-file reading spine that walks the +repository in a strategy-chosen order, grouped by domain/subdomain, with each +entry linking to its per-file shard. This gives agents a deterministic path +through the codebase instead of N independent shards with no order. + +Strategies: + topo reverse-topological over imports (leaves first, roots last) + bfs-seed breadth-first from --seed outward (focused tours) + dfs-seed depth-first from --seed outward + centrality files with the largest blast radius first + +When --narrate is set, each existing .graph.* shard is rewritten with a prose +preamble describing the file's role as sentences (rather than only structured +arrows). Same data, different rendering targeted at LLM reading style. + +When --budget-tokens is set and the tour exceeds the budget, TOUR.md becomes an +index linking to TOUR.01.md, TOUR.02.md, ... sized to fit one chapter per turn. + +Reads .supermodel/shards.json produced by 'supermodel analyze'. No API call. +See docs/linearization.md for the design rationale.`, + Args: cobra.MaximumNArgs(1), + RunE: func(_ *cobra.Command, args []string) error { + dir := "." + if len(args) > 0 { + dir = args[0] + } + repoDir, err := filepath.Abs(dir) + if err != nil { + return fmt.Errorf("resolving path: %w", err) + } + cacheFile := filepath.Join(repoDir, ".supermodel", "shards.json") + data, err := os.ReadFile(cacheFile) + if err != nil { + return fmt.Errorf("reading cache %s: %w (run `supermodel analyze` first)", cacheFile, err) + } + var ir api.ShardIR + if err := json.Unmarshal(data, &ir); err != nil { + return fmt.Errorf("parsing cache: %w", err) + } + cache := shards.NewCache() + cache.Build(&ir) + + strategy, err := shards.ResolveStrategy(strategyName, seed) + if err != nil { + return err + } + + out, err := shards.WriteTour(repoDir, cache, strategy, budgetTokens, dryRun) + if err != nil { + return err + } + if !dryRun { + ui.Success("Wrote tour to %s (strategy: %s)", out, strategy.Name()) + } + + if narrate { + files := cache.SourceFiles() + written, rerr := shards.RenderAll(repoDir, cache, files, true, dryRun) + if rerr != nil { + return fmt.Errorf("re-rendering shards with narrative: %w", rerr) + } + if !dryRun { + ui.Success("Re-wrote %d shards with narrative preamble", written) + } + } + return nil + }, + } + + c.Flags().StringVar(&strategyName, "strategy", "topo", + "linearization strategy: topo | bfs-seed | dfs-seed | centrality") + c.Flags().StringVar(&seed, "seed", "", "seed file path (required for bfs-seed / dfs-seed)") + c.Flags().BoolVar(&narrate, "narrate", false, "also rewrite existing .graph.* shards with a prose narrative preamble") + c.Flags().IntVar(&budgetTokens, "budget-tokens", 0, "chunk tour into chapters of this token budget (0 = single file)") + c.Flags().BoolVar(&dryRun, "dry-run", false, "print what would be written without touching disk") + + rootCmd.AddCommand(c) +} diff --git a/docs/linearization.md b/docs/linearization.md new file mode 100644 index 0000000..3d9f4b1 --- /dev/null +++ b/docs/linearization.md @@ -0,0 +1,180 @@ +# Graph Linearization for Sharding + +## Thesis + +LLMs are one-dimensional. They consume a token stream and attend to positions +within it. Graphs are multi-dimensional: nodes are connected by edges that +don't live on the token axis. A model handed a blob of JSON nodes and edges has +to do pointer-chasing on UUIDs inside a single attention pass — work that scales +badly with graph size and burns context. + +**Graph linearization** is the deliberate serialization of a graph into a +reading order the model can consume left-to-right, with local neighborhoods +kept close in the token stream and adjacency rendered as prose rather than +identifiers. See Xypolopoulos et al., *Graph Linearization Methods for Reasoning +on Graphs with Large Language Models* (arXiv:2410.19494) for the underlying +principles: centrality and degeneracy-based orderings substantially beat random +serialization on LLM graph-reasoning tasks. + +## Where the CLI stands today + +`supermodel analyze` already writes per-file sidecar shards (`.graph.ext` or +`.calls / .deps / .impact`). Those shards are **file-level linearization**: +each sidecar collapses a subgraph into a `[deps] / [calls] / [impact]` text +layout the model reads before touching the source file. + +Two things are missing: + +1. **No reading order across files.** Agents see N independent shards and have + to guess which to read first. There is no spine. +2. **No prose adjacency inside a shard.** Call relationships are rendered as + `name ← other path:line` arrows. Accurate and terse, but the model + reconstructs sentences on the fly every time. + +Sharding produces the units. Linearization produces the **order and +narrative** over those units. + +## Design: the Tour + +A *tour* is a single markdown file — `.supermodel/TOUR.md` — that serializes +the whole repository graph into a linear walk. It is the spine that makes the +existing shards navigable. + +``` +TOUR.md ← linear walk (this feature) +src/auth/session.go ← source file +src/auth/session.graph.go ← existing shard (per-file linearization) +``` + +Agents read `TOUR.md` once to get the layout, then open shards + source in the +order the tour presents them. + +### Structure of TOUR.md + +```markdown +# Repository Tour — supermodel-cli + +**Strategy:** reverse-topological over the import graph +(leaves → roots). Read top-to-bottom to see dependencies before dependents. + +## Domain: Analyze +### Subdomain: Pipeline +- **internal/analyze/handler.go** — orchestrates upload + render + reads: api, config, shards · read by: cmd/analyze.go + risk: MEDIUM · [shard](../internal/analyze/handler.graph.go) + +## Domain: Shards +### Subdomain: Rendering +- **internal/shards/render.go** — emits .graph sidecars per source file + reads: api · read by: internal/shards/handler.go + risk: LOW · [shard](../internal/shards/render.graph.go) +... +``` + +One prose line per file — name, domain, adjacency, risk, shard pointer. Linear +order is the strategy's output. The agent reads prefix-to-suffix. + +### Linearization strategies + +Strategies are interchangeable. The default is `topo` because it matches how +humans read codebases ("what are the leaves, then what depends on them"). + +| Strategy | Ordering | Best for | +|--------------|---------------------------------------------------------|-----------------------------------------| +| `topo` | reverse-topological over imports (leaves first) | whole-codebase onboarding | +| `bfs-seed` | BFS from `--seed ` outward | focused tasks, blast radius walks | +| `dfs-seed` | DFS from `--seed ` — depth-first exploration | tracing a request through layers | +| `centrality` | PageRank-like over importers (most-depended-on first) | "what's the core of this codebase" | + +Cycles are broken by file-path lexicographic order (deterministic, boring). + +### Prose narrative preamble (opt-in) + +Tour generation also lets you inject a prose preamble into each existing shard +with `--narrate`: + +```go +// @generated supermodel-shard — do not edit +// +// Narrative: parseConfig (Domain Config / Loading) is called by main +// (cmd/root.go:42) and serverInit (cmd/server.go:18). It calls readFile +// and json.Unmarshal. Imports: os, encoding/json. Risk: LOW. +// +// [deps] +// imports os +// imports encoding/json +// ... +``` + +The preamble is a one-paragraph summary derived from the same cache used for +the structured sections — no new data, just a second rendering targeted at the +model's native reading style. Flag-gated so users can A/B. + +## CLI surface (implemented) + +Standalone: + +``` +supermodel tour [--strategy topo|bfs-seed|dfs-seed|centrality] + [--seed ] + [--narrate] + [--budget-tokens ] + [--dry-run] + [path] +``` + +Integrated with `analyze` so a single command emits shards + spine: + +``` +supermodel analyze [--tour] + [--tour-strategy topo|bfs-seed|dfs-seed|centrality] + [--tour-seed ] + [--tour-budget ] + [--narrate] + [path] +``` + +- Reads `.supermodel/shards.json` (errors if absent — prompts `analyze` first). +- Writes `.supermodel/TOUR.md`. +- With `--narrate`, rewrites existing `.graph.*` shards in place to include a + prose narrative preamble. +- `--budget-tokens` chunks the tour into `TOUR.01.md`, `TOUR.02.md`, ... with + `TOUR.md` becoming an index. Each chapter has prev/next cross-links. + +No API call. No new cache. Pure reshaping of what `analyze` already produced. + +## Why this shape + +- **Same vertical slice.** Tour lives inside `internal/shards/` — it consumes + the shard cache and emits a companion artifact. No cross-slice dependency. +- **Additive.** Default behavior of `analyze` is unchanged. Tour is opt-in. +- **Deterministic.** Lexicographic tiebreaks, stable sort; tour file is safe to + commit or diff. +- **Strategy-pluggable.** The `Strategy` interface is small (one method: + `Order(cache) []string`), so we can add more orderings without touching the + renderer. + +## Open questions + +- Should tour output default-render inline snippets of each shard, or strictly + link to them? Inline is self-contained (one file to read) but duplicates + content; linked is DRY but requires the agent to follow pointers. +- Should there be a `--focus ` filter so tours scope to a subtree? +- Does `arch-docs` want to consume TOUR.md as its entry point (replacing its + own traversal)? +- Running `supermodel tour` with a different `--budget-tokens` should probably + clean up stale `TOUR.NN.md` files from a prior chunked run. Cosmetic. +- Benchmark: we need numbers. Plan to wire through + `supermodeltools/supermodel-benchmarks/shard-ab-test/` to measure + agent performance with/without TOUR + narrate. + +## References + +- Xypolopoulos et al., *Graph Linearization Methods for Reasoning on Graphs + with Large Language Models*, arXiv:2410.19494 +- `supermodeltools/codegraph-graphrag` — BFS narrative walks, the thesis doc + in the org +- `supermodeltools/graph2md` — per-node markdown emission (another + linearization strategy) +- `supermodeltools/mcp/src/tools/explore-function.ts` — `describeNode()` + prose format, cross-subsystem markers diff --git a/internal/shards/daemon.go b/internal/shards/daemon.go index ee803e3..7617016 100644 --- a/internal/shards/daemon.go +++ b/internal/shards/daemon.go @@ -183,7 +183,7 @@ func (d *Daemon) loadOrGenerate(ctx context.Context) error { d.mu.Unlock() files := d.cache.SourceFiles() - written, renderErr := RenderAll(d.cfg.RepoDir, d.cache, files, false) + written, renderErr := RenderAll(d.cfg.RepoDir, d.cache, files, false, false) if renderErr != nil { return renderErr } @@ -226,7 +226,7 @@ func (d *Daemon) fullGenerate(ctx context.Context) error { d.mu.Unlock() files := d.cache.SourceFiles() - written, err := RenderAll(d.cfg.RepoDir, d.cache, files, false) + written, err := RenderAll(d.cfg.RepoDir, d.cache, files, false, false) if err != nil { return err } @@ -301,7 +301,7 @@ func (d *Daemon) incrementalUpdate(ctx context.Context, changedFiles []string) { d.logf("Re-rendering %d affected shards", len(affected)) - written, err := RenderAll(d.cfg.RepoDir, cacheSnapshot, affected, false) + written, err := RenderAll(d.cfg.RepoDir, cacheSnapshot, affected, false, false) if err != nil { d.logf("Render error: %v", err) return diff --git a/internal/shards/handler.go b/internal/shards/handler.go index 0a57fa8..858d680 100644 --- a/internal/shards/handler.go +++ b/internal/shards/handler.go @@ -27,17 +27,67 @@ const ( // GenerateOptions configures the generate command. type GenerateOptions struct { - Force bool - DryRun bool - CacheFile string - ThreeFile bool // generate .calls/.deps/.impact instead of single .graph + Force bool + DryRun bool + CacheFile string + ThreeFile bool // generate .calls/.deps/.impact instead of single .graph + Narrate bool // prefix each shard with a prose narrative preamble + Tour bool // also write .supermodel/TOUR.md after shards + TourStrategy string // strategy name: topo|bfs-seed|dfs-seed|centrality + TourSeed string // seed file for bfs-seed/dfs-seed + TourBudget int // chunk tour into chapters of this token budget (0 = no chunking) } -func renderShards(repoDir string, cache *Cache, files []string, dryRun, threeFile bool) (int, error) { +func renderShards(repoDir string, cache *Cache, files []string, dryRun, threeFile, narrate bool) (int, error) { if threeFile { - return RenderAllThreeFile(repoDir, cache, files, dryRun) + return RenderAllThreeFile(repoDir, cache, files, narrate, dryRun) + } + return RenderAll(repoDir, cache, files, narrate, dryRun) +} + +// maybeWriteTour writes the tour when opts.Tour is set. Errors during tour +// generation are reported but don't fail the whole command since shards are +// the primary artifact. +func maybeWriteTour(repoDir string, cache *Cache, opts GenerateOptions) { + if !opts.Tour { + return + } + strategy, err := ResolveStrategy(opts.TourStrategy, opts.TourSeed) + if err != nil { + ui.Warn("Tour skipped: %v", err) + return + } + out, err := WriteTour(repoDir, cache, strategy, opts.TourBudget, opts.DryRun) + if err != nil { + ui.Warn("Tour write failed: %v", err) + return + } + if !opts.DryRun { + ui.Success("Wrote tour to %s (strategy: %s)", out, strategy.Name()) + } +} + +// ResolveStrategy picks a TourStrategy by name. Returns an error if the name is +// unknown or if a seeded strategy is missing its seed. +func ResolveStrategy(name, seed string) (TourStrategy, error) { + switch name { + case "", "topo": + return TopoStrategy{}, nil + case "bfs-seed": + if seed == "" { + return nil, fmt.Errorf("bfs-seed requires --seed ") + } + return BFSSeedStrategy{Seed: seed}, nil + case "dfs-seed": + if seed == "" { + return nil, fmt.Errorf("dfs-seed requires --seed ") + } + return DFSSeedStrategy{Seed: seed}, nil + case "centrality": + return CentralityStrategy{}, nil + default: + return nil, fmt.Errorf("unknown strategy %q (supported: topo, bfs-seed, dfs-seed, centrality)", name) } - return RenderAll(repoDir, cache, files, dryRun) } // WatchOptions configures the watch command. @@ -54,6 +104,7 @@ type RenderOptions struct { CacheFile string DryRun bool ThreeFile bool + Narrate bool } // guardDir returns an error if dir is the filesystem root or the user's home @@ -99,12 +150,13 @@ func Generate(ctx context.Context, cfg *config.Config, dir string, opts Generate cache.Build(&ir) files := cache.SourceFiles() spin := ui.Start("Rendering shards…") - written, err := renderShards(repoDir, cache, files, opts.DryRun, opts.ThreeFile) + written, err := renderShards(repoDir, cache, files, opts.DryRun, opts.ThreeFile, opts.Narrate) spin.Stop() if err != nil { return err } ui.Success("Wrote %d shards for %d source files", written, len(files)) + maybeWriteTour(repoDir, cache, opts) return updateGitignore(repoDir) } } @@ -159,11 +211,12 @@ func Generate(ctx context.Context, cfg *config.Config, dir string, opts Generate staleCache := NewCache() staleCache.Build(&staleIR) files := staleCache.SourceFiles() - written, renderErr := renderShards(repoDir, staleCache, files, opts.DryRun, opts.ThreeFile) + written, renderErr := renderShards(repoDir, staleCache, files, opts.DryRun, opts.ThreeFile, opts.Narrate) if renderErr != nil { return fmt.Errorf("API error: %w; stale render also failed: %v", err, renderErr) } ui.Success("Wrote %d shards from stale cache (%d nodes)", written, len(staleIR.Graph.Nodes)) + maybeWriteTour(repoDir, staleCache, opts) return nil } } @@ -192,7 +245,7 @@ func Generate(ctx context.Context, cfg *config.Config, dir string, opts Generate files := cache.SourceFiles() spin = ui.Start("Rendering shards…") - written, err := renderShards(repoDir, cache, files, opts.DryRun, opts.ThreeFile) + written, err := renderShards(repoDir, cache, files, opts.DryRun, opts.ThreeFile, opts.Narrate) spin.Stop() if err != nil { return err @@ -201,6 +254,7 @@ func Generate(ctx context.Context, cfg *config.Config, dir string, opts Generate ui.Success("Wrote %d shards for %d source files (%d nodes, %d relationships)", written, len(files), len(ir.Graph.Nodes), len(ir.Graph.Relationships)) + maybeWriteTour(repoDir, cache, opts) return updateGitignore(repoDir) } @@ -425,7 +479,7 @@ func Render(dir string, opts RenderOptions) error { cache.Build(&ir) files := cache.SourceFiles() - written, err := renderShards(repoDir, cache, files, opts.DryRun, opts.ThreeFile) + written, err := renderShards(repoDir, cache, files, opts.DryRun, opts.ThreeFile, opts.Narrate) if err != nil { return err } diff --git a/internal/shards/narrative.go b/internal/shards/narrative.go new file mode 100644 index 0000000..a93a61d --- /dev/null +++ b/internal/shards/narrative.go @@ -0,0 +1,127 @@ +package shards + +import ( + "fmt" + "sort" + "strings" +) + +// RenderNarrative produces a prose preamble describing a file's place in the +// graph as sentences rather than structured arrows. The output is one comment +// block (each line prefixed with `prefix`) covering: domain/subdomain, +// imports/importers counts with a few named examples, intra-file functions +// and their call adjacency, and risk tier. +// +// Returns "" if the file has no meaningful prose to render (no imports, +// importers, or functions). The result does NOT include a trailing blank line; +// callers compose it with the structured sections. +func RenderNarrative(filePath string, cache *Cache, prefix string) string { + imports := sortedUnique(cache.Imports[filePath]) + importers := sortedUnique(cache.Importers[filePath]) + + var fnNames []string + var fnByName []*FuncInfo + for _, fn := range cache.FnByID { + if fn.File == filePath { + fnByName = append(fnByName, fn) + } + } + sort.Slice(fnByName, func(i, j int) bool { + if fnByName[i].Name != fnByName[j].Name { + return fnByName[i].Name < fnByName[j].Name + } + return fnByName[i].ID < fnByName[j].ID + }) + for _, fn := range fnByName { + fnNames = append(fnNames, fn.Name) + } + + if len(imports) == 0 && len(importers) == 0 && len(fnNames) == 0 { + return "" + } + + var sentences []string + + domain := cache.FileDomain[filePath] + openSentence := fmt.Sprintf("This file (%s) sits in the graph as follows:", filePath) + if domain != "" { + dom, sub := splitDomain(domain) + if sub != "" { + openSentence = fmt.Sprintf("This file (%s) belongs to domain %s / subdomain %s.", filePath, dom, sub) + } else { + openSentence = fmt.Sprintf("This file (%s) belongs to domain %s.", filePath, dom) + } + } + sentences = append(sentences, openSentence) + + if len(imports) > 0 { + sentences = append(sentences, fmt.Sprintf( + "It imports %d file(s): %s.", len(imports), joinTrunc(imports, 3))) + } + if len(importers) > 0 { + sentences = append(sentences, fmt.Sprintf( + "It is imported by %d file(s): %s.", len(importers), joinTrunc(importers, 3))) + } + + if len(fnByName) > 0 { + sentences = append(sentences, fmt.Sprintf( + "It defines %d function(s): %s.", len(fnByName), joinTrunc(fnNames, 5))) + // Add call adjacency as prose for up to the first few functions. + maxFns := 4 + if len(fnByName) < maxFns { + maxFns = len(fnByName) + } + for _, fn := range fnByName[:maxFns] { + fnProse := fnAdjacencySentence(fn, cache) + if fnProse != "" { + sentences = append(sentences, fnProse) + } + } + } + + risk := riskFor(filePath, cache) + transitiveCount := len(cache.TransitiveDependents(filePath)) + sentences = append(sentences, fmt.Sprintf( + "Risk: %s (%d transitive dependent(s)).", risk, transitiveCount)) + + // Render as a comment block. + var b strings.Builder + b.WriteString(prefix + " Narrative:\n") + for _, s := range sentences { + b.WriteString(prefix + " " + s + "\n") + } + return b.String() +} + +func fnAdjacencySentence(fn *FuncInfo, cache *Cache) string { + callers := cache.Callers[fn.ID] + callees := cache.Callees[fn.ID] + if len(callers) == 0 && len(callees) == 0 { + return fmt.Sprintf(" %s has no recorded callers or callees.", fn.Name) + } + var parts []string + if len(callers) > 0 { + names := uniqueCallerNames(callers, cache) + parts = append(parts, fmt.Sprintf("is called by %s", joinTrunc(names, 3))) + } + if len(callees) > 0 { + names := uniqueCallerNames(callees, cache) + parts = append(parts, fmt.Sprintf("calls %s", joinTrunc(names, 3))) + } + return fmt.Sprintf(" %s %s.", fn.Name, strings.Join(parts, " and ")) +} + +func uniqueCallerNames(refs []CallerRef, cache *Cache) []string { + seen := make(map[string]bool, len(refs)) + var out []string + for _, r := range refs { + n := cache.FuncName(r.FuncID) + if n == "" || seen[n] { + continue + } + seen[n] = true + out = append(out, n) + } + sort.Strings(out) + return out +} diff --git a/internal/shards/render.go b/internal/shards/render.go index f97a8b4..e4cb31c 100644 --- a/internal/shards/render.go +++ b/internal/shards/render.go @@ -273,8 +273,10 @@ func removeStaleGraph(repoDir, srcFile string) { } // RenderAll generates and writes .graph shards for the given source files. +// When narrate is true, each shard is prefixed with a prose narrative preamble +// that describes the file's role in the graph as sentences. // Returns the count of shards written. -func RenderAll(repoDir string, cache *Cache, files []string, dryRun bool) (int, error) { +func RenderAll(repoDir string, cache *Cache, files []string, narrate, dryRun bool) (int, error) { sort.Strings(files) written := 0 @@ -293,7 +295,15 @@ func RenderAll(repoDir string, cache *Cache, files []string, dryRun bool) (int, continue } - fullContent := header + content + var narrative string + if narrate { + narrative = RenderNarrative(srcFile, cache, prefix) + if narrative != "" { + narrative += prefix + "\n" + } + } + + fullContent := header + narrative + content if ext == ".go" { fullContent = "//go:build ignore\n\npackage ignore\n" + fullContent } @@ -312,7 +322,9 @@ func RenderAll(repoDir string, cache *Cache, files []string, dryRun bool) (int, } // RenderAllThreeFile generates .calls, .deps, and .impact files per source file. -func RenderAllThreeFile(repoDir string, cache *Cache, files []string, dryRun bool) (int, error) { +// When narrate is true, each of the three shards is prefixed with a prose +// narrative preamble. +func RenderAllThreeFile(repoDir string, cache *Cache, files []string, narrate, dryRun bool) (int, error) { sort.Strings(files) written := 0 @@ -334,6 +346,14 @@ func RenderAllThreeFile(repoDir string, cache *Cache, files []string, dryRun boo calls := renderCallsSection(srcFile, cache, prefix) impact := renderImpactSection(srcFile, cache, prefix) + var narrative string + if narrate { + narrative = RenderNarrative(srcFile, cache, prefix) + if narrative != "" { + narrative += prefix + "\n" + } + } + for _, item := range []struct { path string content string @@ -346,7 +366,7 @@ func RenderAllThreeFile(repoDir string, cache *Cache, files []string, dryRun boo safeRemove(repoDir, item.path) continue } - fullContent := goPrefix + header + item.content + "\n" + fullContent := goPrefix + header + narrative + item.content + "\n" if err := WriteShard(repoDir, item.path, fullContent, dryRun); err != nil { if strings.Contains(err.Error(), "path traversal") { continue diff --git a/internal/shards/render_stale_test.go b/internal/shards/render_stale_test.go index c04d555..1d87340 100644 --- a/internal/shards/render_stale_test.go +++ b/internal/shards/render_stale_test.go @@ -72,7 +72,7 @@ func TestRenderAll_RemovesStaleThreeFiles(t *testing.T) { touchFile(t, filepath.Join(dir, "src", "index.impact.ts")) cache := testCache() - _, err := RenderAll(dir, cache, []string{"src/index.ts"}, false) + _, err := RenderAll(dir, cache, []string{"src/index.ts"}, false, false) if err != nil { t.Fatal(err) } @@ -95,7 +95,7 @@ func TestRenderAllThreeFile_RemovesStaleGraphFile(t *testing.T) { touchFile(t, filepath.Join(dir, "src", "index.graph.ts")) cache := testCache() - _, err := RenderAllThreeFile(dir, cache, []string{"src/index.ts"}, false) + _, err := RenderAllThreeFile(dir, cache, []string{"src/index.ts"}, false, false) if err != nil { t.Fatal(err) } @@ -122,7 +122,7 @@ func TestRenderAllThreeFile_CallsContent(t *testing.T) { os.MkdirAll(filepath.Join(dir, "src"), 0o755) cache := testCache() - _, err := RenderAllThreeFile(dir, cache, []string{"src/index.ts"}, false) + _, err := RenderAllThreeFile(dir, cache, []string{"src/index.ts"}, false, false) if err != nil { t.Fatal(err) } @@ -148,7 +148,7 @@ func TestRenderAllThreeFile_DepsContent(t *testing.T) { os.MkdirAll(filepath.Join(dir, "src"), 0o755) cache := testCache() - _, err := RenderAllThreeFile(dir, cache, []string{"src/index.ts"}, false) + _, err := RenderAllThreeFile(dir, cache, []string{"src/index.ts"}, false, false) if err != nil { t.Fatal(err) } @@ -175,7 +175,7 @@ func TestRenderAllThreeFile_ImpactContent(t *testing.T) { cache := testCache() // utils.ts has an importer (index.ts) so it will have impact data - _, err := RenderAllThreeFile(dir, cache, []string{"src/utils.ts"}, false) + _, err := RenderAllThreeFile(dir, cache, []string{"src/utils.ts"}, false, false) if err != nil { t.Fatal(err) } @@ -201,7 +201,7 @@ func TestRenderAll_GraphContent(t *testing.T) { os.MkdirAll(filepath.Join(dir, "src"), 0o755) cache := testCache() - _, err := RenderAll(dir, cache, []string{"src/index.ts"}, false) + _, err := RenderAll(dir, cache, []string{"src/index.ts"}, false, false) if err != nil { t.Fatal(err) } @@ -236,7 +236,7 @@ func TestRenderAllThreeFile_EmptySectionRemovesStaleFile(t *testing.T) { touchFile(t, filepath.Join(dir, "src", "lonely.impact.ts")) cache := testCacheNoImpact() - _, err := RenderAllThreeFile(dir, cache, []string{"src/lonely.ts"}, false) + _, err := RenderAllThreeFile(dir, cache, []string{"src/lonely.ts"}, false, false) if err != nil { t.Fatal(err) } diff --git a/internal/shards/render_test.go b/internal/shards/render_test.go index f7c9ea4..bf630d7 100644 --- a/internal/shards/render_test.go +++ b/internal/shards/render_test.go @@ -593,7 +593,7 @@ func TestUpdateGitignore_NoTrailingNewlineHandled(t *testing.T) { func TestRenderAll_EmptyFiles(t *testing.T) { dir := t.TempDir() c := makeRenderCache(shardIR(nil, nil)) - n, err := RenderAll(dir, c, nil, false) + n, err := RenderAll(dir, c, nil, false, false) if err != nil { t.Fatalf("RenderAll(empty): %v", err) } @@ -615,7 +615,7 @@ func TestRenderAll_WritesShards(t *testing.T) { ) dir := t.TempDir() c := makeRenderCache(ir) - n, err := RenderAll(dir, c, []string{"src/a.go"}, false) + n, err := RenderAll(dir, c, []string{"src/a.go"}, false, false) if err != nil { t.Fatalf("RenderAll: %v", err) } @@ -636,7 +636,7 @@ func TestRenderAll_DryRun(t *testing.T) { ) dir := t.TempDir() c := makeRenderCache(ir) - n, err := RenderAll(dir, c, []string{"src/a.go"}, true) + n, err := RenderAll(dir, c, []string{"src/a.go"}, false, true) if err != nil { t.Fatalf("RenderAll dryRun: %v", err) } @@ -654,7 +654,7 @@ func TestRenderAll_SkipsEmptyContent(t *testing.T) { // A file not in the cache produces empty content → no shard written. dir := t.TempDir() c := makeRenderCache(shardIR(nil, nil)) - n, err := RenderAll(dir, c, []string{"src/unknown.go"}, false) + n, err := RenderAll(dir, c, []string{"src/unknown.go"}, false, false) if err != nil { t.Fatalf("RenderAll: %v", err) } @@ -678,7 +678,7 @@ func TestRenderAll_PathTraversalSkipped(t *testing.T) { ) dir := t.TempDir() c := makeRenderCache(ir) - n, err := RenderAll(dir, c, []string{"../../evil.go"}, false) + n, err := RenderAll(dir, c, []string{"../../evil.go"}, false, false) if err != nil { t.Fatalf("RenderAll path-traversal: %v", err) } @@ -710,7 +710,7 @@ func TestRenderAll_WriteshardError(t *testing.T) { t.Fatal(err) } - _, err := RenderAll(dir, c, []string{"sub/a.go"}, false) + _, err := RenderAll(dir, c, []string{"sub/a.go"}, false, false) if err == nil { t.Error("expected error when shard directory cannot be created") } diff --git a/internal/shards/tour.go b/internal/shards/tour.go new file mode 100644 index 0000000..253aceb --- /dev/null +++ b/internal/shards/tour.go @@ -0,0 +1,486 @@ +package shards + +import ( + "fmt" + "os" + "path/filepath" + "sort" + "strings" +) + +// TourStrategy chooses a linear reading order over the source files. +type TourStrategy interface { + Name() string + Order(cache *Cache) []string +} + +// TopoStrategy orders files by reverse topological order over the import graph +// (leaves first, roots last). Cycles are broken by lexicographic file path. +type TopoStrategy struct{} + +func (TopoStrategy) Name() string { return "topo" } + +// BFSSeedStrategy walks the undirected import graph outward from a seed file +// in BFS order. Only files reachable from the seed are emitted. +type BFSSeedStrategy struct{ Seed string } + +func (BFSSeedStrategy) Name() string { return "bfs-seed" } + +func (s BFSSeedStrategy) Order(cache *Cache) []string { + return seededTraversal(cache, s.Seed, true) +} + +// DFSSeedStrategy walks the undirected import graph from a seed file in DFS +// order. Only files reachable from the seed are emitted. +type DFSSeedStrategy struct{ Seed string } + +func (DFSSeedStrategy) Name() string { return "dfs-seed" } + +func (s DFSSeedStrategy) Order(cache *Cache) []string { + return seededTraversal(cache, s.Seed, false) +} + +// CentralityStrategy orders files by transitive-dependent count descending +// (the "blast radius" of a change). Most-depended-on files come first; +// lex-ascending breaks ties. +type CentralityStrategy struct{} + +func (CentralityStrategy) Name() string { return "centrality" } + +func (CentralityStrategy) Order(cache *Cache) []string { + files := cache.SourceFiles() + sort.Strings(files) + type scored struct { + file string + score int + } + scores := make([]scored, len(files)) + for i, f := range files { + scores[i] = scored{file: f, score: len(cache.TransitiveDependents(f))} + } + sort.SliceStable(scores, func(i, j int) bool { + if scores[i].score != scores[j].score { + return scores[i].score > scores[j].score + } + return scores[i].file < scores[j].file + }) + out := make([]string, len(scores)) + for i, s := range scores { + out[i] = s.file + } + return out +} + +// seededTraversal walks the undirected import graph from seed. bfs=true for +// BFS, false for DFS. Neighbors are visited in lex order for determinism. +// Returns empty slice if seed is not present in the cache. +func seededTraversal(cache *Cache, seed string, bfs bool) []string { + files := cache.SourceFiles() + present := make(map[string]bool, len(files)) + for _, f := range files { + present[f] = true + } + if !present[seed] { + return nil + } + + visited := map[string]bool{seed: true} + var out []string + frontier := []string{seed} + + for len(frontier) > 0 { + var current string + if bfs { + current = frontier[0] + frontier = frontier[1:] + } else { + current = frontier[len(frontier)-1] + frontier = frontier[:len(frontier)-1] + } + // Emit on pop so the output reflects visit order, not discovery order. + // This is what makes DFS descend one branch fully before crossing. + out = append(out, current) + + neighbors := map[string]bool{} + for _, n := range cache.Imports[current] { + if present[n] { + neighbors[n] = true + } + } + for _, n := range cache.Importers[current] { + if present[n] { + neighbors[n] = true + } + } + sorted := make([]string, 0, len(neighbors)) + for n := range neighbors { + sorted = append(sorted, n) + } + sort.Strings(sorted) + if !bfs { + // Reverse so lex-smallest neighbor is popped first after the push. + for i, j := 0, len(sorted)-1; i < j; i, j = i+1, j-1 { + sorted[i], sorted[j] = sorted[j], sorted[i] + } + } + for _, n := range sorted { + if visited[n] { + continue + } + visited[n] = true + frontier = append(frontier, n) + } + } + return out +} + +func (TopoStrategy) Order(cache *Cache) []string { + files := cache.SourceFiles() + sort.Strings(files) // deterministic tiebreak + + inDegree := make(map[string]int, len(files)) + present := make(map[string]bool, len(files)) + for _, f := range files { + present[f] = true + } + // inDegree[f] = number of files that f depends on (imports). + // Leaves (no imports to other tracked files) have inDegree 0 → emitted first. + for _, f := range files { + for _, dep := range cache.Imports[f] { + if present[dep] && dep != f { + inDegree[f]++ + } + } + } + + var queue []string + for _, f := range files { + if inDegree[f] == 0 { + queue = append(queue, f) + } + } + sort.Strings(queue) + + var out []string + emitted := make(map[string]bool, len(files)) + + for len(queue) > 0 { + f := queue[0] + queue = queue[1:] + if emitted[f] { + continue + } + emitted[f] = true + out = append(out, f) + + // Anything that imports f loses one unresolved dep. + importers := cache.Importers[f] + var unlocked []string + for _, imp := range importers { + if !present[imp] || emitted[imp] { + continue + } + inDegree[imp]-- + if inDegree[imp] <= 0 { + unlocked = append(unlocked, imp) + } + } + sort.Strings(unlocked) + queue = append(queue, unlocked...) + } + + // Any files left unemitted are in cycles; append them lex-sorted so the tour + // is total. + var leftover []string + for _, f := range files { + if !emitted[f] { + leftover = append(leftover, f) + } + } + sort.Strings(leftover) + return append(out, leftover...) +} + +// RenderTour builds a TOUR.md body for the given strategy and cache. +// repoDir is used only to compute relative shard links. +func RenderTour(cache *Cache, strategy TourStrategy, repoRelPrefix string) string { + order := strategy.Order(cache) + + var b strings.Builder + fmt.Fprintf(&b, "# Repository Tour\n\n") + fmt.Fprintf(&b, "**Strategy:** `%s` — %s\n\n", strategy.Name(), strategyBlurb(strategy.Name())) + fmt.Fprintf(&b, "Read top-to-bottom. Each entry points to the file's shard, which"+ + " contains the structured [deps] / [calls] / [impact] view.\n\n") + + // Group by domain then subdomain while preserving order within each group. + type entry struct { + file string + dom string + sub string + } + entries := make([]entry, 0, len(order)) + for _, f := range order { + dom, sub := splitDomain(cache.FileDomain[f]) + entries = append(entries, entry{file: f, dom: dom, sub: sub}) + } + + lastDom, lastSub := "", "" + for _, e := range entries { + if e.dom != lastDom { + fmt.Fprintf(&b, "## Domain: %s\n\n", displayOrUnassigned(e.dom)) + lastDom = e.dom + lastSub = "" + } + if e.sub != lastSub { + if e.sub != "" { + fmt.Fprintf(&b, "### Subdomain: %s\n\n", e.sub) + } + lastSub = e.sub + } + writeTourEntry(&b, e.file, cache, repoRelPrefix) + } + + return b.String() +} + +func writeTourEntry(b *strings.Builder, file string, cache *Cache, repoRelPrefix string) { + imports := sortedUnique(cache.Imports[file]) + importers := sortedUnique(cache.Importers[file]) + risk := riskFor(file, cache) + + fmt.Fprintf(b, "- **%s**\n", file) + if len(imports) > 0 { + fmt.Fprintf(b, " reads: %s\n", joinTrunc(imports, 4)) + } + if len(importers) > 0 { + fmt.Fprintf(b, " read by: %s\n", joinTrunc(importers, 4)) + } + fmt.Fprintf(b, " risk: %s · [shard](%s)\n\n", risk, shardLink(file, repoRelPrefix)) +} + +func shardLink(file, prefix string) string { + if prefix == "" { + return ShardFilename(file) + } + return filepath.ToSlash(filepath.Join(prefix, ShardFilename(file))) +} + +func joinTrunc(items []string, n int) string { + if len(items) <= n { + return strings.Join(items, ", ") + } + return strings.Join(items[:n], ", ") + fmt.Sprintf(", … (+%d)", len(items)-n) +} + +func splitDomain(d string) (string, string) { + if d == "" { + return "", "" + } + if i := strings.Index(d, "/"); i >= 0 { + return d[:i], d[i+1:] + } + return d, "" +} + +func displayOrUnassigned(s string) string { + if s == "" { + return "Unassigned" + } + return s +} + +// riskFor is a narrow re-derivation of the impact section's risk tier for the +// tour line. It matches renderImpactSection's thresholds so tour and shard stay +// in agreement. +func riskFor(file string, cache *Cache) string { + transitive := cache.TransitiveDependents(file) + domains := map[string]bool{} + if d := cache.FileDomain[file]; d != "" { + domains[d] = true + } + for f := range transitive { + if d := cache.FileDomain[f]; d != "" { + domains[d] = true + } + } + switch { + case len(transitive) > 20 || len(domains) > 2: + return "HIGH" + case len(transitive) > 5 || len(domains) > 1: + return "MEDIUM" + default: + return "LOW" + } +} + +func strategyBlurb(name string) string { + switch name { + case "topo": + return "reverse-topological over the import graph (leaves first, roots last)" + case "bfs-seed": + return "breadth-first walk outward from the seed file" + case "dfs-seed": + return "depth-first walk outward from the seed file" + case "centrality": + return "files with the largest blast radius first (most transitively depended-on)" + default: + return "custom ordering" + } +} + +// approxTokens estimates the token count of s using the 4-chars-per-token +// heuristic. Good enough for sizing chapter boundaries — no tokenizer needed. +func approxTokens(s string) int { + return (len(s) + 3) / 4 +} + +// ChunkTour splits a rendered tour body into chapters at file-entry boundaries +// so each chapter fits within budgetTokens. Each chapter gets a "Chapter N of M" +// header prepended. Returns a single-element slice when budgetTokens <= 0 or the +// body already fits. +func ChunkTour(body string, budgetTokens int) []string { + if budgetTokens <= 0 || approxTokens(body) <= budgetTokens { + return []string{body} + } + + // File entries begin with "- **". Keep the preamble (everything before the + // first entry) glued to the first chapter as a header, and split entries + // into chapters. + const entryMarker = "\n- **" + idx := strings.Index(body, entryMarker) + if idx < 0 { + return []string{body} + } + preamble := body[:idx+1] // include trailing \n + rest := body[idx+1:] + + // Split entries on blank line (entries are separated by "\n\n"). + type domainedEntry struct { + heading string // most recent "## Domain" or "### Subdomain" heading block + text string // the "- **..." entry + } + var entries []domainedEntry + var currentHeading strings.Builder + for _, block := range strings.Split(rest, "\n\n") { + block = strings.TrimRight(block, "\n") + if block == "" { + continue + } + if strings.HasPrefix(block, "## ") || strings.HasPrefix(block, "### ") { + currentHeading.WriteString(block) + currentHeading.WriteString("\n\n") + continue + } + if strings.HasPrefix(block, "- **") { + entries = append(entries, domainedEntry{heading: currentHeading.String(), text: block}) + currentHeading.Reset() + } + } + + var chapters [][]domainedEntry + var currentChapter []domainedEntry + currentSize := approxTokens(preamble) + for _, e := range entries { + entrySize := approxTokens(e.heading) + approxTokens(e.text) + 2 + if len(currentChapter) > 0 && currentSize+entrySize > budgetTokens { + chapters = append(chapters, currentChapter) + currentChapter = nil + currentSize = approxTokens(preamble) + } + currentChapter = append(currentChapter, e) + currentSize += entrySize + } + if len(currentChapter) > 0 { + chapters = append(chapters, currentChapter) + } + + out := make([]string, len(chapters)) + total := len(chapters) + for i, chapter := range chapters { + var b strings.Builder + b.WriteString(preamble) + fmt.Fprintf(&b, "> Chapter %d of %d", i+1, total) + if i > 0 { + fmt.Fprintf(&b, " · [prev](TOUR.%02d.md)", i) + } + if i < total-1 { + fmt.Fprintf(&b, " · [next](TOUR.%02d.md)", i+2) + } + b.WriteString("\n\n") + lastHeading := "" + for _, e := range chapter { + if e.heading != "" && e.heading != lastHeading { + b.WriteString(e.heading) + lastHeading = e.heading + } + b.WriteString(e.text) + b.WriteString("\n\n") + } + out[i] = strings.TrimRight(b.String(), "\n") + "\n" + } + return out +} + +// WriteTour writes TOUR.md to .supermodel/TOUR.md inside repoDir. +// When budgetTokens > 0 and the body exceeds the budget, the tour is split +// into TOUR.01.md, TOUR.02.md, ... and TOUR.md becomes an index file. +func WriteTour(repoDir string, cache *Cache, strategy TourStrategy, budgetTokens int, dryRun bool) (string, error) { + outDir := filepath.Join(repoDir, ".supermodel") + outPath := filepath.Join(outDir, "TOUR.md") + + // Shards live next to source files, so from .supermodel/TOUR.md the link to + // src/foo.graph.ts is ../src/foo.graph.ts. + body := RenderTour(cache, strategy, "..") + chapters := ChunkTour(body, budgetTokens) + + if dryRun { + if len(chapters) == 1 { + fmt.Printf(" [dry-run] would write %s (%d bytes)\n", outPath, len(body)) + } else { + fmt.Printf(" [dry-run] would write %s + %d chapters (%d bytes total)\n", outPath, len(chapters), len(body)) + } + return outPath, nil + } + + if err := os.MkdirAll(outDir, 0o755); err != nil { + return "", fmt.Errorf("create tour dir: %w", err) + } + + writeFile := func(name, content string) error { + full := filepath.Join(outDir, name) + tmp := full + ".tmp" + if err := os.WriteFile(tmp, []byte(content), 0o644); err != nil { + return err + } + if err := os.Rename(tmp, full); err != nil { + _ = os.Remove(tmp) + return err + } + return nil + } + + if len(chapters) == 1 { + if err := writeFile("TOUR.md", chapters[0]); err != nil { + return "", fmt.Errorf("write tour: %w", err) + } + return outPath, nil + } + + // Multi-chapter: write TOUR.NN.md files and an index TOUR.md. + for i, chapter := range chapters { + if err := writeFile(fmt.Sprintf("TOUR.%02d.md", i+1), chapter); err != nil { + return "", fmt.Errorf("write chapter: %w", err) + } + } + var idx strings.Builder + fmt.Fprintf(&idx, "# Repository Tour — Index\n\n") + fmt.Fprintf(&idx, "**Strategy:** `%s` · %d chapters\n\n", strategy.Name(), len(chapters)) + fmt.Fprintf(&idx, "Read chapters in order; each fits within the token budget.\n\n") + for i := range chapters { + fmt.Fprintf(&idx, "- [Chapter %d](TOUR.%02d.md)\n", i+1, i+1) + } + if err := writeFile("TOUR.md", idx.String()); err != nil { + return "", fmt.Errorf("write tour index: %w", err) + } + return outPath, nil +} diff --git a/internal/shards/tour_test.go b/internal/shards/tour_test.go new file mode 100644 index 0000000..20906d9 --- /dev/null +++ b/internal/shards/tour_test.go @@ -0,0 +1,390 @@ +package shards + +import ( + "fmt" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/supermodeltools/cli/internal/api" +) + +// TestTopoStrategy_LeavesFirst verifies that files with no outbound imports +// (leaves of the dependency graph) appear before files that import them. +func TestTopoStrategy_LeavesFirst(t *testing.T) { + // main.go imports lib.go; lib.go imports util.go. Expected: util, lib, main. + nodes := []api.Node{ + {ID: "f:src/util.go", Labels: []string{"File"}, Properties: map[string]any{"filePath": "src/util.go"}}, + {ID: "f:src/lib.go", Labels: []string{"File"}, Properties: map[string]any{"filePath": "src/lib.go"}}, + {ID: "f:src/main.go", Labels: []string{"File"}, Properties: map[string]any{"filePath": "src/main.go"}}, + } + rels := []api.Relationship{ + {ID: "r1", Type: "imports", StartNode: "f:src/main.go", EndNode: "f:src/lib.go"}, + {ID: "r2", Type: "imports", StartNode: "f:src/lib.go", EndNode: "f:src/util.go"}, + } + cache := NewCache() + cache.Build(&api.ShardIR{Graph: api.ShardGraph{Nodes: nodes, Relationships: rels}}) + + order := TopoStrategy{}.Order(cache) + + if len(order) != 3 { + t.Fatalf("expected 3 files in order, got %d: %v", len(order), order) + } + idx := map[string]int{} + for i, f := range order { + idx[f] = i + } + if idx["src/util.go"] > idx["src/lib.go"] { + t.Errorf("leaf util.go should precede lib.go; got %v", order) + } + if idx["src/lib.go"] > idx["src/main.go"] { + t.Errorf("lib.go should precede main.go; got %v", order) + } +} + +// TestTopoStrategy_Deterministic verifies that two equivalent graphs produce +// identical orderings regardless of relationship insertion order. +func TestTopoStrategy_Deterministic(t *testing.T) { + nodes := []api.Node{ + {ID: "f:a.go", Labels: []string{"File"}, Properties: map[string]any{"filePath": "a.go"}}, + {ID: "f:b.go", Labels: []string{"File"}, Properties: map[string]any{"filePath": "b.go"}}, + {ID: "f:c.go", Labels: []string{"File"}, Properties: map[string]any{"filePath": "c.go"}}, + } + // a and b are both leaves that c imports. + rels1 := []api.Relationship{ + {ID: "r1", Type: "imports", StartNode: "f:c.go", EndNode: "f:a.go"}, + {ID: "r2", Type: "imports", StartNode: "f:c.go", EndNode: "f:b.go"}, + } + rels2 := []api.Relationship{ + {ID: "r2", Type: "imports", StartNode: "f:c.go", EndNode: "f:b.go"}, + {ID: "r1", Type: "imports", StartNode: "f:c.go", EndNode: "f:a.go"}, + } + + c1 := NewCache() + c1.Build(&api.ShardIR{Graph: api.ShardGraph{Nodes: nodes, Relationships: rels1}}) + c2 := NewCache() + c2.Build(&api.ShardIR{Graph: api.ShardGraph{Nodes: nodes, Relationships: rels2}}) + + o1 := TopoStrategy{}.Order(c1) + o2 := TopoStrategy{}.Order(c2) + + if strings.Join(o1, "|") != strings.Join(o2, "|") { + t.Errorf("non-deterministic ordering\n run1: %v\n run2: %v", o1, o2) + } +} + +// TestTopoStrategy_HandlesCycles verifies cycles don't cause infinite loops +// and all files still appear in the output. +func TestTopoStrategy_HandlesCycles(t *testing.T) { + nodes := []api.Node{ + {ID: "f:a.go", Labels: []string{"File"}, Properties: map[string]any{"filePath": "a.go"}}, + {ID: "f:b.go", Labels: []string{"File"}, Properties: map[string]any{"filePath": "b.go"}}, + } + rels := []api.Relationship{ + {ID: "r1", Type: "imports", StartNode: "f:a.go", EndNode: "f:b.go"}, + {ID: "r2", Type: "imports", StartNode: "f:b.go", EndNode: "f:a.go"}, + } + cache := NewCache() + cache.Build(&api.ShardIR{Graph: api.ShardGraph{Nodes: nodes, Relationships: rels}}) + + order := TopoStrategy{}.Order(cache) + + if len(order) != 2 { + t.Fatalf("cycle participants must all appear; got %v", order) + } +} + +// TestRenderTour_ContainsDomainHeadings verifies domain grouping appears in output. +func TestRenderTour_ContainsDomainHeadings(t *testing.T) { + nodes := []api.Node{ + {ID: "f:src/a.go", Labels: []string{"File"}, Properties: map[string]any{"filePath": "src/a.go"}}, + } + cache := NewCache() + cache.Build(&api.ShardIR{Graph: api.ShardGraph{Nodes: nodes}}) + cache.FileDomain["src/a.go"] = "Core/Utils" + + body := RenderTour(cache, TopoStrategy{}, "..") + + if !strings.Contains(body, "# Repository Tour") { + t.Errorf("missing title in tour body:\n%s", body) + } + if !strings.Contains(body, "Domain: Core") { + t.Errorf("missing domain heading: %s", body) + } + if !strings.Contains(body, "Subdomain: Utils") { + t.Errorf("missing subdomain heading: %s", body) + } + if !strings.Contains(body, "src/a.go") { + t.Errorf("missing file entry: %s", body) + } +} + +// TestRenderTour_ShardLinkRelative verifies the shard link uses .. prefix so +// that TOUR.md in .supermodel/ can resolve to shards next to source files. +func TestRenderTour_ShardLinkRelative(t *testing.T) { + nodes := []api.Node{ + {ID: "f:src/a.ts", Labels: []string{"File"}, Properties: map[string]any{"filePath": "src/a.ts"}}, + } + cache := NewCache() + cache.Build(&api.ShardIR{Graph: api.ShardGraph{Nodes: nodes}}) + + body := RenderTour(cache, TopoStrategy{}, "..") + + if !strings.Contains(body, "../src/a.graph.ts") { + t.Errorf("expected relative shard link '../src/a.graph.ts' in body:\n%s", body) + } +} + +// TestBFSSeedStrategy_ReachableOnly verifies BFS from seed only emits files +// reachable by walking the undirected import graph. +func TestBFSSeedStrategy_ReachableOnly(t *testing.T) { + // Reachable: a↔b↔c. Unreachable: z. + nodes := []api.Node{ + {ID: "f:a.go", Labels: []string{"File"}, Properties: map[string]any{"filePath": "a.go"}}, + {ID: "f:b.go", Labels: []string{"File"}, Properties: map[string]any{"filePath": "b.go"}}, + {ID: "f:c.go", Labels: []string{"File"}, Properties: map[string]any{"filePath": "c.go"}}, + {ID: "f:z.go", Labels: []string{"File"}, Properties: map[string]any{"filePath": "z.go"}}, + } + rels := []api.Relationship{ + {ID: "r1", Type: "imports", StartNode: "f:a.go", EndNode: "f:b.go"}, + {ID: "r2", Type: "imports", StartNode: "f:b.go", EndNode: "f:c.go"}, + } + cache := NewCache() + cache.Build(&api.ShardIR{Graph: api.ShardGraph{Nodes: nodes, Relationships: rels}}) + + order := BFSSeedStrategy{Seed: "a.go"}.Order(cache) + + if len(order) != 3 { + t.Fatalf("BFS should reach 3 files (a,b,c), got %v", order) + } + if order[0] != "a.go" { + t.Errorf("seed must come first, got %v", order) + } + for _, f := range order { + if f == "z.go" { + t.Errorf("unreachable z.go leaked into BFS: %v", order) + } + } +} + +// TestBFSSeedStrategy_MissingSeed returns nil for a seed not in the cache. +func TestBFSSeedStrategy_MissingSeed(t *testing.T) { + cache := NewCache() + cache.Build(&api.ShardIR{}) + if got := (BFSSeedStrategy{Seed: "nonexistent.go"}).Order(cache); len(got) != 0 { + t.Errorf("missing seed should yield empty order, got %v", got) + } +} + +// TestDFSSeedStrategy_DifferentFromBFS verifies DFS produces a different order +// than BFS on a branching graph (proving we're actually doing DFS). +func TestDFSSeedStrategy_DifferentFromBFS(t *testing.T) { + // Star from root to a,b,c,d. BFS sees them at depth 1; DFS descends first. + // With linearly-chained children we can actually see the difference. + // Build: root → x → leaf_x; root → y → leaf_y + nodes := []api.Node{ + {ID: "f:root.go", Labels: []string{"File"}, Properties: map[string]any{"filePath": "root.go"}}, + {ID: "f:x.go", Labels: []string{"File"}, Properties: map[string]any{"filePath": "x.go"}}, + {ID: "f:leaf_x.go", Labels: []string{"File"}, Properties: map[string]any{"filePath": "leaf_x.go"}}, + {ID: "f:y.go", Labels: []string{"File"}, Properties: map[string]any{"filePath": "y.go"}}, + {ID: "f:leaf_y.go", Labels: []string{"File"}, Properties: map[string]any{"filePath": "leaf_y.go"}}, + } + rels := []api.Relationship{ + {ID: "r1", Type: "imports", StartNode: "f:root.go", EndNode: "f:x.go"}, + {ID: "r2", Type: "imports", StartNode: "f:root.go", EndNode: "f:y.go"}, + {ID: "r3", Type: "imports", StartNode: "f:x.go", EndNode: "f:leaf_x.go"}, + {ID: "r4", Type: "imports", StartNode: "f:y.go", EndNode: "f:leaf_y.go"}, + } + cache := NewCache() + cache.Build(&api.ShardIR{Graph: api.ShardGraph{Nodes: nodes, Relationships: rels}}) + + bfs := BFSSeedStrategy{Seed: "root.go"}.Order(cache) + dfs := DFSSeedStrategy{Seed: "root.go"}.Order(cache) + + // BFS should see root, x, y (depth 1) before any leaves (depth 2). + bfsIdx := map[string]int{} + for i, f := range bfs { + bfsIdx[f] = i + } + if bfsIdx["leaf_x.go"] < bfsIdx["y.go"] || bfsIdx["leaf_y.go"] < bfsIdx["x.go"] { + t.Errorf("BFS should visit depth-1 before depth-2: %v", bfs) + } + + // DFS should descend all the way down one branch before the other. + // After root, it visits x, then leaf_x, before crossing to y. + dfsIdx := map[string]int{} + for i, f := range dfs { + dfsIdx[f] = i + } + if dfsIdx["leaf_x.go"] > dfsIdx["y.go"] && dfsIdx["leaf_y.go"] > dfsIdx["x.go"] { + t.Errorf("DFS should descend one branch fully before the other: %v", dfs) + } +} + +// TestCentralityStrategy_MostDependedFirst verifies centrality orders by +// transitive-dependent count descending. +func TestCentralityStrategy_MostDependedFirst(t *testing.T) { + // util is a leaf depended on by lib and main. lib is depended on by main. + // Transitive-dependent counts: util=2, lib=1, main=0. + nodes := []api.Node{ + {ID: "f:util.go", Labels: []string{"File"}, Properties: map[string]any{"filePath": "util.go"}}, + {ID: "f:lib.go", Labels: []string{"File"}, Properties: map[string]any{"filePath": "lib.go"}}, + {ID: "f:main.go", Labels: []string{"File"}, Properties: map[string]any{"filePath": "main.go"}}, + } + rels := []api.Relationship{ + {ID: "r1", Type: "imports", StartNode: "f:lib.go", EndNode: "f:util.go"}, + {ID: "r2", Type: "imports", StartNode: "f:main.go", EndNode: "f:lib.go"}, + {ID: "r3", Type: "imports", StartNode: "f:main.go", EndNode: "f:util.go"}, + } + cache := NewCache() + cache.Build(&api.ShardIR{Graph: api.ShardGraph{Nodes: nodes, Relationships: rels}}) + + order := CentralityStrategy{}.Order(cache) + + if len(order) != 3 { + t.Fatalf("expected 3 files, got %v", order) + } + if order[0] != "util.go" { + t.Errorf("most-depended-on file should come first; got %v", order) + } + if order[2] != "main.go" { + t.Errorf("least-depended-on file should come last; got %v", order) + } +} + +// TestChunkTour_SplitsAtEntryBoundaries verifies long tours get chunked into +// chapters that each stay within budget. +func TestChunkTour_SplitsAtEntryBoundaries(t *testing.T) { + // Build a tour with 5 entries, then chunk at a budget small enough to force + // multiple chapters but large enough to fit the preamble. + nodes := make([]api.Node, 5) + for i := range nodes { + name := fmt.Sprintf("f%d.go", i) + nodes[i] = api.Node{ + ID: "f:" + name, + Labels: []string{"File"}, + Properties: map[string]any{"filePath": name}, + } + } + cache := NewCache() + cache.Build(&api.ShardIR{Graph: api.ShardGraph{Nodes: nodes}}) + + body := RenderTour(cache, TopoStrategy{}, "..") + chapters := ChunkTour(body, 60) // small budget + + if len(chapters) < 2 { + t.Fatalf("expected multiple chapters at small budget, got %d", len(chapters)) + } + for i, ch := range chapters { + if !strings.Contains(ch, "Chapter") { + t.Errorf("chapter %d missing 'Chapter' header: %s", i+1, ch) + } + } + // First chapter has "next" link, last has "prev" link. + if !strings.Contains(chapters[0], "next") { + t.Errorf("first chapter missing next link: %s", chapters[0]) + } + if !strings.Contains(chapters[len(chapters)-1], "prev") { + t.Errorf("last chapter missing prev link: %s", chapters[len(chapters)-1]) + } +} + +// TestChunkTour_FitsInBudget returns a single chunk when body fits. +func TestChunkTour_FitsInBudget(t *testing.T) { + chapters := ChunkTour("short body", 10000) + if len(chapters) != 1 { + t.Errorf("short body should produce 1 chunk, got %d", len(chapters)) + } +} + +// TestResolveStrategy_ValidNames checks all strategy names resolve. +func TestResolveStrategy_ValidNames(t *testing.T) { + cases := []struct { + name string + seed string + wantErr bool + }{ + {"topo", "", false}, + {"", "", false}, + {"bfs-seed", "foo.go", false}, + {"bfs-seed", "", true}, + {"dfs-seed", "foo.go", false}, + {"dfs-seed", "", true}, + {"centrality", "", false}, + {"nonsense", "", true}, + } + for _, tc := range cases { + _, err := ResolveStrategy(tc.name, tc.seed) + if (err != nil) != tc.wantErr { + t.Errorf("ResolveStrategy(%q, %q): wantErr=%v got %v", tc.name, tc.seed, tc.wantErr, err) + } + } +} + +// TestRenderNarrative_ContainsKeyInfo verifies the narrative covers domain, +// imports, importers, functions, and risk. +func TestRenderNarrative_ContainsKeyInfo(t *testing.T) { + nodes := []api.Node{ + {ID: "f:src/a.go", Labels: []string{"File"}, Properties: map[string]any{"filePath": "src/a.go"}}, + {ID: "f:src/b.go", Labels: []string{"File"}, Properties: map[string]any{"filePath": "src/b.go"}}, + {ID: "fn1", Labels: []string{"Function"}, Properties: map[string]any{"name": "doWork", "filePath": "src/a.go"}}, + {ID: "fn2", Labels: []string{"Function"}, Properties: map[string]any{"name": "helper", "filePath": "src/a.go"}}, + } + rels := []api.Relationship{ + {ID: "r1", Type: "imports", StartNode: "f:src/a.go", EndNode: "f:src/b.go"}, + {ID: "r2", Type: "calls", StartNode: "fn1", EndNode: "fn2"}, + } + cache := NewCache() + cache.Build(&api.ShardIR{Graph: api.ShardGraph{Nodes: nodes, Relationships: rels}}) + cache.FileDomain["src/a.go"] = "Core/Utils" + + got := RenderNarrative("src/a.go", cache, "//") + + wantSubstrings := []string{ + "Narrative:", + "Core", + "Utils", + "imports", + "doWork", + "helper", + "Risk:", + } + for _, s := range wantSubstrings { + if !strings.Contains(got, s) { + t.Errorf("narrative missing %q\n---\n%s", s, got) + } + } + // Should use comment prefix on each line. + for _, line := range strings.Split(strings.TrimRight(got, "\n"), "\n") { + if !strings.HasPrefix(line, "//") { + t.Errorf("narrative line missing comment prefix: %q", line) + } + } +} + +// TestRenderAll_Narrate prepends the narrative preamble when narrate=true. +func TestRenderAll_Narrate(t *testing.T) { + dir := t.TempDir() + nodes := []api.Node{ + {ID: "fa", Labels: []string{"File"}, Properties: map[string]any{"filePath": "src/a.go"}}, + {ID: "fb", Labels: []string{"File"}, Properties: map[string]any{"filePath": "src/b.go"}}, + } + rels := []api.Relationship{ + {ID: "r1", Type: "imports", StartNode: "fa", EndNode: "fb"}, + } + cache := NewCache() + cache.Build(&api.ShardIR{Graph: api.ShardGraph{Nodes: nodes, Relationships: rels}}) + + n, err := RenderAll(dir, cache, []string{"src/a.go"}, true, false) + if err != nil || n != 1 { + t.Fatalf("RenderAll narrate: n=%d err=%v", n, err) + } + data, err := os.ReadFile(filepath.Join(dir, "src", "a.graph.go")) + if err != nil { + t.Fatal(err) + } + if !strings.Contains(string(data), "Narrative:") { + t.Errorf("shard missing narrative when narrate=true:\n%s", data) + } +}