From 6e23845b11d16f3655a01c00efe74e1b8ba9f20b Mon Sep 17 00:00:00 2001 From: Brandon Lipman Date: Wed, 6 May 2026 14:40:54 -0400 Subject: [PATCH] fix(extract): default --dir to configured brain dir, not cwd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `gbrain extract links` (and timeline / all) defaulted --dir to '.' when not explicitly passed (src/commands/extract.ts:357). Combined with a walker that skips dotfiles but NOT node_modules/dist/build/vendor, this turned a no-arg invocation into a footgun. Repro: $ cd ~/Documents/some-project # has a node_modules/ tree $ gbrain extract links [extract.links_fs] 28989/28989 (100%) done Links: created 0 from 28989 pages Done: 0 links, 0 timeline entries from 28989 pages The "28989 pages" is `walkMarkdownFiles('.')` recursively eating package READMEs, dependency docs, fixture content. Their from_slug doesn't match any row in the pages table, so addLinksBatch rejects every insert and returns 0. Output looks like a healthy idempotent no-op; was actually a wasteful junk walk that wrote nothing. Fix: when --dir is not passed AND source is fs, resolve from sources(local_path) via getDefaultSourcePath — same helper sync uses (src/commands/sync.ts:1089). The default behavior now matches `sync`: "work on the configured brain". Falls back to a clear error when no source is configured, telling the user to either pass --dir, register a source, or use --source db. Behavior matrix: --dir explicit → use that path (unchanged) --dir absent + cfg → resolve from sources(local_path) --dir absent + no → error with actionable hint (was: walk cwd silently) --dir . → cwd (user opted in explicitly — unchanged) Tests: three added in test/extract-fs.test.ts: 1. configured source → no-arg invocation extracts from that path 2. no source configured → exit 1 + actionable error message 3. explicit --dir wins over a configured (decoy) source path --- src/commands/extract.ts | 34 +++++++++++++++- test/extract-fs.test.ts | 87 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+), 2 deletions(-) diff --git a/src/commands/extract.ts b/src/commands/extract.ts index 9b9d08baa..d2539ca4e 100644 --- a/src/commands/extract.ts +++ b/src/commands/extract.ts @@ -354,7 +354,19 @@ export async function runExtractCore(engine: BrainEngine, opts: ExtractOpts): Pr export async function runExtract(engine: BrainEngine, args: string[]) { const subcommand = args[0]; const dirIdx = args.indexOf('--dir'); - const brainDir = (dirIdx >= 0 && dirIdx + 1 < args.length) ? args[dirIdx + 1] : '.'; + const explicitDir = dirIdx >= 0 && dirIdx + 1 < args.length; + // When --dir is not passed, resolve from the configured brain source + // BEFORE falling back to '.' (the prior default). The bare `.` default was + // a footgun: a user who runs `gbrain extract links` from anywhere outside + // their brain dir (e.g., a project checkout with a node_modules tree) had + // the recursive walker grab tens of thousands of unrelated .md files, + // attempt to extract links between them, then write 0 rows because the + // synthetic from_slugs don't match any pages row. The output ("created 0 + // links from 28989 pages") looks like a no-op, but it walked 28K junk files + // first. Resolving from sources(local_path) makes the no-arg invocation + // match what `gbrain sync` already does, and keeps cwd-cwd usage available + // via explicit `--dir .`. + let brainDir = explicitDir ? args[dirIdx + 1] : '.'; const sourceIdx = args.indexOf('--source'); const source = (sourceIdx >= 0 && sourceIdx + 1 < args.length) ? args[sourceIdx + 1] : 'fs'; const typeIdx = args.indexOf('--type'); @@ -390,7 +402,25 @@ export async function runExtract(engine: BrainEngine, args: string[]) { process.exit(1); } - // FS source needs a brain dir; DB source ignores --dir. + // FS source needs a brain dir. When --dir wasn't passed, resolve from + // sources(local_path) — same path `gbrain sync` uses — instead of + // silently walking cwd. See the brainDir comment above for the footgun. + if (source === 'fs' && !explicitDir) { + const { getDefaultSourcePath } = await import('../core/source-resolver.ts'); + const configured = await getDefaultSourcePath(engine); + if (configured) { + brainDir = configured; + } else { + console.error( + `No brain directory configured. Pass --dir explicitly, or use --source db ` + + `to extract from already-synced pages. To register a brain dir as the default, ` + + `run: gbrain sources add default --path `, + ); + process.exit(1); + } + } + + // DB source ignores --dir. if (source === 'fs' && !existsSync(brainDir)) { console.error(`Directory not found: ${brainDir}`); process.exit(1); diff --git a/test/extract-fs.test.ts b/test/extract-fs.test.ts index 97bbbcfa5..04f97dd82 100644 --- a/test/extract-fs.test.ts +++ b/test/extract-fs.test.ts @@ -159,3 +159,90 @@ title: Alice expect(elapsedMs).toBeLessThan(2000); }); }); + +describe('gbrain extract --dir default resolution', () => { + // Pin the cwd-footgun fix: when --dir is not passed, extract resolves the + // brain dir from the sources(local_path) row before falling back. The bare + // `.` default would let a user running from a directory with a node_modules/ + // tree walk tens of thousands of unrelated .md files and report + // "created 0 links from 28K pages" — looks like a no-op, was actually a + // wasteful junk walk that wrote nothing because synthetic from_slugs don't + // match the pages table. + test('uses configured sources(local_path) when --dir is not passed', async () => { + await engine.putPage('people/alice', personPage('Alice')); + await engine.putPage('people/bob', personPage('Bob')); + writeFile('people/alice.md', '---\ntitle: Alice\n---\n\n[Bob](../people/bob.md) is a friend.\n'); + writeFile('people/bob.md', '---\ntitle: Bob\n---\n'); + + // Register brainDir as the default source's local_path. + await (engine as any).db.exec( + `UPDATE sources SET local_path = '${brainDir.replace(/'/g, "''")}' WHERE id = 'default'`, + ); + + // Save + clobber cwd to a sibling tmpdir so the test fails loudly if the + // resolver still walks `.` instead of the configured path. + const otherDir = mkdtempSync(join(tmpdir(), 'gbrain-extract-other-')); + const savedCwd = process.cwd(); + try { + process.chdir(otherDir); + await runExtract(engine, ['links']); // no --dir + } finally { + process.chdir(savedCwd); + try { rmSync(otherDir, { recursive: true, force: true }); } catch { /* ignore */ } + } + + const links = await engine.getLinks('people/alice'); + expect(links.length).toBe(1); + expect(links[0]).toMatchObject({ to_slug: 'people/bob' }); + }); + + test('errors with actionable message when no --dir and no source configured', async () => { + // Clear the default source's local_path so getDefaultSourcePath returns null. + await (engine as any).db.exec(`UPDATE sources SET local_path = NULL WHERE id = 'default'`); + await (engine as any).db.exec(`DELETE FROM config WHERE key = 'sync.repo_path'`); + + let exitCode: number | null = null; + const errBuf: string[] = []; + const savedExit = process.exit; + const savedConsoleError = console.error; + try { + (process as any).exit = (code: number) => { exitCode = code; throw new Error('__test_exit__'); }; + console.error = (...parts: unknown[]) => { errBuf.push(parts.join(' ')); }; + try { + await runExtract(engine, ['links']); + } catch (e) { + if (!(e instanceof Error && e.message === '__test_exit__')) throw e; + } + } finally { + (process as any).exit = savedExit; + console.error = savedConsoleError; + } + expect(exitCode).toBe(1); + const all = errBuf.join('\n'); + expect(all).toContain('No brain directory configured'); + expect(all).toContain('--source db'); + expect(all).toContain('--dir'); + }); + + test('explicit --dir always wins over configured source', async () => { + await engine.putPage('people/alice', personPage('Alice')); + await engine.putPage('people/bob', personPage('Bob')); + writeFile('people/alice.md', '---\ntitle: Alice\n---\n\n[Bob](../people/bob.md) is a friend.\n'); + writeFile('people/bob.md', '---\ntitle: Bob\n---\n'); + + // Configured path points elsewhere; explicit --dir must override. + const decoyDir = mkdtempSync(join(tmpdir(), 'gbrain-extract-decoy-')); + await (engine as any).db.exec( + `UPDATE sources SET local_path = '${decoyDir.replace(/'/g, "''")}' WHERE id = 'default'`, + ); + + try { + await runExtract(engine, ['links', '--dir', brainDir]); + const links = await engine.getLinks('people/alice'); + expect(links.length).toBe(1); + expect(links[0]).toMatchObject({ to_slug: 'people/bob' }); + } finally { + try { rmSync(decoyDir, { recursive: true, force: true }); } catch { /* ignore */ } + } + }); +});