Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 32 additions & 2 deletions src/commands/extract.ts
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,19 @@ export async function runExtractCore(engine: BrainEngine, opts: ExtractOpts): Pr
export async function runExtract(engine: BrainEngine, args: string[]) {
const subcommand = args[0];
const dirIdx = args.indexOf('--dir');
const brainDir = (dirIdx >= 0 && dirIdx + 1 < args.length) ? args[dirIdx + 1] : '.';
const explicitDir = dirIdx >= 0 && dirIdx + 1 < args.length;
// When --dir is not passed, resolve from the configured brain source
// BEFORE falling back to '.' (the prior default). The bare `.` default was
// a footgun: a user who runs `gbrain extract links` from anywhere outside
// their brain dir (e.g., a project checkout with a node_modules tree) had
// the recursive walker grab tens of thousands of unrelated .md files,
// attempt to extract links between them, then write 0 rows because the
// synthetic from_slugs don't match any pages row. The output ("created 0
// links from 28989 pages") looks like a no-op, but it walked 28K junk files
// first. Resolving from sources(local_path) makes the no-arg invocation
// match what `gbrain sync` already does, and keeps cwd-cwd usage available
// via explicit `--dir .`.
let brainDir = explicitDir ? args[dirIdx + 1] : '.';
const sourceIdx = args.indexOf('--source');
const source = (sourceIdx >= 0 && sourceIdx + 1 < args.length) ? args[sourceIdx + 1] : 'fs';
const typeIdx = args.indexOf('--type');
Expand Down Expand Up @@ -390,7 +402,25 @@ export async function runExtract(engine: BrainEngine, args: string[]) {
process.exit(1);
}

// FS source needs a brain dir; DB source ignores --dir.
// FS source needs a brain dir. When --dir wasn't passed, resolve from
// sources(local_path) — same path `gbrain sync` uses — instead of
// silently walking cwd. See the brainDir comment above for the footgun.
if (source === 'fs' && !explicitDir) {
const { getDefaultSourcePath } = await import('../core/source-resolver.ts');
const configured = await getDefaultSourcePath(engine);
if (configured) {
brainDir = configured;
} else {
console.error(
`No brain directory configured. Pass --dir <path> explicitly, or use --source db ` +
`to extract from already-synced pages. To register a brain dir as the default, ` +
`run: gbrain sources add default --path <brain-dir>`,
);
process.exit(1);
}
}

// DB source ignores --dir.
if (source === 'fs' && !existsSync(brainDir)) {
console.error(`Directory not found: ${brainDir}`);
process.exit(1);
Expand Down
87 changes: 87 additions & 0 deletions test/extract-fs.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -159,3 +159,90 @@ title: Alice
expect(elapsedMs).toBeLessThan(2000);
});
});

describe('gbrain extract --dir default resolution', () => {
// Pin the cwd-footgun fix: when --dir is not passed, extract resolves the
// brain dir from the sources(local_path) row before falling back. The bare
// `.` default would let a user running from a directory with a node_modules/
// tree walk tens of thousands of unrelated .md files and report
// "created 0 links from 28K pages" — looks like a no-op, was actually a
// wasteful junk walk that wrote nothing because synthetic from_slugs don't
// match the pages table.
test('uses configured sources(local_path) when --dir is not passed', async () => {
await engine.putPage('people/alice', personPage('Alice'));
await engine.putPage('people/bob', personPage('Bob'));
writeFile('people/alice.md', '---\ntitle: Alice\n---\n\n[Bob](../people/bob.md) is a friend.\n');
writeFile('people/bob.md', '---\ntitle: Bob\n---\n');

// Register brainDir as the default source's local_path.
await (engine as any).db.exec(
`UPDATE sources SET local_path = '${brainDir.replace(/'/g, "''")}' WHERE id = 'default'`,
);

// Save + clobber cwd to a sibling tmpdir so the test fails loudly if the
// resolver still walks `.` instead of the configured path.
const otherDir = mkdtempSync(join(tmpdir(), 'gbrain-extract-other-'));
const savedCwd = process.cwd();
try {
process.chdir(otherDir);
await runExtract(engine, ['links']); // no --dir
} finally {
process.chdir(savedCwd);
try { rmSync(otherDir, { recursive: true, force: true }); } catch { /* ignore */ }
}

const links = await engine.getLinks('people/alice');
expect(links.length).toBe(1);
expect(links[0]).toMatchObject({ to_slug: 'people/bob' });
});

test('errors with actionable message when no --dir and no source configured', async () => {
// Clear the default source's local_path so getDefaultSourcePath returns null.
await (engine as any).db.exec(`UPDATE sources SET local_path = NULL WHERE id = 'default'`);
await (engine as any).db.exec(`DELETE FROM config WHERE key = 'sync.repo_path'`);

let exitCode: number | null = null;
const errBuf: string[] = [];
const savedExit = process.exit;
const savedConsoleError = console.error;
try {
(process as any).exit = (code: number) => { exitCode = code; throw new Error('__test_exit__'); };
console.error = (...parts: unknown[]) => { errBuf.push(parts.join(' ')); };
try {
await runExtract(engine, ['links']);
} catch (e) {
if (!(e instanceof Error && e.message === '__test_exit__')) throw e;
}
} finally {
(process as any).exit = savedExit;
console.error = savedConsoleError;
}
expect(exitCode).toBe(1);
const all = errBuf.join('\n');
expect(all).toContain('No brain directory configured');
expect(all).toContain('--source db');
expect(all).toContain('--dir');
});

test('explicit --dir always wins over configured source', async () => {
await engine.putPage('people/alice', personPage('Alice'));
await engine.putPage('people/bob', personPage('Bob'));
writeFile('people/alice.md', '---\ntitle: Alice\n---\n\n[Bob](../people/bob.md) is a friend.\n');
writeFile('people/bob.md', '---\ntitle: Bob\n---\n');

// Configured path points elsewhere; explicit --dir must override.
const decoyDir = mkdtempSync(join(tmpdir(), 'gbrain-extract-decoy-'));
await (engine as any).db.exec(
`UPDATE sources SET local_path = '${decoyDir.replace(/'/g, "''")}' WHERE id = 'default'`,
);

try {
await runExtract(engine, ['links', '--dir', brainDir]);
const links = await engine.getLinks('people/alice');
expect(links.length).toBe(1);
expect(links[0]).toMatchObject({ to_slug: 'people/bob' });
} finally {
try { rmSync(decoyDir, { recursive: true, force: true }); } catch { /* ignore */ }
}
});
});