Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/commands/import.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ export interface RunImportResult {
failures: Array<{ path: string; error: string }>;
}

export async function runImport(engine: BrainEngine, args: string[], opts: { commit?: string } = {}): Promise<RunImportResult> {
export async function runImport(engine: BrainEngine, args: string[], opts: { commit?: string; sourceId?: string } = {}): Promise<RunImportResult> {
const noEmbed = args.includes('--no-embed');
const fresh = args.includes('--fresh');
const jsonOutput = args.includes('--json');
Expand Down Expand Up @@ -95,7 +95,7 @@ export async function runImport(engine: BrainEngine, args: string[], opts: { com
async function processFile(eng: BrainEngine, filePath: string) {
const relativePath = relative(dir, filePath);
try {
const result = await importFile(eng, filePath, relativePath, { noEmbed });
const result = await importFile(eng, filePath, relativePath, { noEmbed, sourceId: opts.sourceId });
if (result.status === 'imported') {
imported++;
chunksCreated += result.chunks;
Expand Down
6 changes: 3 additions & 3 deletions src/commands/sync.ts
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ export async function performSync(engine: BrainEngine, opts: SyncOpts): Promise<
// Reimport at new path (picks up content changes)
const filePath = join(repoPath, to);
if (existsSync(filePath)) {
const result = await importFile(engine, filePath, to, { noEmbed });
const result = await importFile(engine, filePath, to, { noEmbed, sourceId: opts.sourceId });
if (result.status === 'imported') chunksCreated += result.chunks;
}
pagesAffected.push(newSlug);
Expand Down Expand Up @@ -496,7 +496,7 @@ export async function performSync(engine: BrainEngine, opts: SyncOpts): Promise<
continue;
}
try {
const result = await importFile(engine, filePath, path, { noEmbed });
const result = await importFile(engine, filePath, path, { noEmbed, sourceId: opts.sourceId });
if (result.status === 'imported') {
chunksCreated += result.chunks;
pagesAffected.push(result.slug);
Expand Down Expand Up @@ -656,7 +656,7 @@ async function performFullSync(
const { runImport } = await import('./import.ts');
const importArgs = [repoPath];
if (opts.noEmbed) importArgs.push('--no-embed');
const result = await runImport(engine, importArgs, { commit: headCommit });
const result = await runImport(engine, importArgs, { commit: headCommit, sourceId: opts.sourceId });

// Bug 9 — gate the full-sync bookmark on success. runImport already
// writes its own sync.last_commit conditionally (import.ts), but
Expand Down
18 changes: 9 additions & 9 deletions src/core/engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,9 @@ export interface BrainEngine {
withReservedConnection<T>(fn: (conn: ReservedConnection) => Promise<T>): Promise<T>;

// Pages CRUD
getPage(slug: string): Promise<Page | null>;
getPage(slug: string, sourceId?: string): Promise<Page | null>;
putPage(slug: string, page: PageInput): Promise<Page>;
deletePage(slug: string): Promise<void>;
deletePage(slug: string, sourceId?: string): Promise<void>;
listPages(filters?: PageFilters): Promise<Page[]>;
resolveSlugs(partial: string): Promise<string[]>;
/**
Expand All @@ -131,8 +131,8 @@ export interface BrainEngine {
getEmbeddingsByChunkIds(ids: number[]): Promise<Map<number, Float32Array>>;

// Chunks
upsertChunks(slug: string, chunks: ChunkInput[]): Promise<void>;
getChunks(slug: string): Promise<Chunk[]>;
upsertChunks(slug: string, chunks: ChunkInput[], sourceId?: string): Promise<void>;
getChunks(slug: string, sourceId?: string): Promise<Chunk[]>;
Comment on lines 115 to +135
Copy link

Copilot AI Apr 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With getPage/getChunks/getTags/... now defaulting to source_id='default' when sourceId is omitted, the interface becomes inconsistent with listPages() / getAllSlugs() (which currently return entries across all sources). Callers that iterate via listPages() and then call getChunks(slug) / getTags(slug) (e.g. embed/export flows) will silently operate only on the default source for non-default pages. Consider adding source scoping to listPages/getAllSlugs (or including source_id in the returned Page shape) so multi-source callers can preserve source context end-to-end.

Copilot uses AI. Check for mistakes.
/**
* Count chunks across the entire brain where embedded_at IS NULL.
* Pre-flight short-circuit for `embed --stale` so a 100%-embedded brain
Expand All @@ -148,7 +148,7 @@ export interface BrainEngine {
* Bounded by an internal LIMIT of 100000 to mirror listPages.
*/
listStaleChunks(): Promise<StaleChunkRow[]>;
deleteChunks(slug: string): Promise<void>;
deleteChunks(slug: string, sourceId?: string): Promise<void>;

// Links
/**
Expand Down Expand Up @@ -229,9 +229,9 @@ export interface BrainEngine {
findOrphanPages(): Promise<Array<{ slug: string; title: string; domain: string | null }>>;

// Tags
addTag(slug: string, tag: string): Promise<void>;
removeTag(slug: string, tag: string): Promise<void>;
getTags(slug: string): Promise<string[]>;
addTag(slug: string, tag: string, sourceId?: string): Promise<void>;
removeTag(slug: string, tag: string, sourceId?: string): Promise<void>;
getTags(slug: string, sourceId?: string): Promise<string[]>;

// Timeline
/**
Expand Down Expand Up @@ -259,7 +259,7 @@ export interface BrainEngine {
getRawData(slug: string, source?: string): Promise<RawData[]>;

// Versions
createVersion(slug: string): Promise<PageVersion>;
createVersion(slug: string, sourceId?: string): Promise<PageVersion>;
getVersions(slug: string): Promise<PageVersion[]>;
revertToVersion(slug: string, versionId: number): Promise<void>;

Expand Down
38 changes: 20 additions & 18 deletions src/core/import-file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ export async function importFromContent(
engine: BrainEngine,
slug: string,
content: string,
opts: { noEmbed?: boolean } = {},
opts: { noEmbed?: boolean; sourceId?: string } = {},
): Promise<ImportResult> {
// Reject oversized payloads before any parsing, chunking, or embedding happens.
// Uses Buffer.byteLength to count UTF-8 bytes the same way disk size would,
Expand Down Expand Up @@ -223,7 +223,7 @@ export async function importFromContent(
tags: parsed.tags,
};

const existing = await engine.getPage(slug);
const existing = await engine.getPage(slug, opts.sourceId);
if (existing?.content_hash === hash) {
return { slug, status: 'skipped', chunks: 0, parsedPage };
}
Expand Down Expand Up @@ -261,7 +261,7 @@ export async function importFromContent(

// Transaction wraps all DB writes
await engine.transaction(async (tx) => {
if (existing) await tx.createVersion(slug);
if (existing) await tx.createVersion(slug, opts.sourceId);

await tx.putPage(slug, {
type: parsed.type,
Expand All @@ -270,23 +270,24 @@ export async function importFromContent(
timeline: parsed.timeline || '',
frontmatter: parsed.frontmatter,
content_hash: hash,
source_id: opts.sourceId,
});

// Tag reconciliation: remove stale, add current
const existingTags = await tx.getTags(slug);
const existingTags = await tx.getTags(slug, opts.sourceId);
const newTags = new Set(parsed.tags);
for (const old of existingTags) {
if (!newTags.has(old)) await tx.removeTag(slug, old);
if (!newTags.has(old)) await tx.removeTag(slug, old, opts.sourceId);
}
for (const tag of parsed.tags) {
await tx.addTag(slug, tag);
await tx.addTag(slug, tag, opts.sourceId);
}

if (chunks.length > 0) {
await tx.upsertChunks(slug, chunks);
await tx.upsertChunks(slug, chunks, opts.sourceId);
} else {
// Content is empty — delete stale chunks so they don't ghost in search results
await tx.deleteChunks(slug);
await tx.deleteChunks(slug, opts.sourceId);
}

// v0.19.0 E1 — doc↔impl linking: if this markdown page cites code paths
Expand Down Expand Up @@ -333,7 +334,7 @@ export async function importFromFile(
engine: BrainEngine,
filePath: string,
relativePath: string,
opts: { noEmbed?: boolean } = {},
opts: { noEmbed?: boolean; sourceId?: string } = {},
): Promise<ImportResult> {
// Defense-in-depth: reject symlinks before reading content.
const lstat = lstatSync(filePath);
Expand Down Expand Up @@ -384,7 +385,7 @@ export async function importCodeFile(
engine: BrainEngine,
relativePath: string,
content: string,
opts: { noEmbed?: boolean; force?: boolean } = {},
opts: { noEmbed?: boolean; force?: boolean; sourceId?: string } = {},
): Promise<ImportResult> {
const slug = slugifyCodePath(relativePath);
const lang = detectCodeLanguage(relativePath) || 'unknown';
Expand All @@ -401,7 +402,7 @@ export async function importCodeFile(
.update(JSON.stringify({ title, type: 'code', content, lang, chunker_version: CHUNKER_VERSION }))
.digest('hex');

const existing = await engine.getPage(slug);
const existing = await engine.getPage(slug, opts.sourceId);
if (!opts.force && existing?.content_hash === hash) {
return { slug, status: 'skipped', chunks: 0 };
}
Expand Down Expand Up @@ -439,7 +440,7 @@ export async function importCodeFile(
// OpenAI API. Order matters: our chunk_index is semantic (tree-sitter
// order), so a matching (chunk_index, text_hash) means a verbatim
// preserved symbol.
const existingChunks = existing ? await engine.getChunks(slug) : [];
const existingChunks = existing ? await engine.getChunks(slug, opts.sourceId) : [];
const existingByKey = new Map<string, typeof existingChunks[number]>();
for (const ec of existingChunks) {
existingByKey.set(`${ec.chunk_index}:${ec.chunk_text}`, ec);
Expand Down Expand Up @@ -474,7 +475,7 @@ export async function importCodeFile(

// Store
await engine.transaction(async (tx) => {
if (existing) await tx.createVersion(slug);
if (existing) await tx.createVersion(slug, opts.sourceId);

await tx.putPage(slug, {
type: 'code' as PageType,
Expand All @@ -484,15 +485,16 @@ export async function importCodeFile(
timeline: '',
frontmatter: { language: lang, file: relativePath },
content_hash: hash,
source_id: opts.sourceId,
});

await tx.addTag(slug, 'code');
await tx.addTag(slug, lang);
await tx.addTag(slug, 'code', opts.sourceId);
await tx.addTag(slug, lang, opts.sourceId);

if (chunks.length > 0) {
await tx.upsertChunks(slug, chunks);
await tx.upsertChunks(slug, chunks, opts.sourceId);
} else {
await tx.deleteChunks(slug);
await tx.deleteChunks(slug, opts.sourceId);
}
});

Expand All @@ -503,7 +505,7 @@ export async function importCodeFile(
// chunk IDs are stable.
if (extractedEdges.length > 0 && chunks.length > 0) {
try {
const persistedChunks = await engine.getChunks(slug);
const persistedChunks = await engine.getChunks(slug, opts.sourceId);
const byIndex = new Map<number, { id?: number; symbol_name_qualified?: string | null; start_line?: number | null; end_line?: number | null }>();
for (const pc of persistedChunks) {
byIndex.set(pc.chunk_index, pc);
Expand Down
Loading
Loading