diff --git a/cli.ts b/cli.ts index 99203916..b45b8611 100644 --- a/cli.ts +++ b/cli.ts @@ -3,7 +3,7 @@ */ import type { Command } from "commander"; -import { readFileSync } from "node:fs"; +import { readFileSync, type Dirent } from "node:fs"; import { mkdir, readFile, rm, writeFile } from "node:fs/promises"; import { homedir } from "node:os"; import path from "node:path"; @@ -417,6 +417,231 @@ async function sleep(ms: number): Promise { // CLI Command Implementations // ============================================================================ +export async function runImportMarkdown( + ctx: { embedder?: import("./src/embedder.js").Embedder; store: MemoryStore }, + workspaceGlob: string | undefined, + options: { + dryRun?: boolean; + scope?: string; + openclawHome?: string; + dedup?: boolean; + minTextLength?: string; + importance?: string; + } +): Promise<{ imported: number; skipped: number; foundFiles: number }> { +const openclawHome = options.openclawHome + ? path.resolve(options.openclawHome) + : path.join(homedir(), ".openclaw"); + + const workspaceDir = path.join(openclawHome, "workspace"); + let imported = 0; + let skipped = 0; + let foundFiles = 0; + + if (!ctx.embedder) { + console.error( + "import-markdown requires an embedder. Use via plugin CLI or ensure embedder is configured.", + ); + process.exit(1); + } + + // Infer workspace scope from openclaw.json agents list + // (flat memory/ files have no per-file metadata, so we derive scope from config) + const fsPromises = await import("node:fs/promises"); + let workspaceScope = ""; // empty = no scope override for nested workspaces + try { + const configPath = path.join(openclawHome, "openclaw.json"); + const configContent = await fsPromises.readFile(configPath, "utf-8"); + const config = JSON.parse(configContent); + const agentsList: Array<{ id?: string; workspace?: string }> = config?.agents?.list ?? []; + const matched = agentsList.find((a) => { + if (!a.workspace) return false; + return path.normalize(a.workspace) === workspaceDir; + }); + if (matched?.id) { + workspaceScope = matched.id; + } + } catch { /* use default */ } + + // Scan workspace directories + let workspaceEntries: Dirent[]; + try { + workspaceEntries = await fsPromises.readdir(workspaceDir, { withFileTypes: true }); + } catch { + console.error(`Failed to read workspace directory: ${workspaceDir}`); + process.exit(1); + } + + // Collect all markdown files to scan + const mdFiles: Array<{ filePath: string; scope: string }> = []; + + for (const entry of workspaceEntries) { + if (!entry.isDirectory()) continue; + if (workspaceGlob && !entry.name.includes(workspaceGlob)) continue; + + const workspacePath = path.join(workspaceDir, entry.name); + + // MEMORY.md + const memoryMd = path.join(workspacePath, "MEMORY.md"); + try { + await fsPromises.stat(memoryMd); + mdFiles.push({ filePath: memoryMd, scope: entry.name }); + } catch { /* not found */ } + + // memory/ directory + const memoryDir = path.join(workspacePath, "memory"); + try { + const stats = await fsPromises.stat(memoryDir); + if (stats.isDirectory()) { + const files = await fsPromises.readdir(memoryDir); + for (const f of files) { + if (f.endsWith(".md") && /^\d{4}-\d{2}-\d{2}/.test(f)) { + mdFiles.push({ filePath: path.join(memoryDir, f), scope: entry.name }); + } + } + } + } catch { /* not found */ } + } + + // Also scan nested agent workspaces under workspace/agents//. + // This handles the structure used by session-recovery and other OpenClaw + // components: workspace/agents//MEMORY.md and workspace/agents//memory/. + // We scan one additional level deeper than the top-level workspace scan. + if (!workspaceGlob) { + const agentsDir = path.join(workspaceDir, "agents"); + try { + const agentEntries = await fsPromises.readdir(agentsDir, { withFileTypes: true }); + for (const agentEntry of agentEntries) { + if (!agentEntry.isDirectory()) continue; + const agentPath = path.join(agentsDir, agentEntry.name); + + // workspace/agents//MEMORY.md + const agentMemoryMd = path.join(agentPath, "MEMORY.md"); + try { + await fsPromises.stat(agentMemoryMd); + mdFiles.push({ filePath: agentMemoryMd, scope: agentEntry.name }); + } catch { /* not found */ } + + // workspace/agents//memory/ date files + const agentMemoryDir = path.join(agentPath, "memory"); + try { + const stats = await fsPromises.stat(agentMemoryDir); + if (stats.isDirectory()) { + const files = await fsPromises.readdir(agentMemoryDir); + for (const f of files) { + if (f.endsWith(".md") && /^\d{4}-\d{2}-\d{2}/.test(f)) { + mdFiles.push({ filePath: path.join(agentMemoryDir, f), scope: agentEntry.name }); + } + } + } + } catch { /* not found */ } + } + } catch { /* no agents/ directory */ } + } + + // Also scan the flat `workspace/memory/` directory directly under workspace root + // (not inside any workspace subdirectory — supports James's actual structure). + // This scan runs regardless of whether nested workspace mdFiles were found, + // so flat memory is always reachable even when all nested workspaces are empty. + // Skip if a specific workspace was requested (workspaceGlob), to avoid importing + // root flat memory when the user meant to import only one workspace. + if (!workspaceGlob) { + const flatMemoryDir = path.join(workspaceDir, "memory"); + try { + const stats = await fsPromises.stat(flatMemoryDir); + if (stats.isDirectory()) { + const files = await fsPromises.readdir(flatMemoryDir); + for (const f of files) { + if (f.endsWith(".md") && /^\d{4}-\d{2}-\d{2}/.test(f)) { + mdFiles.push({ filePath: path.join(flatMemoryDir, f), scope: workspaceScope || "shared" }); + } + } + } + } catch { /* not found */ } + } + + if (mdFiles.length === 0) { + return { imported: 0, skipped: 0, foundFiles: 0 }; + } + + // NaN-safe parsing with bounds — invalid input falls back to defaults instead of + // silently passing NaN (e.g. "--min-text-length abc" would otherwise make every + // length check behave unexpectedly). + const minTextLength = clampInt(parseInt(options.minTextLength ?? "5", 10), 1, 10000); + const importanceDefault = Number.isFinite(parseFloat(options.importance ?? "0.7")) + ? Math.max(0, Math.min(1, parseFloat(options.importance ?? "0.7"))) + : 0.7; + const dedupEnabled = !!options.dedup; + + // Parse each file for memory entries (lines starting with "- ") + for (const { filePath, scope: discoveredScope } of mdFiles) { + foundFiles++; + let content = await fsPromises.readFile(filePath, "utf-8"); + // Strip UTF-8 BOM (e.g. from Windows Notepad-saved files) + content = content.replace(/^\uFEFF/, ""); + // Normalize line endings: handle both CRLF (\r\n) and LF (\n) + const lines = content.split(/\r?\n/); + + for (const line of lines) { + // Skip non-memory lines + // Supports: "- text", "* text", "+ text" (standard Markdown bullet formats) + if (!/^[-*+]\s/.test(line)) continue; + const text = line.slice(2).trim(); + if (text.length < minTextLength) { skipped++; continue; } + + // Use --scope if provided, otherwise fall back to per-file discovered scope. + // This prevents cross-workspace leakage: without --scope, each workspace + // writes to its own scope instead of collapsing everything into "global". + const effectiveScope = options.scope || discoveredScope; + + // ── Deduplication check (scope-aware exact match) ─────────────────── + // Run even in dry-run so --dry-run --dedup reports accurate counts + if (dedupEnabled) { + try { + const existing = await ctx.store.bm25Search(text, 1, [effectiveScope]); + if (existing.length > 0 && existing[0].entry.text === text) { + skipped++; + if (!options.dryRun) { + console.log(` [skip] already imported: ${text.slice(0, 60)}${text.length > 60 ? "..." : ""}`); + } + continue; + } + } catch { + // bm25Search not available on this store implementation; proceed with import + } + } + + if (options.dryRun) { + console.log(` [dry-run] would import: ${text.slice(0, 80)}${text.length > 80 ? "..." : ""}`); + imported++; + continue; + } + + try { + const vector = await ctx.embedder!.embedPassage(text); + await ctx.store.store({ + text, + vector, + importance: importanceDefault, + category: "other", + scope: effectiveScope, + metadata: JSON.stringify({ importedFrom: filePath, sourceScope: discoveredScope }), + }); + imported++; + } catch (err) { + console.warn(` Failed to import: ${text.slice(0, 60)}... — ${err}`); + skipped++; + } + } + } + + if (options.dryRun) { + console.log(`\nDRY RUN — found ${foundFiles} files, ${imported} entries would be imported, ${skipped} skipped${dedupEnabled ? " [dedup enabled]" : ""}`); + } else { + console.log(`\nImport complete: ${imported} imported, ${skipped} skipped (scanned ${foundFiles} files)${dedupEnabled ? " [dedup enabled]" : ""}`); + } +} + export function registerMemoryCLI(program: Command, context: CLIContext): void { const getSearchRetriever = (): MemoryRetriever => { if (!context.embedder) { @@ -1036,6 +1261,48 @@ export function registerMemoryCLI(program: Command, context: CLIContext): void { } }); + /** + * import-markdown: Import memories from Markdown memory files into the plugin store. + * Targets MEMORY.md and memory/YYYY-MM-DD.md files found in OpenClaw workspaces. + */ + memory + .command("import-markdown [workspace-glob]") + .description("Import memories from Markdown files (MEMORY.md, memory/YYYY-MM-DD.md) into the plugin store") + .option("--dry-run", "Show what would be imported without importing") + .option("--scope ", "Import into specific scope (default: global)") + .option( + "--openclaw-home ", + "OpenClaw home directory (default: ~/.openclaw)", + ) + .option( + "--dedup", + "Skip entries already in store (scope-aware exact match, requires store.bm25Search)", + ) + .option( + "--min-text-length ", + "Minimum text length to import (default: 5)", + "5", + ) + .option( + "--importance ", + "Importance score for imported entries, 0.0-1.0 (default: 0.7)", + "0.7", + ) + .action(async (workspaceGlob, options) => { + const result = await runImportMarkdown(context, workspaceGlob, options); + if (result.foundFiles === 0) { + console.log("No Markdown memory files found."); + return; + } + const { imported, skipped, foundFiles } = result; + const dedupEnabled = !!options.dryRun; + if (options.dryRun) { + console.log(`\nDRY RUN - found ${foundFiles} files, ${imported} entries would be imported, ${skipped} skipped${dedupEnabled ? " [dedup enabled]" : ""}`); + } else { + console.log(`\nImport complete: ${imported} imported, ${skipped} skipped (scanned ${foundFiles} files)${dedupEnabled ? " [dedup enabled]" : ""}`); + } + }); + // Re-embed an existing LanceDB into the current target DB (A/B testing) memory .command("reembed") diff --git a/index.ts b/index.ts index ef3a4c15..79184eef 100644 --- a/index.ts +++ b/index.ts @@ -106,6 +106,8 @@ interface PluginConfig { autoRecallMaxItems?: number; autoRecallMaxChars?: number; autoRecallPerItemMaxChars?: number; + /** Max query string length before embedding search (safety valve). Default: 2000, range: 100-10000. */ + autoRecallMaxQueryLength?: number; /** Hard per-turn injection cap (safety valve). Overrides autoRecallMaxItems if lower. Default: 10. */ maxRecallPerTurn?: number; recallMode?: "full" | "summary" | "adaptive" | "off"; @@ -2279,13 +2281,13 @@ const memoryLanceDBProPlugin = { // FR-04: Truncate long prompts (e.g. file attachments) before embedding. // Auto-recall only needs the user's intent, not full attachment text. - const MAX_RECALL_QUERY_LENGTH = 1_000; + const maxQueryLen = config.autoRecallMaxQueryLength; let recallQuery = event.prompt; - if (recallQuery.length > MAX_RECALL_QUERY_LENGTH) { + if (recallQuery.length > maxQueryLen) { const originalLength = recallQuery.length; - recallQuery = recallQuery.slice(0, MAX_RECALL_QUERY_LENGTH); + recallQuery = recallQuery.slice(0, maxQueryLen); api.logger.info( - `memory-lancedb-pro: auto-recall query truncated from ${originalLength} to ${MAX_RECALL_QUERY_LENGTH} chars` + `memory-lancedb-pro: auto-recall query truncated from ${originalLength} to ${maxQueryLen} chars` ); } @@ -3833,6 +3835,7 @@ export function parsePluginConfig(value: unknown): PluginConfig { autoRecallMaxItems: parsePositiveInt(cfg.autoRecallMaxItems) ?? 3, autoRecallMaxChars: parsePositiveInt(cfg.autoRecallMaxChars) ?? 600, autoRecallPerItemMaxChars: parsePositiveInt(cfg.autoRecallPerItemMaxChars) ?? 180, + autoRecallMaxQueryLength: clampInt(parsePositiveInt(cfg.autoRecallMaxQueryLength) ?? 2_000, 100, 10_000), maxRecallPerTurn: parsePositiveInt(cfg.maxRecallPerTurn) ?? 10, captureAssistant: cfg.captureAssistant === true, retrieval: typeof cfg.retrieval === "object" && cfg.retrieval !== null ? cfg.retrieval as any : undefined, diff --git a/test/auto-recall-query-length.test.mjs b/test/auto-recall-query-length.test.mjs new file mode 100644 index 00000000..290dac87 --- /dev/null +++ b/test/auto-recall-query-length.test.mjs @@ -0,0 +1,113 @@ +import { describe, it } from "node:test"; +import assert from "node:assert/strict"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; +import jitiFactory from "jiti"; + +const testDir = path.dirname(fileURLToPath(import.meta.url)); +const pluginSdkStubPath = path.resolve(testDir, "helpers", "openclaw-plugin-sdk-stub.mjs"); +const jiti = jitiFactory(import.meta.url, { + interopDefault: true, + alias: { + "openclaw/plugin-sdk": pluginSdkStubPath, + }, +}); +const { parsePluginConfig } = jiti("../index.ts"); + +function baseConfig() { + return { + embedding: { + apiKey: "test-api-key", + }, + }; +} + +describe("autoRecallMaxQueryLength", () => { + it("defaults to 2000 when not specified", () => { + const parsed = parsePluginConfig(baseConfig()); + assert.equal(parsed.autoRecallMaxQueryLength, 2000); + }); + + it("clamps values below minimum (100) to 100", () => { + const parsed = parsePluginConfig({ + ...baseConfig(), + autoRecallMaxQueryLength: 50, + }); + assert.equal(parsed.autoRecallMaxQueryLength, 100); + }); + + it("clamps values above maximum (10000) to 10000", () => { + const parsed = parsePluginConfig({ + ...baseConfig(), + autoRecallMaxQueryLength: 20000, + }); + assert.equal(parsed.autoRecallMaxQueryLength, 10000); + }); + + it("accepts value within valid range", () => { + const parsed = parsePluginConfig({ + ...baseConfig(), + autoRecallMaxQueryLength: 5000, + }); + assert.equal(parsed.autoRecallMaxQueryLength, 5000); + }); + + it("clamps boundary minimum (exactly 100) to 100", () => { + const parsed = parsePluginConfig({ + ...baseConfig(), + autoRecallMaxQueryLength: 100, + }); + assert.equal(parsed.autoRecallMaxQueryLength, 100); + }); + + it("clamps boundary maximum (exactly 10000) to 10000", () => { + const parsed = parsePluginConfig({ + ...baseConfig(), + autoRecallMaxQueryLength: 10000, + }); + assert.equal(parsed.autoRecallMaxQueryLength, 10000); + }); + + it("handles non-integer values by flooring and clamping", () => { + const parsed = parsePluginConfig({ + ...baseConfig(), + autoRecallMaxQueryLength: 150.7, + }); + assert.equal(parsed.autoRecallMaxQueryLength, 150); + }); + + it("treats negative values as missing (use default 2000)", () => { + // parsePositiveInt returns undefined for non-positive values, + // so -500 falls through to the ?? 2000 default, which is within range + const parsed = parsePluginConfig({ + ...baseConfig(), + autoRecallMaxQueryLength: -500, + }); + assert.equal(parsed.autoRecallMaxQueryLength, 2000); + }); +}); + +// Unit test: verify truncation logic behaves correctly +describe("autoRecallMaxQueryLength truncation behavior", () => { + it("truncates a 100-char string to 50 when maxQueryLen=50", () => { + const maxQueryLen = 50; + const input = "a".repeat(100); + const truncated = input.length > maxQueryLen ? input.slice(0, maxQueryLen) : input; + assert.equal(truncated.length, 50); + assert.equal(truncated, "a".repeat(50)); + }); + + it("does not truncate when string is shorter than maxQueryLen", () => { + const maxQueryLen = 2000; + const input = "a".repeat(100); + const truncated = input.length > maxQueryLen ? input.slice(0, maxQueryLen) : input; + assert.equal(truncated.length, 100); + }); + + it("exact boundary: 2000-char string stays unchanged when maxQueryLen=2000", () => { + const maxQueryLen = 2000; + const input = "b".repeat(2000); + const truncated = input.length > maxQueryLen ? input.slice(0, maxQueryLen) : input; + assert.equal(truncated.length, 2000); + }); +}); diff --git a/test/import-markdown/import-markdown.test.mjs b/test/import-markdown/import-markdown.test.mjs new file mode 100644 index 00000000..860d7092 --- /dev/null +++ b/test/import-markdown/import-markdown.test.mjs @@ -0,0 +1,342 @@ +/** + * import-markdown.test.mjs + * Integration tests for the import-markdown CLI command. + * Tests: BOM handling, CRLF normalization, bullet formats, dedup logic, + * minTextLength, importance, and dry-run mode. + * + * Run: node --experimental-vm-modules node_modules/.bin/jest test/import-markdown.test.mjs + */ +import { jest } from "@jest/globals"; + +// ?€?€?€ Mock implementations ?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€ + +const storedRecords = []; +const mockEmbedder = { + embedQuery: jest.fn(async (text) => { + // Return a deterministic 384-dim fake vector + const dim = 384; + const vec = []; + let seed = hashString(text); + for (let i = 0; i < dim; i++) { + seed = (seed * 1664525 + 1013904223) & 0xffffffff; + vec.push((seed >>> 8) / 16777215 - 1); + } + return vec; + }), + embedPassage: jest.fn(async (text) => { + // Use same deterministic vector as embedQuery for test consistency + const dim = 384; + const vec = []; + let seed = hashString(text); + for (let i = 0; i < dim; i++) { + seed = (seed * 1664525 + 1013904223) & 0xffffffff; + vec.push((seed >>> 8) / 16777215 - 1); + } + return vec; + }), +}; + +const mockStore = { + storedRecords, + async store(entry) { + storedRecords.push({ ...entry }); + }, + async bm25Search(query, limit = 1, scopeFilter = []) { + const q = query.toLowerCase(); + return storedRecords + .filter((r) => { + if (scopeFilter.length > 0 && !scopeFilter.includes(r.scope)) return false; + return r.text.toLowerCase().includes(q); + }) + .slice(0, limit) + .map((r) => ({ entry: r, score: r.text.toLowerCase() === q ? 1.0 : 0.8 })); + }, + reset() { + storedRecords.length = 0; + }, +}; + +function hashString(s) { + let h = 5381; + for (let i = 0; i < s.length; i++) { + h = ((h << 5) + h) + s.charCodeAt(i); + h = h & 0xffffffff; + } + return h; +} + +// ?€?€?€ Test helpers ?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€ + +import { readFile, writeFile, mkdir } from "node:fs/promises"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +let testWorkspaceDir; + +async function setupWorkspace(name) { + // Files must be created at: /workspace// + // because runImportMarkdown looks for path.join(openclawHome, "workspace") + const wsDir = join(testWorkspaceDir, "workspace", name); + await mkdir(wsDir, { recursive: true }); + return wsDir; +} + +async function writeMem(wsDir, content) { + await writeFile(join(wsDir, "MEMORY.md"), content, "utf-8"); +} + +// ?€?€?€ Setup / Teardown ?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€ + +beforeAll(async () => { + testWorkspaceDir = join(tmpdir(), "import-markdown-test-" + Date.now()); + await mkdir(testWorkspaceDir, { recursive: true }); +}); + +afterEach(async () => { + mockStore.reset(); + mockEmbedder.embedQuery.mockClear(); + mockEmbedder.embedPassage.mockClear(); +}); + +afterAll(async () => { + // Cleanup is handled by OS (tmpdir cleanup) +}); + +// ?€?€?€ Tests ?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€ + +describe("import-markdown CLI", () => { + // Lazy-import to avoid hoisting issues + let importMarkdown; + + beforeAll(async () => { + // We test the core logic directly instead of via CLI to avoid complex setup + const mod = await import("../cli.ts"); + importMarkdown = mod.runImportMarkdown ?? null; + }); + + describe("BOM handling", () => { + it("strips UTF-8 BOM from file content", async () => { + // UTF-8 BOM: bytes EF BB BF + const wsDir = await setupWorkspace("bom-test"); + // BOM byte followed by a valid bullet line + const bomHex = "\ufeff"; + await writeFile(join(wsDir, "MEMORY.md"), bomHex + "- 甇?虜閮??批捆\n", "utf-8"); + + const ctx = { embedder: mockEmbedder, store: mockStore }; + const { imported, skipped } = await runImportMarkdown(ctx, { + openclawHome: testWorkspaceDir, + workspaceGlob: "bom-test", + }); + + // Second line should be imported; BOM line should be skipped (not "- " prefix) + expect(imported).toBeGreaterThanOrEqual(1); + }); + }); + + describe("CRLF normalization", () => { + it("handles Windows CRLF line endings", async () => { + const wsDir = await setupWorkspace("crlf-test"); + await writeFile(join(wsDir, "MEMORY.md"), "- Windows CRLF 閮\r\n- 蝚砌?蝑?\r\n", "utf-8"); + + const ctx = { embedder: mockEmbedder, store: mockStore }; + const { imported } = await runImportMarkdown(ctx, { + openclawHome: testWorkspaceDir, + workspaceGlob: "crlf-test", + }); + + expect(imported).toBe(2); + }); + }); + + describe("Bullet format support", () => { + it("imports dash, star, and plus bullet formats", async () => { + const wsDir = await setupWorkspace("bullet-formats"); + await writeFile(join(wsDir, "MEMORY.md"), + "- Dash format bullet\n" + + "* Star format bullet\n" + + "+ Plus format bullet\n", + "utf-8"); + + const ctx = { embedder: mockEmbedder, store: mockStore }; + const { imported, skipped } = await runImportMarkdown(ctx, { + openclawHome: testWorkspaceDir, + workspaceGlob: "bullet-formats", + }); + + expect(imported).toBe(3); + expect(skipped).toBe(0); + }); + }); + + describe("minTextLength option", () => { + it("skips lines shorter than minTextLength", async () => { + const wsDir = await setupWorkspace("min-len-test"); + await writeFile(join(wsDir, "MEMORY.md"), + "- 憟穀n- 皜祈岫\n- 甇?虜?瑕漲???園??娉n", + "utf-8"); + + const ctx = { embedder: mockEmbedder, store: mockStore }; + const { imported, skipped } = await runImportMarkdown(ctx, { + openclawHome: testWorkspaceDir, + workspaceGlob: "min-len-test", + minTextLength: 5, + }); + + expect(imported).toBe(1); // "甇?虜?瑕漲???園??? + expect(skipped).toBe(2); // "憟?, "皜祈岫" + }); + }); + + describe("importance option", () => { + it("uses custom importance value", async () => { + const wsDir = await setupWorkspace("importance-test"); + await writeFile(join(wsDir, "MEMORY.md"), "- ???扳葫閰西??跚n", "utf-8"); + + const ctx = { embedder: mockEmbedder, store: mockStore }; + await runImportMarkdown(ctx, { + openclawHome: testWorkspaceDir, + workspaceGlob: "importance-test", + importance: 0.9, + }); + + expect(mockStore.storedRecords[0].importance).toBe(0.9); + }); + }); + + describe("dedup logic", () => { + it("skips already-imported entries in same scope when dedup is enabled", async () => { + const wsDir = await setupWorkspace("dedup-test"); + await writeFile(join(wsDir, "MEMORY.md"), "- 蝚砌?甈∪?亦?閮\n", "utf-8"); + + const ctx = { embedder: mockEmbedder, store: mockStore }; + + // First import + await runImportMarkdown(ctx, { + openclawHome: testWorkspaceDir, + workspaceGlob: "dedup-test", + dedup: false, + }); + expect(mockStore.storedRecords.length).toBe(1); + + // Second import WITH dedup ??should skip the duplicate + const { imported, skipped } = await runImportMarkdown(ctx, { + openclawHome: testWorkspaceDir, + workspaceGlob: "dedup-test", + dedup: true, + }); + + expect(imported).toBe(0); + expect(skipped).toBe(1); + expect(mockStore.storedRecords.length).toBe(1); // Still only 1 + }); + + it("imports same text into different scope even with dedup enabled", async () => { + const wsDir = await setupWorkspace("dedup-scope-test"); + await writeFile(join(wsDir, "MEMORY.md"), "- 頝?scope 皜祈岫閮\n", "utf-8"); + + const ctx = { embedder: mockEmbedder, store: mockStore }; + + // First import to scope-A + await runImportMarkdown(ctx, { + openclawHome: testWorkspaceDir, + workspaceGlob: "dedup-scope-test", + scope: "scope-A", + dedup: false, + }); + expect(mockStore.storedRecords.length).toBe(1); + + // Second import to scope-B ??should NOT skip (different scope) + const { imported } = await runImportMarkdown(ctx, { + openclawHome: testWorkspaceDir, + workspaceGlob: "dedup-scope-test", + scope: "scope-B", + dedup: true, + }); + + expect(imported).toBe(1); + expect(mockStore.storedRecords.length).toBe(2); // Two entries, different scopes + }); + }); + + describe("dry-run mode", () => { + it("does not write to store in dry-run mode", async () => { + const wsDir = await setupWorkspace("dryrun-test"); + await writeFile(join(wsDir, "MEMORY.md"), "- 銋曄頝葫閰西??跚n", "utf-8"); + + const ctx = { embedder: mockEmbedder, store: mockStore }; + const { imported } = await runImportMarkdown(ctx, { + openclawHome: testWorkspaceDir, + workspaceGlob: "dryrun-test", + dryRun: true, + }); + + expect(imported).toBe(1); + expect(mockStore.storedRecords.length).toBe(0); // No actual write + }); + }); + + describe("continue on error", () => { + it("continues processing after a store failure", async () => { + const wsDir = await setupWorkspace("error-test"); + await writeFile(join(wsDir, "MEMORY.md"), + "- 蝚砌?蝑?\n- 蝚砌?蝑?\n- 蝚砌?蝑?\n", + "utf-8"); + + let callCount = 0; + const errorStore = { + async store(entry) { + callCount++; + if (callCount === 2) throw new Error("Simulated failure"); + mockStore.storedRecords.push({ ...entry }); + }, + async bm25Search(...args) { + return mockStore.bm25Search(...args); + }, + }; + + const ctx = { embedder: mockEmbedder, store: errorStore }; + const { imported, skipped } = await runImportMarkdown(ctx, { + openclawHome: testWorkspaceDir, + workspaceGlob: "error-test", + }); + + // One failed (the second call), two should have succeeded + expect(imported).toBeGreaterThanOrEqual(2); + expect(skipped).toBeGreaterThanOrEqual(1); + }); + }); +}); + +// ?€?€?€ Test runner helper ?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€?€ +// This is a simplified version that calls the CLI logic directly. +// In a full integration test, you would use the actual CLI entry point. + +/** + * Run the import-markdown logic for testing. + * This simulates the CLI action without requiring the full plugin context. + */ +/** + * Thin adapter: delegates to the production runImportMarkdown exported from ../cli.ts. + * Keeps existing test call signatures working while ensuring tests always exercise the + * real implementation (no duplicate logic drift). + */ +async function runImportMarkdown(context, options = {}) { + if (typeof importMarkdown === "function") { + // Production signature: runImportMarkdown(ctx, workspaceGlob, options) + // Test passes workspaceGlob as options.workspaceGlob + return importMarkdown( + context, + options.workspaceGlob ?? null, + { + dryRun: !!options.dryRun, + scope: options.scope, + openclawHome: options.openclawHome, + dedup: !!options.dedup, + minTextLength: String(options.minTextLength ?? 5), + importance: String(options.importance ?? 0.7), + }, + ); + } + return { imported: 0, skipped: 0, foundFiles: 0 }; +} +