From 097ae1c9649773cb1df2ec4147e427c7d7e43f47 Mon Sep 17 00:00:00 2001 From: Ryan Malia Date: Thu, 12 Mar 2026 00:14:21 -0700 Subject: [PATCH 1/7] fix: allow hyphenated words in vec/hyde queries (#383) The validateSemanticQuery regex rejected any hyphen followed by a word character, blocking common compound words (real-time, multi-client, kebab-case identifiers like better-sqlite3). Tighten the check to only match negation syntax at token boundaries (start of string or after whitespace). See https://github.com/tobi/qmd/issues/383 Co-Authored-By: Claude Opus 4.6 --- src/store.ts | 5 +-- test/structured-search.test.ts | 58 +++++++++++++++++++++++++++++++++- 2 files changed, 60 insertions(+), 3 deletions(-) diff --git a/src/store.ts b/src/store.ts index d1b24eb3..2dd64d41 100644 --- a/src/store.ts +++ b/src/store.ts @@ -2894,8 +2894,9 @@ function buildFTS5Query(query: string): string | null { * Returns error message if invalid, null if valid. */ export function validateSemanticQuery(query: string): string | null { - // Check for negation syntax - if (/-\w/.test(query) || /-"/.test(query)) { + // Check for negation syntax — only at token boundaries (start of string or after whitespace). + // Hyphenated words like "real-time" or "write-ahead" must not trigger this. + if (/(^|\s)-[\w"]/.test(query)) { return 'Negation (-term) is not supported in vec/hyde queries. Use lex for exclusions.'; } return null; diff --git a/test/structured-search.test.ts b/test/structured-search.test.ts index d7042103..70da7fd1 100644 --- a/test/structured-search.test.ts +++ b/test/structured-search.test.ts @@ -361,17 +361,73 @@ describe("lex query syntax", () => { expect(validateSemanticQuery("what is the CAP theorem")).toBeNull(); }); - test("rejects negation syntax", () => { + test("rejects negation at start of query", () => { + expect(validateSemanticQuery("-redis connection pooling")).toContain("Negation"); + }); + + test("rejects negation after space", () => { expect(validateSemanticQuery("performance -sports")).toContain("Negation"); + }); + + test("rejects negated quoted phrase", () => { expect(validateSemanticQuery('-"exact phrase"')).toContain("Negation"); }); + test("rejects multiple negations", () => { + expect(validateSemanticQuery("error handling -java -python")).toContain("Negation"); + }); + + test("rejects negation after leading whitespace", () => { + expect(validateSemanticQuery(" -term at start")).toContain("Negation"); + }); + + test("rejects negation after tab", () => { + expect(validateSemanticQuery("foo\t-bar")).toContain("Negation"); + }); + + test("accepts hyphenated compound words", () => { + expect(validateSemanticQuery("long-lived server shared across clients")).toBeNull(); + expect(validateSemanticQuery("real-time voice processing pipeline")).toBeNull(); + expect(validateSemanticQuery("how does the rate-limiter handle burst traffic")).toBeNull(); + expect(validateSemanticQuery("self-hosted deployment options")).toBeNull(); + expect(validateSemanticQuery("multi-client session architecture")).toBeNull(); + expect(validateSemanticQuery("cross-platform compatibility")).toBeNull(); + expect(validateSemanticQuery("non-blocking I/O model")).toBeNull(); + expect(validateSemanticQuery("in-memory caching strategy")).toBeNull(); + expect(validateSemanticQuery("write-ahead log for crash recovery")).toBeNull(); + expect(validateSemanticQuery("copy-on-write semantics")).toBeNull(); + }); + + test("accepts multiple hyphens in a phrase", () => { + expect(validateSemanticQuery("state-of-the-art embedding models")).toBeNull(); + expect(validateSemanticQuery("end-to-end testing")).toBeNull(); + expect(validateSemanticQuery("man-in-the-middle attack prevention")).toBeNull(); + }); + + test("accepts multiple hyphenated words in one query", () => { + expect(validateSemanticQuery("built-in vs add-on features")).toBeNull(); + }); + + test("accepts short hyphenated terms", () => { + expect(validateSemanticQuery("A-B testing for ML models")).toBeNull(); + expect(validateSemanticQuery("e-commerce platform")).toBeNull(); + }); + + test("accepts bare hyphen without word character", () => { + expect(validateSemanticQuery("-")).toBeNull(); + }); test("accepts hyde-style hypothetical answers", () => { expect(validateSemanticQuery( "The CAP theorem states that a distributed system cannot simultaneously provide consistency, availability, and partition tolerance." )).toBeNull(); }); + + test("accepts hyde with hyphenated words", () => { + expect(validateSemanticQuery( + "HTTP transport runs a single long-lived daemon shared across all clients, avoiding per-session model re-loading." + )).toBeNull(); + }); }); describe("validateLexQuery", () => { From aee44af7acf62487d85d2c9f0b7ee40822372458 Mon Sep 17 00:00:00 2001 From: Sebastian Kouba Date: Thu, 19 Mar 2026 10:52:58 +0100 Subject: [PATCH 2/7] Avoid SQLite startup races during parallel Bun qmd initialization --- CHANGELOG.md | 4 ++ src/store.ts | 36 ++++++++++- test/cli.test.ts | 64 ++++++++++++++++-- test/store.helpers.unit.test.ts | 111 ++++++++++++++++++++++++++++++++ 4 files changed, 209 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e56c278..d72bfcb8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,10 @@ ### Fixes - Fix paths in nix flake +- Configure SQLite connection pragmas before probing `sqlite-vec`, avoid + resetting `PRAGMA journal_mode = WAL` on every query startup, and tolerate + another process winning the WAL transition race so parallel readers don't + fail during initialization with transient `database is locked` errors. - Sync stale `bun.lock` (`better-sqlite3` 11.x → 12.x). CI and release script now use `--frozen-lockfile` to prevent recurrence. #386 (thanks @Mic92) diff --git a/src/store.ts b/src/store.ts index 2dd64d41..80ba6e7b 100644 --- a/src/store.ts +++ b/src/store.ts @@ -721,7 +721,41 @@ export function verifySqliteVecLoaded(db: Database): void { let _sqliteVecAvailable: boolean | null = null; +const DEFAULT_BUSY_TIMEOUT_MS = 5_000; + +function isBusyLockError(err: unknown): boolean { + const message = getErrorMessage(err).toLowerCase(); + return message.includes("database is locked") || message.includes("sqlite_busy"); +} + +export function configureConnectionPragmas(db: Database): void { + db.exec(`PRAGMA busy_timeout = ${DEFAULT_BUSY_TIMEOUT_MS}`); + + let journalModeRow = db.prepare("PRAGMA journal_mode").get() as { journal_mode?: string } | null; + if (journalModeRow?.journal_mode?.toLowerCase() !== "wal") { + try { + db.exec("PRAGMA journal_mode = WAL"); + } catch (err) { + if (!isBusyLockError(err)) { + throw err; + } + + // Two qmd processes can both observe a non-WAL database and then race to + // become the one that flips the shared file into WAL mode. Losing that + // race should not abort startup; re-probe for observability, then proceed. + journalModeRow = db.prepare("PRAGMA journal_mode").get() as { journal_mode?: string } | null; + } + } + + db.exec("PRAGMA foreign_keys = ON"); +} + function initializeDatabase(db: Database): void { + // Configure the connection before any probe queries. Parallel qmd processes + // can race during startup; without a busy timeout even read-only probe work + // like `SELECT vec_version()` can fail immediately with SQLITE_BUSY. + configureConnectionPragmas(db); + try { loadSqliteVec(db); verifySqliteVecLoaded(db); @@ -731,8 +765,6 @@ function initializeDatabase(db: Database): void { _sqliteVecAvailable = false; console.warn(getErrorMessage(err)); } - db.exec("PRAGMA journal_mode = WAL"); - db.exec("PRAGMA foreign_keys = ON"); // Drop legacy tables that are now managed in YAML db.exec(`DROP TABLE IF EXISTS path_contexts`); diff --git a/test/cli.test.ts b/test/cli.test.ts index 7d6f5267..cb4fd66d 100644 --- a/test/cli.test.ts +++ b/test/cli.test.ts @@ -11,7 +11,7 @@ import { existsSync, lstatSync, readFileSync, symlinkSync, writeFileSync, unlink import { tmpdir } from "os"; import { join, dirname } from "path"; import { fileURLToPath } from "url"; -import { spawn } from "child_process"; +import { spawn, spawnSync } from "child_process"; import { setTimeout as sleep } from "timers/promises"; // Test fixtures directory and database path @@ -33,16 +33,19 @@ const tsxBin = (() => { } return join(process.cwd(), "node_modules", ".bin", "tsx"); })(); +const bunBin = "bun"; +const bunAvailable = spawnSync(bunBin, ["--version"], { stdio: "ignore" }).status === 0; // Helper to run qmd command with test database -async function runQmd( - args: string[], +async function runQmdCommand( + command: string, + commandArgs: string[], options: { cwd?: string; env?: Record; dbPath?: string; configDir?: string } = {} ): Promise<{ stdout: string; stderr: string; exitCode: number }> { const workingDir = options.cwd || fixturesDir; const dbPath = options.dbPath || testDbPath; const configDir = options.configDir || testConfigDir; - const proc = spawn(tsxBin, [qmdScript, ...args], { + const proc = spawn(command, commandArgs, { cwd: workingDir, env: { ...process.env, @@ -76,6 +79,20 @@ async function runQmd( return { stdout, stderr, exitCode }; } +async function runQmd( + args: string[], + options: { cwd?: string; env?: Record; dbPath?: string; configDir?: string } = {} +): Promise<{ stdout: string; stderr: string; exitCode: number }> { + return runQmdCommand(tsxBin, [qmdScript, ...args], options); +} + +async function runQmdWithBun( + args: string[], + options: { cwd?: string; env?: Record; dbPath?: string; configDir?: string } = {} +): Promise<{ stdout: string; stderr: string; exitCode: number }> { + return runQmdCommand(bunBin, [qmdScript, ...args], options); +} + // Get a fresh database path for isolated tests function getFreshDbPath(): string { testCounter++; @@ -224,6 +241,45 @@ beforeEach(async () => { ); }); +describe("CLI parallel startup regression", () => { + const parallelStartupTest = bunAvailable ? test : test.skip; + + function expectSuccessfulStatus(result: { stdout: string; stderr: string; exitCode: number }): void { + expect(result.exitCode).toBe(0); + expect(result.stderr).not.toContain("database is locked"); + expect(result.stderr).not.toContain("SQLITE_BUSY"); + expect(result.stderr).not.toContain("sqlite-vec probe failed"); + expect(result.stdout).toContain("QMD Status"); + } + + parallelStartupTest("allows two Bun qmd processes to initialize the same fresh DB concurrently", async () => { + const { dbPath, configDir } = await createIsolatedTestEnv("parallel-startup-fresh"); + + const [first, second] = await Promise.all([ + runQmdWithBun(["status"], { dbPath, configDir }), + runQmdWithBun(["status"], { dbPath, configDir }), + ]); + + expectSuccessfulStatus(first); + expectSuccessfulStatus(second); + }, 15000); + + parallelStartupTest("allows two Bun qmd processes to initialize the same existing DB concurrently", async () => { + const { dbPath, configDir } = await createIsolatedTestEnv("parallel-startup-existing"); + + const warmup = await runQmdWithBun(["status"], { dbPath, configDir }); + expectSuccessfulStatus(warmup); + + const [first, second] = await Promise.all([ + runQmdWithBun(["status"], { dbPath, configDir }), + runQmdWithBun(["status"], { dbPath, configDir }), + ]); + + expectSuccessfulStatus(first); + expectSuccessfulStatus(second); + }, 15000); +}); + describe("CLI Help", () => { test("shows help with --help flag", async () => { const { stdout, exitCode } = await runQmd(["--help"]); diff --git a/test/store.helpers.unit.test.ts b/test/store.helpers.unit.test.ts index e3c23739..ac0fd702 100644 --- a/test/store.helpers.unit.test.ts +++ b/test/store.helpers.unit.test.ts @@ -16,6 +16,7 @@ import { isDocid, handelize, cleanupOrphanedVectors, + configureConnectionPragmas, } from "../src/store"; // ============================================================================= @@ -109,6 +110,116 @@ describe("cleanupOrphanedVectors", () => { }); }); +// ============================================================================= +// Connection pragma tests +// ============================================================================= + +describe("configureConnectionPragmas", () => { + test("skips resetting journal mode when database is already in WAL mode", () => { + const execCalls: string[] = []; + const db = { + exec: (sql: string) => execCalls.push(sql), + prepare: (sql: string) => { + expect(sql).toBe("PRAGMA journal_mode"); + return { get: () => ({ journal_mode: "wal" }) }; + }, + } as any; + + configureConnectionPragmas(db); + + expect(execCalls).toEqual([ + "PRAGMA busy_timeout = 5000", + "PRAGMA foreign_keys = ON", + ]); + }); + + test("enables WAL once when database is not already in WAL mode", () => { + const execCalls: string[] = []; + const db = { + exec: (sql: string) => execCalls.push(sql), + prepare: (sql: string) => { + expect(sql).toBe("PRAGMA journal_mode"); + return { get: () => ({ journal_mode: "delete" }) }; + }, + } as any; + + configureConnectionPragmas(db); + + expect(execCalls).toEqual([ + "PRAGMA busy_timeout = 5000", + "PRAGMA journal_mode = WAL", + "PRAGMA foreign_keys = ON", + ]); + }); + + test("tolerates a busy WAL switch when another process wins the race", () => { + const execCalls: string[] = []; + let journalModeReads = 0; + const db = { + exec: (sql: string) => { + execCalls.push(sql); + if (sql === "PRAGMA journal_mode = WAL") { + throw new Error("database is locked"); + } + }, + prepare: (sql: string) => { + expect(sql).toBe("PRAGMA journal_mode"); + return { + get: () => ({ journal_mode: journalModeReads++ === 0 ? "delete" : "wal" }), + }; + }, + } as any; + + expect(() => configureConnectionPragmas(db)).not.toThrow(); + expect(execCalls).toEqual([ + "PRAGMA busy_timeout = 5000", + "PRAGMA journal_mode = WAL", + "PRAGMA foreign_keys = ON", + ]); + }); + + test("continues when WAL switch is busy and follow-up probe still reports non-WAL", () => { + const execCalls: string[] = []; + const db = { + exec: (sql: string) => { + execCalls.push(sql); + if (sql === "PRAGMA journal_mode = WAL") { + throw new Error("SQLITE_BUSY_RECOVERY: database is locked"); + } + }, + prepare: (sql: string) => { + expect(sql).toBe("PRAGMA journal_mode"); + return { + get: () => ({ journal_mode: "delete" }), + }; + }, + } as any; + + expect(() => configureConnectionPragmas(db)).not.toThrow(); + expect(execCalls).toEqual([ + "PRAGMA busy_timeout = 5000", + "PRAGMA journal_mode = WAL", + "PRAGMA foreign_keys = ON", + ]); + }); + + test("rethrows non-lock WAL errors", () => { + const db = { + exec: (sql: string) => { + if (sql === "PRAGMA journal_mode = WAL") { + throw new Error("disk I/O error"); + } + }, + prepare: (sql: string) => { + expect(sql).toBe("PRAGMA journal_mode"); + return { get: () => ({ journal_mode: "delete" }) }; + }, + } as any; + + expect(() => configureConnectionPragmas(db)).toThrow("disk I/O error"); + }); +}); + // ============================================================================= // Handelize Tests // ============================================================================= From a882d6b514b7147570194c20b3965f79252df8ec Mon Sep 17 00:00:00 2001 From: DmitryPogodaev Date: Wed, 18 Mar 2026 14:02:13 +0000 Subject: [PATCH 3/7] feat(mcp): expose skipRerank and candidateLimit in query tool On CPU-only servers, LLM reranking (0.6B model) takes ~2s per document, making the query tool unusable with timeouts under 30s. This commit: - Adds `skipRerank` boolean parameter to the MCP `query` tool schema. When true, returns results scored by RRF fusion only (no LLM rerank). - Passes `candidateLimit` through to structuredSearch (was declared in schema but never forwarded to the store). Use case: automated RAG hooks with 1-2s timeouts on VPS without GPU. With skipRerank=true, queries complete in 30-50ms instead of 30-40s. --- src/index.ts | 3 +++ src/mcp/server.ts | 8 ++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/index.ts b/src/index.ts index 02ec51bd..7c9d0bee 100644 --- a/src/index.ts +++ b/src/index.ts @@ -161,6 +161,8 @@ export interface SearchOptions { limit?: number; /** Minimum score threshold */ minScore?: number; + /** Maximum candidates to rerank (default: 40) */ + candidateLimit?: number; /** Include explain traces */ explain?: boolean; /** Chunk strategy: "auto" (default, uses AST for code files) or "regex" (legacy) */ @@ -393,6 +395,7 @@ export async function createStore(options: StoreOptions): Promise { collections: collections.length > 0 ? collections : undefined, limit: opts.limit, minScore: opts.minScore, + candidateLimit: opts.candidateLimit, explain: opts.explain, intent: opts.intent, skipRerank, diff --git a/src/mcp/server.ts b/src/mcp/server.ts index b7fada76..015f0998 100644 --- a/src/mcp/server.ts +++ b/src/mcp/server.ts @@ -292,6 +292,9 @@ Intent-aware lex (C++ performance, not sports): candidateLimit: z.number().optional().describe( "Maximum candidates to rerank (default: 40, lower = faster but may miss results)" ), + skipRerank: z.boolean().optional().describe( + "Skip LLM reranking and use RRF fusion scores only. Much faster on CPU-only servers." + ), collections: z.array(z.string()).optional().describe("Filter to collections (OR match)"), intent: z.string().optional().describe( "Background context to disambiguate the query. Example: query='performance', intent='web page load times and Core Web Vitals'. Does not search on its own." @@ -301,7 +304,7 @@ Intent-aware lex (C++ performance, not sports): ), }, }, - async ({ searches, limit, minScore, candidateLimit, collections, intent, rerank }) => { + async ({ searches, limit, minScore, candidateLimit, skipRerank, collections, intent, rerank }) => { // Map to internal format const queries: ExpandedQuery[] = searches.map(s => ({ type: s.type, @@ -316,8 +319,9 @@ Intent-aware lex (C++ performance, not sports): collections: effectiveCollections.length > 0 ? effectiveCollections : undefined, limit, minScore, - rerank, + candidateLimit, intent, + rerank: skipRerank ? false : rerank, }); // Use first lex or vec query for snippet extraction From 62b170782ed3fe514f58d8ca910712789ba3a19a Mon Sep 17 00:00:00 2001 From: Sebastian Kouba Date: Thu, 19 Mar 2026 11:55:10 +0100 Subject: [PATCH 4/7] Narrow the Bun startup regression harness --- test/cli.test.ts | 35 +++++++++++++++++++------------- test/parallel-startup-harness.ts | 21 +++++++++++++++++++ 2 files changed, 42 insertions(+), 14 deletions(-) create mode 100644 test/parallel-startup-harness.ts diff --git a/test/cli.test.ts b/test/cli.test.ts index cb4fd66d..b76ac4c9 100644 --- a/test/cli.test.ts +++ b/test/cli.test.ts @@ -25,6 +25,7 @@ let testCounter = 0; // Unique counter for each test run const thisDir = dirname(fileURLToPath(import.meta.url)); const projectRoot = join(thisDir, ".."); const qmdScript = join(projectRoot, "src", "cli", "qmd.ts"); +const parallelStartupHarness = join(projectRoot, "test", "parallel-startup-harness.ts"); // Resolve tsx binary from project's node_modules (not cwd-dependent) const tsxBin = (() => { const candidate = join(projectRoot, "node_modules", ".bin", "tsx"); @@ -93,6 +94,12 @@ async function runQmdWithBun( return runQmdCommand(bunBin, [qmdScript, ...args], options); } +async function runParallelStartupHarness( + dbPath: string +): Promise<{ stdout: string; stderr: string; exitCode: number }> { + return runQmdCommand(bunBin, [parallelStartupHarness, dbPath], { cwd: projectRoot, dbPath }); +} + // Get a fresh database path for isolated tests function getFreshDbPath(): string { testCounter++; @@ -244,39 +251,39 @@ beforeEach(async () => { describe("CLI parallel startup regression", () => { const parallelStartupTest = bunAvailable ? test : test.skip; - function expectSuccessfulStatus(result: { stdout: string; stderr: string; exitCode: number }): void { + function expectSuccessfulStartup(result: { stdout: string; stderr: string; exitCode: number }): void { expect(result.exitCode).toBe(0); expect(result.stderr).not.toContain("database is locked"); expect(result.stderr).not.toContain("SQLITE_BUSY"); expect(result.stderr).not.toContain("sqlite-vec probe failed"); - expect(result.stdout).toContain("QMD Status"); + expect(result.stdout).toContain("startup-ok"); } parallelStartupTest("allows two Bun qmd processes to initialize the same fresh DB concurrently", async () => { - const { dbPath, configDir } = await createIsolatedTestEnv("parallel-startup-fresh"); + const dbPath = getFreshDbPath(); const [first, second] = await Promise.all([ - runQmdWithBun(["status"], { dbPath, configDir }), - runQmdWithBun(["status"], { dbPath, configDir }), + runParallelStartupHarness(dbPath), + runParallelStartupHarness(dbPath), ]); - expectSuccessfulStatus(first); - expectSuccessfulStatus(second); + expectSuccessfulStartup(first); + expectSuccessfulStartup(second); }, 15000); parallelStartupTest("allows two Bun qmd processes to initialize the same existing DB concurrently", async () => { - const { dbPath, configDir } = await createIsolatedTestEnv("parallel-startup-existing"); + const dbPath = getFreshDbPath(); - const warmup = await runQmdWithBun(["status"], { dbPath, configDir }); - expectSuccessfulStatus(warmup); + const warmup = await runParallelStartupHarness(dbPath); + expectSuccessfulStartup(warmup); const [first, second] = await Promise.all([ - runQmdWithBun(["status"], { dbPath, configDir }), - runQmdWithBun(["status"], { dbPath, configDir }), + runParallelStartupHarness(dbPath), + runParallelStartupHarness(dbPath), ]); - expectSuccessfulStatus(first); - expectSuccessfulStatus(second); + expectSuccessfulStartup(first); + expectSuccessfulStartup(second); }, 15000); }); diff --git a/test/parallel-startup-harness.ts b/test/parallel-startup-harness.ts new file mode 100644 index 00000000..279b112c --- /dev/null +++ b/test/parallel-startup-harness.ts @@ -0,0 +1,21 @@ +import { createStore } from "../src/store.js"; + +const dbPath = process.argv[2]; + +if (!dbPath) { + console.error("Usage: bun test/parallel-startup-harness.ts "); + process.exit(1); +} + +let store: ReturnType | undefined; + +try { + store = createStore(dbPath); + store.getStatus(); + console.log("startup-ok"); +} catch (err) { + console.error(err instanceof Error ? err.message : String(err)); + process.exit(1); +} finally { + store?.close(); +} From fc03532173f75b166f368cbf5e30a0694e88a287 Mon Sep 17 00:00:00 2001 From: Claw Date: Wed, 1 Apr 2026 18:58:39 +0000 Subject: [PATCH 5/7] feat: support remote Ollama embeddings via OLLAMA_EMBED_URL When OLLAMA_EMBED_URL is set, all embedding and tokenization operations use the remote Ollama HTTP API instead of node-llama-cpp. This enables QMD on platforms without local GPU/Vulkan support (ARM64 VPS, Docker containers, CI runners) and with remote Ollama instances (Tailscale, LAN, Docker networks). Changes: - Add ollamaEmbed() and ollamaEmbedBatch() helper functions using Ollama /api/embed endpoint - Patch getEmbedding() to bypass node-llama-cpp when OLLAMA_EMBED_URL is set - Patch generateEmbeddings() with dedicated Ollama fast-path that skips withLLMSessionForLlm entirely - Patch expandQuery() to skip LLM-based HYDE query expansion (passes raw query as vector search) - Patch chunkDocumentByTokens() to use char-based estimation instead of local tokenizer - Patch vsearch and query CLI commands to skip withLLMSession wrapper Environment variables: OLLAMA_EMBED_URL - Ollama server URL (e.g. http://your-ollama:11434) OLLAMA_EMBED_MODEL - Model name (default: nomic-embed-text) Tested on ARM64 Oracle Cloud VPS with qwen3-embedding:0.6b on remote Ollama via Tailscale. 7,100+ documents indexed successfully. --- src/cli/qmd.ts | 22 +++++++-- src/store.ts | 118 +++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 133 insertions(+), 7 deletions(-) diff --git a/src/cli/qmd.ts b/src/cli/qmd.ts index 7216965a..f4b0ffe6 100755 --- a/src/cli/qmd.ts +++ b/src/cli/qmd.ts @@ -2183,7 +2183,7 @@ async function vectorSearch(query: string, opts: OutputOptions, _model: string = checkIndexHealth(store.db); - await withLLMSession(async () => { + const runSearch = async () => { let results = await vectorSearchQuery(store, query, { collection: singleCollection, limit: opts.all ? 500 : (opts.limit || 10), @@ -2221,7 +2221,14 @@ async function vectorSearch(query: string, opts: OutputOptions, _model: string = context: r.context, docid: r.docid, })), query, { ...opts, limit: results.length }); - }, { maxDuration: 10 * 60 * 1000, name: 'vectorSearch' }); + }; + + // Skip local LLM session when using remote Ollama for embeddings + if (process.env.OLLAMA_EMBED_URL) { + await runSearch(); + } else { + await withLLMSession(runSearch, { maxDuration: 10 * 60 * 1000, name: 'vectorSearch' }); + } } async function querySearch(query: string, opts: OutputOptions, _embedModel: string = DEFAULT_EMBED_MODEL, _rerankModel: string = DEFAULT_RERANK_MODEL): Promise { @@ -2239,7 +2246,7 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri // Intent can come from --intent flag or from intent: line in query document const intent = opts.intent || parsed?.intent; - await withLLMSession(async () => { + const runQuery = async () => { let results; if (parsed) { @@ -2359,7 +2366,14 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri docid: r.docid, explain: r.explain, })), displayQuery, { ...opts, limit: results.length }); - }, { maxDuration: 10 * 60 * 1000, name: 'querySearch' }); + }; + + // Skip local LLM session when using remote Ollama for embeddings + if (process.env.OLLAMA_EMBED_URL) { + await runQuery(); + } else { + await withLLMSession(runQuery, { maxDuration: 10 * 60 * 1000, name: 'querySearch' }); + } } // Parse CLI arguments using util.parseArgs diff --git a/src/store.ts b/src/store.ts index 80ba6e7b..1ae75375 100644 --- a/src/store.ts +++ b/src/store.ts @@ -39,6 +39,42 @@ import type { // ============================================================================= const HOME = process.env.HOME || "/tmp"; + +// Remote Ollama embedding support — when OLLAMA_EMBED_URL is set, all embedding +// and tokenization operations use the remote Ollama HTTP API instead of +// node-llama-cpp. This enables QMD on platforms without local GPU/Vulkan +// (ARM64 VPS, Docker, CI) and with remote Ollama instances (Tailscale, LAN). +const OLLAMA_EMBED_URL = process.env.OLLAMA_EMBED_URL; +const OLLAMA_EMBED_MODEL = process.env.OLLAMA_EMBED_MODEL || "nomic-embed-text"; + +interface OllamaEmbedResult { + embedding: number[]; + model: string; +} + +async function ollamaEmbed(text: string): Promise { + const res = await fetch(`${OLLAMA_EMBED_URL}/api/embed`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ model: OLLAMA_EMBED_MODEL, input: text }), + }); + if (!res.ok) throw new Error(`Ollama embed failed: ${res.status} ${await res.text()}`); + const data = await res.json() as { embeddings: number[][] }; + const embedding = data.embeddings[0]; + if (!embedding) throw new Error('Ollama returned empty embeddings array'); + return { embedding, model: OLLAMA_EMBED_MODEL }; +} + +async function ollamaEmbedBatch(texts: string[]): Promise { + const res = await fetch(`${OLLAMA_EMBED_URL}/api/embed`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ model: OLLAMA_EMBED_MODEL, input: texts }), + }); + if (!res.ok) throw new Error(`Ollama embed batch failed: ${res.status} ${await res.text()}`); + const data = await res.json() as { embeddings: number[][] }; + return data.embeddings.map(e => ({ embedding: e, model: OLLAMA_EMBED_MODEL })); +} export const DEFAULT_EMBED_MODEL = "embeddinggemma"; export const DEFAULT_RERANK_MODEL = "ExpedientFalcon/qwen3-reranker:0.6b-q8_0"; export const DEFAULT_QUERY_MODEL = "Qwen/Qwen3-1.7B"; @@ -1439,6 +1475,67 @@ export async function generateEmbeddings( const totalDocs = docsToEmbed.length; const startTime = Date.now(); + // Remote Ollama mode: bypass local LLM entirely + if (OLLAMA_EMBED_URL) { + let chunksEmbedded = 0; + let errors = 0; + let bytesProcessed = 0; + let totalChunks = 0; + let vectorTableInitialized = false; + const BATCH_SIZE = 32; + const batches = buildEmbeddingBatches(docsToEmbed, maxDocsPerBatch, maxBatchBytes); + + for (const batchMeta of batches) { + const batchDocs = getEmbeddingDocsForBatch(db, batchMeta); + const batchChunks: ChunkItem[] = []; + const batchBytes = batchMeta.reduce((sum, doc) => sum + Math.max(0, doc.bytes), 0); + + for (const doc of batchDocs) { + if (!doc.body.trim()) continue; + const title = extractTitle(doc.body, doc.path); + const chunks = await chunkDocumentByTokens(doc.body, undefined, undefined, undefined, doc.path, options?.chunkStrategy); + for (let seq = 0; seq < chunks.length; seq++) { + batchChunks.push({ hash: doc.hash, title, text: chunks[seq]!.text, seq, pos: chunks[seq]!.pos, tokens: chunks[seq]!.tokens, bytes: encoder.encode(chunks[seq]!.text).length }); + } + } + + totalChunks += batchChunks.length; + if (batchChunks.length === 0) { bytesProcessed += batchBytes; options?.onProgress?.({ chunksEmbedded, totalChunks, bytesProcessed, totalBytes, errors }); continue; } + + if (!vectorTableInitialized) { + const firstResult = await ollamaEmbed(batchChunks[0]!.text); + store.ensureVecTable(firstResult.embedding.length); + vectorTableInitialized = true; + } + + for (let batchStart = 0; batchStart < batchChunks.length; batchStart += BATCH_SIZE) { + const batchEnd = Math.min(batchStart + BATCH_SIZE, batchChunks.length); + const chunkBatch = batchChunks.slice(batchStart, batchEnd); + const texts = chunkBatch.map(chunk => chunk.text); + try { + const embeddings = await ollamaEmbedBatch(texts); + for (let i = 0; i < chunkBatch.length; i++) { + const chunk = chunkBatch[i]!; + insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(embeddings[i]!.embedding), model, now); + chunksEmbedded++; + } + } catch { + for (const chunk of chunkBatch) { + try { + const result = await ollamaEmbed(chunk.text); + insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now); + chunksEmbedded++; + } catch { errors++; } + } + } + options?.onProgress?.({ chunksEmbedded, totalChunks, bytesProcessed: bytesProcessed + batchBytes, totalBytes, errors }); + } + bytesProcessed += batchBytes; + } + + return { docsProcessed: totalDocs, chunksEmbedded, errors, durationMs: Date.now() - startTime }; + } + // Use store's LlamaCpp or global singleton, wrapped in a session const llm = getLlm(store); @@ -2233,15 +2330,20 @@ export async function chunkDocumentByTokens( chunkStrategy: ChunkStrategy = "regex", signal?: AbortSignal ): Promise<{ text: string; pos: number; tokens: number }[]> { - const llm = getDefaultLlamaCpp(); - // Use moderate chars/token estimate (prose ~4, code ~2, mixed ~3) - // If chunks exceed limit, they'll be re-split with actual ratio const avgCharsPerToken = 3; const maxChars = maxTokens * avgCharsPerToken; const overlapChars = overlapTokens * avgCharsPerToken; const windowChars = windowTokens * avgCharsPerToken; + // Remote Ollama mode: skip local tokenizer, use char-based chunking + if (OLLAMA_EMBED_URL) { + const charChunks = await chunkDocumentAsync(content, maxChars, overlapChars, windowChars, filepath, chunkStrategy); + return charChunks.map(c => ({ text: c.text, pos: c.pos, tokens: Math.ceil(c.text.length / avgCharsPerToken) })); + } + + const llm = getDefaultLlamaCpp(); + // Chunk in character space with conservative estimate // Use AST-aware chunking for the first pass when filepath/strategy provided let charChunks = await chunkDocumentAsync(content, maxChars, overlapChars, windowChars, filepath, chunkStrategy); @@ -3111,6 +3213,11 @@ export async function searchVec(db: Database, query: string, model: string, limi // ============================================================================= async function getEmbedding(text: string, model: string, isQuery: boolean, session?: ILLMSession, llmOverride?: LlamaCpp): Promise { + // Remote Ollama mode: bypass local LLM entirely + if (OLLAMA_EMBED_URL && !session && !llmOverride) { + const result = await ollamaEmbed(text); + return result.embedding; + } // Format text using the appropriate prompt template const formattedText = isQuery ? formatQueryForEmbedding(text, model) : formatDocForEmbedding(text, undefined, model); const result = session @@ -3180,6 +3287,11 @@ export function insertEmbedding( // ============================================================================= export async function expandQuery(query: string, model: string = DEFAULT_QUERY_MODEL, db: Database, intent?: string, llmOverride?: LlamaCpp): Promise { + // Remote Ollama mode: skip LLM-based HYDE query expansion (no local model) + if (OLLAMA_EMBED_URL && !llmOverride) { + return [{ type: 'vec' as const, query }]; + } + // Check cache first — stored as JSON preserving types const cacheKey = getCacheKey("expandQuery", { query, model, ...(intent && { intent }) }); const cached = getCachedResult(db, cacheKey); From 22028026edc52a27de64036beb72aa620c814a57 Mon Sep 17 00:00:00 2001 From: chidev Date: Thu, 2 Apr 2026 01:43:27 -0500 Subject: [PATCH 6/7] Add managed installer for cross-machine rollout --- install.sh | 227 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 227 insertions(+) create mode 100644 install.sh diff --git a/install.sh b/install.sh new file mode 100644 index 00000000..a511f3cb --- /dev/null +++ b/install.sh @@ -0,0 +1,227 @@ +#!/usr/bin/env bash +set -euo pipefail +umask 022 +shopt -s lastpipe 2>/dev/null || true + +REPO_URL="${QMD_REPO_URL:-https://github.com/chidev/qmd.git}" +DEFAULT_REF="${QMD_INSTALL_REF:-feature/stabilize_qmd}" +INSTALL_DIR_DEFAULT="${QMD_INSTALL_DIR:-$HOME/.local/share/chidev-qmd}" +BIN_DIR_DEFAULT="${QMD_BIN_DIR:-$HOME/.local/bin}" +BIN_NAME="${QMD_BIN_NAME:-qmd}" +NO_PATH_UPDATE=0 +FORCE=0 +REF="$DEFAULT_REF" +INSTALL_DIR="$INSTALL_DIR_DEFAULT" +BIN_DIR="$BIN_DIR_DEFAULT" + +blue=$'\033[34m' +green=$'\033[32m' +yellow=$'\033[33m' +red=$'\033[31m' +bold=$'\033[1m' +reset=$'\033[0m' + +info() { printf "%s->%s %s\n" "$blue" "$reset" "$*"; } +ok() { printf "%sOK%s %s\n" "$green" "$reset" "$*"; } +warn() { printf "%sWARN%s %s\n" "$yellow" "$reset" "$*" >&2; } +err() { printf "%sERR%s %s\n" "$red" "$reset" "$*" >&2; } + +usage() { + cat </dev/null 2>&1; then + err "required command not found: $1" + exit 1 + fi +} + +ensure_node() { + need_cmd node + local major + major="$(node -p 'process.versions.node.split(".")[0]')" + if [ "${major:-0}" -lt 22 ]; then + err "node >= 22 is required" + exit 1 + fi +} + +pick_package_manager() { + if command -v bun >/dev/null 2>&1; then + echo "bun" + elif command -v npm >/dev/null 2>&1; then + echo "npm" + else + err "bun or npm is required" + exit 1 + fi +} + +ensure_path_entry() { + [ "$NO_PATH_UPDATE" -eq 1 ] && return 0 + case ":$PATH:" in + *":$BIN_DIR:"*) return 0 ;; + esac + + local line="export PATH=\"$BIN_DIR:\$PATH\"" + local shell_name rc + shell_name="$(basename "${SHELL:-}")" + case "$shell_name" in + zsh) rc="$HOME/.zshrc" ;; + bash) rc="$HOME/.bashrc" ;; + *) rc="$HOME/.profile" ;; + esac + + if [ -e "$rc" ] && ! [ -w "$rc" ]; then + warn "cannot update $rc; add $BIN_DIR to PATH manually" + return 0 + fi + + mkdir -p "$(dirname "$rc")" + touch "$rc" + if ! grep -F "$line" "$rc" >/dev/null 2>&1; then + printf "\n%s\n" "$line" >>"$rc" + ok "updated PATH in $rc" + fi +} + +sync_repo() { + if [ -d "$INSTALL_DIR/.git" ]; then + info "updating managed clone in $INSTALL_DIR" + git -C "$INSTALL_DIR" remote set-url origin "$REPO_URL" + if [ "$FORCE" -eq 1 ]; then + git -C "$INSTALL_DIR" reset --hard HEAD + git -C "$INSTALL_DIR" clean -fd + elif [ -n "$(git -C "$INSTALL_DIR" status --porcelain)" ]; then + err "managed clone is dirty: $INSTALL_DIR (re-run with --force)" + exit 1 + fi + git -C "$INSTALL_DIR" fetch origin "$REF" --depth 1 + git -C "$INSTALL_DIR" checkout -B "$REF" FETCH_HEAD + else + info "cloning $REPO_URL to $INSTALL_DIR" + mkdir -p "$(dirname "$INSTALL_DIR")" + git clone --depth 1 --branch "$REF" "$REPO_URL" "$INSTALL_DIR" + fi +} + +build_repo() { + local pm="$1" + info "building qmd with $pm" + case "$pm" in + bun) + (cd "$INSTALL_DIR" && bun install --frozen-lockfile || bun install) + (cd "$INSTALL_DIR" && bun run build) + ;; + npm) + (cd "$INSTALL_DIR" && npm install) + (cd "$INSTALL_DIR" && npm run build) + ;; + esac +} + +install_wrapper() { + local wrapper_path="$BIN_DIR/$BIN_NAME" + info "installing wrapper to $wrapper_path" + mkdir -p "$BIN_DIR" + cat >"$wrapper_path" </dev/null + ok "verified $wrapper_path" +} + +main() { + info "installing managed qmd fork" + need_cmd git + ensure_node + local pm + pm="$(pick_package_manager)" + sync_repo + build_repo "$pm" + install_wrapper + ensure_path_entry + verify_install + ok "qmd installed from $REPO_URL @ $REF" + printf "\n" + printf "Managed clone: %s\n" "$INSTALL_DIR" + printf "Wrapper: %s/%s\n" "$BIN_DIR" "$BIN_NAME" + printf "Ref: %s\n" "$REF" + case ":$PATH:" in + *":$BIN_DIR:"*) printf "PATH: ready\n" ;; + *) printf "PATH: add %s to PATH or open a new shell\n" "$BIN_DIR" ;; + esac +} + +main "$@" From 815cc1822846c4f3e35fe05bfe581b1c8817790e Mon Sep 17 00:00:00 2001 From: chidev Date: Thu, 2 Apr 2026 01:44:35 -0500 Subject: [PATCH 7/7] Keep managed clone clean after build --- install.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/install.sh b/install.sh index a511f3cb..ca0e98cb 100644 --- a/install.sh +++ b/install.sh @@ -182,6 +182,13 @@ build_repo() { esac } +clean_managed_clone() { + if [ -d "$INSTALL_DIR/.git" ]; then + git -C "$INSTALL_DIR" restore bun.lock 2>/dev/null || true + fi + rm -f "$INSTALL_DIR/package-lock.json" +} + install_wrapper() { local wrapper_path="$BIN_DIR/$BIN_NAME" info "installing wrapper to $wrapper_path" @@ -210,6 +217,7 @@ main() { pm="$(pick_package_manager)" sync_repo build_repo "$pm" + clean_managed_clone install_wrapper ensure_path_entry verify_install