From 097ae1c9649773cb1df2ec4147e427c7d7e43f47 Mon Sep 17 00:00:00 2001
From: Ryan Malia <ryanmalia@gmail.com>
Date: Thu, 12 Mar 2026 00:14:21 -0700
Subject: [PATCH 1/7] fix: allow hyphenated words in vec/hyde queries (#383)

The validateSemanticQuery regex rejected any hyphen followed by a word
character, blocking common compound words (real-time, multi-client,
kebab-case identifiers like better-sqlite3). Tighten the check to only
match negation syntax at token boundaries (start of string or after
whitespace).

See https://github.com/tobi/qmd/issues/383

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/store.ts                   |  5 +--
 test/structured-search.test.ts | 58 +++++++++++++++++++++++++++++++++-
 2 files changed, 60 insertions(+), 3 deletions(-)

diff --git a/src/store.ts b/src/store.ts
index d1b24eb3..2dd64d41 100644
--- a/src/store.ts
+++ b/src/store.ts
@@ -2894,8 +2894,9 @@ function buildFTS5Query(query: string): string | null {
  * Returns error message if invalid, null if valid.
  */
 export function validateSemanticQuery(query: string): string | null {
-  // Check for negation syntax
-  if (/-\w/.test(query) || /-"/.test(query)) {
+  // Check for negation syntax — only at token boundaries (start of string or after whitespace).
+  // Hyphenated words like "real-time" or "write-ahead" must not trigger this.
+  if (/(^|\s)-[\w"]/.test(query)) {
     return 'Negation (-term) is not supported in vec/hyde queries. Use lex for exclusions.';
   }
   return null;
diff --git a/test/structured-search.test.ts b/test/structured-search.test.ts
index d7042103..70da7fd1 100644
--- a/test/structured-search.test.ts
+++ b/test/structured-search.test.ts
@@ -361,17 +361,73 @@ describe("lex query syntax", () => {
       expect(validateSemanticQuery("what is the CAP theorem")).toBeNull();
     });
 
-    test("rejects negation syntax", () => {
+    test("rejects negation at start of query", () => {
+      expect(validateSemanticQuery("-redis connection pooling")).toContain("Negation");
+    });
+
+    test("rejects negation after space", () => {
       expect(validateSemanticQuery("performance -sports")).toContain("Negation");
+    });
+
+    test("rejects negated quoted phrase", () => {
       expect(validateSemanticQuery('-"exact phrase"')).toContain("Negation");
     });
 
+    test("rejects multiple negations", () => {
+      expect(validateSemanticQuery("error handling -java -python")).toContain("Negation");
+    });
+
+    test("rejects negation after leading whitespace", () => {
+      expect(validateSemanticQuery("  -term at start")).toContain("Negation");
+    });
+
+    test("rejects negation after tab", () => {
+      expect(validateSemanticQuery("foo\t-bar")).toContain("Negation");
+    });
+
+    test("accepts hyphenated compound words", () => {
+      expect(validateSemanticQuery("long-lived server shared across clients")).toBeNull();
+      expect(validateSemanticQuery("real-time voice processing pipeline")).toBeNull();
+      expect(validateSemanticQuery("how does the rate-limiter handle burst traffic")).toBeNull();
+      expect(validateSemanticQuery("self-hosted deployment options")).toBeNull();
+      expect(validateSemanticQuery("multi-client session architecture")).toBeNull();
+      expect(validateSemanticQuery("cross-platform compatibility")).toBeNull();
+      expect(validateSemanticQuery("non-blocking I/O model")).toBeNull();
+      expect(validateSemanticQuery("in-memory caching strategy")).toBeNull();
+      expect(validateSemanticQuery("write-ahead log for crash recovery")).toBeNull();
+      expect(validateSemanticQuery("copy-on-write semantics")).toBeNull();
+    });
+
+    test("accepts multiple hyphens in a phrase", () => {
+      expect(validateSemanticQuery("state-of-the-art embedding models")).toBeNull();
+      expect(validateSemanticQuery("end-to-end testing")).toBeNull();
+      expect(validateSemanticQuery("man-in-the-middle attack prevention")).toBeNull();
+    });
+
+    test("accepts multiple hyphenated words in one query", () => {
+      expect(validateSemanticQuery("built-in vs add-on features")).toBeNull();
+    });
+
+    test("accepts short hyphenated terms", () => {
+      expect(validateSemanticQuery("A-B testing for ML models")).toBeNull();
+      expect(validateSemanticQuery("e-commerce platform")).toBeNull();
+    });
+
+    test("accepts bare hyphen without word character", () => {
+      expect(validateSemanticQuery("-")).toBeNull();
+    });
 
     test("accepts hyde-style hypothetical answers", () => {
       expect(validateSemanticQuery(
         "The CAP theorem states that a distributed system cannot simultaneously provide consistency, availability, and partition tolerance."
       )).toBeNull();
     });
+
+    test("accepts hyde with hyphenated words", () => {
+      expect(validateSemanticQuery(
+        "HTTP transport runs a single long-lived daemon shared across all clients, avoiding per-session model re-loading."
+      )).toBeNull();
+    });
   });
 
   describe("validateLexQuery", () => {

From aee44af7acf62487d85d2c9f0b7ee40822372458 Mon Sep 17 00:00:00 2001
From: Sebastian Kouba <little.sebby@gmail.com>
Date: Thu, 19 Mar 2026 10:52:58 +0100
Subject: [PATCH 2/7] Avoid SQLite startup races during parallel Bun qmd
 initialization

---
 CHANGELOG.md                    |   4 ++
 src/store.ts                    |  36 ++++++++++-
 test/cli.test.ts                |  64 ++++++++++++++++--
 test/store.helpers.unit.test.ts | 111 ++++++++++++++++++++++++++++++++
 4 files changed, 209 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3e56c278..d72bfcb8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -17,6 +17,10 @@
 ### Fixes
 
 - Fix paths in nix flake 
+- Configure SQLite connection pragmas before probing `sqlite-vec`, avoid
+  resetting `PRAGMA journal_mode = WAL` on every query startup, and tolerate
+  another process winning the WAL transition race so parallel readers don't
+  fail during initialization with transient `database is locked` errors.
 - Sync stale `bun.lock` (`better-sqlite3` 11.x → 12.x). CI and release
   script now use `--frozen-lockfile` to prevent recurrence. #386
   (thanks @Mic92)
diff --git a/src/store.ts b/src/store.ts
index 2dd64d41..80ba6e7b 100644
--- a/src/store.ts
+++ b/src/store.ts
@@ -721,7 +721,41 @@ export function verifySqliteVecLoaded(db: Database): void {
 
 let _sqliteVecAvailable: boolean | null = null;
 
+const DEFAULT_BUSY_TIMEOUT_MS = 5_000;
+
+function isBusyLockError(err: unknown): boolean {
+  const message = getErrorMessage(err).toLowerCase();
+  return message.includes("database is locked") || message.includes("sqlite_busy");
+}
+
+export function configureConnectionPragmas(db: Database): void {
+  db.exec(`PRAGMA busy_timeout = ${DEFAULT_BUSY_TIMEOUT_MS}`);
+
+  let journalModeRow = db.prepare("PRAGMA journal_mode").get() as { journal_mode?: string } | null;
+  if (journalModeRow?.journal_mode?.toLowerCase() !== "wal") {
+    try {
+      db.exec("PRAGMA journal_mode = WAL");
+    } catch (err) {
+      if (!isBusyLockError(err)) {
+        throw err;
+      }
+
+      // Two qmd processes can both observe a non-WAL database and then race to
+      // become the one that flips the shared file into WAL mode. Losing that
+      // race should not abort startup; re-probe for observability, then proceed.
+      journalModeRow = db.prepare("PRAGMA journal_mode").get() as { journal_mode?: string } | null;
+    }
+  }
+
+  db.exec("PRAGMA foreign_keys = ON");
+}
+
 function initializeDatabase(db: Database): void {
+  // Configure the connection before any probe queries. Parallel qmd processes
+  // can race during startup; without a busy timeout even read-only probe work
+  // like `SELECT vec_version()` can fail immediately with SQLITE_BUSY.
+  configureConnectionPragmas(db);
+
   try {
     loadSqliteVec(db);
     verifySqliteVecLoaded(db);
@@ -731,8 +765,6 @@ function initializeDatabase(db: Database): void {
     _sqliteVecAvailable = false;
     console.warn(getErrorMessage(err));
   }
-  db.exec("PRAGMA journal_mode = WAL");
-  db.exec("PRAGMA foreign_keys = ON");
 
   // Drop legacy tables that are now managed in YAML
   db.exec(`DROP TABLE IF EXISTS path_contexts`);
diff --git a/test/cli.test.ts b/test/cli.test.ts
index 7d6f5267..cb4fd66d 100644
--- a/test/cli.test.ts
+++ b/test/cli.test.ts
@@ -11,7 +11,7 @@ import { existsSync, lstatSync, readFileSync, symlinkSync, writeFileSync, unlink
 import { tmpdir } from "os";
 import { join, dirname } from "path";
 import { fileURLToPath } from "url";
-import { spawn } from "child_process";
+import { spawn, spawnSync } from "child_process";
 import { setTimeout as sleep } from "timers/promises";
 
 // Test fixtures directory and database path
@@ -33,16 +33,19 @@ const tsxBin = (() => {
   }
   return join(process.cwd(), "node_modules", ".bin", "tsx");
 })();
+const bunBin = "bun";
+const bunAvailable = spawnSync(bunBin, ["--version"], { stdio: "ignore" }).status === 0;
 
 // Helper to run qmd command with test database
-async function runQmd(
-  args: string[],
+async function runQmdCommand(
+  command: string,
+  commandArgs: string[],
   options: { cwd?: string; env?: Record<string, string>; dbPath?: string; configDir?: string } = {}
 ): Promise<{ stdout: string; stderr: string; exitCode: number }> {
   const workingDir = options.cwd || fixturesDir;
   const dbPath = options.dbPath || testDbPath;
   const configDir = options.configDir || testConfigDir;
-  const proc = spawn(tsxBin, [qmdScript, ...args], {
+  const proc = spawn(command, commandArgs, {
     cwd: workingDir,
     env: {
       ...process.env,
@@ -76,6 +79,20 @@ async function runQmd(
   return { stdout, stderr, exitCode };
 }
 
+async function runQmd(
+  args: string[],
+  options: { cwd?: string; env?: Record<string, string>; dbPath?: string; configDir?: string } = {}
+): Promise<{ stdout: string; stderr: string; exitCode: number }> {
+  return runQmdCommand(tsxBin, [qmdScript, ...args], options);
+}
+
+async function runQmdWithBun(
+  args: string[],
+  options: { cwd?: string; env?: Record<string, string>; dbPath?: string; configDir?: string } = {}
+): Promise<{ stdout: string; stderr: string; exitCode: number }> {
+  return runQmdCommand(bunBin, [qmdScript, ...args], options);
+}
+
 // Get a fresh database path for isolated tests
 function getFreshDbPath(): string {
   testCounter++;
@@ -224,6 +241,45 @@ beforeEach(async () => {
   );
 });
 
+describe("CLI parallel startup regression", () => {
+  const parallelStartupTest = bunAvailable ? test : test.skip;
+
+  function expectSuccessfulStatus(result: { stdout: string; stderr: string; exitCode: number }): void {
+    expect(result.exitCode).toBe(0);
+    expect(result.stderr).not.toContain("database is locked");
+    expect(result.stderr).not.toContain("SQLITE_BUSY");
+    expect(result.stderr).not.toContain("sqlite-vec probe failed");
+    expect(result.stdout).toContain("QMD Status");
+  }
+
+  parallelStartupTest("allows two Bun qmd processes to initialize the same fresh DB concurrently", async () => {
+    const { dbPath, configDir } = await createIsolatedTestEnv("parallel-startup-fresh");
+
+    const [first, second] = await Promise.all([
+      runQmdWithBun(["status"], { dbPath, configDir }),
+      runQmdWithBun(["status"], { dbPath, configDir }),
+    ]);
+
+    expectSuccessfulStatus(first);
+    expectSuccessfulStatus(second);
+  }, 15000);
+
+  parallelStartupTest("allows two Bun qmd processes to initialize the same existing DB concurrently", async () => {
+    const { dbPath, configDir } = await createIsolatedTestEnv("parallel-startup-existing");
+
+    const warmup = await runQmdWithBun(["status"], { dbPath, configDir });
+    expectSuccessfulStatus(warmup);
+
+    const [first, second] = await Promise.all([
+      runQmdWithBun(["status"], { dbPath, configDir }),
+      runQmdWithBun(["status"], { dbPath, configDir }),
+    ]);
+
+    expectSuccessfulStatus(first);
+    expectSuccessfulStatus(second);
+  }, 15000);
+});
+
 describe("CLI Help", () => {
   test("shows help with --help flag", async () => {
     const { stdout, exitCode } = await runQmd(["--help"]);
diff --git a/test/store.helpers.unit.test.ts b/test/store.helpers.unit.test.ts
index e3c23739..ac0fd702 100644
--- a/test/store.helpers.unit.test.ts
+++ b/test/store.helpers.unit.test.ts
@@ -16,6 +16,7 @@ import {
   isDocid,
   handelize,
   cleanupOrphanedVectors,
+  configureConnectionPragmas,
 } from "../src/store";
 
 // =============================================================================
@@ -109,6 +110,116 @@ describe("cleanupOrphanedVectors", () => {
   });
 });
 
+// =============================================================================
+// Connection pragma tests
+// =============================================================================
+
+describe("configureConnectionPragmas", () => {
+  test("skips resetting journal mode when database is already in WAL mode", () => {
+    const execCalls: string[] = [];
+    const db = {
+      exec: (sql: string) => execCalls.push(sql),
+      prepare: (sql: string) => {
+        expect(sql).toBe("PRAGMA journal_mode");
+        return { get: () => ({ journal_mode: "wal" }) };
+      },
+    } as any;
+
+    configureConnectionPragmas(db);
+
+    expect(execCalls).toEqual([
+      "PRAGMA busy_timeout = 5000",
+      "PRAGMA foreign_keys = ON",
+    ]);
+  });
+
+  test("enables WAL once when database is not already in WAL mode", () => {
+    const execCalls: string[] = [];
+    const db = {
+      exec: (sql: string) => execCalls.push(sql),
+      prepare: (sql: string) => {
+        expect(sql).toBe("PRAGMA journal_mode");
+        return { get: () => ({ journal_mode: "delete" }) };
+      },
+    } as any;
+
+    configureConnectionPragmas(db);
+
+    expect(execCalls).toEqual([
+      "PRAGMA busy_timeout = 5000",
+      "PRAGMA journal_mode = WAL",
+      "PRAGMA foreign_keys = ON",
+    ]);
+  });
+
+  test("tolerates a busy WAL switch when another process wins the race", () => {
+    const execCalls: string[] = [];
+    let journalModeReads = 0;
+    const db = {
+      exec: (sql: string) => {
+        execCalls.push(sql);
+        if (sql === "PRAGMA journal_mode = WAL") {
+          throw new Error("database is locked");
+        }
+      },
+      prepare: (sql: string) => {
+        expect(sql).toBe("PRAGMA journal_mode");
+        return {
+          get: () => ({ journal_mode: journalModeReads++ === 0 ? "delete" : "wal" }),
+        };
+      },
+    } as any;
+
+    expect(() => configureConnectionPragmas(db)).not.toThrow();
+    expect(execCalls).toEqual([
+      "PRAGMA busy_timeout = 5000",
+      "PRAGMA journal_mode = WAL",
+      "PRAGMA foreign_keys = ON",
+    ]);
+  });
+
+  test("continues when WAL switch is busy and follow-up probe still reports non-WAL", () => {
+    const execCalls: string[] = [];
+    const db = {
+      exec: (sql: string) => {
+        execCalls.push(sql);
+        if (sql === "PRAGMA journal_mode = WAL") {
+          throw new Error("SQLITE_BUSY_RECOVERY: database is locked");
+        }
+      },
+      prepare: (sql: string) => {
+        expect(sql).toBe("PRAGMA journal_mode");
+        return {
+          get: () => ({ journal_mode: "delete" }),
+        };
+      },
+    } as any;
+
+    expect(() => configureConnectionPragmas(db)).not.toThrow();
+    expect(execCalls).toEqual([
+      "PRAGMA busy_timeout = 5000",
+      "PRAGMA journal_mode = WAL",
+      "PRAGMA foreign_keys = ON",
+    ]);
+  });
+
+  test("rethrows non-lock WAL errors", () => {
+    const db = {
+      exec: (sql: string) => {
+        if (sql === "PRAGMA journal_mode = WAL") {
+          throw new Error("disk I/O error");
+        }
+      },
+      prepare: (sql: string) => {
+        expect(sql).toBe("PRAGMA journal_mode");
+        return { get: () => ({ journal_mode: "delete" }) };
+      },
+    } as any;
+
+    expect(() => configureConnectionPragmas(db)).toThrow("disk I/O error");
+  });
+});
+
 // =============================================================================
 // Handelize Tests
 // =============================================================================

From a882d6b514b7147570194c20b3965f79252df8ec Mon Sep 17 00:00:00 2001
From: DmitryPogodaev <dmitry@pogodaev.com>
Date: Wed, 18 Mar 2026 14:02:13 +0000
Subject: [PATCH 3/7] feat(mcp): expose skipRerank and candidateLimit in query
 tool

On CPU-only servers, LLM reranking (0.6B model) takes ~2s per document,
making the query tool unusable with timeouts under 30s.

This commit:
- Adds `skipRerank` boolean parameter to the MCP `query` tool schema.
  When true, returns results scored by RRF fusion only (no LLM rerank).
- Passes `candidateLimit` through to structuredSearch (was declared in
  schema but never forwarded to the store).

Use case: automated RAG hooks with 1-2s timeouts on VPS without GPU.
With skipRerank=true, queries complete in 30-50ms instead of 30-40s.
---
 src/index.ts      | 3 +++
 src/mcp/server.ts | 8 ++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/index.ts b/src/index.ts
index 02ec51bd..7c9d0bee 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -161,6 +161,8 @@ export interface SearchOptions {
   limit?: number;
   /** Minimum score threshold */
   minScore?: number;
+  /** Maximum candidates to rerank (default: 40) */
+  candidateLimit?: number;
   /** Include explain traces */
   explain?: boolean;
   /** Chunk strategy: "auto" (default, uses AST for code files) or "regex" (legacy) */
@@ -393,6 +395,7 @@ export async function createStore(options: StoreOptions): Promise<QMDStore> {
           collections: collections.length > 0 ? collections : undefined,
           limit: opts.limit,
           minScore: opts.minScore,
+          candidateLimit: opts.candidateLimit,
           explain: opts.explain,
           intent: opts.intent,
           skipRerank,
diff --git a/src/mcp/server.ts b/src/mcp/server.ts
index b7fada76..015f0998 100644
--- a/src/mcp/server.ts
+++ b/src/mcp/server.ts
@@ -292,6 +292,9 @@ Intent-aware lex (C++ performance, not sports):
         candidateLimit: z.number().optional().describe(
           "Maximum candidates to rerank (default: 40, lower = faster but may miss results)"
         ),
+        skipRerank: z.boolean().optional().describe(
+          "Skip LLM reranking and use RRF fusion scores only. Much faster on CPU-only servers."
+        ),
         collections: z.array(z.string()).optional().describe("Filter to collections (OR match)"),
         intent: z.string().optional().describe(
           "Background context to disambiguate the query. Example: query='performance', intent='web page load times and Core Web Vitals'. Does not search on its own."
@@ -301,7 +304,7 @@ Intent-aware lex (C++ performance, not sports):
         ),
       },
     },
-    async ({ searches, limit, minScore, candidateLimit, collections, intent, rerank }) => {
+    async ({ searches, limit, minScore, candidateLimit, skipRerank, collections, intent, rerank }) => {
       // Map to internal format
       const queries: ExpandedQuery[] = searches.map(s => ({
         type: s.type,
@@ -316,8 +319,9 @@ Intent-aware lex (C++ performance, not sports):
         collections: effectiveCollections.length > 0 ? effectiveCollections : undefined,
         limit,
         minScore,
-        rerank,
+        candidateLimit,
         intent,
+        rerank: skipRerank ? false : rerank,
       });
 
       // Use first lex or vec query for snippet extraction

From 62b170782ed3fe514f58d8ca910712789ba3a19a Mon Sep 17 00:00:00 2001
From: Sebastian Kouba <little.sebby@gmail.com>
Date: Thu, 19 Mar 2026 11:55:10 +0100
Subject: [PATCH 4/7] Narrow the Bun startup regression harness

---
 test/cli.test.ts                 | 35 +++++++++++++++++++-------------
 test/parallel-startup-harness.ts | 21 +++++++++++++++++++
 2 files changed, 42 insertions(+), 14 deletions(-)
 create mode 100644 test/parallel-startup-harness.ts

diff --git a/test/cli.test.ts b/test/cli.test.ts
index cb4fd66d..b76ac4c9 100644
--- a/test/cli.test.ts
+++ b/test/cli.test.ts
@@ -25,6 +25,7 @@ let testCounter = 0; // Unique counter for each test run
 const thisDir = dirname(fileURLToPath(import.meta.url));
 const projectRoot = join(thisDir, "..");
 const qmdScript = join(projectRoot, "src", "cli", "qmd.ts");
+const parallelStartupHarness = join(projectRoot, "test", "parallel-startup-harness.ts");
 // Resolve tsx binary from project's node_modules (not cwd-dependent)
 const tsxBin = (() => {
   const candidate = join(projectRoot, "node_modules", ".bin", "tsx");
@@ -93,6 +94,12 @@ async function runQmdWithBun(
   return runQmdCommand(bunBin, [qmdScript, ...args], options);
 }
 
+async function runParallelStartupHarness(
+  dbPath: string
+): Promise<{ stdout: string; stderr: string; exitCode: number }> {
+  return runQmdCommand(bunBin, [parallelStartupHarness, dbPath], { cwd: projectRoot, dbPath });
+}
+
 // Get a fresh database path for isolated tests
 function getFreshDbPath(): string {
   testCounter++;
@@ -244,39 +251,39 @@ beforeEach(async () => {
 describe("CLI parallel startup regression", () => {
   const parallelStartupTest = bunAvailable ? test : test.skip;
 
-  function expectSuccessfulStatus(result: { stdout: string; stderr: string; exitCode: number }): void {
+  function expectSuccessfulStartup(result: { stdout: string; stderr: string; exitCode: number }): void {
     expect(result.exitCode).toBe(0);
     expect(result.stderr).not.toContain("database is locked");
     expect(result.stderr).not.toContain("SQLITE_BUSY");
     expect(result.stderr).not.toContain("sqlite-vec probe failed");
-    expect(result.stdout).toContain("QMD Status");
+    expect(result.stdout).toContain("startup-ok");
   }
 
   parallelStartupTest("allows two Bun qmd processes to initialize the same fresh DB concurrently", async () => {
-    const { dbPath, configDir } = await createIsolatedTestEnv("parallel-startup-fresh");
+    const dbPath = getFreshDbPath();
 
     const [first, second] = await Promise.all([
-      runQmdWithBun(["status"], { dbPath, configDir }),
-      runQmdWithBun(["status"], { dbPath, configDir }),
+      runParallelStartupHarness(dbPath),
+      runParallelStartupHarness(dbPath),
     ]);
 
-    expectSuccessfulStatus(first);
-    expectSuccessfulStatus(second);
+    expectSuccessfulStartup(first);
+    expectSuccessfulStartup(second);
   }, 15000);
 
   parallelStartupTest("allows two Bun qmd processes to initialize the same existing DB concurrently", async () => {
-    const { dbPath, configDir } = await createIsolatedTestEnv("parallel-startup-existing");
+    const dbPath = getFreshDbPath();
 
-    const warmup = await runQmdWithBun(["status"], { dbPath, configDir });
-    expectSuccessfulStatus(warmup);
+    const warmup = await runParallelStartupHarness(dbPath);
+    expectSuccessfulStartup(warmup);
 
     const [first, second] = await Promise.all([
-      runQmdWithBun(["status"], { dbPath, configDir }),
-      runQmdWithBun(["status"], { dbPath, configDir }),
+      runParallelStartupHarness(dbPath),
+      runParallelStartupHarness(dbPath),
     ]);
 
-    expectSuccessfulStatus(first);
-    expectSuccessfulStatus(second);
+    expectSuccessfulStartup(first);
+    expectSuccessfulStartup(second);
   }, 15000);
 });
 
diff --git a/test/parallel-startup-harness.ts b/test/parallel-startup-harness.ts
new file mode 100644
index 00000000..279b112c
--- /dev/null
+++ b/test/parallel-startup-harness.ts
@@ -0,0 +1,21 @@
+import { createStore } from "../src/store.js";
+
+const dbPath = process.argv[2];
+
+if (!dbPath) {
+  console.error("Usage: bun test/parallel-startup-harness.ts <dbPath>");
+  process.exit(1);
+}
+
+let store: ReturnType<typeof createStore> | undefined;
+
+try {
+  store = createStore(dbPath);
+  store.getStatus();
+  console.log("startup-ok");
+} catch (err) {
+  console.error(err instanceof Error ? err.message : String(err));
+  process.exit(1);
+} finally {
+  store?.close();
+}

From fc03532173f75b166f368cbf5e30a0694e88a287 Mon Sep 17 00:00:00 2001
From: Claw <claw@reflejo.ai>
Date: Wed, 1 Apr 2026 18:58:39 +0000
Subject: [PATCH 5/7] feat: support remote Ollama embeddings via
 OLLAMA_EMBED_URL

When OLLAMA_EMBED_URL is set, all embedding and tokenization operations
use the remote Ollama HTTP API instead of node-llama-cpp. This enables
QMD on platforms without local GPU/Vulkan support (ARM64 VPS, Docker
containers, CI runners) and with remote Ollama instances (Tailscale,
LAN, Docker networks).

Changes:
- Add ollamaEmbed() and ollamaEmbedBatch() helper functions using
  Ollama /api/embed endpoint
- Patch getEmbedding() to bypass node-llama-cpp when OLLAMA_EMBED_URL
  is set
- Patch generateEmbeddings() with dedicated Ollama fast-path that skips
  withLLMSessionForLlm entirely
- Patch expandQuery() to skip LLM-based HYDE query expansion (passes
  raw query as vector search)
- Patch chunkDocumentByTokens() to use char-based estimation instead of
  local tokenizer
- Patch vsearch and query CLI commands to skip withLLMSession wrapper

Environment variables:
  OLLAMA_EMBED_URL   - Ollama server URL (e.g. http://your-ollama:11434)
  OLLAMA_EMBED_MODEL - Model name (default: nomic-embed-text)

Tested on ARM64 Oracle Cloud VPS with qwen3-embedding:0.6b on remote
Ollama via Tailscale. 7,100+ documents indexed successfully.
---
 src/cli/qmd.ts |  22 +++++++--
 src/store.ts   | 118 +++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 133 insertions(+), 7 deletions(-)

diff --git a/src/cli/qmd.ts b/src/cli/qmd.ts
index 7216965a..f4b0ffe6 100755
--- a/src/cli/qmd.ts
+++ b/src/cli/qmd.ts
@@ -2183,7 +2183,7 @@ async function vectorSearch(query: string, opts: OutputOptions, _model: string =
 
   checkIndexHealth(store.db);
 
-  await withLLMSession(async () => {
+  const runSearch = async () => {
     let results = await vectorSearchQuery(store, query, {
       collection: singleCollection,
       limit: opts.all ? 500 : (opts.limit || 10),
@@ -2221,7 +2221,14 @@ async function vectorSearch(query: string, opts: OutputOptions, _model: string =
       context: r.context,
       docid: r.docid,
     })), query, { ...opts, limit: results.length });
-  }, { maxDuration: 10 * 60 * 1000, name: 'vectorSearch' });
+  };
+
+  // Skip local LLM session when using remote Ollama for embeddings
+  if (process.env.OLLAMA_EMBED_URL) {
+    await runSearch();
+  } else {
+    await withLLMSession(runSearch, { maxDuration: 10 * 60 * 1000, name: 'vectorSearch' });
+  }
 }
 
 async function querySearch(query: string, opts: OutputOptions, _embedModel: string = DEFAULT_EMBED_MODEL, _rerankModel: string = DEFAULT_RERANK_MODEL): Promise<void> {
@@ -2239,7 +2246,7 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri
   // Intent can come from --intent flag or from intent: line in query document
   const intent = opts.intent || parsed?.intent;
 
-  await withLLMSession(async () => {
+  const runQuery = async () => {
     let results;
 
     if (parsed) {
@@ -2359,7 +2366,14 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri
       docid: r.docid,
       explain: r.explain,
     })), displayQuery, { ...opts, limit: results.length });
-  }, { maxDuration: 10 * 60 * 1000, name: 'querySearch' });
+  };
+
+  // Skip local LLM session when using remote Ollama for embeddings
+  if (process.env.OLLAMA_EMBED_URL) {
+    await runQuery();
+  } else {
+    await withLLMSession(runQuery, { maxDuration: 10 * 60 * 1000, name: 'querySearch' });
+  }
 }
 
 // Parse CLI arguments using util.parseArgs
diff --git a/src/store.ts b/src/store.ts
index 80ba6e7b..1ae75375 100644
--- a/src/store.ts
+++ b/src/store.ts
@@ -39,6 +39,42 @@ import type {
 // =============================================================================
 
 const HOME = process.env.HOME || "/tmp";
+
+// Remote Ollama embedding support — when OLLAMA_EMBED_URL is set, all embedding
+// and tokenization operations use the remote Ollama HTTP API instead of
+// node-llama-cpp. This enables QMD on platforms without local GPU/Vulkan
+// (ARM64 VPS, Docker, CI) and with remote Ollama instances (Tailscale, LAN).
+const OLLAMA_EMBED_URL = process.env.OLLAMA_EMBED_URL;
+const OLLAMA_EMBED_MODEL = process.env.OLLAMA_EMBED_MODEL || "nomic-embed-text";
+
+interface OllamaEmbedResult {
+  embedding: number[];
+  model: string;
+}
+
+async function ollamaEmbed(text: string): Promise<OllamaEmbedResult> {
+  const res = await fetch(`${OLLAMA_EMBED_URL}/api/embed`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({ model: OLLAMA_EMBED_MODEL, input: text }),
+  });
+  if (!res.ok) throw new Error(`Ollama embed failed: ${res.status} ${await res.text()}`);
+  const data = await res.json() as { embeddings: number[][] };
+  const embedding = data.embeddings[0];
+  if (!embedding) throw new Error('Ollama returned empty embeddings array');
+  return { embedding, model: OLLAMA_EMBED_MODEL };
+}
+
+async function ollamaEmbedBatch(texts: string[]): Promise<OllamaEmbedResult[]> {
+  const res = await fetch(`${OLLAMA_EMBED_URL}/api/embed`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({ model: OLLAMA_EMBED_MODEL, input: texts }),
+  });
+  if (!res.ok) throw new Error(`Ollama embed batch failed: ${res.status} ${await res.text()}`);
+  const data = await res.json() as { embeddings: number[][] };
+  return data.embeddings.map(e => ({ embedding: e, model: OLLAMA_EMBED_MODEL }));
+}
 export const DEFAULT_EMBED_MODEL = "embeddinggemma";
 export const DEFAULT_RERANK_MODEL = "ExpedientFalcon/qwen3-reranker:0.6b-q8_0";
 export const DEFAULT_QUERY_MODEL = "Qwen/Qwen3-1.7B";
@@ -1439,6 +1475,67 @@ export async function generateEmbeddings(
   const totalDocs = docsToEmbed.length;
   const startTime = Date.now();
 
+  // Remote Ollama mode: bypass local LLM entirely
+  if (OLLAMA_EMBED_URL) {
+    let chunksEmbedded = 0;
+    let errors = 0;
+    let bytesProcessed = 0;
+    let totalChunks = 0;
+    let vectorTableInitialized = false;
+    const BATCH_SIZE = 32;
+    const batches = buildEmbeddingBatches(docsToEmbed, maxDocsPerBatch, maxBatchBytes);
+
+    for (const batchMeta of batches) {
+      const batchDocs = getEmbeddingDocsForBatch(db, batchMeta);
+      const batchChunks: ChunkItem[] = [];
+      const batchBytes = batchMeta.reduce((sum, doc) => sum + Math.max(0, doc.bytes), 0);
+
+      for (const doc of batchDocs) {
+        if (!doc.body.trim()) continue;
+        const title = extractTitle(doc.body, doc.path);
+        const chunks = await chunkDocumentByTokens(doc.body, undefined, undefined, undefined, doc.path, options?.chunkStrategy);
+        for (let seq = 0; seq < chunks.length; seq++) {
+          batchChunks.push({ hash: doc.hash, title, text: chunks[seq]!.text, seq, pos: chunks[seq]!.pos, tokens: chunks[seq]!.tokens, bytes: encoder.encode(chunks[seq]!.text).length });
+        }
+      }
+
+      totalChunks += batchChunks.length;
+      if (batchChunks.length === 0) { bytesProcessed += batchBytes; options?.onProgress?.({ chunksEmbedded, totalChunks, bytesProcessed, totalBytes, errors }); continue; }
+
+      if (!vectorTableInitialized) {
+        const firstResult = await ollamaEmbed(batchChunks[0]!.text);
+        store.ensureVecTable(firstResult.embedding.length);
+        vectorTableInitialized = true;
+      }
+
+      for (let batchStart = 0; batchStart < batchChunks.length; batchStart += BATCH_SIZE) {
+        const batchEnd = Math.min(batchStart + BATCH_SIZE, batchChunks.length);
+        const chunkBatch = batchChunks.slice(batchStart, batchEnd);
+        const texts = chunkBatch.map(chunk => chunk.text);
+        try {
+          const embeddings = await ollamaEmbedBatch(texts);
+          for (let i = 0; i < chunkBatch.length; i++) {
+            const chunk = chunkBatch[i]!;
+            insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(embeddings[i]!.embedding), model, now);
+            chunksEmbedded++;
+          }
+        } catch {
+          for (const chunk of chunkBatch) {
+            try {
+              const result = await ollamaEmbed(chunk.text);
+              insertEmbedding(db, chunk.hash, chunk.seq, chunk.pos, new Float32Array(result.embedding), model, now);
+              chunksEmbedded++;
+            } catch { errors++; }
+          }
+        }
+        options?.onProgress?.({ chunksEmbedded, totalChunks, bytesProcessed: bytesProcessed + batchBytes, totalBytes, errors });
+      }
+      bytesProcessed += batchBytes;
+    }
+
+    return { docsProcessed: totalDocs, chunksEmbedded, errors, durationMs: Date.now() - startTime };
+  }
+
   // Use store's LlamaCpp or global singleton, wrapped in a session
   const llm = getLlm(store);
 
@@ -2233,15 +2330,20 @@ export async function chunkDocumentByTokens(
   chunkStrategy: ChunkStrategy = "regex",
   signal?: AbortSignal
 ): Promise<{ text: string; pos: number; tokens: number }[]> {
-  const llm = getDefaultLlamaCpp();
-
   // Use moderate chars/token estimate (prose ~4, code ~2, mixed ~3)
-  // If chunks exceed limit, they'll be re-split with actual ratio
   const avgCharsPerToken = 3;
   const maxChars = maxTokens * avgCharsPerToken;
   const overlapChars = overlapTokens * avgCharsPerToken;
   const windowChars = windowTokens * avgCharsPerToken;
 
+  // Remote Ollama mode: skip local tokenizer, use char-based chunking
+  if (OLLAMA_EMBED_URL) {
+    const charChunks = await chunkDocumentAsync(content, maxChars, overlapChars, windowChars, filepath, chunkStrategy);
+    return charChunks.map(c => ({ text: c.text, pos: c.pos, tokens: Math.ceil(c.text.length / avgCharsPerToken) }));
+  }
+
+  const llm = getDefaultLlamaCpp();
+
   // Chunk in character space with conservative estimate
   // Use AST-aware chunking for the first pass when filepath/strategy provided
   let charChunks = await chunkDocumentAsync(content, maxChars, overlapChars, windowChars, filepath, chunkStrategy);
@@ -3111,6 +3213,11 @@ export async function searchVec(db: Database, query: string, model: string, limi
 // =============================================================================
 
 async function getEmbedding(text: string, model: string, isQuery: boolean, session?: ILLMSession, llmOverride?: LlamaCpp): Promise<number[] | null> {
+  // Remote Ollama mode: bypass local LLM entirely
+  if (OLLAMA_EMBED_URL && !session && !llmOverride) {
+    const result = await ollamaEmbed(text);
+    return result.embedding;
+  }
   // Format text using the appropriate prompt template
   const formattedText = isQuery ? formatQueryForEmbedding(text, model) : formatDocForEmbedding(text, undefined, model);
   const result = session
@@ -3180,6 +3287,11 @@ export function insertEmbedding(
 // =============================================================================
 
 export async function expandQuery(query: string, model: string = DEFAULT_QUERY_MODEL, db: Database, intent?: string, llmOverride?: LlamaCpp): Promise<ExpandedQuery[]> {
+  // Remote Ollama mode: skip LLM-based HYDE query expansion (no local model)
+  if (OLLAMA_EMBED_URL && !llmOverride) {
+    return [{ type: 'vec' as const, query }];
+  }
+
   // Check cache first — stored as JSON preserving types
   const cacheKey = getCacheKey("expandQuery", { query, model, ...(intent && { intent }) });
   const cached = getCachedResult(db, cacheKey);

From 22028026edc52a27de64036beb72aa620c814a57 Mon Sep 17 00:00:00 2001
From: chidev <chicagowebdev@gmail.com>
Date: Thu, 2 Apr 2026 01:43:27 -0500
Subject: [PATCH 6/7] Add managed installer for cross-machine rollout

---
 install.sh | 227 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 227 insertions(+)
 create mode 100644 install.sh

diff --git a/install.sh b/install.sh
new file mode 100644
index 00000000..a511f3cb
--- /dev/null
+++ b/install.sh
@@ -0,0 +1,227 @@
+#!/usr/bin/env bash
+set -euo pipefail
+umask 022
+shopt -s lastpipe 2>/dev/null || true
+
+REPO_URL="${QMD_REPO_URL:-https://github.com/chidev/qmd.git}"
+DEFAULT_REF="${QMD_INSTALL_REF:-feature/stabilize_qmd}"
+INSTALL_DIR_DEFAULT="${QMD_INSTALL_DIR:-$HOME/.local/share/chidev-qmd}"
+BIN_DIR_DEFAULT="${QMD_BIN_DIR:-$HOME/.local/bin}"
+BIN_NAME="${QMD_BIN_NAME:-qmd}"
+NO_PATH_UPDATE=0
+FORCE=0
+REF="$DEFAULT_REF"
+INSTALL_DIR="$INSTALL_DIR_DEFAULT"
+BIN_DIR="$BIN_DIR_DEFAULT"
+
+blue=$'\033[34m'
+green=$'\033[32m'
+yellow=$'\033[33m'
+red=$'\033[31m'
+bold=$'\033[1m'
+reset=$'\033[0m'
+
+info() { printf "%s->%s %s\n" "$blue" "$reset" "$*"; }
+ok() { printf "%sOK%s %s\n" "$green" "$reset" "$*"; }
+warn() { printf "%sWARN%s %s\n" "$yellow" "$reset" "$*" >&2; }
+err() { printf "%sERR%s %s\n" "$red" "$reset" "$*" >&2; }
+
+usage() {
+  cat <<EOF
+${bold}chidev qmd installer${reset}
+
+Installs the managed qmd fork and a stable wrapper executable.
+
+Usage:
+  install.sh [options]
+
+Options:
+  --ref REF              Git ref to install (default: ${DEFAULT_REF})
+  --install-dir PATH     Managed clone path (default: ${INSTALL_DIR_DEFAULT})
+  --bin-dir PATH         Wrapper install dir (default: ${BIN_DIR_DEFAULT})
+  --bin-name NAME        Wrapper name (default: ${BIN_NAME})
+  --force                Reset managed clone if it is dirty
+  --no-path-update       Do not try to add bin dir to shell rc files
+  -h, --help             Show this help
+
+Environment:
+  QMD_REPO_URL           Override git remote URL
+  QMD_INSTALL_REF        Override default ref
+  QMD_INSTALL_DIR        Override default install dir
+  QMD_BIN_DIR            Override default bin dir
+  QMD_BIN_NAME           Override wrapper name
+EOF
+}
+
+while [ $# -gt 0 ]; do
+  case "$1" in
+    --ref)
+      REF="${2:?missing value for --ref}"
+      shift 2
+      ;;
+    --install-dir)
+      INSTALL_DIR="${2:?missing value for --install-dir}"
+      shift 2
+      ;;
+    --bin-dir)
+      BIN_DIR="${2:?missing value for --bin-dir}"
+      shift 2
+      ;;
+    --bin-name)
+      BIN_NAME="${2:?missing value for --bin-name}"
+      shift 2
+      ;;
+    --force)
+      FORCE=1
+      shift
+      ;;
+    --no-path-update)
+      NO_PATH_UPDATE=1
+      shift
+      ;;
+    -h|--help)
+      usage
+      exit 0
+      ;;
+    *)
+      err "unknown option: $1"
+      usage
+      exit 1
+      ;;
+  esac
+done
+
+need_cmd() {
+  if ! command -v "$1" >/dev/null 2>&1; then
+    err "required command not found: $1"
+    exit 1
+  fi
+}
+
+ensure_node() {
+  need_cmd node
+  local major
+  major="$(node -p 'process.versions.node.split(".")[0]')"
+  if [ "${major:-0}" -lt 22 ]; then
+    err "node >= 22 is required"
+    exit 1
+  fi
+}
+
+pick_package_manager() {
+  if command -v bun >/dev/null 2>&1; then
+    echo "bun"
+  elif command -v npm >/dev/null 2>&1; then
+    echo "npm"
+  else
+    err "bun or npm is required"
+    exit 1
+  fi
+}
+
+ensure_path_entry() {
+  [ "$NO_PATH_UPDATE" -eq 1 ] && return 0
+  case ":$PATH:" in
+    *":$BIN_DIR:"*) return 0 ;;
+  esac
+
+  local line="export PATH=\"$BIN_DIR:\$PATH\""
+  local shell_name rc
+  shell_name="$(basename "${SHELL:-}")"
+  case "$shell_name" in
+    zsh) rc="$HOME/.zshrc" ;;
+    bash) rc="$HOME/.bashrc" ;;
+    *) rc="$HOME/.profile" ;;
+  esac
+
+  if [ -e "$rc" ] && ! [ -w "$rc" ]; then
+    warn "cannot update $rc; add $BIN_DIR to PATH manually"
+    return 0
+  fi
+
+  mkdir -p "$(dirname "$rc")"
+  touch "$rc"
+  if ! grep -F "$line" "$rc" >/dev/null 2>&1; then
+    printf "\n%s\n" "$line" >>"$rc"
+    ok "updated PATH in $rc"
+  fi
+}
+
+sync_repo() {
+  if [ -d "$INSTALL_DIR/.git" ]; then
+    info "updating managed clone in $INSTALL_DIR"
+    git -C "$INSTALL_DIR" remote set-url origin "$REPO_URL"
+    if [ "$FORCE" -eq 1 ]; then
+      git -C "$INSTALL_DIR" reset --hard HEAD
+      git -C "$INSTALL_DIR" clean -fd
+    elif [ -n "$(git -C "$INSTALL_DIR" status --porcelain)" ]; then
+      err "managed clone is dirty: $INSTALL_DIR (re-run with --force)"
+      exit 1
+    fi
+    git -C "$INSTALL_DIR" fetch origin "$REF" --depth 1
+    git -C "$INSTALL_DIR" checkout -B "$REF" FETCH_HEAD
+  else
+    info "cloning $REPO_URL to $INSTALL_DIR"
+    mkdir -p "$(dirname "$INSTALL_DIR")"
+    git clone --depth 1 --branch "$REF" "$REPO_URL" "$INSTALL_DIR"
+  fi
+}
+
+build_repo() {
+  local pm="$1"
+  info "building qmd with $pm"
+  case "$pm" in
+    bun)
+      (cd "$INSTALL_DIR" && bun install --frozen-lockfile || bun install)
+      (cd "$INSTALL_DIR" && bun run build)
+      ;;
+    npm)
+      (cd "$INSTALL_DIR" && npm install)
+      (cd "$INSTALL_DIR" && npm run build)
+      ;;
+  esac
+}
+
+install_wrapper() {
+  local wrapper_path="$BIN_DIR/$BIN_NAME"
+  info "installing wrapper to $wrapper_path"
+  mkdir -p "$BIN_DIR"
+  cat >"$wrapper_path" <<EOF
+#!/usr/bin/env bash
+set -euo pipefail
+QMD_HOME="${INSTALL_DIR}"
+exec node "\$QMD_HOME/dist/cli/qmd.js" "\$@"
+EOF
+  chmod 0755 "$wrapper_path"
+}
+
+verify_install() {
+  local wrapper_path="$BIN_DIR/$BIN_NAME"
+  info "verifying wrapper"
+  "$wrapper_path" --version >/dev/null
+  ok "verified $wrapper_path"
+}
+
+main() {
+  info "installing managed qmd fork"
+  need_cmd git
+  ensure_node
+  local pm
+  pm="$(pick_package_manager)"
+  sync_repo
+  build_repo "$pm"
+  install_wrapper
+  ensure_path_entry
+  verify_install
+  ok "qmd installed from $REPO_URL @ $REF"
+  printf "\n"
+  printf "Managed clone: %s\n" "$INSTALL_DIR"
+  printf "Wrapper:       %s/%s\n" "$BIN_DIR" "$BIN_NAME"
+  printf "Ref:           %s\n" "$REF"
+  case ":$PATH:" in
+    *":$BIN_DIR:"*) printf "PATH:          ready\n" ;;
+    *) printf "PATH:          add %s to PATH or open a new shell\n" "$BIN_DIR" ;;
+  esac
+}
+
+main "$@"

From 815cc1822846c4f3e35fe05bfe581b1c8817790e Mon Sep 17 00:00:00 2001
From: chidev <chicagowebdev@gmail.com>
Date: Thu, 2 Apr 2026 01:44:35 -0500
Subject: [PATCH 7/7] Keep managed clone clean after build

---
 install.sh | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/install.sh b/install.sh
index a511f3cb..ca0e98cb 100644
--- a/install.sh
+++ b/install.sh
@@ -182,6 +182,13 @@ build_repo() {
   esac
 }
 
+clean_managed_clone() {
+  if [ -d "$INSTALL_DIR/.git" ]; then
+    git -C "$INSTALL_DIR" restore bun.lock 2>/dev/null || true
+  fi
+  rm -f "$INSTALL_DIR/package-lock.json"
+}
+
 install_wrapper() {
   local wrapper_path="$BIN_DIR/$BIN_NAME"
   info "installing wrapper to $wrapper_path"
@@ -210,6 +217,7 @@ main() {
   pm="$(pick_package_manager)"
   sync_repo
   build_repo "$pm"
+  clean_managed_clone
   install_wrapper
   ensure_path_entry
   verify_install