From f0cd93983da0487f516f0c2310247d19bc3e6dc6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Stabell?= <bjorn@stabell.org>
Date: Mon, 13 Apr 2026 07:29:44 -0700
Subject: [PATCH] fix: embed and pull commands now respect models config and
 env vars
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The `embed` and `pull` commands hardcoded DEFAULT_*_MODEL_URI, ignoring
both the `models:` section in index.yml and QMD_*_MODEL environment
variables. Only `query` and `vsearch` honored custom models (via the
LlamaCpp instance set up in getStore).

Adds a module-level `models` object that resolves URIs once during store
initialization (config > env var > default). The embed, pull, and status
commands now read from this object instead of hardcoded defaults.

Fixes #494 (partially — the v2.1.0 fix only covered LlamaCpp init in
getStore, not the embed/pull/status code paths).
---
 src/cli/qmd.ts | 40 +++++++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 15 deletions(-)
diff --git a/src/cli/qmd.ts b/src/cli/qmd.ts
index 50ae7648..acf2fd14 100755
--- a/src/cli/qmd.ts
+++ b/src/cli/qmd.ts
@@ -112,23 +112,36 @@ enableProductionMode();
 let store: ReturnType<typeof createStore> | null = null;
 let storeDbPathOverride: string | undefined;
 let currentIndexName = "index";
+/** Resolved model URIs: YAML config > env var > built-in default */
+const models = {
+  embed: DEFAULT_EMBED_MODEL_URI,
+  generate: DEFAULT_GENERATE_MODEL_URI,
+  rerank: DEFAULT_RERANK_MODEL_URI,
+};
 
 function getStore(): ReturnType<typeof createStore> {
   if (!store) {
     store = createStore(storeDbPathOverride);
-    // Sync YAML config into SQLite store_collections so store.ts reads from DB
+    // Resolve model URIs from config/env before anything else
     try {
       const config = loadConfig();
+      models.embed = config.models?.embed || process.env.QMD_EMBED_MODEL || DEFAULT_EMBED_MODEL_URI;
+      models.generate = config.models?.generate || process.env.QMD_GENERATE_MODEL || DEFAULT_GENERATE_MODEL_URI;
+      models.rerank = config.models?.rerank || process.env.QMD_RERANK_MODEL || DEFAULT_RERANK_MODEL_URI;
+      // Sync YAML config into SQLite store_collections
       syncConfigToDb(store.db, config);
       if (config.models) {
         setDefaultLlamaCpp(new LlamaCpp({
-          embedModel: config.models.embed,
-          generateModel: config.models.generate,
-          rerankModel: config.models.rerank,
+          embedModel: models.embed,
+          generateModel: models.generate,
+          rerankModel: models.rerank,
         }));
       }
     } catch {
-      // Config may not exist yet — that's fine, DB works without it
+      // Config may not exist yet — resolve from env/defaults
+      models.embed = process.env.QMD_EMBED_MODEL || DEFAULT_EMBED_MODEL_URI;
+      models.generate = process.env.QMD_GENERATE_MODEL || DEFAULT_GENERATE_MODEL_URI;
+      models.rerank = process.env.QMD_RERANK_MODEL || DEFAULT_RERANK_MODEL_URI;
     }
   }
   return store;
@@ -462,9 +475,9 @@ async function showStatus(): Promise<void> {
       return match ? `https://huggingface.co/${match[1]}` : uri;
     };
     console.log(`\n${c.bold}Models${c.reset}`);
-    console.log(`  Embedding:   ${hfLink(DEFAULT_EMBED_MODEL_URI)}`);
-    console.log(`  Reranking:   ${hfLink(DEFAULT_RERANK_MODEL_URI)}`);
-    console.log(`  Generation:  ${hfLink(DEFAULT_GENERATE_MODEL_URI)}`);
+    console.log(`  Embedding:   ${hfLink(models.embed)}`);
+    console.log(`  Reranking:   ${hfLink(models.rerank)}`);
+    console.log(`  Generation:  ${hfLink(models.generate)}`);
   }
 
   // Device / GPU info
@@ -3104,10 +3117,11 @@ if (isMain) {
 
     case "embed":
       try {
+        getStore(); // ensure models are resolved from config before reading
         const maxDocsPerBatch = parseEmbedBatchOption("maxDocsPerBatch", cli.values["max-docs-per-batch"]);
         const maxBatchMb = parseEmbedBatchOption("maxBatchBytes", cli.values["max-batch-mb"]);
         const embedChunkStrategy = parseChunkStrategy(cli.values["chunk-strategy"]);
-        await vectorIndex(DEFAULT_EMBED_MODEL_URI, !!cli.values.force, {
+        await vectorIndex(models.embed, !!cli.values.force, {
           maxDocsPerBatch,
           maxBatchBytes: maxBatchMb === undefined ? undefined : maxBatchMb * 1024 * 1024,
           chunkStrategy: embedChunkStrategy,
@@ -3119,14 +3133,10 @@ if (isMain) {
       break;
 
     case "pull": {
+      getStore(); // ensure models are resolved from config
       const refresh = cli.values.refresh === undefined ? false : Boolean(cli.values.refresh);
-      const models = [
-        DEFAULT_EMBED_MODEL_URI,
-        DEFAULT_GENERATE_MODEL_URI,
-        DEFAULT_RERANK_MODEL_URI,
-      ];
       console.log(`${c.bold}Pulling models${c.reset}`);
-      const results = await pullModels(models, {
+      const results = await pullModels(Object.values(models), {
         refresh,
         cacheDir: DEFAULT_MODEL_CACHE_DIR,
       });