Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
274 changes: 160 additions & 114 deletions bun.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,10 @@
"better-sqlite3": "^12.4.5",
"fast-glob": "^3.3.0",
"node-llama-cpp": "^3.17.1",
"openai": "^6.33.0",
"picomatch": "^4.0.0",
"sqlite-vec": "^0.1.7-alpha.2",
"tiktoken": "^1.0.22",
"yaml": "^2.8.2",
"zod": "4.2.1"
},
Expand Down
85 changes: 56 additions & 29 deletions src/cli/qmd.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ import {
syncConfigToDb,
type ReindexResult,
} from "../store.js";
import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, withLLMSession, pullModels, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "../llm.js";
import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, getDefaultEmbeddingLLM, withLLMSession, pullModels, setEmbeddingConfig, isUsingOpenAI, DEFAULT_EMBED_MODEL_URI, DEFAULT_GENERATE_MODEL_URI, DEFAULT_RERANK_MODEL_URI, DEFAULT_MODEL_CACHE_DIR } from "../llm.js";
import {
formatSearchResults,
formatDocuments,
Expand All @@ -96,6 +96,7 @@ import {
listAllContexts,
setConfigIndexName,
loadConfig,
getEmbeddingConfig as getEmbeddingConfigFromYaml,
} from "../collections.js";
import { getEmbeddedQmdSkillContent, getEmbeddedQmdSkillFiles } from "../embedded-skills.js";

Expand Down Expand Up @@ -276,7 +277,6 @@ function computeDisplayPath(
return filepath;
}


function formatTimeAgo(date: Date): string {
const seconds = Math.floor((Date.now() - date.getTime()) / 1000);
if (seconds < 60) return `${seconds}s ago`;
Expand Down Expand Up @@ -426,34 +426,40 @@ async function showStatus(): Promise<void> {
console.log(` Generation: ${hfLink(DEFAULT_GENERATE_MODEL_URI)}`);
}

// Device / GPU info
try {
const llm = getDefaultLlamaCpp();
const device = await llm.getDeviceInfo();
console.log(`\n${c.bold}Device${c.reset}`);
if (device.gpu) {
console.log(` GPU: ${c.green}${device.gpu}${c.reset} (offloading: ${device.gpuOffloading ? 'yes' : 'no'})`);
if (device.gpuDevices.length > 0) {
// Deduplicate and count GPUs
const counts = new Map<string, number>();
for (const name of device.gpuDevices) {
counts.set(name, (counts.get(name) || 0) + 1);
// Device / GPU info (local llama-cpp mode only)
if (!isUsingOpenAI()) {
try {
const llm = getDefaultLlamaCpp();
const device = await llm.getDeviceInfo();
console.log(`
${c.bold}Device${c.reset}`);
if (device.gpu) {
console.log(` GPU: ${c.green}${device.gpu}${c.reset} (offloading: ${device.gpuOffloading ? 'yes' : 'no'})`);
if (device.gpuDevices.length > 0) {
const counts = new Map<string, number>();
for (const name of device.gpuDevices) {
counts.set(name, (counts.get(name) || 0) + 1);
}
const deviceStr = Array.from(counts.entries())
.map(([name, count]) => count > 1 ? `${count}× ${name}` : name)
.join(', ');
console.log(` Devices: ${deviceStr}`);
}
const deviceStr = Array.from(counts.entries())
.map(([name, count]) => count > 1 ? `${count}× ${name}` : name)
.join(', ');
console.log(` Devices: ${deviceStr}`);
}
if (device.vram) {
console.log(` VRAM: ${formatBytes(device.vram.free)} free / ${formatBytes(device.vram.total)} total`);
if (device.vram) {
console.log(` VRAM: ${formatBytes(device.vram.free)} free / ${formatBytes(device.vram.total)} total`);
}
} else {
console.log(` GPU: ${c.yellow}none${c.reset} (running on CPU — models will be slow)`);
console.log(` ${c.dim}Tip: Install CUDA, Vulkan, or Metal support for GPU acceleration.${c.reset}`);
}
} else {
console.log(` GPU: ${c.yellow}none${c.reset} (running on CPU — models will be slow)`);
console.log(` ${c.dim}Tip: Install CUDA, Vulkan, or Metal support for GPU acceleration.${c.reset}`);
console.log(` CPU: ${device.cpuCores} math cores`);
} catch {
// Don't fail status if LLM init fails
}
console.log(` CPU: ${device.cpuCores} math cores`);
} catch {
// Don't fail status if LLM init fails
} else {
console.log(`
${c.bold}Provider${c.reset}`);
console.log(` Embeddings/Rerank/Expansion: OpenAI-compatible endpoint`);
}

// Tips section
Expand Down Expand Up @@ -2147,7 +2153,7 @@ async function vectorSearch(query: string, opts: OutputOptions, _model: string =

checkIndexHealth(store.db);

await withLLMSession(async () => {
const run = async () => {
let results = await vectorSearchQuery(store, query, {
collection: singleCollection,
limit: opts.all ? 500 : (opts.limit || 10),
Expand Down Expand Up @@ -2185,7 +2191,13 @@ async function vectorSearch(query: string, opts: OutputOptions, _model: string =
context: r.context,
docid: r.docid,
})), query, { ...opts, limit: results.length });
}, { maxDuration: 10 * 60 * 1000, name: 'vectorSearch' });
};

if (isUsingOpenAI()) {
await run();
} else {
await withLLMSession(async () => run(), { maxDuration: 10 * 60 * 1000, name: 'vectorSearch' });
}
}

async function querySearch(query: string, opts: OutputOptions, _embedModel: string = DEFAULT_EMBED_MODEL, _rerankModel: string = DEFAULT_RERANK_MODEL): Promise<void> {
Expand Down Expand Up @@ -2698,6 +2710,21 @@ if (isMain) {
process.exit(cli.values.help ? 0 : 1);
}

// Load embedding configuration from config file or env var
const embeddingYamlConfig = getEmbeddingConfigFromYaml();
const useOpenAI = process.env.QMD_OPENAI === '1' || embeddingYamlConfig.provider === 'openai';

if (useOpenAI) {
setEmbeddingConfig({
provider: 'openai',
openai: {
apiKey: process.env.OPENAI_API_KEY || embeddingYamlConfig.openai?.api_key,
baseURL: process.env.OPENAI_BASE_URL || embeddingYamlConfig.openai?.base_url,
embedModel: embeddingYamlConfig.openai?.model,
},
});
}

switch (cli.command) {
case "context": {
const subcommand = cli.args[0];
Expand Down
24 changes: 23 additions & 1 deletion src/collections.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,25 @@ export interface Collection {
includeByDefault?: boolean; // Include in queries by default (default: true)
}

/**
* Embedding provider configuration (optional in config file)
*/
export interface EmbeddingProviderConfig {
provider?: 'local' | 'openai'; // Default: 'local'
openai?: {
api_key?: string; // Falls back to OPENAI_API_KEY env var
base_url?: string; // Falls back to OPENAI_BASE_URL env var
model?: string; // Default: 'text-embedding-3-small'
};
}

/**
* The complete configuration file structure
*/
export interface CollectionConfig {
global_context?: string; // Context applied to all collections
collections: Record<string, Collection>; // Collection name -> config
collections: Record<string, Collection>; // Collection name -> config
embedding?: EmbeddingProviderConfig; // Optional embedding provider settings
}

/**
Expand Down Expand Up @@ -498,3 +511,12 @@ export function isValidCollectionName(name: string): boolean {
// Allow alphanumeric, hyphens, underscores
return /^[a-zA-Z0-9_-]+$/.test(name);
}

/**
* Get embedding configuration from config file
* Returns default (local) config if not specified
*/
export function getEmbeddingConfig(): EmbeddingProviderConfig {
const config = loadConfig();
return config.embedding || { provider: 'local' };
}
74 changes: 74 additions & 0 deletions src/llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,16 @@ export interface LLM {
*/
embed(text: string, options?: EmbedOptions): Promise<EmbeddingResult | null>;

/**
* Get embeddings for multiple texts in a batch
*/
embedBatch(texts: string[]): Promise<(EmbeddingResult | null)[]>;

/**
* Get the model name used for embeddings
*/
getModelName(): string;

/**
* Generate text completion
*/
Expand Down Expand Up @@ -443,6 +453,13 @@ export class LlamaCpp implements LLM {
this.disposeModelsOnInactivity = config.disposeModelsOnInactivity ?? false;
}

/**
* Get the model name used for embeddings
*/
getModelName(): string {
return this.embedModelUri;
}

/**
* Reset the inactivity timer. Called after each model operation.
* When timer fires, models are unloaded to free memory (if no active sessions).
Expand Down Expand Up @@ -1544,3 +1561,60 @@ export async function disposeDefaultLlamaCpp(): Promise<void> {
defaultLlamaCpp = null;
}
}

// =============================================================================
// OpenAI Embedding Support
// =============================================================================

import { OpenAIEmbedding, type OpenAIConfig } from "./openai-llm.js";

/**
* Embedding provider configuration
*/
export type EmbeddingProvider = 'local' | 'openai';

export type EmbeddingConfig = {
provider: EmbeddingProvider;
openai?: OpenAIConfig;
};

// Default embedding config: use local llama-cpp
let embeddingConfig: EmbeddingConfig = { provider: 'local' };
let openAIEmbedding: OpenAIEmbedding | null = null;

/**
* Set the embedding configuration. Call before using embeddings.
*/
export function setEmbeddingConfig(config: EmbeddingConfig): void {
embeddingConfig = config;
// Reset OpenAI instance if config changes
openAIEmbedding = null;
}

/**
* Get the current embedding configuration
*/
export function getEmbeddingConfig(): EmbeddingConfig {
return embeddingConfig;
}

/**
* Check if using OpenAI for embeddings
*/
export function isUsingOpenAI(): boolean {
return embeddingConfig.provider === 'openai';
}

/**
* Get the appropriate LLM for embeddings based on config.
* Returns OpenAI embedding client if configured, otherwise local LlamaCpp.
*/
export function getDefaultEmbeddingLLM(): LLM {
if (embeddingConfig.provider === 'openai') {
if (!openAIEmbedding) {
openAIEmbedding = new OpenAIEmbedding(embeddingConfig.openai);
}
return openAIEmbedding;
}
return getDefaultLlamaCpp();
}
Loading