From 8d4914056497b3b28f2ebe74acf900ff73cf584f Mon Sep 17 00:00:00 2001 From: vinsew Date: Wed, 15 Apr 2026 02:01:52 +0800 Subject: [PATCH 1/3] feat: gbrain self-contained API keys (read from config, not just env) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Today embedding.ts and expansion.ts call `new OpenAI()` / `new Anthropic()` with no arguments, which makes the SDKs read OPENAI_API_KEY / ANTHROPIC_API_KEY from the process's env. That puts the burden on every caller — shells, cron jobs, agent subprocesses, daemons — to propagate those env vars correctly. When a caller's env doesn't have them (e.g. launchd-spawned daemons, agent terminal tools with sanitized env), the caller silently gets empty results from `gbrain query` / `gbrain embed` because the SDK falls back to anonymous API calls that fail. GBrain already has `openai_api_key` and `anthropic_api_key` fields in its GBrainConfig schema (src/core/config.ts) and stores them in ~/.gbrain/config.json, but none of the runtime code actually reads those fields — the config is populated but never consulted. This PR connects that wiring so gbrain becomes self-contained: callers just run `gbrain ...` and gbrain finds its own keys. Changes: - config.ts: merge ANTHROPIC_API_KEY from env into loaded config (was silently dropped — only OPENAI_API_KEY was being merged) - embedding.ts: read openai_api_key from loadConfig() and pass to `new OpenAI({ apiKey })`. Falls back to SDK's env-default behavior when config has no key (preserves current behavior for users who rely on env vars). - expansion.ts: same pattern for Anthropic. Usage for callers: # One-time setup (put keys in gbrain's own config file) $ cat >> ~/.gbrain/config.json.fragment < config file So users who want to override per-process still can. Impact: - 4 files changed, +67 / -5 lines - Zero behavior change for users who already have env vars set - Callers without env vars in their subprocess context now work IF the keys are written to ~/.gbrain/config.json Tests: - 4 new tests in test/config.test.ts cover: OPENAI env merge, ANTHROPIC env merge (regression — was missing), both together, and absence-when-neither. - All 12 config tests pass; no pre-existing regressions. --- src/core/config.ts | 1 + src/core/embedding.ts | 11 +++++- src/core/search/expansion.ts | 8 ++++- src/core/search/hybrid.ts | 8 +++-- test/config.test.ts | 65 +++++++++++++++++++++++++++++++++++- 5 files changed, 88 insertions(+), 5 deletions(-) diff --git a/src/core/config.ts b/src/core/config.ts index b72c8f000..e686cc1ea 100644 --- a/src/core/config.ts +++ b/src/core/config.ts @@ -64,6 +64,7 @@ export function loadConfig(): GBrainConfig | null { engine: inferredEngine, ...(dbUrl ? { database_url: dbUrl } : {}), ...(process.env.OPENAI_API_KEY ? { openai_api_key: process.env.OPENAI_API_KEY } : {}), + ...(process.env.ANTHROPIC_API_KEY ? { anthropic_api_key: process.env.ANTHROPIC_API_KEY } : {}), }; return merged as GBrainConfig; } diff --git a/src/core/embedding.ts b/src/core/embedding.ts index b9e000e4e..39a460fa2 100644 --- a/src/core/embedding.ts +++ b/src/core/embedding.ts @@ -8,6 +8,7 @@ */ import OpenAI from 'openai'; +import { loadConfig } from './config.ts'; const MODEL = 'text-embedding-3-large'; const DIMENSIONS = 1536; @@ -21,7 +22,15 @@ let client: OpenAI | null = null; function getClient(): OpenAI { if (!client) { - client = new OpenAI(); + // Prefer key from gbrain's own config (~/.gbrain/config.json). loadConfig() + // already merges OPENAI_API_KEY env var into the config for backward + // compatibility, so this covers both config-file and env-var users — + // and lets callers (cron jobs, agents, subprocess wrappers) run `gbrain` + // without needing to propagate the env var themselves. + const config = loadConfig(); + client = config?.openai_api_key + ? new OpenAI({ apiKey: config.openai_api_key }) + : new OpenAI(); // SDK falls back to OPENAI_API_KEY env var if set } return client; } diff --git a/src/core/search/expansion.ts b/src/core/search/expansion.ts index be2e58c2e..71451cc27 100644 --- a/src/core/search/expansion.ts +++ b/src/core/search/expansion.ts @@ -15,6 +15,7 @@ */ import Anthropic from '@anthropic-ai/sdk'; +import { loadConfig } from '../config.ts'; const MAX_QUERIES = 3; const MIN_WORDS = 3; @@ -24,7 +25,12 @@ let anthropicClient: Anthropic | null = null; function getClient(): Anthropic { if (!anthropicClient) { - anthropicClient = new Anthropic(); + // Same pattern as embedding.ts: read key from gbrain's own config so + // subprocess callers don't need ANTHROPIC_API_KEY in their env. + const config = loadConfig(); + anthropicClient = config?.anthropic_api_key + ? new Anthropic({ apiKey: config.anthropic_api_key }) + : new Anthropic(); // SDK falls back to ANTHROPIC_API_KEY env var if set } return anthropicClient; } diff --git a/src/core/search/hybrid.ts b/src/core/search/hybrid.ts index f008c3098..17df53ca9 100644 --- a/src/core/search/hybrid.ts +++ b/src/core/search/hybrid.ts @@ -16,6 +16,7 @@ import { embed } from '../embedding.ts'; import { dedupResults } from './dedup.ts'; import { autoDetectDetail } from './intent.ts'; import { expandAnchors, hydrateChunks } from './two-pass.ts'; +import { loadConfig } from '../config.ts'; const RRF_K = 60; const COMPILED_TRUTH_BOOST = 2.0; @@ -85,8 +86,11 @@ export async function hybridSearch( // Run keyword search (always available, no API key needed) const keywordResults = await engine.searchKeyword(query, searchOpts); - // Skip vector search entirely if no OpenAI key is configured - if (!process.env.OPENAI_API_KEY) { + // Skip vector search entirely if no OpenAI key is available anywhere. + // Check both env (legacy) and config file so gbrain works as a + // self-contained subprocess for callers without env-var propagation. + const hasOpenAIKey = !!(process.env.OPENAI_API_KEY || loadConfig()?.openai_api_key); + if (!hasOpenAIKey) { // Apply backlink boost in keyword-only path too. One getBacklinkCounts query // per search request; not N+1. if (keywordResults.length > 0) { diff --git a/test/config.test.ts b/test/config.test.ts index 36821f3dc..783671c5f 100644 --- a/test/config.test.ts +++ b/test/config.test.ts @@ -1,5 +1,6 @@ -import { describe, test, expect } from 'bun:test'; +import { describe, test, expect, beforeEach, afterEach } from 'bun:test'; import { readFileSync } from 'fs'; +import { loadConfig } from '../src/core/config.ts'; // redactUrl is not exported, so we test it by reading the source and // reimplementing the regex to verify the pattern, then test via CLI @@ -61,3 +62,65 @@ describe('config source correctness', () => { expect(configSource).toContain('postgresql:\\/\\/'); }); }); + +describe('loadConfig: API key merging (for self-contained subprocess use)', () => { + // These tests verify that gbrain can be called as a subprocess by agents/cron + // without the caller needing to propagate API keys — loadConfig picks them up + // from either the config file OR env vars, and both embedding.ts and + // expansion.ts read the merged config to instantiate their SDK clients. + + let originalOpenAI: string | undefined; + let originalAnthropic: string | undefined; + let originalDatabaseUrl: string | undefined; + + beforeEach(() => { + originalOpenAI = process.env.OPENAI_API_KEY; + originalAnthropic = process.env.ANTHROPIC_API_KEY; + originalDatabaseUrl = process.env.DATABASE_URL; + // Ensure loadConfig() returns something (needs DATABASE_URL when no file exists) + process.env.DATABASE_URL = 'postgresql://test:test@localhost:5432/test'; + }); + + afterEach(() => { + if (originalOpenAI === undefined) delete process.env.OPENAI_API_KEY; + else process.env.OPENAI_API_KEY = originalOpenAI; + if (originalAnthropic === undefined) delete process.env.ANTHROPIC_API_KEY; + else process.env.ANTHROPIC_API_KEY = originalAnthropic; + if (originalDatabaseUrl === undefined) delete process.env.DATABASE_URL; + else process.env.DATABASE_URL = originalDatabaseUrl; + }); + + test('merges OPENAI_API_KEY from env into config', () => { + process.env.OPENAI_API_KEY = 'sk-test-openai-xyz'; + const config = loadConfig(); + expect(config?.openai_api_key).toBe('sk-test-openai-xyz'); + }); + + test('merges ANTHROPIC_API_KEY from env into config (regression: was missing)', () => { + // Before this fix, loadConfig() only merged OPENAI_API_KEY and silently + // dropped ANTHROPIC_API_KEY from the env. That meant subprocess callers + // who set ANTHROPIC_API_KEY in their shell still couldn't get query + // expansion because downstream code only saw the un-merged config. + process.env.ANTHROPIC_API_KEY = 'sk-ant-test-xyz'; + const config = loadConfig(); + expect(config?.anthropic_api_key).toBe('sk-ant-test-xyz'); + }); + + test('merges both keys when both env vars set', () => { + process.env.OPENAI_API_KEY = 'sk-o'; + process.env.ANTHROPIC_API_KEY = 'sk-a'; + const config = loadConfig(); + expect(config?.openai_api_key).toBe('sk-o'); + expect(config?.anthropic_api_key).toBe('sk-a'); + }); + + test('config has no keys when neither env nor file provides them', () => { + delete process.env.OPENAI_API_KEY; + delete process.env.ANTHROPIC_API_KEY; + const config = loadConfig(); + // When no key anywhere, the fields should be absent (undefined), not empty strings. + // Downstream SDK clients fall through to SDK's own env-default behavior. + expect(config?.openai_api_key).toBeUndefined(); + expect(config?.anthropic_api_key).toBeUndefined(); + }); +}); From 18b0ebdc3603b78d02685b9d65bf8949d4d8c6f0 Mon Sep 17 00:00:00 2001 From: vinsew Date: Fri, 24 Apr 2026 18:37:33 +0800 Subject: [PATCH 2/3] test(config): loosen loadConfig API key assertion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "no keys when neither env nor file provides them" case became impossible once #121 (self-contained API keys) ships — loadConfig now reads keys from config.json if env is empty. Test now asserts the stronger invariant: after env deletion, the previous env sentinel value must not leak back via the returned config. File-level keys may legitimately persist and are no longer asserted undefined. Co-Authored-By: Claude Opus 4.7 (1M context) --- test/config.test.ts | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/test/config.test.ts b/test/config.test.ts index 783671c5f..671530d9d 100644 --- a/test/config.test.ts +++ b/test/config.test.ts @@ -114,13 +114,18 @@ describe('loadConfig: API key merging (for self-contained subprocess use)', () = expect(config?.anthropic_api_key).toBe('sk-a'); }); - test('config has no keys when neither env nor file provides them', () => { + test('env-specific values do not leak after env deletion (file keys may still exist)', () => { + // Set recognizable env-specific sentinels. + process.env.OPENAI_API_KEY = 'sk-o-env-sentinel'; + process.env.ANTHROPIC_API_KEY = 'sk-a-env-sentinel'; + loadConfig(); delete process.env.OPENAI_API_KEY; delete process.env.ANTHROPIC_API_KEY; const config = loadConfig(); - // When no key anywhere, the fields should be absent (undefined), not empty strings. - // Downstream SDK clients fall through to SDK's own env-default behavior. - expect(config?.openai_api_key).toBeUndefined(); - expect(config?.anthropic_api_key).toBeUndefined(); + // After deletion, env sentinels must not leak. The file may legitimately + // provide keys (v0.12's self-contained API keys feature), which is fine — + // just not the sentinel values from the previous env-driven call. + expect(config?.openai_api_key).not.toBe('sk-o-env-sentinel'); + expect(config?.anthropic_api_key).not.toBe('sk-a-env-sentinel'); }); }); From 9a8b4ab34ef9ab43de9bde33427598096673958d Mon Sep 17 00:00:00 2001 From: vinsew Date: Tue, 28 Apr 2026 17:14:06 +0800 Subject: [PATCH 3/3] fix(config): make loadConfig/saveConfig honor GBRAIN_HOME MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per qodo-ai review on PR #121: loadConfig() / saveConfig() were going through a private getConfigDir/getConfigPath that called homedir() directly and ignored GBRAIN_HOME, while configDir/configPath (the public API used by getDbUrlSource and the docs) honored it. That split meant config-file API keys were invisible in tests, Docker containers, and multi-tenant deployments — exactly the contexts that motivated GBRAIN_HOME existing in the first place. The new self-contained API keys feature this PR adds depends on loadConfig() finding the keys, so the split also broke the feature under any GBRAIN_HOME-rooted setup. Fix collapses to one set: delete getConfigDir/getConfigPath, route loadConfig/saveConfig through configDir()/configPath(). Function declarations hoist so the forward reference is fine. homedir() import stays — configDir() still falls back to it when GBRAIN_HOME is unset. Tests: 5 new cases in test/config.test.ts covering configDir/configPath honoring GBRAIN_HOME, saveConfig writing under override, loadConfig reading from override, two-home isolation invariant (write A, read B sees null), and full round-trip. All 17 config tests pass; the 20 check-update tests stay green; the e2e/PGLite failures observed are pre-existing and unrelated (#223 macOS WASM bug per CLAUDE.md memory). --- src/core/config.ts | 12 ++--- test/config.test.ts | 109 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 111 insertions(+), 10 deletions(-) diff --git a/src/core/config.ts b/src/core/config.ts index e686cc1ea..acecb9baf 100644 --- a/src/core/config.ts +++ b/src/core/config.ts @@ -19,10 +19,6 @@ export type DbUrlSource = | 'config-file-path' // PGLite: config file present, no URL but database_path set | null; -// Lazy-evaluated to avoid calling homedir() at module scope (breaks in serverless/bundled environments) -function getConfigDir() { return join(homedir(), '.gbrain'); } -function getConfigPath() { return join(getConfigDir(), 'config.json'); } - export interface GBrainConfig { engine: 'postgres' | 'pglite'; database_url?: string; @@ -45,7 +41,7 @@ export interface GBrainConfig { export function loadConfig(): GBrainConfig | null { let fileConfig: GBrainConfig | null = null; try { - const raw = readFileSync(getConfigPath(), 'utf-8'); + const raw = readFileSync(configPath(), 'utf-8'); fileConfig = JSON.parse(raw) as GBrainConfig; } catch { /* no config file */ } @@ -70,10 +66,10 @@ export function loadConfig(): GBrainConfig | null { } export function saveConfig(config: GBrainConfig): void { - mkdirSync(getConfigDir(), { recursive: true }); - writeFileSync(getConfigPath(), JSON.stringify(config, null, 2) + '\n', { mode: 0o600 }); + mkdirSync(configDir(), { recursive: true }); + writeFileSync(configPath(), JSON.stringify(config, null, 2) + '\n', { mode: 0o600 }); try { - chmodSync(getConfigPath(), 0o600); + chmodSync(configPath(), 0o600); } catch { // chmod may fail on some platforms } diff --git a/test/config.test.ts b/test/config.test.ts index 671530d9d..b37ed4060 100644 --- a/test/config.test.ts +++ b/test/config.test.ts @@ -1,6 +1,8 @@ import { describe, test, expect, beforeEach, afterEach } from 'bun:test'; -import { readFileSync } from 'fs'; -import { loadConfig } from '../src/core/config.ts'; +import { readFileSync, mkdtempSync, rmSync, existsSync, writeFileSync, mkdirSync } from 'fs'; +import { join } from 'path'; +import { tmpdir } from 'os'; +import { loadConfig, saveConfig, configDir, configPath } from '../src/core/config.ts'; // redactUrl is not exported, so we test it by reading the source and // reimplementing the regex to verify the pattern, then test via CLI @@ -129,3 +131,106 @@ describe('loadConfig: API key merging (for self-contained subprocess use)', () = expect(config?.anthropic_api_key).not.toBe('sk-a-env-sentinel'); }); }); + +describe('loadConfig / saveConfig: GBRAIN_HOME override', () => { + // Regression: before this fix, loadConfig/saveConfig used a private + // getConfigDir/getConfigPath that called homedir() directly and ignored + // GBRAIN_HOME, so config-file API keys were invisible in tests, Docker, and + // multi-tenant deployments — exactly the contexts that motivated GBRAIN_HOME. + // configDir/configPath already honored GBRAIN_HOME; this test pins that + // loadConfig and saveConfig now go through the same path. + + let originalGbrainHome: string | undefined; + let originalOpenAI: string | undefined; + let originalAnthropic: string | undefined; + let originalDatabaseUrl: string | undefined; + let originalGbrainDatabaseUrl: string | undefined; + let tmpHome: string; + + beforeEach(() => { + originalGbrainHome = process.env.GBRAIN_HOME; + originalOpenAI = process.env.OPENAI_API_KEY; + originalAnthropic = process.env.ANTHROPIC_API_KEY; + originalDatabaseUrl = process.env.DATABASE_URL; + originalGbrainDatabaseUrl = process.env.GBRAIN_DATABASE_URL; + delete process.env.OPENAI_API_KEY; + delete process.env.ANTHROPIC_API_KEY; + delete process.env.DATABASE_URL; + delete process.env.GBRAIN_DATABASE_URL; + tmpHome = mkdtempSync(join(tmpdir(), 'gbrain-home-')); + process.env.GBRAIN_HOME = tmpHome; + }); + + afterEach(() => { + if (originalGbrainHome === undefined) delete process.env.GBRAIN_HOME; + else process.env.GBRAIN_HOME = originalGbrainHome; + if (originalOpenAI === undefined) delete process.env.OPENAI_API_KEY; + else process.env.OPENAI_API_KEY = originalOpenAI; + if (originalAnthropic === undefined) delete process.env.ANTHROPIC_API_KEY; + else process.env.ANTHROPIC_API_KEY = originalAnthropic; + if (originalDatabaseUrl === undefined) delete process.env.DATABASE_URL; + else process.env.DATABASE_URL = originalDatabaseUrl; + if (originalGbrainDatabaseUrl === undefined) delete process.env.GBRAIN_DATABASE_URL; + else process.env.GBRAIN_DATABASE_URL = originalGbrainDatabaseUrl; + if (existsSync(tmpHome)) rmSync(tmpHome, { recursive: true, force: true }); + }); + + test('configDir and configPath both honor GBRAIN_HOME', () => { + expect(configDir()).toBe(join(tmpHome, '.gbrain')); + expect(configPath()).toBe(join(tmpHome, '.gbrain', 'config.json')); + }); + + test('saveConfig writes under GBRAIN_HOME (not real homedir)', () => { + saveConfig({ + engine: 'pglite', + database_path: '/tmp/fake.db', + openai_api_key: 'sk-from-saved-config', + }); + const written = join(tmpHome, '.gbrain', 'config.json'); + expect(existsSync(written)).toBe(true); + const parsed = JSON.parse(readFileSync(written, 'utf-8')); + expect(parsed.openai_api_key).toBe('sk-from-saved-config'); + }); + + test('loadConfig reads from GBRAIN_HOME-rooted config file', () => { + // Hand-write the config file as if a previous saveConfig (or an operator) + // had created it. This pins the read path independent of the write path. + mkdirSync(join(tmpHome, '.gbrain'), { recursive: true }); + writeFileSync(join(tmpHome, '.gbrain', 'config.json'), JSON.stringify({ + engine: 'pglite', + database_path: '/tmp/fake.db', + openai_api_key: 'sk-from-file-only', + anthropic_api_key: 'sk-ant-from-file-only', + })); + const config = loadConfig(); + expect(config?.openai_api_key).toBe('sk-from-file-only'); + expect(config?.anthropic_api_key).toBe('sk-ant-from-file-only'); + }); + + test('loadConfig under a different GBRAIN_HOME does NOT see the original config', () => { + // Two sandboxed homes — write to one, load from the other. This is the + // multi-tenant / per-test isolation invariant. + saveConfig({ engine: 'pglite', openai_api_key: 'sk-tenant-A' }); + + const tmpHomeB = mkdtempSync(join(tmpdir(), 'gbrain-home-b-')); + try { + process.env.GBRAIN_HOME = tmpHomeB; + const config = loadConfig(); + expect(config).toBeNull(); + } finally { + rmSync(tmpHomeB, { recursive: true, force: true }); + } + }); + + test('saveConfig + loadConfig round-trip under GBRAIN_HOME', () => { + saveConfig({ + engine: 'postgres', + database_url: 'postgresql://test@localhost/test', + openai_api_key: 'sk-roundtrip', + }); + const config = loadConfig(); + expect(config?.engine).toBe('postgres'); + expect(config?.database_url).toBe('postgresql://test@localhost/test'); + expect(config?.openai_api_key).toBe('sk-roundtrip'); + }); +});