From 0a80d7d66e0b7836ad596cab5e514a64b12126e2 Mon Sep 17 00:00:00 2001 From: lanceretter Date: Fri, 8 May 2026 08:38:39 -0400 Subject: [PATCH] fix: bootstrap forward-references for v39-v41 schema replay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three column-with-index forward references in the embedded schema blob were missing from applyForwardReferenceBootstrap, so any brain at config.version < 39 (Postgres) or < 41 (PGLite) wedges before the migration runner can advance. Reproduced end-to-end on a PlanetScale Postgres brain stuck at config.version=34 trying to upgrade to v0.30.0: ERROR: column "effective_date" does not exist ERROR: column cc.modality does not exist (After upgrading, gbrain search and gbrain reindex-frontmatter both fail.) The schema-blob references that crash before migrations run: - v39 (multimodal_dual_column_v0_27_1): CREATE INDEX idx_chunks_embedding_image ON content_chunks USING hnsw (embedding_image vector_cosine_ops) WHERE embedding_image IS NOT NULL; - v41 (pages_recency_columns): CREATE INDEX pages_coalesce_date_idx ON pages ((COALESCE(effective_date, updated_at))); PGLite already covered v39 (lines 273+, 308+, 382-392). Postgres and PGLite both lacked v40+v41 coverage. This commit adds: - Postgres engine probe + branch for v39 (modality, embedding_image) — was entirely missing on Postgres, so Postgres brains < v39 hit the wedge that PGLite already protected against. - Both engines: probe + branch for v40+v41. Bootstraps all five additive pages columns (emotional_weight, effective_date, effective_date_source, import_filename, salience_touched_at) gated on `effective_date_exists` as the proxy. - test/schema-bootstrap-coverage.test.ts: extends REQUIRED_BOOTSTRAP_COVERAGE with the six new columns AND the pre-test DROP block so both the per-target assertion test and the end-to-end "bootstrap + SCHEMA_SQL replay" test exercise the new coverage. All 5 tests in schema-bootstrap-coverage pass. typecheck clean. Bootstrap stays additive-columns-only. Indexes are left to schema replay / migrations as before. --- src/core/pglite-engine.ts | 27 +++++++++++++- src/core/postgres-engine.ts | 50 ++++++++++++++++++++++++-- test/schema-bootstrap-coverage.test.ts | 31 ++++++++++++++++ 3 files changed, 105 insertions(+), 3 deletions(-) diff --git a/src/core/pglite-engine.ts b/src/core/pglite-engine.ts index 7b3d4c066..1439266be 100644 --- a/src/core/pglite-engine.ts +++ b/src/core/pglite-engine.ts @@ -271,6 +271,8 @@ export class PGLiteEngine implements BrainEngine { WHERE table_schema='public' AND table_name='content_chunks' AND column_name='search_vector') AS search_vector_exists, EXISTS (SELECT 1 FROM information_schema.columns WHERE table_schema='public' AND table_name='content_chunks' AND column_name='embedding_image') AS embedding_image_exists, + EXISTS (SELECT 1 FROM information_schema.columns + WHERE table_schema='public' AND table_name='pages' AND column_name='effective_date') AS effective_date_exists, EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema='public' AND table_name='mcp_request_log') AS mcp_log_exists, EXISTS (SELECT 1 FROM information_schema.columns @@ -292,6 +294,7 @@ export class PGLiteEngine implements BrainEngine { language_exists: boolean; search_vector_exists: boolean; embedding_image_exists: boolean; + effective_date_exists: boolean; mcp_log_exists: boolean; agent_name_exists: boolean; subagent_messages_exists: boolean; @@ -311,11 +314,16 @@ export class PGLiteEngine implements BrainEngine { // v0.27 (v36): idx_subagent_messages_provider in PGLITE_SCHEMA_SQL needs // provider_id (the SECOND column in the composite index `(job_id, provider_id)`). const needsSubagentProviderId = probe.subagent_messages_exists && !probe.subagent_provider_id_exists; + // v0.29.1 (v40 + v41): pages_coalesce_date_idx expression index in + // PGLITE_SCHEMA_SQL references effective_date. Use effective_date_exists + // as the proxy for the five v40 + v41 pages columns. + const needsPagesRecency = probe.pages_exists && !probe.effective_date_exists; // Fresh installs (no tables yet) and modern brains both no-op. if (!needsPagesBootstrap && !needsLinksBootstrap && !needsChunksBootstrap && !needsPagesDeletedAt && !needsChunksEmbeddingImage - && !needsMcpLogBootstrap && !needsSubagentProviderId) return; + && !needsMcpLogBootstrap && !needsSubagentProviderId + && !needsPagesRecency) return; console.log(' Pre-v0.21 brain detected, applying forward-reference bootstrap'); @@ -415,6 +423,23 @@ export class PGLiteEngine implements BrainEngine { ALTER TABLE subagent_messages ADD COLUMN IF NOT EXISTS provider_id TEXT; `); } + + if (needsPagesRecency) { + // v40 (pages_emotional_weight) adds emotional_weight; v41 + // (pages_recency_columns) adds effective_date + effective_date_source + + // import_filename + salience_touched_at and the + // `pages_coalesce_date_idx ON pages ((COALESCE(effective_date, updated_at)))` + // expression index. PGLITE_SCHEMA_SQL's CREATE INDEX for that expression + // crashes before v41 runs. Bootstrap adds all five additive columns; + // v40 + v41 run later via runMigrations and are idempotent. + await this.db.exec(` + ALTER TABLE pages ADD COLUMN IF NOT EXISTS emotional_weight REAL NOT NULL DEFAULT 0.0; + ALTER TABLE pages ADD COLUMN IF NOT EXISTS effective_date TIMESTAMPTZ; + ALTER TABLE pages ADD COLUMN IF NOT EXISTS effective_date_source TEXT; + ALTER TABLE pages ADD COLUMN IF NOT EXISTS import_filename TEXT; + ALTER TABLE pages ADD COLUMN IF NOT EXISTS salience_touched_at TIMESTAMPTZ; + `); + } } async withReservedConnection(fn: (conn: ReservedConnection) => Promise): Promise { diff --git a/src/core/postgres-engine.ts b/src/core/postgres-engine.ts index f081579c0..98eb2fc7c 100644 --- a/src/core/postgres-engine.ts +++ b/src/core/postgres-engine.ts @@ -230,6 +230,7 @@ export class PostgresEngine implements BrainEngine { pages_exists: boolean; source_id_exists: boolean; deleted_at_exists: boolean; + effective_date_exists: boolean; links_exists: boolean; link_source_exists: boolean; origin_page_id_exists: boolean; @@ -237,6 +238,7 @@ export class PostgresEngine implements BrainEngine { symbol_name_exists: boolean; language_exists: boolean; search_vector_exists: boolean; + embedding_image_exists: boolean; mcp_log_exists: boolean; agent_name_exists: boolean; subagent_messages_exists: boolean; @@ -249,6 +251,8 @@ export class PostgresEngine implements BrainEngine { WHERE table_schema = current_schema() AND table_name = 'pages' AND column_name = 'source_id') AS source_id_exists, EXISTS (SELECT 1 FROM information_schema.columns WHERE table_schema = current_schema() AND table_name = 'pages' AND column_name = 'deleted_at') AS deleted_at_exists, + EXISTS (SELECT 1 FROM information_schema.columns + WHERE table_schema = current_schema() AND table_name = 'pages' AND column_name = 'effective_date') AS effective_date_exists, EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema = current_schema() AND table_name = 'links') AS links_exists, EXISTS (SELECT 1 FROM information_schema.columns @@ -263,6 +267,8 @@ export class PostgresEngine implements BrainEngine { WHERE table_schema = current_schema() AND table_name = 'content_chunks' AND column_name = 'language') AS language_exists, EXISTS (SELECT 1 FROM information_schema.columns WHERE table_schema = current_schema() AND table_name = 'content_chunks' AND column_name = 'search_vector') AS search_vector_exists, + EXISTS (SELECT 1 FROM information_schema.columns + WHERE table_schema = current_schema() AND table_name = 'content_chunks' AND column_name = 'embedding_image') AS embedding_image_exists, EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema = current_schema() AND table_name = 'mcp_request_log') AS mcp_log_exists, EXISTS (SELECT 1 FROM information_schema.columns @@ -287,8 +293,19 @@ export class PostgresEngine implements BrainEngine { // v0.27 (v36): idx_subagent_messages_provider in SCHEMA_SQL needs provider_id // (the SECOND column in the composite index `(job_id, provider_id)`). const needsSubagentProviderId = probe.subagent_messages_exists && !probe.subagent_provider_id_exists; - - if (!needsPagesBootstrap && !needsLinksBootstrap && !needsChunksBootstrap && !needsPagesDeletedAt && !needsMcpLogBootstrap && !needsSubagentProviderId) return; + // v0.27.1 (v39): idx_chunks_embedding_image partial HNSW in SCHEMA_SQL + // references embedding_image. Use embedding_image_exists as the proxy for + // both v39 columns; modality is added in the same migration. + const needsChunksEmbeddingImage = probe.chunks_exists && !probe.embedding_image_exists; + // v0.29.1 (v40 + v41): pages_coalesce_date_idx expression index in SCHEMA_SQL + // references effective_date. Use effective_date_exists as the proxy for the + // five v40 + v41 pages columns (emotional_weight, effective_date, + // effective_date_source, import_filename, salience_touched_at). + const needsPagesRecency = probe.pages_exists && !probe.effective_date_exists; + + if (!needsPagesBootstrap && !needsLinksBootstrap && !needsChunksBootstrap + && !needsPagesDeletedAt && !needsMcpLogBootstrap && !needsSubagentProviderId + && !needsChunksEmbeddingImage && !needsPagesRecency) return; console.log(' Pre-v0.21 brain detected, applying forward-reference bootstrap'); @@ -376,6 +393,35 @@ export class PostgresEngine implements BrainEngine { ALTER TABLE subagent_messages ADD COLUMN IF NOT EXISTS provider_id TEXT; `); } + + if (needsChunksEmbeddingImage) { + // v39 (multimodal_dual_column_v0_27_1) adds modality + embedding_image + // columns to content_chunks plus a partial HNSW index that references + // embedding_image. Bootstrap mirrors enough state for SCHEMA_SQL's + // `CREATE INDEX idx_chunks_embedding_image ... WHERE embedding_image IS NOT NULL` + // not to crash. v39 runs later via runMigrations and is idempotent. + await conn.unsafe(` + ALTER TABLE content_chunks ADD COLUMN IF NOT EXISTS modality TEXT NOT NULL DEFAULT 'text'; + ALTER TABLE content_chunks ADD COLUMN IF NOT EXISTS embedding_image vector(1024); + `); + } + + if (needsPagesRecency) { + // v40 (pages_emotional_weight) adds emotional_weight; v41 + // (pages_recency_columns) adds effective_date + effective_date_source + + // import_filename + salience_touched_at and the + // `pages_coalesce_date_idx ON pages ((COALESCE(effective_date, updated_at)))` + // expression index. SCHEMA_SQL's CREATE INDEX for that expression crashes + // before v41 runs. Bootstrap adds all five additive columns; v40 + v41 + // run later via runMigrations and are idempotent. + await conn.unsafe(` + ALTER TABLE pages ADD COLUMN IF NOT EXISTS emotional_weight REAL NOT NULL DEFAULT 0.0; + ALTER TABLE pages ADD COLUMN IF NOT EXISTS effective_date TIMESTAMPTZ; + ALTER TABLE pages ADD COLUMN IF NOT EXISTS effective_date_source TEXT; + ALTER TABLE pages ADD COLUMN IF NOT EXISTS import_filename TEXT; + ALTER TABLE pages ADD COLUMN IF NOT EXISTS salience_touched_at TIMESTAMPTZ; + `); + } } async transaction(fn: (engine: BrainEngine) => Promise): Promise { diff --git a/test/schema-bootstrap-coverage.test.ts b/test/schema-bootstrap-coverage.test.ts index fb4efebf7..0a116af28 100644 --- a/test/schema-bootstrap-coverage.test.ts +++ b/test/schema-bootstrap-coverage.test.ts @@ -77,6 +77,10 @@ const REQUIRED_BOOTSTRAP_COVERAGE: ForwardReference[] = [ // ON content_chunks USING hnsw (embedding_image vector_cosine_ops) // WHERE embedding_image IS NOT NULL`. { kind: 'column', table: 'content_chunks', column: 'embedding_image' }, + // v0.27.1 — added in the same migration as embedding_image. Sibling column; + // not directly forward-referenced by an index but the bootstrap adds it + // alongside embedding_image for the v39 contract. + { kind: 'column', table: 'content_chunks', column: 'modality' }, // v0.26.3 (v33) — forward-referenced by `CREATE INDEX idx_mcp_log_agent_time // ON mcp_request_log(agent_name, created_at DESC)`. { kind: 'column', table: 'mcp_request_log', column: 'agent_name' }, @@ -86,6 +90,19 @@ const REQUIRED_BOOTSTRAP_COVERAGE: ForwardReference[] = [ // by default, which is why this fix wave's Step 3 replaces this with a // SQL parser that extracts every column referenced by any DDL. { kind: 'column', table: 'subagent_messages', column: 'provider_id' }, + // v0.29 (v40) — pages.emotional_weight populated by recompute_emotional_weight; + // bootstrapped alongside the v41 columns since they share the v0.29.1 wave. + { kind: 'column', table: 'pages', column: 'emotional_weight' }, + // v0.29.1 (v41) — forward-referenced by `CREATE INDEX pages_coalesce_date_idx + // ON pages ((COALESCE(effective_date, updated_at)))`. The expression-index + // claim from earlier plan iterations was wrong; PG's planner won't use a + // partial index for the negative side of a COALESCE — expression index is. + { kind: 'column', table: 'pages', column: 'effective_date' }, + // v0.29.1 (v41) — sibling columns added in the same migration as + // effective_date; bootstrap adds them all together. + { kind: 'column', table: 'pages', column: 'effective_date_source' }, + { kind: 'column', table: 'pages', column: 'import_filename' }, + { kind: 'column', table: 'pages', column: 'salience_touched_at' }, ]; test('applyForwardReferenceBootstrap covers every forward reference declared in REQUIRED_BOOTSTRAP_COVERAGE', async () => { @@ -139,6 +156,13 @@ test('applyForwardReferenceBootstrap covers every forward reference declared in DROP INDEX IF EXISTS idx_subagent_messages_provider; ALTER TABLE subagent_messages DROP COLUMN IF EXISTS provider_id; + + DROP INDEX IF EXISTS pages_coalesce_date_idx; + ALTER TABLE pages DROP COLUMN IF EXISTS effective_date; + ALTER TABLE pages DROP COLUMN IF EXISTS effective_date_source; + ALTER TABLE pages DROP COLUMN IF EXISTS import_filename; + ALTER TABLE pages DROP COLUMN IF EXISTS salience_touched_at; + ALTER TABLE pages DROP COLUMN IF EXISTS emotional_weight; `); // Run bootstrap in isolation (NOT initSchema). This is what we're testing. @@ -198,6 +222,13 @@ test('after bootstrap, PGLITE_SCHEMA_SQL replays without crashing on missing for DROP INDEX IF EXISTS idx_chunks_embedding_image; ALTER TABLE content_chunks DROP COLUMN IF EXISTS embedding_image; ALTER TABLE content_chunks DROP COLUMN IF EXISTS modality; + + DROP INDEX IF EXISTS pages_coalesce_date_idx; + ALTER TABLE pages DROP COLUMN IF EXISTS effective_date; + ALTER TABLE pages DROP COLUMN IF EXISTS effective_date_source; + ALTER TABLE pages DROP COLUMN IF EXISTS import_filename; + ALTER TABLE pages DROP COLUMN IF EXISTS salience_touched_at; + ALTER TABLE pages DROP COLUMN IF EXISTS emotional_weight; `); // Bootstrap, then schema replay. Either step crashing fails the test.