From 2bee8e6b904d98b978ddb4507500fe089db83a08 Mon Sep 17 00:00:00 2001 From: Byeonghoon Yoo Date: Wed, 20 May 2026 17:23:22 +0900 Subject: [PATCH 1/3] =?UTF-8?q?fix(api):=20vchord=20ANN=20=E2=80=94=20use?= =?UTF-8?q?=20cosine=20opclass=20and=20dispatch=20tuning=20GUCs=20per=20ba?= =?UTF-8?q?ckend?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes #1667. vchordrq operator classes are bound 1:1 to operators: vector_l2_ops only matches `<->`, while every Hindsight ANN query uses `<=>` (cosine distance). The previous vchord mapping used vector_l2_ops, so the planner ignored the index entirely and fell back to a sequential scan + per-row cosine computation. Separately, `SET LOCAL hnsw.ef_search = 60` (retain) and `SET hnsw.ef_search = 200` (pool init) only exist in pgvector and silently no-op'd under vchord, so the recall-vs-latency trade-off had never been applied to vchord deployments at all. This switches the vchord opclass to vector_cosine_ops (matching the engine's `<=>` queries), updates the four historical migrations that create vchord indexes inline so fresh installs land on cosine ops, and adds an online migration that rebuilds any existing L2-ops vchordrq indexes via CREATE INDEX CONCURRENTLY + drop + rename. Also introduces an ann_search_tuning_settings dispatcher so link_utils and the pool init pick the right GUC per backend (hnsw.ef_search for pgvector, vchordrq.probes for vchord). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../hindsight_api/_vector_index.py | 39 ++++++- .../versions/5a366d414dce_initial_schema.py | 2 +- ...c6d7e8f9_fix_per_bank_vector_index_type.py | 2 +- .../b8c9d0e1f2a3_vchord_cosine_opclass.py | 100 ++++++++++++++++++ ..._add_bank_internal_id_and_per_bank_hnsw.py | 2 +- ...k2l3m4_learnings_and_pinned_reflections.py | 2 +- .../hindsight_api/engine/memory_engine.py | 21 ++-- .../hindsight_api/engine/retain/link_utils.py | 21 ++-- hindsight-api-slim/tests/test_link_utils.py | 25 +++-- hindsight-api-slim/tests/test_vector_index.py | 27 +++++ 10 files changed, 211 insertions(+), 30 deletions(-) create mode 100644 hindsight-api-slim/hindsight_api/alembic/versions/b8c9d0e1f2a3_vchord_cosine_opclass.py diff --git a/hindsight-api-slim/hindsight_api/_vector_index.py b/hindsight-api-slim/hindsight_api/_vector_index.py index 6a19f7135..6e286bff3 100644 --- a/hindsight-api-slim/hindsight_api/_vector_index.py +++ b/hindsight-api-slim/hindsight_api/_vector_index.py @@ -34,7 +34,7 @@ "pgvector": "USING hnsw (embedding vector_cosine_ops)", "pgvectorscale": "USING diskann (embedding vector_cosine_ops) WITH (num_neighbors = 50)", "pg_diskann": "USING diskann (embedding vector_cosine_ops) WITH (max_neighbors = 50)", - "vchord": "USING vchordrq (embedding vector_l2_ops)", + "vchord": "USING vchordrq (embedding vector_cosine_ops)", "scann": "USING scann (embedding cosine) WITH (mode = 'AUTO')", } @@ -46,6 +46,24 @@ "scann": "scann", } +# Per-backend ANN search-time tuning GUCs. Each entry is a tuple of +# (guc_name, value) pairs the caller can apply with SET or SET LOCAL. +# +# - pgvector exposes hnsw.ef_search. +# - vchord exposes vchordrq.probes (no default; see VectorChord issue #392) +# and vchordrq.epsilon (default 1.9). We only set probes for now and leave +# epsilon at its default; lowering it further is a separate trade-off. +# - pgvectorscale / pg_diskann / scann do not expose an equivalent per-statement +# knob in the engine today, so the dispatcher returns no statements for them. +_ANN_TUNING_LOW_LATENCY: dict[str, tuple[tuple[str, str], ...]] = { + "pgvector": (("hnsw.ef_search", "60"),), + "vchord": (("vchordrq.probes", "10"),), +} +_ANN_TUNING_HIGH_RECALL: dict[str, tuple[tuple[str, str], ...]] = { + "pgvector": (("hnsw.ef_search", "200"),), + "vchord": (("vchordrq.probes", "30"),), +} + _EXTENSION_INSTALL_SQL = { "pgvector": ("CREATE EXTENSION IF NOT EXISTS vector",), "pgvectorscale": ( @@ -115,6 +133,25 @@ def should_defer_index_creation(ext: str, row_count: int) -> bool: return minimum_rows > 0 and row_count < minimum_rows +def ann_search_tuning_settings(ext: str, *, kind: str) -> tuple[tuple[str, str], ...]: + """Return per-backend (guc_name, value) pairs for ANN search-time tuning. + + ``kind`` is ``"low_latency"`` for retain-side link probing (smaller probe + count, lower recall, lower latency) and ``"high_recall"`` for connection + init in the pool (larger probe count, higher recall). Callers wrap each + pair with ``SET LOCAL`` or ``SET`` themselves so the same dispatcher works + for both transaction-scoped and session-scoped use. Returns an empty tuple + for backends without an equivalent knob. + """ + if kind == "low_latency": + table = _ANN_TUNING_LOW_LATENCY + elif kind == "high_recall": + table = _ANN_TUNING_HIGH_RECALL + else: + raise ValueError(f"Unknown ANN tuning kind: {kind!r}") + return table.get(_normalize_resolved(ext), ()) + + def uses_per_bank_vector_indexes(ext: str) -> bool: """Return whether the backend should create per-bank partial vector indexes.""" return _normalize_resolved(ext) != "scann" diff --git a/hindsight-api-slim/hindsight_api/alembic/versions/5a366d414dce_initial_schema.py b/hindsight-api-slim/hindsight_api/alembic/versions/5a366d414dce_initial_schema.py index ea66f4d05..461b6e929 100644 --- a/hindsight-api-slim/hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +++ b/hindsight-api-slim/hindsight_api/alembic/versions/5a366d414dce_initial_schema.py @@ -83,7 +83,7 @@ def _vector_index_using_clause(ext: str) -> str: if ext == "pg_diskann": return "USING diskann (embedding vector_cosine_ops) WITH (max_neighbors = 50)" if ext == "vchord": - return "USING vchordrq (embedding vector_l2_ops)" + return "USING vchordrq (embedding vector_cosine_ops)" if ext == "scann": return "USING scann (embedding cosine) WITH (mode = 'AUTO')" return "USING hnsw (embedding vector_cosine_ops)" diff --git a/hindsight-api-slim/hindsight_api/alembic/versions/a4b5c6d7e8f9_fix_per_bank_vector_index_type.py b/hindsight-api-slim/hindsight_api/alembic/versions/a4b5c6d7e8f9_fix_per_bank_vector_index_type.py index dc0a5ef20..ffa1dc6a3 100644 --- a/hindsight-api-slim/hindsight_api/alembic/versions/a4b5c6d7e8f9_fix_per_bank_vector_index_type.py +++ b/hindsight-api-slim/hindsight_api/alembic/versions/a4b5c6d7e8f9_fix_per_bank_vector_index_type.py @@ -63,7 +63,7 @@ def _vector_index_using_clause(ext: str) -> str: if ext == "pgvectorscale": return "USING diskann (embedding vector_cosine_ops) WITH (num_neighbors = 50)" if ext == "vchord": - return "USING vchordrq (embedding vector_l2_ops)" + return "USING vchordrq (embedding vector_cosine_ops)" if ext == "scann": return "USING scann (embedding cosine) WITH (mode = 'AUTO')" return "USING hnsw (embedding vector_cosine_ops)" diff --git a/hindsight-api-slim/hindsight_api/alembic/versions/b8c9d0e1f2a3_vchord_cosine_opclass.py b/hindsight-api-slim/hindsight_api/alembic/versions/b8c9d0e1f2a3_vchord_cosine_opclass.py new file mode 100644 index 000000000..dc2756a28 --- /dev/null +++ b/hindsight-api-slim/hindsight_api/alembic/versions/b8c9d0e1f2a3_vchord_cosine_opclass.py @@ -0,0 +1,100 @@ +"""Re-create vchord vector indexes with vector_cosine_ops + +Revision ID: b8c9d0e1f2a3 +Revises: 86f7a033d372 +Create Date: 2026-05-20 + +vchordrq operator classes are bound 1:1 to operators in PostgreSQL: +vector_l2_ops only matches ``<->``, while every Hindsight ANN query uses +``<=>`` (cosine distance). The previous vchord mapping used vector_l2_ops, +so vchord deployments could never use the index — every ANN query fell +back to a sequential scan with per-row cosine computation. + +This migration finds any vchordrq index built with vector_l2_ops in the +target schema and re-creates it with vector_cosine_ops, using +``CREATE INDEX CONCURRENTLY`` so it can run online. It is a no-op when: + +* the configured vector extension is not vchord, or +* no matching indexes exist (already on cosine ops). + +Only PostgreSQL is affected; the Oracle 23ai dialect uses its own native +vector index and does not depend on this mapping. +""" + +from __future__ import annotations + +import os +import re +from collections.abc import Sequence + +from alembic import context, op +from sqlalchemy import text + +from hindsight_api.alembic._dialect import run_for_dialect + +revision: str = "b8c9d0e1f2a3" +down_revision: str | Sequence[str] | None = "86f7a033d372" +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def _configured_vector_extension() -> str: + return os.getenv("HINDSIGHT_API_VECTOR_EXTENSION", "pgvector").lower() + + +def _rebuild_vchordrq_indexes(old_ops: str, new_ops: str) -> None: + """Rebuild vchordrq indexes using ``old_ops`` so they use ``new_ops``. + + Each index is rebuilt with CREATE INDEX CONCURRENTLY under a fresh name, + then the old index is dropped and the new one renamed to take its place. + Must be called inside an ``autocommit_block()`` because CONCURRENTLY + cannot run inside a transaction. + """ + bind = op.get_bind() + schema = context.config.get_main_option("target_schema") or "public" + + rows = bind.execute( + text( + "SELECT indexname, indexdef FROM pg_indexes " + "WHERE schemaname = :schema " + "AND indexdef ILIKE '%vchordrq%' " + "AND indexdef ILIKE :ops_like" + ), + {"schema": schema, "ops_like": f"%{old_ops}%"}, + ).fetchall() + + for idx_name, indexdef in rows: + new_def = indexdef.replace(old_ops, new_ops, 1) + temp_name = f"{idx_name}__opclass_swap" + new_def = new_def.replace(idx_name, temp_name, 1) + new_def = re.sub( + r"^CREATE\s+INDEX\b", + "CREATE INDEX CONCURRENTLY IF NOT EXISTS", + new_def, + count=1, + ) + op.execute(new_def) + op.execute(f'DROP INDEX IF EXISTS "{schema}"."{idx_name}"') + op.execute(f'ALTER INDEX "{schema}"."{temp_name}" RENAME TO "{idx_name}"') + + +def _pg_upgrade() -> None: + if _configured_vector_extension() != "vchord": + return + with op.get_context().autocommit_block(): + _rebuild_vchordrq_indexes("vector_l2_ops", "vector_cosine_ops") + + +def _pg_downgrade() -> None: + if _configured_vector_extension() != "vchord": + return + with op.get_context().autocommit_block(): + _rebuild_vchordrq_indexes("vector_cosine_ops", "vector_l2_ops") + + +def upgrade() -> None: + run_for_dialect(pg=_pg_upgrade) + + +def downgrade() -> None: + run_for_dialect(pg=_pg_downgrade) diff --git a/hindsight-api-slim/hindsight_api/alembic/versions/d5e6f7a8b9c0_add_bank_internal_id_and_per_bank_hnsw.py b/hindsight-api-slim/hindsight_api/alembic/versions/d5e6f7a8b9c0_add_bank_internal_id_and_per_bank_hnsw.py index d88a7a813..dc880730c 100644 --- a/hindsight-api-slim/hindsight_api/alembic/versions/d5e6f7a8b9c0_add_bank_internal_id_and_per_bank_hnsw.py +++ b/hindsight-api-slim/hindsight_api/alembic/versions/d5e6f7a8b9c0_add_bank_internal_id_and_per_bank_hnsw.py @@ -55,7 +55,7 @@ def _vector_index_using_clause(ext: str) -> str: if ext == "pgvectorscale": return "USING diskann (embedding vector_cosine_ops) WITH (num_neighbors = 50)" if ext == "vchord": - return "USING vchordrq (embedding vector_l2_ops)" + return "USING vchordrq (embedding vector_cosine_ops)" if ext == "scann": return "USING scann (embedding cosine) WITH (mode = 'AUTO')" return "USING hnsw (embedding vector_cosine_ops)" diff --git a/hindsight-api-slim/hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py b/hindsight-api-slim/hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py index 52a750eb9..717b2d1ee 100644 --- a/hindsight-api-slim/hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py +++ b/hindsight-api-slim/hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py @@ -87,7 +87,7 @@ def _vector_index_using_clause(ext: str) -> str: if ext == "pg_diskann": return "USING diskann (embedding vector_cosine_ops) WITH (max_neighbors = 50)" if ext == "vchord": - return "USING vchordrq (embedding vector_l2_ops)" + return "USING vchordrq (embedding vector_cosine_ops)" if ext == "scann": return "USING scann (embedding cosine) WITH (mode = 'AUTO')" return "USING hnsw (embedding vector_cosine_ops)" diff --git a/hindsight-api-slim/hindsight_api/engine/memory_engine.py b/hindsight-api-slim/hindsight_api/engine/memory_engine.py index bf204ee6c..252695255 100644 --- a/hindsight-api-slim/hindsight_api/engine/memory_engine.py +++ b/hindsight-api-slim/hindsight_api/engine/memory_engine.py @@ -13,6 +13,7 @@ import contextvars import json import logging +import os import time import uuid from collections.abc import Awaitable, Callable @@ -24,6 +25,7 @@ import httpx import tiktoken +from .._vector_index import ann_search_tuning_settings from ..config import ( DEFAULT_RECALL_CHUNKS_MAX_TOKENS, DEFAULT_RECALL_INCLUDE_CHUNKS, @@ -2047,13 +2049,18 @@ async def verify_llm(): # Per-connection initialization callback (PostgreSQL-specific for now) async def _init_connection(conn: asyncpg.Connection) -> None: - # SET (not SET LOCAL) so it persists for the connection lifetime. - # ef_search=200 improves HNSW recall quality for the per-fact_type - # semantic queries in retrieve_semantic_bm25_combined(). - try: - await conn.execute("SET hnsw.ef_search = 200") - except Exception: - logger.debug("Could not set hnsw.ef_search — extension may not support it") + # SET (not SET LOCAL) so per-backend ANN tuning persists for the + # connection lifetime. Each backend exposes its own GUC: pgvector + # uses hnsw.ef_search, vchord uses vchordrq.probes. The dispatcher + # returns the right one for the configured extension, tuned for + # the higher recall the per-fact_type semantic queries in + # retrieve_semantic_bm25_combined() need. + ext = os.getenv("HINDSIGHT_API_VECTOR_EXTENSION", "pgvector").lower() + for guc, value in ann_search_tuning_settings(ext, kind="high_recall"): + try: + await conn.execute(f"SET {guc} = {value}") + except Exception: + logger.debug("Could not set %s — extension may not support it", guc) # Server-side safety net for runaway queries. Migrations use a # separate SQLAlchemy/psycopg2 engine, so long-running DDL is diff --git a/hindsight-api-slim/hindsight_api/engine/retain/link_utils.py b/hindsight-api-slim/hindsight_api/engine/retain/link_utils.py index 3c0b7e8c1..ef6d6a7cf 100644 --- a/hindsight-api-slim/hindsight_api/engine/retain/link_utils.py +++ b/hindsight-api-slim/hindsight_api/engine/retain/link_utils.py @@ -3,10 +3,12 @@ """ import logging +import os import time from datetime import UTC, datetime, timedelta from uuid import UUID +from ..._vector_index import ann_search_tuning_settings from ..memory_engine import fq_table from .types import EntityLink @@ -701,16 +703,19 @@ async def compute_semantic_links_ann( # `relation "_ann_seeds" does not exist` on the second statement. # # Using ON COMMIT DROP + SET LOCAL also means we don't have to remember to - # manually drop the temp table or reset hnsw.ef_search — the transaction - # end handles both. + # manually drop the temp table or reset the per-backend ANN tuning GUC — + # the transaction end handles both. rows: list = [] async with conn.transaction(): - # Transaction-local ef_search. Default 400 is tuned for recall precision - # but at 164k units each HNSW probe takes 94ms. ef_search=60 gives 2.7ms - # per probe (35x faster) with sufficient accuracy for top-50 semantic - # link creation. SET LOCAL auto-reverts at commit, so we don't pollute - # the pool for subsequent recall queries. - await conn.execute("SET LOCAL hnsw.ef_search = 60") + # Transaction-local ANN tuning. Each supported backend exposes its own + # GUC (hnsw.ef_search on pgvector, vchordrq.probes on vchord); the + # dispatcher returns the right knob for the configured backend with a + # value tuned for top-50 semantic link creation (lower recall but much + # lower latency than the recall-side default). SET LOCAL auto-reverts + # at commit, so we don't pollute the pool for subsequent queries. + ext = os.getenv("HINDSIGHT_API_VECTOR_EXTENSION", "pgvector").lower() + for guc, value in ann_search_tuning_settings(ext, kind="low_latency"): + await conn.execute(f"SET LOCAL {guc} = {value}") t_setup = time_mod.time() await conn.execute("CREATE TEMP TABLE _ann_seeds (unit_id text, emb_text text, fact_type text) ON COMMIT DROP") diff --git a/hindsight-api-slim/tests/test_link_utils.py b/hindsight-api-slim/tests/test_link_utils.py index c152e4e58..b5f9d6511 100644 --- a/hindsight-api-slim/tests/test_link_utils.py +++ b/hindsight-api-slim/tests/test_link_utils.py @@ -509,11 +509,16 @@ async def test_no_manual_drop_or_truncate(self, mock_conn): ) @pytest.mark.asyncio - async def test_uses_set_local_for_ef_search(self, mock_conn): - """hnsw.ef_search must be set with SET LOCAL so the change is scoped - to the transaction. Without SET LOCAL, the setting would leak onto - the pooled backend and affect subsequent recall queries that land - on the same backend.""" + @pytest.mark.parametrize( + ("ext", "guc"), + [("pgvector", "hnsw.ef_search"), ("vchord", "vchordrq.probes")], + ) + async def test_uses_set_local_for_ann_tuning(self, mock_conn, monkeypatch, ext, guc): + """The per-backend ANN tuning GUC must be set with SET LOCAL so the + change is scoped to the transaction. Without SET LOCAL, the setting + would leak onto the pooled backend and affect subsequent recall + queries that land on the same backend.""" + monkeypatch.setenv("HINDSIGHT_API_VECTOR_EXTENSION", ext) emb = [0.1] * 384 await compute_semantic_links_ann( conn=mock_conn, @@ -524,11 +529,11 @@ async def test_uses_set_local_for_ef_search(self, mock_conn): ) executed_sql = [call.args[0] for call in mock_conn.execute.call_args_list] - ef_statements = [s for s in executed_sql if "hnsw.ef_search" in s] - assert ef_statements, "ef_search must be tuned down for retain ANN" - for stmt in ef_statements: + tuning_statements = [s for s in executed_sql if guc in s] + assert tuning_statements, f"{guc} must be tuned for retain ANN under ext={ext}" + for stmt in tuning_statements: assert stmt.strip().startswith("SET LOCAL"), ( - f"hnsw.ef_search must use SET LOCAL, got: {stmt}" + f"{guc} must use SET LOCAL, got: {stmt}" ) # And there must not be a RESET — SET LOCAL handles it at commit. - assert not any("RESET hnsw.ef_search" in s for s in executed_sql) + assert not any(f"RESET {guc}" in s for s in executed_sql) diff --git a/hindsight-api-slim/tests/test_vector_index.py b/hindsight-api-slim/tests/test_vector_index.py index c88df872d..e37ca8b0e 100644 --- a/hindsight-api-slim/tests/test_vector_index.py +++ b/hindsight-api-slim/tests/test_vector_index.py @@ -2,6 +2,7 @@ from hindsight_api._vector_index import ( SCANN_MIN_ROWS_FOR_AUTO_INDEX, + ann_search_tuning_settings, bootstrap_extension, index_type_keyword, index_using_clause, @@ -41,6 +42,13 @@ def test_index_using_clause_pgvector_matches_existing_clause(): assert index_using_clause("pgvector") == "USING hnsw (embedding vector_cosine_ops)" +def test_index_using_clause_vchord_uses_cosine_ops(): + # vchordrq opclasses are bound 1:1 to operators in PostgreSQL; the engine + # uses `<=>` (cosine distance) everywhere, so the index must be declared + # with vector_cosine_ops or the planner falls back to a sequential scan. + assert index_using_clause("vchord") == "USING vchordrq (embedding vector_cosine_ops)" + + def test_index_type_keyword_scann_round_trips_pg_indexes_indexdef(): keyword = index_type_keyword("scann") indexdef = "CREATE INDEX idx ON memory_units USING scann (embedding cosine) WITH (mode='AUTO')" @@ -67,6 +75,24 @@ def test_scann_index_creation_defers_until_table_is_large_enough(): assert not should_defer_index_creation("pgvector", 0) +def test_ann_search_tuning_settings_pgvector_dispatches_hnsw_ef_search(): + assert ann_search_tuning_settings("pgvector", kind="low_latency") == (("hnsw.ef_search", "60"),) + assert ann_search_tuning_settings("pgvector", kind="high_recall") == (("hnsw.ef_search", "200"),) + + +def test_ann_search_tuning_settings_vchord_dispatches_vchordrq_probes(): + # vchord doesn't recognize hnsw.ef_search; the dispatcher must route to + # the vchordrq equivalent (probes), otherwise the GUC silently does nothing. + assert ann_search_tuning_settings("vchord", kind="low_latency") == (("vchordrq.probes", "10"),) + assert ann_search_tuning_settings("vchord", kind="high_recall") == (("vchordrq.probes", "30"),) + + +def test_ann_search_tuning_settings_returns_empty_for_backends_without_knob(): + for ext in ("pgvectorscale", "pg_diskann", "scann"): + assert ann_search_tuning_settings(ext, kind="low_latency") == () + assert ann_search_tuning_settings(ext, kind="high_recall") == () + + def test_scann_does_not_use_per_bank_partial_indexes(): assert not uses_per_bank_vector_indexes("scann") assert uses_per_bank_vector_indexes("pgvector") @@ -79,6 +105,7 @@ def test_alembic_vector_migrations_freeze_vector_sql_locally(): changed_migrations = [ "5a366d414dce_initial_schema.py", "a4b5c6d7e8f9_fix_per_bank_vector_index_type.py", + "b8c9d0e1f2a3_vchord_cosine_opclass.py", "d5e6f7a8b9c0_add_bank_internal_id_and_per_bank_hnsw.py", "n9i0j1k2l3m4_learnings_and_pinned_reflections.py", ] From 79ee0a347ec7f9351b1ac951c41bfb246f3736a1 Mon Sep 17 00:00:00 2001 From: Byeonghoon Yoo Date: Thu, 21 May 2026 09:31:29 +0900 Subject: [PATCH 2/3] refactor: route HINDSIGHT_API_VECTOR_EXTENSION through a shared helper Per review on #1668: the env-var lookup that decides which vector backend is configured was duplicated in three places (the new migration plus the two runtime call sites in engine/retain/link_utils.py and engine/memory_engine.py). Centralize the read + validation in hindsight_api._vector_index.configured_vector_extension() so the default value and the access mechanism live in one spot. The new migration b8c9d0e1f2a3_vchord_cosine_opclass now imports the shared helper instead of inlining its own. The four legacy vchord migrations stay frozen (they keep their inline helpers); the frozen-state test is narrowed to that legacy set so future vchord migrations can opt into the shared helper on a per-migration basis. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../hindsight_api/_vector_index.py | 13 ++++++++ .../b8c9d0e1f2a3_vchord_cosine_opclass.py | 10 ++----- .../hindsight_api/engine/memory_engine.py | 6 ++-- .../hindsight_api/engine/retain/link_utils.py | 6 ++-- hindsight-api-slim/tests/test_vector_index.py | 30 +++++++++++++++++-- 5 files changed, 47 insertions(+), 18 deletions(-) diff --git a/hindsight-api-slim/hindsight_api/_vector_index.py b/hindsight-api-slim/hindsight_api/_vector_index.py index 6e286bff3..169bd0088 100644 --- a/hindsight-api-slim/hindsight_api/_vector_index.py +++ b/hindsight-api-slim/hindsight_api/_vector_index.py @@ -3,6 +3,7 @@ from __future__ import annotations import logging +import os from sqlalchemy import text from sqlalchemy.engine import Connection @@ -85,6 +86,18 @@ } +def configured_vector_extension() -> str: + """Return the user-configured vector backend extension. + + Reads ``HINDSIGHT_API_VECTOR_EXTENSION`` (default ``"pgvector"``) and + validates it via :func:`validate_extension`. This is the single source of + truth for runtime code that needs to dispatch behaviour by vector backend; + callers should prefer this over reading the env var directly, so the + default value and the lookup mechanism live in one place. + """ + return validate_extension(os.getenv("HINDSIGHT_API_VECTOR_EXTENSION", "pgvector")) + + def validate_extension(name: str) -> str: """Return a normalized configurable vector extension name or raise. diff --git a/hindsight-api-slim/hindsight_api/alembic/versions/b8c9d0e1f2a3_vchord_cosine_opclass.py b/hindsight-api-slim/hindsight_api/alembic/versions/b8c9d0e1f2a3_vchord_cosine_opclass.py index dc2756a28..860a3ab84 100644 --- a/hindsight-api-slim/hindsight_api/alembic/versions/b8c9d0e1f2a3_vchord_cosine_opclass.py +++ b/hindsight-api-slim/hindsight_api/alembic/versions/b8c9d0e1f2a3_vchord_cosine_opclass.py @@ -23,13 +23,13 @@ from __future__ import annotations -import os import re from collections.abc import Sequence from alembic import context, op from sqlalchemy import text +from hindsight_api._vector_index import configured_vector_extension from hindsight_api.alembic._dialect import run_for_dialect revision: str = "b8c9d0e1f2a3" @@ -38,10 +38,6 @@ depends_on: str | Sequence[str] | None = None -def _configured_vector_extension() -> str: - return os.getenv("HINDSIGHT_API_VECTOR_EXTENSION", "pgvector").lower() - - def _rebuild_vchordrq_indexes(old_ops: str, new_ops: str) -> None: """Rebuild vchordrq indexes using ``old_ops`` so they use ``new_ops``. @@ -79,14 +75,14 @@ def _rebuild_vchordrq_indexes(old_ops: str, new_ops: str) -> None: def _pg_upgrade() -> None: - if _configured_vector_extension() != "vchord": + if configured_vector_extension() != "vchord": return with op.get_context().autocommit_block(): _rebuild_vchordrq_indexes("vector_l2_ops", "vector_cosine_ops") def _pg_downgrade() -> None: - if _configured_vector_extension() != "vchord": + if configured_vector_extension() != "vchord": return with op.get_context().autocommit_block(): _rebuild_vchordrq_indexes("vector_cosine_ops", "vector_l2_ops") diff --git a/hindsight-api-slim/hindsight_api/engine/memory_engine.py b/hindsight-api-slim/hindsight_api/engine/memory_engine.py index 252695255..f7a4521d3 100644 --- a/hindsight-api-slim/hindsight_api/engine/memory_engine.py +++ b/hindsight-api-slim/hindsight_api/engine/memory_engine.py @@ -13,7 +13,6 @@ import contextvars import json import logging -import os import time import uuid from collections.abc import Awaitable, Callable @@ -25,7 +24,7 @@ import httpx import tiktoken -from .._vector_index import ann_search_tuning_settings +from .._vector_index import ann_search_tuning_settings, configured_vector_extension from ..config import ( DEFAULT_RECALL_CHUNKS_MAX_TOKENS, DEFAULT_RECALL_INCLUDE_CHUNKS, @@ -2055,8 +2054,7 @@ async def _init_connection(conn: asyncpg.Connection) -> None: # returns the right one for the configured extension, tuned for # the higher recall the per-fact_type semantic queries in # retrieve_semantic_bm25_combined() need. - ext = os.getenv("HINDSIGHT_API_VECTOR_EXTENSION", "pgvector").lower() - for guc, value in ann_search_tuning_settings(ext, kind="high_recall"): + for guc, value in ann_search_tuning_settings(configured_vector_extension(), kind="high_recall"): try: await conn.execute(f"SET {guc} = {value}") except Exception: diff --git a/hindsight-api-slim/hindsight_api/engine/retain/link_utils.py b/hindsight-api-slim/hindsight_api/engine/retain/link_utils.py index ef6d6a7cf..8b6952bd7 100644 --- a/hindsight-api-slim/hindsight_api/engine/retain/link_utils.py +++ b/hindsight-api-slim/hindsight_api/engine/retain/link_utils.py @@ -3,12 +3,11 @@ """ import logging -import os import time from datetime import UTC, datetime, timedelta from uuid import UUID -from ..._vector_index import ann_search_tuning_settings +from ..._vector_index import ann_search_tuning_settings, configured_vector_extension from ..memory_engine import fq_table from .types import EntityLink @@ -713,8 +712,7 @@ async def compute_semantic_links_ann( # value tuned for top-50 semantic link creation (lower recall but much # lower latency than the recall-side default). SET LOCAL auto-reverts # at commit, so we don't pollute the pool for subsequent queries. - ext = os.getenv("HINDSIGHT_API_VECTOR_EXTENSION", "pgvector").lower() - for guc, value in ann_search_tuning_settings(ext, kind="low_latency"): + for guc, value in ann_search_tuning_settings(configured_vector_extension(), kind="low_latency"): await conn.execute(f"SET LOCAL {guc} = {value}") t_setup = time_mod.time() diff --git a/hindsight-api-slim/tests/test_vector_index.py b/hindsight-api-slim/tests/test_vector_index.py index e37ca8b0e..6834d76d8 100644 --- a/hindsight-api-slim/tests/test_vector_index.py +++ b/hindsight-api-slim/tests/test_vector_index.py @@ -4,6 +4,7 @@ SCANN_MIN_ROWS_FOR_AUTO_INDEX, ann_search_tuning_settings, bootstrap_extension, + configured_vector_extension, index_type_keyword, index_using_clause, pg_extension_name, @@ -93,6 +94,24 @@ def test_ann_search_tuning_settings_returns_empty_for_backends_without_knob(): assert ann_search_tuning_settings(ext, kind="high_recall") == () +def test_configured_vector_extension_defaults_to_pgvector(monkeypatch): + monkeypatch.delenv("HINDSIGHT_API_VECTOR_EXTENSION", raising=False) + assert configured_vector_extension() == "pgvector" + + +def test_configured_vector_extension_reads_env_and_lowercases(monkeypatch): + monkeypatch.setenv("HINDSIGHT_API_VECTOR_EXTENSION", "VChord") + assert configured_vector_extension() == "vchord" + + +def test_configured_vector_extension_rejects_unknown_value(monkeypatch): + import pytest + + monkeypatch.setenv("HINDSIGHT_API_VECTOR_EXTENSION", "bogus") + with pytest.raises(ValueError): + configured_vector_extension() + + def test_scann_does_not_use_per_bank_partial_indexes(): assert not uses_per_bank_vector_indexes("scann") assert uses_per_bank_vector_indexes("pgvector") @@ -101,16 +120,21 @@ def test_scann_does_not_use_per_bank_partial_indexes(): def test_alembic_vector_migrations_freeze_vector_sql_locally(): + # Pre-existing vchord-related migrations keep their vector SQL frozen + # (inline helpers, no import from hindsight_api._vector_index) so their + # historical behaviour cannot drift when _vector_index.py is updated. + # New vchord migrations are reviewed for this trade-off individually and + # may import the shared helpers when they want to share the canonical + # source of truth (see b8c9d0e1f2a3_vchord_cosine_opclass.py). migration_dir = Path(__file__).resolve().parent.parent / "hindsight_api/alembic/versions" - changed_migrations = [ + frozen_migrations = [ "5a366d414dce_initial_schema.py", "a4b5c6d7e8f9_fix_per_bank_vector_index_type.py", - "b8c9d0e1f2a3_vchord_cosine_opclass.py", "d5e6f7a8b9c0_add_bank_internal_id_and_per_bank_hnsw.py", "n9i0j1k2l3m4_learnings_and_pinned_reflections.py", ] - for migration in changed_migrations: + for migration in frozen_migrations: text = (migration_dir / migration).read_text() assert "hindsight_api._vector_index" not in text From 73dfcf8f64a7cb73131ae33f35c19a0305ff1109 Mon Sep 17 00:00:00 2001 From: Byeonghoon Yoo Date: Tue, 26 May 2026 10:29:04 +0900 Subject: [PATCH 3/3] fix(api): address vchord migration review feedback - Wrap DROP canonical + RENAME temp in a server-side DO block so the swap is atomic; a crash between the two would otherwise leave the temp index as a valid orphan and the canonical name missing, with no recovery path on retry. - Drop the temp index at the top of each rebuild loop and assert pg_index.indisvalid after CREATE INDEX CONCURRENTLY, so a leftover INVALID index from a prior failed run can't be promoted into the canonical name. - Align the migration with the _pg_schema_prefix() convention used by other PG migrations, and normalize empty-string target_schema to NULL so COALESCE falls back to current_schema() instead of filtering on ''. - Narrow _init_connection's except Exception to asyncpg.PostgresError so real pool/connection bugs surface instead of being silently logged. - Document the vchordrq.probes 10/30 starting defaults and the indexdef.replace first-occurrence assumption. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../hindsight_api/_vector_index.py | 14 +++- .../b8c9d0e1f2a3_vchord_cosine_opclass.py | 66 +++++++++++++++++-- .../hindsight_api/engine/memory_engine.py | 6 +- 3 files changed, 77 insertions(+), 9 deletions(-) diff --git a/hindsight-api-slim/hindsight_api/_vector_index.py b/hindsight-api-slim/hindsight_api/_vector_index.py index 169bd0088..e966a5747 100644 --- a/hindsight-api-slim/hindsight_api/_vector_index.py +++ b/hindsight-api-slim/hindsight_api/_vector_index.py @@ -50,10 +50,18 @@ # Per-backend ANN search-time tuning GUCs. Each entry is a tuple of # (guc_name, value) pairs the caller can apply with SET or SET LOCAL. # -# - pgvector exposes hnsw.ef_search. +# - pgvector exposes hnsw.ef_search. The 60 / 200 pair is unchanged from the +# pre-dispatcher code (internal benchmarks tuned around our embedding count +# and recall floor; see the link_utils / pool init call sites for the +# latency-vs-recall framing). # - vchord exposes vchordrq.probes (no default; see VectorChord issue #392) -# and vchordrq.epsilon (default 1.9). We only set probes for now and leave -# epsilon at its default; lowering it further is a separate trade-off. +# and vchordrq.epsilon (default 1.9). probes = 10 / 30 are starting +# defaults pending a workload-specific sweep — vchordrq's recall curve +# shape differs from HNSW's, so the pgvector numbers don't translate +# directly. Revisit with a per-cluster benchmark once we have production +# recall data; until then these are deliberately conservative on the +# high-recall path. We leave epsilon at its default; tightening it is a +# separate trade-off. # - pgvectorscale / pg_diskann / scann do not expose an equivalent per-statement # knob in the engine today, so the dispatcher returns no statements for them. _ANN_TUNING_LOW_LATENCY: dict[str, tuple[tuple[str, str], ...]] = { diff --git a/hindsight-api-slim/hindsight_api/alembic/versions/b8c9d0e1f2a3_vchord_cosine_opclass.py b/hindsight-api-slim/hindsight_api/alembic/versions/b8c9d0e1f2a3_vchord_cosine_opclass.py index 860a3ab84..605f3f462 100644 --- a/hindsight-api-slim/hindsight_api/alembic/versions/b8c9d0e1f2a3_vchord_cosine_opclass.py +++ b/hindsight-api-slim/hindsight_api/alembic/versions/b8c9d0e1f2a3_vchord_cosine_opclass.py @@ -38,6 +38,12 @@ depends_on: str | Sequence[str] | None = None +def _pg_schema_prefix() -> str: + """Schema-qualifier for raw SQL on PG (multi-tenant search_path).""" + schema = context.config.get_main_option("target_schema") + return f'"{schema}".' if schema else "" + + def _rebuild_vchordrq_indexes(old_ops: str, new_ops: str) -> None: """Rebuild vchordrq indexes using ``old_ops`` so they use ``new_ops``. @@ -47,19 +53,27 @@ def _rebuild_vchordrq_indexes(old_ops: str, new_ops: str) -> None: cannot run inside a transaction. """ bind = op.get_bind() - schema = context.config.get_main_option("target_schema") or "public" + # `or None` collapses both unset and explicit empty-string Alembic options + # into NULL so the COALESCE below falls back to current_schema() in either + # case. Without it, an empty-string option would filter on `schemaname = ''` + # and skip every real schema. + target_schema = context.config.get_main_option("target_schema") or None + prefix = _pg_schema_prefix() rows = bind.execute( text( "SELECT indexname, indexdef FROM pg_indexes " - "WHERE schemaname = :schema " + "WHERE schemaname = COALESCE(:target_schema, current_schema()) " "AND indexdef ILIKE '%vchordrq%' " "AND indexdef ILIKE :ops_like" ), - {"schema": schema, "ops_like": f"%{old_ops}%"}, + {"target_schema": target_schema, "ops_like": f"%{old_ops}%"}, ).fetchall() for idx_name, indexdef in rows: + # pg_get_indexdef() emits the canonical form `CREATE INDEX ON …`, + # so is the first textual occurrence — both substitutions below + # rely on that. new_def = indexdef.replace(old_ops, new_ops, 1) temp_name = f"{idx_name}__opclass_swap" new_def = new_def.replace(idx_name, temp_name, 1) @@ -69,9 +83,51 @@ def _rebuild_vchordrq_indexes(old_ops: str, new_ops: str) -> None: new_def, count=1, ) + + # CREATE INDEX CONCURRENTLY can leave the partial index as INVALID if a + # previous run errored (disk pressure, lock conflict, signal). Without + # this drop the CONCURRENTLY IF NOT EXISTS below would skip creation, + # then we'd drop the original and rename the broken index into its + # place — silently restoring the seq-scan bug this migration fixes. + op.execute(f'DROP INDEX IF EXISTS {prefix}"{temp_name}"') op.execute(new_def) - op.execute(f'DROP INDEX IF EXISTS "{schema}"."{idx_name}"') - op.execute(f'ALTER INDEX "{schema}"."{temp_name}" RENAME TO "{idx_name}"') + + # Even on a clean run CONCURRENTLY can finish with indisvalid = false + # (e.g. constraint violation during the second build scan). Refuse to + # promote in that case so we never alias an INVALID index over a working + # one. + is_valid = bind.execute( + text( + "SELECT i.indisvalid " + "FROM pg_class c " + "JOIN pg_index i ON c.oid = i.indexrelid " + "JOIN pg_namespace n ON c.relnamespace = n.oid " + "WHERE c.relname = :name " + " AND n.nspname = COALESCE(:target_schema, current_schema())" + ), + {"name": temp_name, "target_schema": target_schema}, + ).scalar() + if not is_valid: + raise RuntimeError( + f"vchordrq index rebuild produced an INVALID index ({temp_name}); " + "drop it manually and re-run the migration." + ) + + # DROP + RENAME atomically. A crash between the two would leave + # `temp_name` as a valid orphan and the canonical name missing — + # next run's `pg_indexes` filter (looking for vector_l2_ops) wouldn't + # find anything to recover from, so the index would stay gone. PG + # runs the DO block in its own server-side transaction, so either + # both succeed or both roll back. + op.execute( + f""" + DO $$ + BEGIN + DROP INDEX IF EXISTS {prefix}"{idx_name}"; + ALTER INDEX {prefix}"{temp_name}" RENAME TO "{idx_name}"; + END $$; + """ + ) def _pg_upgrade() -> None: diff --git a/hindsight-api-slim/hindsight_api/engine/memory_engine.py b/hindsight-api-slim/hindsight_api/engine/memory_engine.py index f7a4521d3..46d94a272 100644 --- a/hindsight-api-slim/hindsight_api/engine/memory_engine.py +++ b/hindsight-api-slim/hindsight_api/engine/memory_engine.py @@ -2057,7 +2057,11 @@ async def _init_connection(conn: asyncpg.Connection) -> None: for guc, value in ann_search_tuning_settings(configured_vector_extension(), kind="high_recall"): try: await conn.execute(f"SET {guc} = {value}") - except Exception: + except asyncpg.exceptions.PostgresError: + # Defensive net for env mis-config (e.g. extension configured + # for vchord but the cluster only has pgvector). Narrow to + # PostgresError so genuine bugs in the pool/conn layer surface + # instead of being silently logged at debug level. logger.debug("Could not set %s — extension may not support it", guc) # Server-side safety net for runaway queries. Migrations use a