From 2bee8e6b904d98b978ddb4507500fe089db83a08 Mon Sep 17 00:00:00 2001
From: Byeonghoon Yoo <bhyoo@bhyoo.com>
Date: Wed, 20 May 2026 17:23:22 +0900
Subject: [PATCH 1/3] =?UTF-8?q?fix(api):=20vchord=20ANN=20=E2=80=94=20use?=
 =?UTF-8?q?=20cosine=20opclass=20and=20dispatch=20tuning=20GUCs=20per=20ba?=
 =?UTF-8?q?ckend?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes #1667.

vchordrq operator classes are bound 1:1 to operators: vector_l2_ops only
matches `<->`, while every Hindsight ANN query uses `<=>` (cosine distance).
The previous vchord mapping used vector_l2_ops, so the planner ignored the
index entirely and fell back to a sequential scan + per-row cosine
computation. Separately, `SET LOCAL hnsw.ef_search = 60` (retain) and
`SET hnsw.ef_search = 200` (pool init) only exist in pgvector and silently
no-op'd under vchord, so the recall-vs-latency trade-off had never been
applied to vchord deployments at all.

This switches the vchord opclass to vector_cosine_ops (matching the
engine's `<=>` queries), updates the four historical migrations that
create vchord indexes inline so fresh installs land on cosine ops, and
adds an online migration that rebuilds any existing L2-ops vchordrq
indexes via CREATE INDEX CONCURRENTLY + drop + rename. Also introduces an
ann_search_tuning_settings dispatcher so link_utils and the pool init
pick the right GUC per backend (hnsw.ef_search for pgvector,
vchordrq.probes for vchord).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../hindsight_api/_vector_index.py            |  39 ++++++-
 .../versions/5a366d414dce_initial_schema.py   |   2 +-
 ...c6d7e8f9_fix_per_bank_vector_index_type.py |   2 +-
 .../b8c9d0e1f2a3_vchord_cosine_opclass.py     | 100 ++++++++++++++++++
 ..._add_bank_internal_id_and_per_bank_hnsw.py |   2 +-
 ...k2l3m4_learnings_and_pinned_reflections.py |   2 +-
 .../hindsight_api/engine/memory_engine.py     |  21 ++--
 .../hindsight_api/engine/retain/link_utils.py |  21 ++--
 hindsight-api-slim/tests/test_link_utils.py   |  25 +++--
 hindsight-api-slim/tests/test_vector_index.py |  27 +++++
 10 files changed, 211 insertions(+), 30 deletions(-)
 create mode 100644 hindsight-api-slim/hindsight_api/alembic/versions/b8c9d0e1f2a3_vchord_cosine_opclass.py

diff --git a/hindsight-api-slim/hindsight_api/_vector_index.py b/hindsight-api-slim/hindsight_api/_vector_index.py
index 6a19f7135..6e286bff3 100644
--- a/hindsight-api-slim/hindsight_api/_vector_index.py
+++ b/hindsight-api-slim/hindsight_api/_vector_index.py
@@ -34,7 +34,7 @@
     "pgvector": "USING hnsw (embedding vector_cosine_ops)",
     "pgvectorscale": "USING diskann (embedding vector_cosine_ops) WITH (num_neighbors = 50)",
     "pg_diskann": "USING diskann (embedding vector_cosine_ops) WITH (max_neighbors = 50)",
-    "vchord": "USING vchordrq (embedding vector_l2_ops)",
+    "vchord": "USING vchordrq (embedding vector_cosine_ops)",
     "scann": "USING scann (embedding cosine) WITH (mode = 'AUTO')",
 }
 
@@ -46,6 +46,24 @@
     "scann": "scann",
 }
 
+# Per-backend ANN search-time tuning GUCs. Each entry is a tuple of
+# (guc_name, value) pairs the caller can apply with SET or SET LOCAL.
+#
+# - pgvector exposes hnsw.ef_search.
+# - vchord exposes vchordrq.probes (no default; see VectorChord issue #392)
+#   and vchordrq.epsilon (default 1.9). We only set probes for now and leave
+#   epsilon at its default; lowering it further is a separate trade-off.
+# - pgvectorscale / pg_diskann / scann do not expose an equivalent per-statement
+#   knob in the engine today, so the dispatcher returns no statements for them.
+_ANN_TUNING_LOW_LATENCY: dict[str, tuple[tuple[str, str], ...]] = {
+    "pgvector": (("hnsw.ef_search", "60"),),
+    "vchord": (("vchordrq.probes", "10"),),
+}
+_ANN_TUNING_HIGH_RECALL: dict[str, tuple[tuple[str, str], ...]] = {
+    "pgvector": (("hnsw.ef_search", "200"),),
+    "vchord": (("vchordrq.probes", "30"),),
+}
+
 _EXTENSION_INSTALL_SQL = {
     "pgvector": ("CREATE EXTENSION IF NOT EXISTS vector",),
     "pgvectorscale": (
@@ -115,6 +133,25 @@ def should_defer_index_creation(ext: str, row_count: int) -> bool:
     return minimum_rows > 0 and row_count < minimum_rows
 
 
+def ann_search_tuning_settings(ext: str, *, kind: str) -> tuple[tuple[str, str], ...]:
+    """Return per-backend (guc_name, value) pairs for ANN search-time tuning.
+
+    ``kind`` is ``"low_latency"`` for retain-side link probing (smaller probe
+    count, lower recall, lower latency) and ``"high_recall"`` for connection
+    init in the pool (larger probe count, higher recall). Callers wrap each
+    pair with ``SET LOCAL`` or ``SET`` themselves so the same dispatcher works
+    for both transaction-scoped and session-scoped use. Returns an empty tuple
+    for backends without an equivalent knob.
+    """
+    if kind == "low_latency":
+        table = _ANN_TUNING_LOW_LATENCY
+    elif kind == "high_recall":
+        table = _ANN_TUNING_HIGH_RECALL
+    else:
+        raise ValueError(f"Unknown ANN tuning kind: {kind!r}")
+    return table.get(_normalize_resolved(ext), ())
+
+
 def uses_per_bank_vector_indexes(ext: str) -> bool:
     """Return whether the backend should create per-bank partial vector indexes."""
     return _normalize_resolved(ext) != "scann"
diff --git a/hindsight-api-slim/hindsight_api/alembic/versions/5a366d414dce_initial_schema.py b/hindsight-api-slim/hindsight_api/alembic/versions/5a366d414dce_initial_schema.py
index ea66f4d05..461b6e929 100644
--- a/hindsight-api-slim/hindsight_api/alembic/versions/5a366d414dce_initial_schema.py
+++ b/hindsight-api-slim/hindsight_api/alembic/versions/5a366d414dce_initial_schema.py
@@ -83,7 +83,7 @@ def _vector_index_using_clause(ext: str) -> str:
     if ext == "pg_diskann":
         return "USING diskann (embedding vector_cosine_ops) WITH (max_neighbors = 50)"
     if ext == "vchord":
-        return "USING vchordrq (embedding vector_l2_ops)"
+        return "USING vchordrq (embedding vector_cosine_ops)"
     if ext == "scann":
         return "USING scann (embedding cosine) WITH (mode = 'AUTO')"
     return "USING hnsw (embedding vector_cosine_ops)"
diff --git a/hindsight-api-slim/hindsight_api/alembic/versions/a4b5c6d7e8f9_fix_per_bank_vector_index_type.py b/hindsight-api-slim/hindsight_api/alembic/versions/a4b5c6d7e8f9_fix_per_bank_vector_index_type.py
index dc0a5ef20..ffa1dc6a3 100644
--- a/hindsight-api-slim/hindsight_api/alembic/versions/a4b5c6d7e8f9_fix_per_bank_vector_index_type.py
+++ b/hindsight-api-slim/hindsight_api/alembic/versions/a4b5c6d7e8f9_fix_per_bank_vector_index_type.py
@@ -63,7 +63,7 @@ def _vector_index_using_clause(ext: str) -> str:
     if ext == "pgvectorscale":
         return "USING diskann (embedding vector_cosine_ops) WITH (num_neighbors = 50)"
     if ext == "vchord":
-        return "USING vchordrq (embedding vector_l2_ops)"
+        return "USING vchordrq (embedding vector_cosine_ops)"
     if ext == "scann":
         return "USING scann (embedding cosine) WITH (mode = 'AUTO')"
     return "USING hnsw (embedding vector_cosine_ops)"
diff --git a/hindsight-api-slim/hindsight_api/alembic/versions/b8c9d0e1f2a3_vchord_cosine_opclass.py b/hindsight-api-slim/hindsight_api/alembic/versions/b8c9d0e1f2a3_vchord_cosine_opclass.py
new file mode 100644
index 000000000..dc2756a28
--- /dev/null
+++ b/hindsight-api-slim/hindsight_api/alembic/versions/b8c9d0e1f2a3_vchord_cosine_opclass.py
@@ -0,0 +1,100 @@
+"""Re-create vchord vector indexes with vector_cosine_ops
+
+Revision ID: b8c9d0e1f2a3
+Revises: 86f7a033d372
+Create Date: 2026-05-20
+
+vchordrq operator classes are bound 1:1 to operators in PostgreSQL:
+vector_l2_ops only matches ``<->``, while every Hindsight ANN query uses
+``<=>`` (cosine distance). The previous vchord mapping used vector_l2_ops,
+so vchord deployments could never use the index — every ANN query fell
+back to a sequential scan with per-row cosine computation.
+
+This migration finds any vchordrq index built with vector_l2_ops in the
+target schema and re-creates it with vector_cosine_ops, using
+``CREATE INDEX CONCURRENTLY`` so it can run online. It is a no-op when:
+
+* the configured vector extension is not vchord, or
+* no matching indexes exist (already on cosine ops).
+
+Only PostgreSQL is affected; the Oracle 23ai dialect uses its own native
+vector index and does not depend on this mapping.
+"""
+
+from __future__ import annotations
+
+import os
+import re
+from collections.abc import Sequence
+
+from alembic import context, op
+from sqlalchemy import text
+
+from hindsight_api.alembic._dialect import run_for_dialect
+
+revision: str = "b8c9d0e1f2a3"
+down_revision: str | Sequence[str] | None = "86f7a033d372"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def _configured_vector_extension() -> str:
+    return os.getenv("HINDSIGHT_API_VECTOR_EXTENSION", "pgvector").lower()
+
+
+def _rebuild_vchordrq_indexes(old_ops: str, new_ops: str) -> None:
+    """Rebuild vchordrq indexes using ``old_ops`` so they use ``new_ops``.
+
+    Each index is rebuilt with CREATE INDEX CONCURRENTLY under a fresh name,
+    then the old index is dropped and the new one renamed to take its place.
+    Must be called inside an ``autocommit_block()`` because CONCURRENTLY
+    cannot run inside a transaction.
+    """
+    bind = op.get_bind()
+    schema = context.config.get_main_option("target_schema") or "public"
+
+    rows = bind.execute(
+        text(
+            "SELECT indexname, indexdef FROM pg_indexes "
+            "WHERE schemaname = :schema "
+            "AND indexdef ILIKE '%vchordrq%' "
+            "AND indexdef ILIKE :ops_like"
+        ),
+        {"schema": schema, "ops_like": f"%{old_ops}%"},
+    ).fetchall()
+
+    for idx_name, indexdef in rows:
+        new_def = indexdef.replace(old_ops, new_ops, 1)
+        temp_name = f"{idx_name}__opclass_swap"
+        new_def = new_def.replace(idx_name, temp_name, 1)
+        new_def = re.sub(
+            r"^CREATE\s+INDEX\b",
+            "CREATE INDEX CONCURRENTLY IF NOT EXISTS",
+            new_def,
+            count=1,
+        )
+        op.execute(new_def)
+        op.execute(f'DROP INDEX IF EXISTS "{schema}"."{idx_name}"')
+        op.execute(f'ALTER INDEX "{schema}"."{temp_name}" RENAME TO "{idx_name}"')
+
+
+def _pg_upgrade() -> None:
+    if _configured_vector_extension() != "vchord":
+        return
+    with op.get_context().autocommit_block():
+        _rebuild_vchordrq_indexes("vector_l2_ops", "vector_cosine_ops")
+
+
+def _pg_downgrade() -> None:
+    if _configured_vector_extension() != "vchord":
+        return
+    with op.get_context().autocommit_block():
+        _rebuild_vchordrq_indexes("vector_cosine_ops", "vector_l2_ops")
+
+
+def upgrade() -> None:
+    run_for_dialect(pg=_pg_upgrade)
+
+
+def downgrade() -> None:
+    run_for_dialect(pg=_pg_downgrade)
diff --git a/hindsight-api-slim/hindsight_api/alembic/versions/d5e6f7a8b9c0_add_bank_internal_id_and_per_bank_hnsw.py b/hindsight-api-slim/hindsight_api/alembic/versions/d5e6f7a8b9c0_add_bank_internal_id_and_per_bank_hnsw.py
index d88a7a813..dc880730c 100644
--- a/hindsight-api-slim/hindsight_api/alembic/versions/d5e6f7a8b9c0_add_bank_internal_id_and_per_bank_hnsw.py
+++ b/hindsight-api-slim/hindsight_api/alembic/versions/d5e6f7a8b9c0_add_bank_internal_id_and_per_bank_hnsw.py
@@ -55,7 +55,7 @@ def _vector_index_using_clause(ext: str) -> str:
     if ext == "pgvectorscale":
         return "USING diskann (embedding vector_cosine_ops) WITH (num_neighbors = 50)"
     if ext == "vchord":
-        return "USING vchordrq (embedding vector_l2_ops)"
+        return "USING vchordrq (embedding vector_cosine_ops)"
     if ext == "scann":
         return "USING scann (embedding cosine) WITH (mode = 'AUTO')"
     return "USING hnsw (embedding vector_cosine_ops)"
diff --git a/hindsight-api-slim/hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py b/hindsight-api-slim/hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py
index 52a750eb9..717b2d1ee 100644
--- a/hindsight-api-slim/hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py
+++ b/hindsight-api-slim/hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py
@@ -87,7 +87,7 @@ def _vector_index_using_clause(ext: str) -> str:
     if ext == "pg_diskann":
         return "USING diskann (embedding vector_cosine_ops) WITH (max_neighbors = 50)"
     if ext == "vchord":
-        return "USING vchordrq (embedding vector_l2_ops)"
+        return "USING vchordrq (embedding vector_cosine_ops)"
     if ext == "scann":
         return "USING scann (embedding cosine) WITH (mode = 'AUTO')"
     return "USING hnsw (embedding vector_cosine_ops)"
diff --git a/hindsight-api-slim/hindsight_api/engine/memory_engine.py b/hindsight-api-slim/hindsight_api/engine/memory_engine.py
index bf204ee6c..252695255 100644
--- a/hindsight-api-slim/hindsight_api/engine/memory_engine.py
+++ b/hindsight-api-slim/hindsight_api/engine/memory_engine.py
@@ -13,6 +13,7 @@
 import contextvars
 import json
 import logging
+import os
 import time
 import uuid
 from collections.abc import Awaitable, Callable
@@ -24,6 +25,7 @@
 import httpx
 import tiktoken
 
+from .._vector_index import ann_search_tuning_settings
 from ..config import (
     DEFAULT_RECALL_CHUNKS_MAX_TOKENS,
     DEFAULT_RECALL_INCLUDE_CHUNKS,
@@ -2047,13 +2049,18 @@ async def verify_llm():
 
         # Per-connection initialization callback (PostgreSQL-specific for now)
         async def _init_connection(conn: asyncpg.Connection) -> None:
-            # SET (not SET LOCAL) so it persists for the connection lifetime.
-            # ef_search=200 improves HNSW recall quality for the per-fact_type
-            # semantic queries in retrieve_semantic_bm25_combined().
-            try:
-                await conn.execute("SET hnsw.ef_search = 200")
-            except Exception:
-                logger.debug("Could not set hnsw.ef_search — extension may not support it")
+            # SET (not SET LOCAL) so per-backend ANN tuning persists for the
+            # connection lifetime. Each backend exposes its own GUC: pgvector
+            # uses hnsw.ef_search, vchord uses vchordrq.probes. The dispatcher
+            # returns the right one for the configured extension, tuned for
+            # the higher recall the per-fact_type semantic queries in
+            # retrieve_semantic_bm25_combined() need.
+            ext = os.getenv("HINDSIGHT_API_VECTOR_EXTENSION", "pgvector").lower()
+            for guc, value in ann_search_tuning_settings(ext, kind="high_recall"):
+                try:
+                    await conn.execute(f"SET {guc} = {value}")
+                except Exception:
+                    logger.debug("Could not set %s — extension may not support it", guc)
 
             # Server-side safety net for runaway queries. Migrations use a
             # separate SQLAlchemy/psycopg2 engine, so long-running DDL is
diff --git a/hindsight-api-slim/hindsight_api/engine/retain/link_utils.py b/hindsight-api-slim/hindsight_api/engine/retain/link_utils.py
index 3c0b7e8c1..ef6d6a7cf 100644
--- a/hindsight-api-slim/hindsight_api/engine/retain/link_utils.py
+++ b/hindsight-api-slim/hindsight_api/engine/retain/link_utils.py
@@ -3,10 +3,12 @@
 """
 
 import logging
+import os
 import time
 from datetime import UTC, datetime, timedelta
 from uuid import UUID
 
+from ..._vector_index import ann_search_tuning_settings
 from ..memory_engine import fq_table
 from .types import EntityLink
 
@@ -701,16 +703,19 @@ async def compute_semantic_links_ann(
     # `relation "_ann_seeds" does not exist` on the second statement.
     #
     # Using ON COMMIT DROP + SET LOCAL also means we don't have to remember to
-    # manually drop the temp table or reset hnsw.ef_search — the transaction
-    # end handles both.
+    # manually drop the temp table or reset the per-backend ANN tuning GUC —
+    # the transaction end handles both.
     rows: list = []
     async with conn.transaction():
-        # Transaction-local ef_search. Default 400 is tuned for recall precision
-        # but at 164k units each HNSW probe takes 94ms. ef_search=60 gives 2.7ms
-        # per probe (35x faster) with sufficient accuracy for top-50 semantic
-        # link creation. SET LOCAL auto-reverts at commit, so we don't pollute
-        # the pool for subsequent recall queries.
-        await conn.execute("SET LOCAL hnsw.ef_search = 60")
+        # Transaction-local ANN tuning. Each supported backend exposes its own
+        # GUC (hnsw.ef_search on pgvector, vchordrq.probes on vchord); the
+        # dispatcher returns the right knob for the configured backend with a
+        # value tuned for top-50 semantic link creation (lower recall but much
+        # lower latency than the recall-side default). SET LOCAL auto-reverts
+        # at commit, so we don't pollute the pool for subsequent queries.
+        ext = os.getenv("HINDSIGHT_API_VECTOR_EXTENSION", "pgvector").lower()
+        for guc, value in ann_search_tuning_settings(ext, kind="low_latency"):
+            await conn.execute(f"SET LOCAL {guc} = {value}")
 
         t_setup = time_mod.time()
         await conn.execute("CREATE TEMP TABLE _ann_seeds (unit_id text, emb_text text, fact_type text) ON COMMIT DROP")
diff --git a/hindsight-api-slim/tests/test_link_utils.py b/hindsight-api-slim/tests/test_link_utils.py
index c152e4e58..b5f9d6511 100644
--- a/hindsight-api-slim/tests/test_link_utils.py
+++ b/hindsight-api-slim/tests/test_link_utils.py
@@ -509,11 +509,16 @@ async def test_no_manual_drop_or_truncate(self, mock_conn):
         )
 
     @pytest.mark.asyncio
-    async def test_uses_set_local_for_ef_search(self, mock_conn):
-        """hnsw.ef_search must be set with SET LOCAL so the change is scoped
-        to the transaction. Without SET LOCAL, the setting would leak onto
-        the pooled backend and affect subsequent recall queries that land
-        on the same backend."""
+    @pytest.mark.parametrize(
+        ("ext", "guc"),
+        [("pgvector", "hnsw.ef_search"), ("vchord", "vchordrq.probes")],
+    )
+    async def test_uses_set_local_for_ann_tuning(self, mock_conn, monkeypatch, ext, guc):
+        """The per-backend ANN tuning GUC must be set with SET LOCAL so the
+        change is scoped to the transaction. Without SET LOCAL, the setting
+        would leak onto the pooled backend and affect subsequent recall
+        queries that land on the same backend."""
+        monkeypatch.setenv("HINDSIGHT_API_VECTOR_EXTENSION", ext)
         emb = [0.1] * 384
         await compute_semantic_links_ann(
             conn=mock_conn,
@@ -524,11 +529,11 @@ async def test_uses_set_local_for_ef_search(self, mock_conn):
         )
 
         executed_sql = [call.args[0] for call in mock_conn.execute.call_args_list]
-        ef_statements = [s for s in executed_sql if "hnsw.ef_search" in s]
-        assert ef_statements, "ef_search must be tuned down for retain ANN"
-        for stmt in ef_statements:
+        tuning_statements = [s for s in executed_sql if guc in s]
+        assert tuning_statements, f"{guc} must be tuned for retain ANN under ext={ext}"
+        for stmt in tuning_statements:
             assert stmt.strip().startswith("SET LOCAL"), (
-                f"hnsw.ef_search must use SET LOCAL, got: {stmt}"
+                f"{guc} must use SET LOCAL, got: {stmt}"
             )
         # And there must not be a RESET — SET LOCAL handles it at commit.
-        assert not any("RESET hnsw.ef_search" in s for s in executed_sql)
+        assert not any(f"RESET {guc}" in s for s in executed_sql)
diff --git a/hindsight-api-slim/tests/test_vector_index.py b/hindsight-api-slim/tests/test_vector_index.py
index c88df872d..e37ca8b0e 100644
--- a/hindsight-api-slim/tests/test_vector_index.py
+++ b/hindsight-api-slim/tests/test_vector_index.py
@@ -2,6 +2,7 @@
 
 from hindsight_api._vector_index import (
     SCANN_MIN_ROWS_FOR_AUTO_INDEX,
+    ann_search_tuning_settings,
     bootstrap_extension,
     index_type_keyword,
     index_using_clause,
@@ -41,6 +42,13 @@ def test_index_using_clause_pgvector_matches_existing_clause():
     assert index_using_clause("pgvector") == "USING hnsw (embedding vector_cosine_ops)"
 
 
+def test_index_using_clause_vchord_uses_cosine_ops():
+    # vchordrq opclasses are bound 1:1 to operators in PostgreSQL; the engine
+    # uses `<=>` (cosine distance) everywhere, so the index must be declared
+    # with vector_cosine_ops or the planner falls back to a sequential scan.
+    assert index_using_clause("vchord") == "USING vchordrq (embedding vector_cosine_ops)"
+
+
 def test_index_type_keyword_scann_round_trips_pg_indexes_indexdef():
     keyword = index_type_keyword("scann")
     indexdef = "CREATE INDEX idx ON memory_units USING scann (embedding cosine) WITH (mode='AUTO')"
@@ -67,6 +75,24 @@ def test_scann_index_creation_defers_until_table_is_large_enough():
     assert not should_defer_index_creation("pgvector", 0)
 
 
+def test_ann_search_tuning_settings_pgvector_dispatches_hnsw_ef_search():
+    assert ann_search_tuning_settings("pgvector", kind="low_latency") == (("hnsw.ef_search", "60"),)
+    assert ann_search_tuning_settings("pgvector", kind="high_recall") == (("hnsw.ef_search", "200"),)
+
+
+def test_ann_search_tuning_settings_vchord_dispatches_vchordrq_probes():
+    # vchord doesn't recognize hnsw.ef_search; the dispatcher must route to
+    # the vchordrq equivalent (probes), otherwise the GUC silently does nothing.
+    assert ann_search_tuning_settings("vchord", kind="low_latency") == (("vchordrq.probes", "10"),)
+    assert ann_search_tuning_settings("vchord", kind="high_recall") == (("vchordrq.probes", "30"),)
+
+
+def test_ann_search_tuning_settings_returns_empty_for_backends_without_knob():
+    for ext in ("pgvectorscale", "pg_diskann", "scann"):
+        assert ann_search_tuning_settings(ext, kind="low_latency") == ()
+        assert ann_search_tuning_settings(ext, kind="high_recall") == ()
+
+
 def test_scann_does_not_use_per_bank_partial_indexes():
     assert not uses_per_bank_vector_indexes("scann")
     assert uses_per_bank_vector_indexes("pgvector")
@@ -79,6 +105,7 @@ def test_alembic_vector_migrations_freeze_vector_sql_locally():
     changed_migrations = [
         "5a366d414dce_initial_schema.py",
         "a4b5c6d7e8f9_fix_per_bank_vector_index_type.py",
+        "b8c9d0e1f2a3_vchord_cosine_opclass.py",
         "d5e6f7a8b9c0_add_bank_internal_id_and_per_bank_hnsw.py",
         "n9i0j1k2l3m4_learnings_and_pinned_reflections.py",
     ]

From 79ee0a347ec7f9351b1ac951c41bfb246f3736a1 Mon Sep 17 00:00:00 2001
From: Byeonghoon Yoo <bhyoo@bhyoo.com>
Date: Thu, 21 May 2026 09:31:29 +0900
Subject: [PATCH 2/3] refactor: route HINDSIGHT_API_VECTOR_EXTENSION through a
 shared helper

Per review on #1668: the env-var lookup that decides which vector backend
is configured was duplicated in three places (the new migration plus the
two runtime call sites in engine/retain/link_utils.py and
engine/memory_engine.py). Centralize the read + validation in
hindsight_api._vector_index.configured_vector_extension() so the default
value and the access mechanism live in one spot.

The new migration b8c9d0e1f2a3_vchord_cosine_opclass now imports the
shared helper instead of inlining its own. The four legacy vchord
migrations stay frozen (they keep their inline helpers); the frozen-state
test is narrowed to that legacy set so future vchord migrations can opt
into the shared helper on a per-migration basis.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../hindsight_api/_vector_index.py            | 13 ++++++++
 .../b8c9d0e1f2a3_vchord_cosine_opclass.py     | 10 ++-----
 .../hindsight_api/engine/memory_engine.py     |  6 ++--
 .../hindsight_api/engine/retain/link_utils.py |  6 ++--
 hindsight-api-slim/tests/test_vector_index.py | 30 +++++++++++++++++--
 5 files changed, 47 insertions(+), 18 deletions(-)

diff --git a/hindsight-api-slim/hindsight_api/_vector_index.py b/hindsight-api-slim/hindsight_api/_vector_index.py
index 6e286bff3..169bd0088 100644
--- a/hindsight-api-slim/hindsight_api/_vector_index.py
+++ b/hindsight-api-slim/hindsight_api/_vector_index.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import logging
+import os
 
 from sqlalchemy import text
 from sqlalchemy.engine import Connection
@@ -85,6 +86,18 @@
 }
 
 
+def configured_vector_extension() -> str:
+    """Return the user-configured vector backend extension.
+
+    Reads ``HINDSIGHT_API_VECTOR_EXTENSION`` (default ``"pgvector"``) and
+    validates it via :func:`validate_extension`. This is the single source of
+    truth for runtime code that needs to dispatch behaviour by vector backend;
+    callers should prefer this over reading the env var directly, so the
+    default value and the lookup mechanism live in one place.
+    """
+    return validate_extension(os.getenv("HINDSIGHT_API_VECTOR_EXTENSION", "pgvector"))
+
+
 def validate_extension(name: str) -> str:
     """Return a normalized configurable vector extension name or raise.
 
diff --git a/hindsight-api-slim/hindsight_api/alembic/versions/b8c9d0e1f2a3_vchord_cosine_opclass.py b/hindsight-api-slim/hindsight_api/alembic/versions/b8c9d0e1f2a3_vchord_cosine_opclass.py
index dc2756a28..860a3ab84 100644
--- a/hindsight-api-slim/hindsight_api/alembic/versions/b8c9d0e1f2a3_vchord_cosine_opclass.py
+++ b/hindsight-api-slim/hindsight_api/alembic/versions/b8c9d0e1f2a3_vchord_cosine_opclass.py
@@ -23,13 +23,13 @@
 
 from __future__ import annotations
 
-import os
 import re
 from collections.abc import Sequence
 
 from alembic import context, op
 from sqlalchemy import text
 
+from hindsight_api._vector_index import configured_vector_extension
 from hindsight_api.alembic._dialect import run_for_dialect
 
 revision: str = "b8c9d0e1f2a3"
@@ -38,10 +38,6 @@
 depends_on: str | Sequence[str] | None = None
 
 
-def _configured_vector_extension() -> str:
-    return os.getenv("HINDSIGHT_API_VECTOR_EXTENSION", "pgvector").lower()
-
-
 def _rebuild_vchordrq_indexes(old_ops: str, new_ops: str) -> None:
     """Rebuild vchordrq indexes using ``old_ops`` so they use ``new_ops``.
 
@@ -79,14 +75,14 @@ def _rebuild_vchordrq_indexes(old_ops: str, new_ops: str) -> None:
 
 
 def _pg_upgrade() -> None:
-    if _configured_vector_extension() != "vchord":
+    if configured_vector_extension() != "vchord":
         return
     with op.get_context().autocommit_block():
         _rebuild_vchordrq_indexes("vector_l2_ops", "vector_cosine_ops")
 
 
 def _pg_downgrade() -> None:
-    if _configured_vector_extension() != "vchord":
+    if configured_vector_extension() != "vchord":
         return
     with op.get_context().autocommit_block():
         _rebuild_vchordrq_indexes("vector_cosine_ops", "vector_l2_ops")
diff --git a/hindsight-api-slim/hindsight_api/engine/memory_engine.py b/hindsight-api-slim/hindsight_api/engine/memory_engine.py
index 252695255..f7a4521d3 100644
--- a/hindsight-api-slim/hindsight_api/engine/memory_engine.py
+++ b/hindsight-api-slim/hindsight_api/engine/memory_engine.py
@@ -13,7 +13,6 @@
 import contextvars
 import json
 import logging
-import os
 import time
 import uuid
 from collections.abc import Awaitable, Callable
@@ -25,7 +24,7 @@
 import httpx
 import tiktoken
 
-from .._vector_index import ann_search_tuning_settings
+from .._vector_index import ann_search_tuning_settings, configured_vector_extension
 from ..config import (
     DEFAULT_RECALL_CHUNKS_MAX_TOKENS,
     DEFAULT_RECALL_INCLUDE_CHUNKS,
@@ -2055,8 +2054,7 @@ async def _init_connection(conn: asyncpg.Connection) -> None:
             # returns the right one for the configured extension, tuned for
             # the higher recall the per-fact_type semantic queries in
             # retrieve_semantic_bm25_combined() need.
-            ext = os.getenv("HINDSIGHT_API_VECTOR_EXTENSION", "pgvector").lower()
-            for guc, value in ann_search_tuning_settings(ext, kind="high_recall"):
+            for guc, value in ann_search_tuning_settings(configured_vector_extension(), kind="high_recall"):
                 try:
                     await conn.execute(f"SET {guc} = {value}")
                 except Exception:
diff --git a/hindsight-api-slim/hindsight_api/engine/retain/link_utils.py b/hindsight-api-slim/hindsight_api/engine/retain/link_utils.py
index ef6d6a7cf..8b6952bd7 100644
--- a/hindsight-api-slim/hindsight_api/engine/retain/link_utils.py
+++ b/hindsight-api-slim/hindsight_api/engine/retain/link_utils.py
@@ -3,12 +3,11 @@
 """
 
 import logging
-import os
 import time
 from datetime import UTC, datetime, timedelta
 from uuid import UUID
 
-from ..._vector_index import ann_search_tuning_settings
+from ..._vector_index import ann_search_tuning_settings, configured_vector_extension
 from ..memory_engine import fq_table
 from .types import EntityLink
 
@@ -713,8 +712,7 @@ async def compute_semantic_links_ann(
         # value tuned for top-50 semantic link creation (lower recall but much
         # lower latency than the recall-side default). SET LOCAL auto-reverts
         # at commit, so we don't pollute the pool for subsequent queries.
-        ext = os.getenv("HINDSIGHT_API_VECTOR_EXTENSION", "pgvector").lower()
-        for guc, value in ann_search_tuning_settings(ext, kind="low_latency"):
+        for guc, value in ann_search_tuning_settings(configured_vector_extension(), kind="low_latency"):
             await conn.execute(f"SET LOCAL {guc} = {value}")
 
         t_setup = time_mod.time()
diff --git a/hindsight-api-slim/tests/test_vector_index.py b/hindsight-api-slim/tests/test_vector_index.py
index e37ca8b0e..6834d76d8 100644
--- a/hindsight-api-slim/tests/test_vector_index.py
+++ b/hindsight-api-slim/tests/test_vector_index.py
@@ -4,6 +4,7 @@
     SCANN_MIN_ROWS_FOR_AUTO_INDEX,
     ann_search_tuning_settings,
     bootstrap_extension,
+    configured_vector_extension,
     index_type_keyword,
     index_using_clause,
     pg_extension_name,
@@ -93,6 +94,24 @@ def test_ann_search_tuning_settings_returns_empty_for_backends_without_knob():
         assert ann_search_tuning_settings(ext, kind="high_recall") == ()
 
 
+def test_configured_vector_extension_defaults_to_pgvector(monkeypatch):
+    monkeypatch.delenv("HINDSIGHT_API_VECTOR_EXTENSION", raising=False)
+    assert configured_vector_extension() == "pgvector"
+
+
+def test_configured_vector_extension_reads_env_and_lowercases(monkeypatch):
+    monkeypatch.setenv("HINDSIGHT_API_VECTOR_EXTENSION", "VChord")
+    assert configured_vector_extension() == "vchord"
+
+
+def test_configured_vector_extension_rejects_unknown_value(monkeypatch):
+    import pytest
+
+    monkeypatch.setenv("HINDSIGHT_API_VECTOR_EXTENSION", "bogus")
+    with pytest.raises(ValueError):
+        configured_vector_extension()
+
+
 def test_scann_does_not_use_per_bank_partial_indexes():
     assert not uses_per_bank_vector_indexes("scann")
     assert uses_per_bank_vector_indexes("pgvector")
@@ -101,16 +120,21 @@ def test_scann_does_not_use_per_bank_partial_indexes():
 
 
 def test_alembic_vector_migrations_freeze_vector_sql_locally():
+    # Pre-existing vchord-related migrations keep their vector SQL frozen
+    # (inline helpers, no import from hindsight_api._vector_index) so their
+    # historical behaviour cannot drift when _vector_index.py is updated.
+    # New vchord migrations are reviewed for this trade-off individually and
+    # may import the shared helpers when they want to share the canonical
+    # source of truth (see b8c9d0e1f2a3_vchord_cosine_opclass.py).
     migration_dir = Path(__file__).resolve().parent.parent / "hindsight_api/alembic/versions"
-    changed_migrations = [
+    frozen_migrations = [
         "5a366d414dce_initial_schema.py",
         "a4b5c6d7e8f9_fix_per_bank_vector_index_type.py",
-        "b8c9d0e1f2a3_vchord_cosine_opclass.py",
         "d5e6f7a8b9c0_add_bank_internal_id_and_per_bank_hnsw.py",
         "n9i0j1k2l3m4_learnings_and_pinned_reflections.py",
     ]
 
-    for migration in changed_migrations:
+    for migration in frozen_migrations:
         text = (migration_dir / migration).read_text()
         assert "hindsight_api._vector_index" not in text
 

From 73dfcf8f64a7cb73131ae33f35c19a0305ff1109 Mon Sep 17 00:00:00 2001
From: Byeonghoon Yoo <bhyoo@bhyoo.com>
Date: Tue, 26 May 2026 10:29:04 +0900
Subject: [PATCH 3/3] fix(api): address vchord migration review feedback

- Wrap DROP canonical + RENAME temp in a server-side DO block so the swap
  is atomic; a crash between the two would otherwise leave the temp index
  as a valid orphan and the canonical name missing, with no recovery path
  on retry.
- Drop the temp index at the top of each rebuild loop and assert
  pg_index.indisvalid after CREATE INDEX CONCURRENTLY, so a leftover
  INVALID index from a prior failed run can't be promoted into the
  canonical name.
- Align the migration with the _pg_schema_prefix() convention used by
  other PG migrations, and normalize empty-string target_schema to NULL
  so COALESCE falls back to current_schema() instead of filtering on ''.
- Narrow _init_connection's except Exception to asyncpg.PostgresError so
  real pool/connection bugs surface instead of being silently logged.
- Document the vchordrq.probes 10/30 starting defaults and the
  indexdef.replace first-occurrence assumption.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../hindsight_api/_vector_index.py            | 14 +++-
 .../b8c9d0e1f2a3_vchord_cosine_opclass.py     | 66 +++++++++++++++++--
 .../hindsight_api/engine/memory_engine.py     |  6 +-
 3 files changed, 77 insertions(+), 9 deletions(-)

diff --git a/hindsight-api-slim/hindsight_api/_vector_index.py b/hindsight-api-slim/hindsight_api/_vector_index.py
index 169bd0088..e966a5747 100644
--- a/hindsight-api-slim/hindsight_api/_vector_index.py
+++ b/hindsight-api-slim/hindsight_api/_vector_index.py
@@ -50,10 +50,18 @@
 # Per-backend ANN search-time tuning GUCs. Each entry is a tuple of
 # (guc_name, value) pairs the caller can apply with SET or SET LOCAL.
 #
-# - pgvector exposes hnsw.ef_search.
+# - pgvector exposes hnsw.ef_search. The 60 / 200 pair is unchanged from the
+#   pre-dispatcher code (internal benchmarks tuned around our embedding count
+#   and recall floor; see the link_utils / pool init call sites for the
+#   latency-vs-recall framing).
 # - vchord exposes vchordrq.probes (no default; see VectorChord issue #392)
-#   and vchordrq.epsilon (default 1.9). We only set probes for now and leave
-#   epsilon at its default; lowering it further is a separate trade-off.
+#   and vchordrq.epsilon (default 1.9). probes = 10 / 30 are starting
+#   defaults pending a workload-specific sweep — vchordrq's recall curve
+#   shape differs from HNSW's, so the pgvector numbers don't translate
+#   directly. Revisit with a per-cluster benchmark once we have production
+#   recall data; until then these are deliberately conservative on the
+#   high-recall path. We leave epsilon at its default; tightening it is a
+#   separate trade-off.
 # - pgvectorscale / pg_diskann / scann do not expose an equivalent per-statement
 #   knob in the engine today, so the dispatcher returns no statements for them.
 _ANN_TUNING_LOW_LATENCY: dict[str, tuple[tuple[str, str], ...]] = {
diff --git a/hindsight-api-slim/hindsight_api/alembic/versions/b8c9d0e1f2a3_vchord_cosine_opclass.py b/hindsight-api-slim/hindsight_api/alembic/versions/b8c9d0e1f2a3_vchord_cosine_opclass.py
index 860a3ab84..605f3f462 100644
--- a/hindsight-api-slim/hindsight_api/alembic/versions/b8c9d0e1f2a3_vchord_cosine_opclass.py
+++ b/hindsight-api-slim/hindsight_api/alembic/versions/b8c9d0e1f2a3_vchord_cosine_opclass.py
@@ -38,6 +38,12 @@
 depends_on: str | Sequence[str] | None = None
 
 
+def _pg_schema_prefix() -> str:
+    """Schema-qualifier for raw SQL on PG (multi-tenant search_path)."""
+    schema = context.config.get_main_option("target_schema")
+    return f'"{schema}".' if schema else ""
+
+
 def _rebuild_vchordrq_indexes(old_ops: str, new_ops: str) -> None:
     """Rebuild vchordrq indexes using ``old_ops`` so they use ``new_ops``.
 
@@ -47,19 +53,27 @@ def _rebuild_vchordrq_indexes(old_ops: str, new_ops: str) -> None:
     cannot run inside a transaction.
     """
     bind = op.get_bind()
-    schema = context.config.get_main_option("target_schema") or "public"
+    # `or None` collapses both unset and explicit empty-string Alembic options
+    # into NULL so the COALESCE below falls back to current_schema() in either
+    # case. Without it, an empty-string option would filter on `schemaname = ''`
+    # and skip every real schema.
+    target_schema = context.config.get_main_option("target_schema") or None
+    prefix = _pg_schema_prefix()
 
     rows = bind.execute(
         text(
             "SELECT indexname, indexdef FROM pg_indexes "
-            "WHERE schemaname = :schema "
+            "WHERE schemaname = COALESCE(:target_schema, current_schema()) "
             "AND indexdef ILIKE '%vchordrq%' "
             "AND indexdef ILIKE :ops_like"
         ),
-        {"schema": schema, "ops_like": f"%{old_ops}%"},
+        {"target_schema": target_schema, "ops_like": f"%{old_ops}%"},
     ).fetchall()
 
     for idx_name, indexdef in rows:
+        # pg_get_indexdef() emits the canonical form `CREATE INDEX <name> ON …`,
+        # so <name> is the first textual occurrence — both substitutions below
+        # rely on that.
         new_def = indexdef.replace(old_ops, new_ops, 1)
         temp_name = f"{idx_name}__opclass_swap"
         new_def = new_def.replace(idx_name, temp_name, 1)
@@ -69,9 +83,51 @@ def _rebuild_vchordrq_indexes(old_ops: str, new_ops: str) -> None:
             new_def,
             count=1,
         )
+
+        # CREATE INDEX CONCURRENTLY can leave the partial index as INVALID if a
+        # previous run errored (disk pressure, lock conflict, signal). Without
+        # this drop the CONCURRENTLY IF NOT EXISTS below would skip creation,
+        # then we'd drop the original and rename the broken index into its
+        # place — silently restoring the seq-scan bug this migration fixes.
+        op.execute(f'DROP INDEX IF EXISTS {prefix}"{temp_name}"')
         op.execute(new_def)
-        op.execute(f'DROP INDEX IF EXISTS "{schema}"."{idx_name}"')
-        op.execute(f'ALTER INDEX "{schema}"."{temp_name}" RENAME TO "{idx_name}"')
+
+        # Even on a clean run CONCURRENTLY can finish with indisvalid = false
+        # (e.g. constraint violation during the second build scan). Refuse to
+        # promote in that case so we never alias an INVALID index over a working
+        # one.
+        is_valid = bind.execute(
+            text(
+                "SELECT i.indisvalid "
+                "FROM pg_class c "
+                "JOIN pg_index i ON c.oid = i.indexrelid "
+                "JOIN pg_namespace n ON c.relnamespace = n.oid "
+                "WHERE c.relname = :name "
+                "  AND n.nspname = COALESCE(:target_schema, current_schema())"
+            ),
+            {"name": temp_name, "target_schema": target_schema},
+        ).scalar()
+        if not is_valid:
+            raise RuntimeError(
+                f"vchordrq index rebuild produced an INVALID index ({temp_name}); "
+                "drop it manually and re-run the migration."
+            )
+
+        # DROP + RENAME atomically. A crash between the two would leave
+        # `temp_name` as a valid orphan and the canonical name missing —
+        # next run's `pg_indexes` filter (looking for vector_l2_ops) wouldn't
+        # find anything to recover from, so the index would stay gone. PG
+        # runs the DO block in its own server-side transaction, so either
+        # both succeed or both roll back.
+        op.execute(
+            f"""
+            DO $$
+            BEGIN
+                DROP INDEX IF EXISTS {prefix}"{idx_name}";
+                ALTER INDEX {prefix}"{temp_name}" RENAME TO "{idx_name}";
+            END $$;
+            """
+        )
 
 
 def _pg_upgrade() -> None:
diff --git a/hindsight-api-slim/hindsight_api/engine/memory_engine.py b/hindsight-api-slim/hindsight_api/engine/memory_engine.py
index f7a4521d3..46d94a272 100644
--- a/hindsight-api-slim/hindsight_api/engine/memory_engine.py
+++ b/hindsight-api-slim/hindsight_api/engine/memory_engine.py
@@ -2057,7 +2057,11 @@ async def _init_connection(conn: asyncpg.Connection) -> None:
             for guc, value in ann_search_tuning_settings(configured_vector_extension(), kind="high_recall"):
                 try:
                     await conn.execute(f"SET {guc} = {value}")
-                except Exception:
+                except asyncpg.exceptions.PostgresError:
+                    # Defensive net for env mis-config (e.g. extension configured
+                    # for vchord but the cluster only has pgvector). Narrow to
+                    # PostgresError so genuine bugs in the pool/conn layer surface
+                    # instead of being silently logged at debug level.
                     logger.debug("Could not set %s — extension may not support it", guc)
 
             # Server-side safety net for runaway queries. Migrations use a