From 8259d6ad9aceb9466ba9d59d32081428228224b8 Mon Sep 17 00:00:00 2001 From: ai-ag2026 <261867348+ai-ag2026@users.noreply.github.com> Date: Thu, 21 May 2026 16:44:09 +0200 Subject: [PATCH 1/2] fix: avoid retrying invalid embedding dimensions --- .../hindsight_api/engine/memory_engine.py | 40 ++++++++++++++----- .../test_integrity_violation_not_retried.py | 24 ++++++++++- 2 files changed, 51 insertions(+), 13 deletions(-) diff --git a/hindsight-api-slim/hindsight_api/engine/memory_engine.py b/hindsight-api-slim/hindsight_api/engine/memory_engine.py index 4f633e91b..99251be2d 100644 --- a/hindsight-api-slim/hindsight_api/engine/memory_engine.py +++ b/hindsight-api-slim/hindsight_api/engine/memory_engine.py @@ -226,6 +226,30 @@ def _is_oracledb_integrity_error(e: Exception) -> bool: return isinstance(e, oracledb.IntegrityError) +def _is_invalid_embedding_dimension_error(e: Exception) -> bool: + """Return True for deterministic embedding-dimension failures. + + These errors come from either PR #1670's preflight validation + ("embedding 0 has dimension 0; expected 384") or from pgvector itself + ("different vector dimensions 384 and 0"). Retrying the same poisoned + embedding response only burns worker slots; a fresh retain request or a + fixed embedding backend is required. + """ + message = str(e).lower() + return "different vector dimensions" in message or ( + "embedding" in message and "dimension" in message and "expected" in message + ) + + +def _is_non_retryable_task_error(e: Exception) -> bool: + """Classify deterministic task failures that should skip worker retry.""" + return ( + isinstance(e, asyncpg.exceptions.IntegrityConstraintViolationError) + or _is_oracledb_integrity_error(e) + or _is_invalid_embedding_dimension_error(e) + ) + + class Budget(str, Enum): """Budget levels for recall/reflect operations.""" @@ -1236,17 +1260,11 @@ async def execute_task(self, task_dict: dict[str, Any]): logger.error(f"Not retrying task {task_type} (non-retryable), marking as failed") if operation_id: await self._mark_operation_failed(operation_id, str(e), error_traceback) - elif isinstance(e, asyncpg.exceptions.IntegrityConstraintViolationError) or ( - _is_oracledb_integrity_error(e) - ): - # Non-retryable: deterministic integrity violations (PG or Oracle) - # (UniqueViolationError, ForeignKeyViolationError, CheckViolationError, - # NotNullViolationError, ExclusionViolationError / ORA-00001, ORA-02291, etc.) - # will never succeed on retry — the offending row state is already committed. - # Retrying just burns worker capacity. See vectorize-io/hindsight#980. - logger.error( - f"Not retrying task {task_type} (integrity violation, deterministic): {type(e).__name__}" - ) + elif _is_non_retryable_task_error(e): + # Non-retryable: deterministic task failures (integrity violations, + # invalid embedding dimensions, etc.) will not succeed by rerunning + # the same payload. Retrying just burns worker capacity. + logger.error(f"Not retrying task {task_type} (deterministic failure): {type(e).__name__}") if task_type == "consolidation" and operation_id: await self._fire_consolidation_webhook( bank_id=task_dict.get("bank_id", ""), diff --git a/hindsight-api-slim/tests/test_integrity_violation_not_retried.py b/hindsight-api-slim/tests/test_integrity_violation_not_retried.py index c31d720f7..0922fa54d 100644 --- a/hindsight-api-slim/tests/test_integrity_violation_not_retried.py +++ b/hindsight-api-slim/tests/test_integrity_violation_not_retried.py @@ -151,13 +151,33 @@ async def test_foreign_key_violation_also_not_retried(memory): await pool.execute("DELETE FROM banks WHERE bank_id = $1", bank_id) +@pytest.mark.parametrize( + "message", + [ + "embedding 0 has dimension 0; expected 384", + "different vector dimensions 384 and 0", + ], +) +def test_invalid_embedding_dimension_error_is_non_retryable(message): + """Embedding dimension mismatches are deterministic and must not be retried. + + PR #1670 validates empty/mismatched embedding vectors before pgvector writes. + pgvector may also raise its own dimension-mismatch error if an invalid vector + reaches the database layer. In both cases, rerunning the same poisoned + embedding response only burns worker slots; a fresh retain request or fixed + embedding backend is required. + """ + from hindsight_api.engine.memory_engine import _is_non_retryable_task_error + + assert _is_non_retryable_task_error(RuntimeError(message)) is True + + @pytest.mark.asyncio async def test_non_integrity_error_still_retried(memory): """ Sanity check: non-integrity errors (network errors, timeouts, value errors) should STILL use the existing retry path — i.e., raise RetryTaskAt when - ``_retry_count < 3``. Only integrity violations are the new non-retryable - class. + ``_retry_count < 3``. Only deterministic task errors are non-retryable. """ bank_id = f"test-worker-{uuid.uuid4().hex[:8]}" operation_id = uuid.uuid4() From 37e3accd85da3be68f468accc6d630c4ab33b13d Mon Sep 17 00:00:00 2001 From: ai-ag2026 <261867348+ai-ag2026@users.noreply.github.com> Date: Fri, 22 May 2026 22:24:31 +0200 Subject: [PATCH 2/2] chore: refresh generated provider docs --- .../engine/providers/openai_compatible_llm.py | 22 +++++++++++-------- .../references/developer/models.md | 1 + skills/hindsight-docs/references/faq.md | 1 + 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/hindsight-api-slim/hindsight_api/engine/providers/openai_compatible_llm.py b/hindsight-api-slim/hindsight_api/engine/providers/openai_compatible_llm.py index 9bb6fdba0..0f02949d5 100644 --- a/hindsight-api-slim/hindsight_api/engine/providers/openai_compatible_llm.py +++ b/hindsight-api-slim/hindsight_api/engine/providers/openai_compatible_llm.py @@ -306,15 +306,19 @@ def __init__( self.api_key = "local" # Validate API key for cloud providers - if self.provider in ( - "openai", - "groq", - "minimax", - "deepseek", - "openrouter", - "zai", - "opencode-go", - ) and not self.api_key: + if ( + self.provider + in ( + "openai", + "groq", + "minimax", + "deepseek", + "openrouter", + "zai", + "opencode-go", + ) + and not self.api_key + ): raise ValueError(f"API key is required for {self.provider}") # Service tier configuration (from config, not env vars) diff --git a/skills/hindsight-docs/references/developer/models.md b/skills/hindsight-docs/references/developer/models.md index b50e625a4..51278726a 100644 --- a/skills/hindsight-docs/references/developer/models.md +++ b/skills/hindsight-docs/references/developer/models.md @@ -30,6 +30,7 @@ Used for fact extraction, entity resolution, mental model consolidation, and ans - MiniMax - DeepSeek - z.ai +- opencode-go - Volcano Engine - OpenRouter - OpenAI Codex diff --git a/skills/hindsight-docs/references/faq.md b/skills/hindsight-docs/references/faq.md index 8b8b96a62..f4f8f2aca 100644 --- a/skills/hindsight-docs/references/faq.md +++ b/skills/hindsight-docs/references/faq.md @@ -76,6 +76,7 @@ Browse all supported integrations in the Integrations Hub. - MiniMax - DeepSeek - z.ai +- opencode-go - Volcano Engine - OpenRouter - OpenAI Codex