diff --git a/hindsight-api-slim/hindsight_api/engine/consolidation/prompts.py b/hindsight-api-slim/hindsight_api/engine/consolidation/prompts.py index 342402589..3feac947b 100644 --- a/hindsight-api-slim/hindsight_api/engine/consolidation/prompts.py +++ b/hindsight-api-slim/hindsight_api/engine/consolidation/prompts.py @@ -1,5 +1,7 @@ """Prompts for the consolidation engine.""" +from hindsight_api.engine.prompt_utils import escape_for_prompt + # Default mission when no bank-specific mission is set _DEFAULT_MISSION = "Track every detail: names, numbers, dates, places, and relationships. Prefer specifics over abstractions, never generalise." @@ -90,11 +92,11 @@ def build_batch_consolidation_prompt( The mission defines *what* to track (customisable per bank). Processing rules and output format are always present regardless of mission. """ - mission = observations_mission or _DEFAULT_MISSION + mission = escape_for_prompt(observations_mission or _DEFAULT_MISSION) capacity_section = "" if observation_capacity_note: - capacity_section = f"\n\n## CAPACITY CONSTRAINT\n{observation_capacity_note}" + capacity_section = f"\n\n## CAPACITY CONSTRAINT\n{escape_for_prompt(observation_capacity_note)}" return ( "You are a memory consolidation system. Synthesize facts into observations " diff --git a/hindsight-api-slim/hindsight_api/engine/prompt_utils.py b/hindsight-api-slim/hindsight_api/engine/prompt_utils.py new file mode 100644 index 000000000..6933e8bcd --- /dev/null +++ b/hindsight-api-slim/hindsight_api/engine/prompt_utils.py @@ -0,0 +1,23 @@ +"""Shared utilities for prompt assembly.""" + +import re + +_LONE_OPEN_BRACE = re.compile(r"(? str: + """Double any lone ``{`` / ``}`` so the text survives ``str.format`` untouched. + + Prompt templates are often passed through ``str.format`` to substitute real + placeholders like ``{facts_text}``. Any literal braces in caller-supplied + text — e.g. a bank mission that contains JSON examples — would otherwise be + interpreted as format keys and raise ``KeyError``. + + Idempotent: text that already contains escaped ``{{`` / ``}}`` pairs is + left as-is. Only lone braces (not adjacent to another brace of the same + kind) are doubled. + """ + text = _LONE_OPEN_BRACE.sub("{{", text) + text = _LONE_CLOSE_BRACE.sub("}}", text) + return text diff --git a/hindsight-api-slim/hindsight_api/engine/reflect/prompts.py b/hindsight-api-slim/hindsight_api/engine/reflect/prompts.py index 8c8feb84d..5341ba448 100644 --- a/hindsight-api-slim/hindsight_api/engine/reflect/prompts.py +++ b/hindsight-api-slim/hindsight_api/engine/reflect/prompts.py @@ -515,7 +515,9 @@ def build_final_prompt( def build_final_system_prompt(mission: str | None = None) -> str: """Build the final synthesis system prompt, using mission as role when set.""" - role_section = mission.strip() if mission else _DEFAULT_FINAL_ROLE + from hindsight_api.engine.prompt_utils import escape_for_prompt + + role_section = escape_for_prompt(mission.strip()) if mission else _DEFAULT_FINAL_ROLE return _FINAL_SYSTEM_PROMPT_BASE.format(role_section=role_section) diff --git a/hindsight-api-slim/hindsight_api/engine/retain/fact_extraction.py b/hindsight-api-slim/hindsight_api/engine/retain/fact_extraction.py index 4506a94b7..44a39c0ee 100644 --- a/hindsight-api-slim/hindsight_api/engine/retain/fact_extraction.py +++ b/hindsight-api-slim/hindsight_api/engine/retain/fact_extraction.py @@ -888,13 +888,16 @@ def _build_extraction_prompt_and_schema(config) -> tuple[str, type]: extract_causal_links = config.retain_extract_causal_links # Build retain_mission section if set - injected before the mode-specific guidelines + # Escape braces so user-supplied text survives str.format() on the prompt template. + from hindsight_api.engine.prompt_utils import escape_for_prompt + retain_mission = getattr(config, "retain_mission", None) if retain_mission: retain_mission_section = ( f"══════════════════════════════════════════════════════════════════════════\n" f"FOCUS — What to retain for this bank\n" f"══════════════════════════════════════════════════════════════════════════\n\n" - f"{retain_mission}\n\n" + f"{escape_for_prompt(retain_mission)}\n\n" ) else: retain_mission_section = "" @@ -910,7 +913,7 @@ def _build_extraction_prompt_and_schema(config) -> tuple[str, type]: base_prompt = CUSTOM_FACT_EXTRACTION_PROMPT prompt = base_prompt.format( retain_mission_section=retain_mission_section, - custom_instructions=config.retain_custom_instructions, + custom_instructions=escape_for_prompt(config.retain_custom_instructions), ) elif extraction_mode == "verbose": prompt = VERBOSE_FACT_EXTRACTION_PROMPT.format( diff --git a/hindsight-api-slim/tests/test_prompt_brace_escape.py b/hindsight-api-slim/tests/test_prompt_brace_escape.py new file mode 100644 index 000000000..e9dc17dd4 --- /dev/null +++ b/hindsight-api-slim/tests/test_prompt_brace_escape.py @@ -0,0 +1,151 @@ +"""Tests for brace escaping in prompt builders across all modules. + +User-supplied text (missions, custom instructions) may contain literal braces +(e.g. JSON examples). These must survive ``str.format()`` without crashing. +""" + +import pytest + +from hindsight_api.engine.prompt_utils import escape_for_prompt + + +# --------------------------------------------------------------------------- +# Unit tests for the shared escape helper +# --------------------------------------------------------------------------- + + +class TestEscapeForPrompt: + def test_lone_braces_doubled(self): + assert escape_for_prompt("{x}") == "{{x}}" + + def test_already_escaped_left_alone(self): + assert escape_for_prompt("{{x}}") == "{{x}}" + + def test_idempotent(self): + once = escape_for_prompt('{"dedup": true}') + twice = escape_for_prompt(once) + assert once == twice + + def test_plain_text_unchanged(self): + assert escape_for_prompt("no braces here") == "no braces here" + + def test_mixed_lone_and_escaped(self): + assert escape_for_prompt("{x} and {{y}}") == "{{x}} and {{y}}" + + @pytest.mark.parametrize( + "text", + ["{single}", "}}weird{{", "trailing {", "leading }", ""], + ) + def test_edge_cases_do_not_crash(self, text): + result = escape_for_prompt(text) + # Double-escaped text must survive .format() with no placeholders + result.format() + + +# --------------------------------------------------------------------------- +# Consolidation prompt +# --------------------------------------------------------------------------- + + +class TestConsolidationBraceSafety: + def test_mission_with_json_renders(self): + from hindsight_api.engine.consolidation.prompts import ( + build_batch_consolidation_prompt, + ) + + mission = '{"dedup": true, "merge": true}' + prompt = build_batch_consolidation_prompt(observations_mission=mission) + rendered = prompt.format(facts_text="", observations_text="") + assert mission in rendered + + def test_capacity_note_with_braces_renders(self): + from hindsight_api.engine.consolidation.prompts import ( + build_batch_consolidation_prompt, + ) + + note = "Use shape {limit, used}" + prompt = build_batch_consolidation_prompt( + observations_mission="m", observation_capacity_note=note + ) + rendered = prompt.format(facts_text="", observations_text="") + assert "{limit, used}" in rendered + + +# --------------------------------------------------------------------------- +# Reflect prompt +# --------------------------------------------------------------------------- + + +class TestReflectBraceSafety: + def test_mission_with_json_renders(self): + from hindsight_api.engine.reflect.prompts import build_final_system_prompt + + mission = '{"role": "admin", "scope": "all"}' + prompt = build_final_system_prompt(mission=mission) + # The prompt has no remaining format placeholders, so it should be + # a plain string that contains the original mission text. + assert mission in prompt + + def test_mission_with_lone_braces_renders(self): + from hindsight_api.engine.reflect.prompts import build_final_system_prompt + + mission = "Track {entity} changes" + prompt = build_final_system_prompt(mission=mission) + assert "{entity}" in prompt + + +# --------------------------------------------------------------------------- +# Retain / fact extraction prompt +# --------------------------------------------------------------------------- + + +class TestRetainBraceSafety: + def _make_config(self, **overrides): + """Minimal config-like object for _build_extraction_prompt_and_schema.""" + from types import SimpleNamespace + + defaults = { + "retain_extraction_mode": "concise", + "retain_extract_causal_links": False, + "retain_mission": None, + "retain_custom_instructions": None, + "retain_taxonomy": None, + } + defaults.update(overrides) + return SimpleNamespace(**defaults) + + def test_retain_mission_with_json(self): + from hindsight_api.engine.retain.fact_extraction import ( + _build_extraction_prompt_and_schema, + ) + + config = self._make_config(retain_mission='{"focus": "compliance"}') + prompt, _ = _build_extraction_prompt_and_schema(config) + # prompt is already fully rendered (no remaining placeholders) + assert '{"focus": "compliance"}' in prompt + + def test_custom_instructions_with_braces(self): + from hindsight_api.engine.retain.fact_extraction import ( + _build_extraction_prompt_and_schema, + ) + + config = self._make_config( + retain_extraction_mode="custom", + retain_custom_instructions="Output as {key: value} pairs", + ) + prompt, _ = _build_extraction_prompt_and_schema(config) + assert "{key: value}" in prompt + + def test_both_mission_and_custom_with_braces(self): + from hindsight_api.engine.retain.fact_extraction import ( + _build_extraction_prompt_and_schema, + ) + + config = self._make_config( + retain_extraction_mode="custom", + retain_mission='{"scope": "all"}', + retain_custom_instructions="Format: {k: v}", + ) + prompt, _ = _build_extraction_prompt_and_schema(config) + assert '{"scope": "all"}' in prompt + assert "{k: v}" in prompt diff --git a/skills/hindsight-docs/references/developer/configuration.md b/skills/hindsight-docs/references/developer/configuration.md index 7446491cb..c2a3feee1 100644 --- a/skills/hindsight-docs/references/developer/configuration.md +++ b/skills/hindsight-docs/references/developer/configuration.md @@ -611,7 +611,7 @@ Google's `gemini-embedding-001` produces 3072 dimensions natively but supports c | Variable | Description | Default | |----------|-------------|---------| -| `HINDSIGHT_API_RERANKER_PROVIDER` | Provider: `local`, `tei`, `cohere`, `openrouter`, `zeroentropy`, `siliconflow`, `google`, `flashrank`, `litellm`, `litellm-sdk`, `jina-mlx`, or `rrf` | `local` | +| `HINDSIGHT_API_RERANKER_PROVIDER` | Provider: `local`, `tei`, `cohere`, `openrouter`, `zeroentropy`, `siliconflow`, `alibaba`, `google`, `flashrank`, `litellm`, `litellm-sdk`, `jina-mlx`, or `rrf` | `local` | | `HINDSIGHT_API_RERANKER_LOCAL_MODEL` | Model for local provider | `cross-encoder/ms-marco-MiniLM-L-6-v2` | | `HINDSIGHT_API_RERANKER_LOCAL_MAX_CONCURRENT` | Max concurrent local reranking (prevents CPU thrashing under load) | `4` | | `HINDSIGHT_API_RERANKER_LOCAL_TRUST_REMOTE_CODE` | Allow loading models with custom code (security risk, disabled by default) | `false` | @@ -641,6 +641,8 @@ Google's `gemini-embedding-001` produces 3072 dimensions natively but supports c | `HINDSIGHT_API_RERANKER_SILICONFLOW_API_KEY` | SiliconFlow API key for reranking | - | | `HINDSIGHT_API_RERANKER_SILICONFLOW_MODEL` | SiliconFlow rerank model (e.g., `BAAI/bge-reranker-v2-m3`) | `BAAI/bge-reranker-v2-m3` | | `HINDSIGHT_API_RERANKER_SILICONFLOW_BASE_URL` | Base URL for the SiliconFlow `/rerank` endpoint | `https://api.siliconflow.cn/v1` | +| `HINDSIGHT_API_RERANKER_ALIBABA_API_KEY` | Alibaba Cloud DashScope API key for reranking | - | +| `HINDSIGHT_API_RERANKER_ALIBABA_MODEL` | DashScope rerank model | `qwen3-rerank` | | `HINDSIGHT_API_RERANKER_GOOGLE_PROJECT_ID` | Google Cloud project ID for Discovery Engine reranking (falls back to `HINDSIGHT_API_LLM_VERTEXAI_PROJECT_ID`) | - | | `HINDSIGHT_API_RERANKER_GOOGLE_MODEL` | Google Discovery Engine ranking model | `semantic-ranker-default-004` | | `HINDSIGHT_API_RERANKER_GOOGLE_SERVICE_ACCOUNT_KEY` | Path to service account JSON key (falls back to `HINDSIGHT_API_LLM_VERTEXAI_SERVICE_ACCOUNT_KEY`). If unset, uses ADC. | - | @@ -704,6 +706,11 @@ export HINDSIGHT_API_RERANKER_SILICONFLOW_API_KEY=your-api-key export HINDSIGHT_API_RERANKER_SILICONFLOW_MODEL=BAAI/bge-reranker-v2-m3 # export HINDSIGHT_API_RERANKER_SILICONFLOW_BASE_URL=https://api.siliconflow.cn/v1 # default +# Alibaba Cloud DashScope - qwen3-rerank via Cohere-compatible /reranks endpoint +export HINDSIGHT_API_RERANKER_PROVIDER=alibaba +export HINDSIGHT_API_RERANKER_ALIBABA_API_KEY=your-dashscope-api-key # or set DASHSCOPE_API_KEY +export HINDSIGHT_API_RERANKER_ALIBABA_MODEL=qwen3-rerank # default, can omit + # LiteLLM proxy - unified gateway for multiple reranking providers (requires running LiteLLM proxy server) export HINDSIGHT_API_RERANKER_PROVIDER=litellm export HINDSIGHT_API_RERANKER_LITELLM_API_BASE=http://localhost:4000 diff --git a/skills/hindsight-docs/references/developer/models.md b/skills/hindsight-docs/references/developer/models.md index 51278726a..89a3af713 100644 --- a/skills/hindsight-docs/references/developer/models.md +++ b/skills/hindsight-docs/references/developer/models.md @@ -513,6 +513,7 @@ Reranks initial search results to improve precision. | `cohere` | Cohere rerank API | Production, high quality | | `zeroentropy` | ZeroEntropy rerank API (zerank-2) | Production, state-of-the-art accuracy | | `siliconflow` | SiliconFlow rerank API (Cohere-compatible `/rerank` endpoint) | Users in China or anyone on SiliconFlow's platform | +| `alibaba` | Alibaba Cloud DashScope rerank API (qwen3-rerank) | Users on Alibaba Cloud / DashScope | | `tei` | HuggingFace Text Embeddings Inference | Production, self-hosted | | `flashrank` | FlashRank (lightweight, fast) | Resource-constrained environments | | `litellm` | LiteLLM proxy (unified gateway) | Multi-provider setups | @@ -550,6 +551,14 @@ SiliconFlow hosts a range of open-weight rerankers behind a Cohere-compatible `/ | `BAAI/bge-reranker-v2-m3` | Multilingual, strong default | | `Qwen/Qwen3-Reranker-8B` | Larger, higher accuracy | +### Alibaba Cloud Models + +Alibaba Cloud DashScope exposes `qwen3-rerank` via a Cohere-compatible `/reranks` endpoint: + +| Model | Use Case | +|-------|----------| +| `qwen3-rerank` | 100+ languages, default | + ### LiteLLM Supported Providers LiteLLM supports multiple reranking providers via the `/rerank` endpoint: @@ -595,6 +604,11 @@ export HINDSIGHT_API_RERANKER_PROVIDER=siliconflow export HINDSIGHT_API_RERANKER_SILICONFLOW_API_KEY=your-api-key export HINDSIGHT_API_RERANKER_SILICONFLOW_MODEL=BAAI/bge-reranker-v2-m3 # default, can omit +# Alibaba Cloud DashScope (qwen3-rerank) +export HINDSIGHT_API_RERANKER_PROVIDER=alibaba +export HINDSIGHT_API_RERANKER_ALIBABA_API_KEY=your-dashscope-api-key +export HINDSIGHT_API_RERANKER_ALIBABA_MODEL=qwen3-rerank # default, can omit + # TEI (self-hosted) export HINDSIGHT_API_RERANKER_PROVIDER=tei export HINDSIGHT_API_RERANKER_TEI_URL=http://localhost:8081