Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Prompts for the consolidation engine."""

from hindsight_api.engine.prompt_utils import escape_for_prompt

# Default mission when no bank-specific mission is set
_DEFAULT_MISSION = "Track every detail: names, numbers, dates, places, and relationships. Prefer specifics over abstractions, never generalise."

Expand Down Expand Up @@ -90,11 +92,11 @@ def build_batch_consolidation_prompt(
The mission defines *what* to track (customisable per bank).
Processing rules and output format are always present regardless of mission.
"""
mission = observations_mission or _DEFAULT_MISSION
mission = escape_for_prompt(observations_mission or _DEFAULT_MISSION)

capacity_section = ""
if observation_capacity_note:
capacity_section = f"\n\n## CAPACITY CONSTRAINT\n{observation_capacity_note}"
capacity_section = f"\n\n## CAPACITY CONSTRAINT\n{escape_for_prompt(observation_capacity_note)}"

return (
"You are a memory consolidation system. Synthesize facts into observations "
Expand Down
23 changes: 23 additions & 0 deletions hindsight-api-slim/hindsight_api/engine/prompt_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""Shared utilities for prompt assembly."""

import re

_LONE_OPEN_BRACE = re.compile(r"(?<!\{)\{(?!\{)")
_LONE_CLOSE_BRACE = re.compile(r"(?<!\})\}(?!\})")


def escape_for_prompt(text: str) -> str:
"""Double any lone ``{`` / ``}`` so the text survives ``str.format`` untouched.

Prompt templates are often passed through ``str.format`` to substitute real
placeholders like ``{facts_text}``. Any literal braces in caller-supplied
text — e.g. a bank mission that contains JSON examples — would otherwise be
interpreted as format keys and raise ``KeyError``.

Idempotent: text that already contains escaped ``{{`` / ``}}`` pairs is
left as-is. Only lone braces (not adjacent to another brace of the same
kind) are doubled.
"""
text = _LONE_OPEN_BRACE.sub("{{", text)
text = _LONE_CLOSE_BRACE.sub("}}", text)
return text
4 changes: 3 additions & 1 deletion hindsight-api-slim/hindsight_api/engine/reflect/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,9 @@ def build_final_prompt(

def build_final_system_prompt(mission: str | None = None) -> str:
"""Build the final synthesis system prompt, using mission as role when set."""
role_section = mission.strip() if mission else _DEFAULT_FINAL_ROLE
from hindsight_api.engine.prompt_utils import escape_for_prompt

role_section = escape_for_prompt(mission.strip()) if mission else _DEFAULT_FINAL_ROLE
return _FINAL_SYSTEM_PROMPT_BASE.format(role_section=role_section)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -888,13 +888,16 @@ def _build_extraction_prompt_and_schema(config) -> tuple[str, type]:
extract_causal_links = config.retain_extract_causal_links

# Build retain_mission section if set - injected before the mode-specific guidelines
# Escape braces so user-supplied text survives str.format() on the prompt template.
from hindsight_api.engine.prompt_utils import escape_for_prompt

retain_mission = getattr(config, "retain_mission", None)
if retain_mission:
retain_mission_section = (
f"══════════════════════════════════════════════════════════════════════════\n"
f"FOCUS — What to retain for this bank\n"
f"══════════════════════════════════════════════════════════════════════════\n\n"
f"{retain_mission}\n\n"
f"{escape_for_prompt(retain_mission)}\n\n"
)
else:
retain_mission_section = ""
Expand All @@ -910,7 +913,7 @@ def _build_extraction_prompt_and_schema(config) -> tuple[str, type]:
base_prompt = CUSTOM_FACT_EXTRACTION_PROMPT
prompt = base_prompt.format(
retain_mission_section=retain_mission_section,
custom_instructions=config.retain_custom_instructions,
custom_instructions=escape_for_prompt(config.retain_custom_instructions),
)
elif extraction_mode == "verbose":
prompt = VERBOSE_FACT_EXTRACTION_PROMPT.format(
Expand Down
151 changes: 151 additions & 0 deletions hindsight-api-slim/tests/test_prompt_brace_escape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
"""Tests for brace escaping in prompt builders across all modules.

User-supplied text (missions, custom instructions) may contain literal braces
(e.g. JSON examples). These must survive ``str.format()`` without crashing.
"""

import pytest

from hindsight_api.engine.prompt_utils import escape_for_prompt


# ---------------------------------------------------------------------------
# Unit tests for the shared escape helper
# ---------------------------------------------------------------------------


class TestEscapeForPrompt:
def test_lone_braces_doubled(self):
assert escape_for_prompt("{x}") == "{{x}}"

def test_already_escaped_left_alone(self):
assert escape_for_prompt("{{x}}") == "{{x}}"

def test_idempotent(self):
once = escape_for_prompt('{"dedup": true}')
twice = escape_for_prompt(once)
assert once == twice

def test_plain_text_unchanged(self):
assert escape_for_prompt("no braces here") == "no braces here"

def test_mixed_lone_and_escaped(self):
assert escape_for_prompt("{x} and {{y}}") == "{{x}} and {{y}}"

@pytest.mark.parametrize(
"text",
["{single}", "}}weird{{", "trailing {", "leading }", ""],
)
def test_edge_cases_do_not_crash(self, text):
result = escape_for_prompt(text)
# Double-escaped text must survive .format() with no placeholders
result.format()


# ---------------------------------------------------------------------------
# Consolidation prompt
# ---------------------------------------------------------------------------


class TestConsolidationBraceSafety:
def test_mission_with_json_renders(self):
from hindsight_api.engine.consolidation.prompts import (
build_batch_consolidation_prompt,
)

mission = '{"dedup": true, "merge": true}'
prompt = build_batch_consolidation_prompt(observations_mission=mission)
rendered = prompt.format(facts_text="<facts>", observations_text="<obs>")
assert mission in rendered

def test_capacity_note_with_braces_renders(self):
from hindsight_api.engine.consolidation.prompts import (
build_batch_consolidation_prompt,
)

note = "Use shape {limit, used}"
prompt = build_batch_consolidation_prompt(
observations_mission="m", observation_capacity_note=note
)
rendered = prompt.format(facts_text="<facts>", observations_text="<obs>")
assert "{limit, used}" in rendered


# ---------------------------------------------------------------------------
# Reflect prompt
# ---------------------------------------------------------------------------


class TestReflectBraceSafety:
def test_mission_with_json_renders(self):
from hindsight_api.engine.reflect.prompts import build_final_system_prompt

mission = '{"role": "admin", "scope": "all"}'
prompt = build_final_system_prompt(mission=mission)
# The prompt has no remaining format placeholders, so it should be
# a plain string that contains the original mission text.
assert mission in prompt

def test_mission_with_lone_braces_renders(self):
from hindsight_api.engine.reflect.prompts import build_final_system_prompt

mission = "Track {entity} changes"
prompt = build_final_system_prompt(mission=mission)
assert "{entity}" in prompt


# ---------------------------------------------------------------------------
# Retain / fact extraction prompt
# ---------------------------------------------------------------------------


class TestRetainBraceSafety:
def _make_config(self, **overrides):
"""Minimal config-like object for _build_extraction_prompt_and_schema."""
from types import SimpleNamespace

defaults = {
"retain_extraction_mode": "concise",
"retain_extract_causal_links": False,
"retain_mission": None,
"retain_custom_instructions": None,
"retain_taxonomy": None,
}
defaults.update(overrides)
return SimpleNamespace(**defaults)

def test_retain_mission_with_json(self):
from hindsight_api.engine.retain.fact_extraction import (
_build_extraction_prompt_and_schema,
)

config = self._make_config(retain_mission='{"focus": "compliance"}')
prompt, _ = _build_extraction_prompt_and_schema(config)
# prompt is already fully rendered (no remaining placeholders)
assert '{"focus": "compliance"}' in prompt

def test_custom_instructions_with_braces(self):
from hindsight_api.engine.retain.fact_extraction import (
_build_extraction_prompt_and_schema,
)

config = self._make_config(
retain_extraction_mode="custom",
retain_custom_instructions="Output as {key: value} pairs",
)
prompt, _ = _build_extraction_prompt_and_schema(config)
assert "{key: value}" in prompt

def test_both_mission_and_custom_with_braces(self):
from hindsight_api.engine.retain.fact_extraction import (
_build_extraction_prompt_and_schema,
)

config = self._make_config(
retain_extraction_mode="custom",
retain_mission='{"scope": "all"}',
retain_custom_instructions="Format: {k: v}",
)
prompt, _ = _build_extraction_prompt_and_schema(config)
assert '{"scope": "all"}' in prompt
assert "{k: v}" in prompt
9 changes: 8 additions & 1 deletion skills/hindsight-docs/references/developer/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -611,7 +611,7 @@ Google's `gemini-embedding-001` produces 3072 dimensions natively but supports c

| Variable | Description | Default |
|----------|-------------|---------|
| `HINDSIGHT_API_RERANKER_PROVIDER` | Provider: `local`, `tei`, `cohere`, `openrouter`, `zeroentropy`, `siliconflow`, `google`, `flashrank`, `litellm`, `litellm-sdk`, `jina-mlx`, or `rrf` | `local` |
| `HINDSIGHT_API_RERANKER_PROVIDER` | Provider: `local`, `tei`, `cohere`, `openrouter`, `zeroentropy`, `siliconflow`, `alibaba`, `google`, `flashrank`, `litellm`, `litellm-sdk`, `jina-mlx`, or `rrf` | `local` |
| `HINDSIGHT_API_RERANKER_LOCAL_MODEL` | Model for local provider | `cross-encoder/ms-marco-MiniLM-L-6-v2` |
| `HINDSIGHT_API_RERANKER_LOCAL_MAX_CONCURRENT` | Max concurrent local reranking (prevents CPU thrashing under load) | `4` |
| `HINDSIGHT_API_RERANKER_LOCAL_TRUST_REMOTE_CODE` | Allow loading models with custom code (security risk, disabled by default) | `false` |
Expand Down Expand Up @@ -641,6 +641,8 @@ Google's `gemini-embedding-001` produces 3072 dimensions natively but supports c
| `HINDSIGHT_API_RERANKER_SILICONFLOW_API_KEY` | SiliconFlow API key for reranking | - |
| `HINDSIGHT_API_RERANKER_SILICONFLOW_MODEL` | SiliconFlow rerank model (e.g., `BAAI/bge-reranker-v2-m3`) | `BAAI/bge-reranker-v2-m3` |
| `HINDSIGHT_API_RERANKER_SILICONFLOW_BASE_URL` | Base URL for the SiliconFlow `/rerank` endpoint | `https://api.siliconflow.cn/v1` |
| `HINDSIGHT_API_RERANKER_ALIBABA_API_KEY` | Alibaba Cloud DashScope API key for reranking | - |
| `HINDSIGHT_API_RERANKER_ALIBABA_MODEL` | DashScope rerank model | `qwen3-rerank` |
| `HINDSIGHT_API_RERANKER_GOOGLE_PROJECT_ID` | Google Cloud project ID for Discovery Engine reranking (falls back to `HINDSIGHT_API_LLM_VERTEXAI_PROJECT_ID`) | - |
| `HINDSIGHT_API_RERANKER_GOOGLE_MODEL` | Google Discovery Engine ranking model | `semantic-ranker-default-004` |
| `HINDSIGHT_API_RERANKER_GOOGLE_SERVICE_ACCOUNT_KEY` | Path to service account JSON key (falls back to `HINDSIGHT_API_LLM_VERTEXAI_SERVICE_ACCOUNT_KEY`). If unset, uses ADC. | - |
Expand Down Expand Up @@ -704,6 +706,11 @@ export HINDSIGHT_API_RERANKER_SILICONFLOW_API_KEY=your-api-key
export HINDSIGHT_API_RERANKER_SILICONFLOW_MODEL=BAAI/bge-reranker-v2-m3
# export HINDSIGHT_API_RERANKER_SILICONFLOW_BASE_URL=https://api.siliconflow.cn/v1 # default

# Alibaba Cloud DashScope - qwen3-rerank via Cohere-compatible /reranks endpoint
export HINDSIGHT_API_RERANKER_PROVIDER=alibaba
export HINDSIGHT_API_RERANKER_ALIBABA_API_KEY=your-dashscope-api-key # or set DASHSCOPE_API_KEY
export HINDSIGHT_API_RERANKER_ALIBABA_MODEL=qwen3-rerank # default, can omit

# LiteLLM proxy - unified gateway for multiple reranking providers (requires running LiteLLM proxy server)
export HINDSIGHT_API_RERANKER_PROVIDER=litellm
export HINDSIGHT_API_RERANKER_LITELLM_API_BASE=http://localhost:4000
Expand Down
14 changes: 14 additions & 0 deletions skills/hindsight-docs/references/developer/models.md
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,7 @@ Reranks initial search results to improve precision.
| `cohere` | Cohere rerank API | Production, high quality |
| `zeroentropy` | ZeroEntropy rerank API (zerank-2) | Production, state-of-the-art accuracy |
| `siliconflow` | SiliconFlow rerank API (Cohere-compatible `/rerank` endpoint) | Users in China or anyone on SiliconFlow's platform |
| `alibaba` | Alibaba Cloud DashScope rerank API (qwen3-rerank) | Users on Alibaba Cloud / DashScope |
| `tei` | HuggingFace Text Embeddings Inference | Production, self-hosted |
| `flashrank` | FlashRank (lightweight, fast) | Resource-constrained environments |
| `litellm` | LiteLLM proxy (unified gateway) | Multi-provider setups |
Expand Down Expand Up @@ -550,6 +551,14 @@ SiliconFlow hosts a range of open-weight rerankers behind a Cohere-compatible `/
| `BAAI/bge-reranker-v2-m3` | Multilingual, strong default |
| `Qwen/Qwen3-Reranker-8B` | Larger, higher accuracy |

### Alibaba Cloud Models

Alibaba Cloud DashScope exposes `qwen3-rerank` via a Cohere-compatible `/reranks` endpoint:

| Model | Use Case |
|-------|----------|
| `qwen3-rerank` | 100+ languages, default |

### LiteLLM Supported Providers

LiteLLM supports multiple reranking providers via the `/rerank` endpoint:
Expand Down Expand Up @@ -595,6 +604,11 @@ export HINDSIGHT_API_RERANKER_PROVIDER=siliconflow
export HINDSIGHT_API_RERANKER_SILICONFLOW_API_KEY=your-api-key
export HINDSIGHT_API_RERANKER_SILICONFLOW_MODEL=BAAI/bge-reranker-v2-m3 # default, can omit

# Alibaba Cloud DashScope (qwen3-rerank)
export HINDSIGHT_API_RERANKER_PROVIDER=alibaba
export HINDSIGHT_API_RERANKER_ALIBABA_API_KEY=your-dashscope-api-key
export HINDSIGHT_API_RERANKER_ALIBABA_MODEL=qwen3-rerank # default, can omit

# TEI (self-hosted)
export HINDSIGHT_API_RERANKER_PROVIDER=tei
export HINDSIGHT_API_RERANKER_TEI_URL=http://localhost:8081
Expand Down
Loading