Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions hindsight-api-slim/hindsight_api/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ def normalize_config_dict(config: dict[str, Any]) -> dict[str, Any]:
ENV_RETAIN_LLM_MAX_BACKOFF = "HINDSIGHT_API_RETAIN_LLM_MAX_BACKOFF"
ENV_RETAIN_LLM_TIMEOUT = "HINDSIGHT_API_RETAIN_LLM_TIMEOUT"
ENV_RETAIN_LLM_LITELLMROUTER_CONFIG = "HINDSIGHT_API_RETAIN_LLM_LITELLMROUTER_CONFIG"
ENV_RETAIN_LLM_EXTRA_BODY = "HINDSIGHT_API_RETAIN_LLM_EXTRA_BODY"

ENV_REFLECT_LLM_PROVIDER = "HINDSIGHT_API_REFLECT_LLM_PROVIDER"
ENV_REFLECT_LLM_API_KEY = "HINDSIGHT_API_REFLECT_LLM_API_KEY"
Expand All @@ -179,6 +180,7 @@ def normalize_config_dict(config: dict[str, Any]) -> dict[str, Any]:
ENV_REFLECT_LLM_MAX_BACKOFF = "HINDSIGHT_API_REFLECT_LLM_MAX_BACKOFF"
ENV_REFLECT_LLM_TIMEOUT = "HINDSIGHT_API_REFLECT_LLM_TIMEOUT"
ENV_REFLECT_LLM_LITELLMROUTER_CONFIG = "HINDSIGHT_API_REFLECT_LLM_LITELLMROUTER_CONFIG"
ENV_REFLECT_LLM_EXTRA_BODY = "HINDSIGHT_API_REFLECT_LLM_EXTRA_BODY"

ENV_CONSOLIDATION_LLM_PROVIDER = "HINDSIGHT_API_CONSOLIDATION_LLM_PROVIDER"
ENV_CONSOLIDATION_LLM_API_KEY = "HINDSIGHT_API_CONSOLIDATION_LLM_API_KEY"
Expand All @@ -190,6 +192,7 @@ def normalize_config_dict(config: dict[str, Any]) -> dict[str, Any]:
ENV_CONSOLIDATION_LLM_MAX_BACKOFF = "HINDSIGHT_API_CONSOLIDATION_LLM_MAX_BACKOFF"
ENV_CONSOLIDATION_LLM_TIMEOUT = "HINDSIGHT_API_CONSOLIDATION_LLM_TIMEOUT"
ENV_CONSOLIDATION_LLM_LITELLMROUTER_CONFIG = "HINDSIGHT_API_CONSOLIDATION_LLM_LITELLMROUTER_CONFIG"
ENV_CONSOLIDATION_LLM_EXTRA_BODY = "HINDSIGHT_API_CONSOLIDATION_LLM_EXTRA_BODY"

ENV_EMBEDDINGS_PROVIDER = "HINDSIGHT_API_EMBEDDINGS_PROVIDER"
ENV_EMBEDDINGS_LOCAL_MODEL = "HINDSIGHT_API_EMBEDDINGS_LOCAL_MODEL"
Expand Down Expand Up @@ -927,6 +930,7 @@ class HindsightConfig:
retain_llm_max_backoff: float | None
retain_llm_timeout: float | None
retain_llm_litellmrouter_config: dict | None
retain_llm_extra_body: dict | None

reflect_llm_provider: str | None
reflect_llm_api_key: str | None
Expand All @@ -938,6 +942,7 @@ class HindsightConfig:
reflect_llm_max_backoff: float | None
reflect_llm_timeout: float | None
reflect_llm_litellmrouter_config: dict | None
reflect_llm_extra_body: dict | None

consolidation_llm_provider: str | None
consolidation_llm_api_key: str | None
Expand All @@ -949,6 +954,7 @@ class HindsightConfig:
consolidation_llm_max_backoff: float | None
consolidation_llm_timeout: float | None
consolidation_llm_litellmrouter_config: dict | None
consolidation_llm_extra_body: dict | None

# Embeddings
embeddings_provider: str
Expand Down Expand Up @@ -1469,6 +1475,9 @@ def from_env(cls) -> "HindsightConfig":
else None,
retain_llm_timeout=float(os.getenv(ENV_RETAIN_LLM_TIMEOUT)) if os.getenv(ENV_RETAIN_LLM_TIMEOUT) else None,
retain_llm_litellmrouter_config=_parse_llm_router_config(ENV_RETAIN_LLM_LITELLMROUTER_CONFIG),
retain_llm_extra_body=json.loads(os.getenv(ENV_RETAIN_LLM_EXTRA_BODY))
if os.getenv(ENV_RETAIN_LLM_EXTRA_BODY)
else None,
reflect_llm_provider=os.getenv(ENV_REFLECT_LLM_PROVIDER) or None,
reflect_llm_api_key=os.getenv(ENV_REFLECT_LLM_API_KEY) or None,
reflect_llm_model=os.getenv(ENV_REFLECT_LLM_MODEL)
Expand All @@ -1494,6 +1503,9 @@ def from_env(cls) -> "HindsightConfig":
if os.getenv(ENV_REFLECT_LLM_TIMEOUT)
else None,
reflect_llm_litellmrouter_config=_parse_llm_router_config(ENV_REFLECT_LLM_LITELLMROUTER_CONFIG),
reflect_llm_extra_body=json.loads(os.getenv(ENV_REFLECT_LLM_EXTRA_BODY))
if os.getenv(ENV_REFLECT_LLM_EXTRA_BODY)
else None,
consolidation_llm_provider=os.getenv(ENV_CONSOLIDATION_LLM_PROVIDER) or None,
consolidation_llm_api_key=os.getenv(ENV_CONSOLIDATION_LLM_API_KEY) or None,
consolidation_llm_model=os.getenv(ENV_CONSOLIDATION_LLM_MODEL)
Expand All @@ -1519,6 +1531,9 @@ def from_env(cls) -> "HindsightConfig":
if os.getenv(ENV_CONSOLIDATION_LLM_TIMEOUT)
else None,
consolidation_llm_litellmrouter_config=_parse_llm_router_config(ENV_CONSOLIDATION_LLM_LITELLMROUTER_CONFIG),
consolidation_llm_extra_body=json.loads(os.getenv(ENV_CONSOLIDATION_LLM_EXTRA_BODY))
if os.getenv(ENV_CONSOLIDATION_LLM_EXTRA_BODY)
else None,
# Embeddings
embeddings_provider=os.getenv(ENV_EMBEDDINGS_PROVIDER, DEFAULT_EMBEDDINGS_PROVIDER),
embeddings_local_model=os.getenv(ENV_EMBEDDINGS_LOCAL_MODEL, DEFAULT_EMBEDDINGS_LOCAL_MODEL),
Expand Down
6 changes: 3 additions & 3 deletions hindsight-api-slim/hindsight_api/engine/memory_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,7 @@ def __init__(
api_key=retain_api_key,
base_url=retain_base_url,
model=retain_model,
extra_body=config.llm_extra_body,
extra_body=config.retain_llm_extra_body or config.llm_extra_body,
default_headers=config.llm_default_headers,
litellmrouter_config=config.retain_llm_litellmrouter_config or config.llm_litellmrouter_config,
Comment on lines +576 to 578
)
Expand All @@ -597,7 +597,7 @@ def __init__(
api_key=reflect_api_key,
base_url=reflect_base_url,
model=reflect_model,
extra_body=config.llm_extra_body,
extra_body=config.reflect_llm_extra_body or config.llm_extra_body,
default_headers=config.llm_default_headers,
litellmrouter_config=config.reflect_llm_litellmrouter_config or config.llm_litellmrouter_config,
)
Expand All @@ -621,7 +621,7 @@ def __init__(
api_key=consolidation_api_key,
base_url=consolidation_base_url,
model=consolidation_model,
extra_body=config.llm_extra_body,
extra_body=config.consolidation_llm_extra_body or config.llm_extra_body,
default_headers=config.llm_default_headers,
litellmrouter_config=config.consolidation_llm_litellmrouter_config or config.llm_litellmrouter_config,
)
Expand Down
18 changes: 17 additions & 1 deletion hindsight-api-slim/hindsight_api/engine/reflect/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import asyncio
import json
import logging
import os
import re
import time
from typing import TYPE_CHECKING, Any, Awaitable, Callable
Expand Down Expand Up @@ -562,7 +563,22 @@ def _log_completion(answer: str, iterations: int, forced: bool = False):
if include_recall:
forced_sequence.append("recall")

if iteration < len(forced_sequence):
# Escape hatch for inference engines whose tool-call parser breaks
# when tool_choice forces a specific function name. vLLM (as of v0.20.2
# / nightly 2026-05-13) has open issue #35936 + #33965: forced
# tool_choice ("required" and named-function modes) bypasses the
# configured --tool-call-parser and uses JSON-only validation. For
# models that emit XML-style tool calls (Qwen3 family with qwen3_coder
# parser), this silently returns tool_calls=[] while finish_reason
# still reports "tool_calls". Set
# HINDSIGHT_API_REFLECT_DISABLE_FORCED_TOOL_CHOICE=true to fall back
# to tool_choice="auto" for every iteration; the model still calls
# retrieval tools when given factual queries, just without API-level
# forcing.
disable_forced = os.getenv(
"HINDSIGHT_API_REFLECT_DISABLE_FORCED_TOOL_CHOICE", ""
).lower() in ("true", "1", "yes")
if not disable_forced and iteration < len(forced_sequence):
iter_tool_choice: str | dict = {"type": "function", "function": {"name": forced_sequence[iteration]}}
else:
iter_tool_choice = "auto"
Expand Down
104 changes: 104 additions & 0 deletions hindsight-api-slim/tests/test_per_scope_llm_extra_body.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""Tests for per-scope ``HINDSIGHT_API_<SCOPE>_LLM_EXTRA_BODY`` env vars.

Verifies:
- Each scope's extra_body env var parses as JSON into the Config dataclass.
- Per-scope value beats the global ``HINDSIGHT_API_LLM_EXTRA_BODY`` when set.
- Unset per-scope value falls back to the global.
- Unset global + unset per-scope = None (no extra_body merged at call time).
"""

import json
import os

import pytest


@pytest.fixture
def reset_env_extra_body():
"""Snapshot + restore all extra_body env vars around each test."""
from hindsight_api.config import clear_config_cache

keys = [
"HINDSIGHT_API_LLM_EXTRA_BODY",
"HINDSIGHT_API_RETAIN_LLM_EXTRA_BODY",
"HINDSIGHT_API_REFLECT_LLM_EXTRA_BODY",
"HINDSIGHT_API_CONSOLIDATION_LLM_EXTRA_BODY",
# required to make get_config() succeed without a real LLM provider
"HINDSIGHT_API_SKIP_LLM_VERIFICATION",
"HINDSIGHT_API_LAZY_RERANKER",
"HINDSIGHT_API_LLM_PROVIDER",
"HINDSIGHT_API_LLM_MODEL",
]
saved = {k: os.environ.get(k) for k in keys}
os.environ["HINDSIGHT_API_SKIP_LLM_VERIFICATION"] = "true"
os.environ["HINDSIGHT_API_LAZY_RERANKER"] = "true"
os.environ["HINDSIGHT_API_LLM_PROVIDER"] = "mock"
os.environ["HINDSIGHT_API_LLM_MODEL"] = "default-model"
# Per-scope vars start unset; tests opt-in by setting them explicitly.
for k in [
"HINDSIGHT_API_LLM_EXTRA_BODY",
"HINDSIGHT_API_RETAIN_LLM_EXTRA_BODY",
"HINDSIGHT_API_REFLECT_LLM_EXTRA_BODY",
"HINDSIGHT_API_CONSOLIDATION_LLM_EXTRA_BODY",
]:
os.environ.pop(k, None)
clear_config_cache()
yield
for k, v in saved.items():
if v is None:
os.environ.pop(k, None)
else:
os.environ[k] = v
clear_config_cache()


class TestPerScopeLLMExtraBody:
def test_unset_yields_none_on_all_scopes(self, reset_env_extra_body):
from hindsight_api.config import get_config

config = get_config()
assert config.llm_extra_body is None
assert config.retain_llm_extra_body is None
assert config.reflect_llm_extra_body is None
assert config.consolidation_llm_extra_body is None

def test_per_scope_env_parses_as_json(self, reset_env_extra_body):
from hindsight_api.config import clear_config_cache, get_config

os.environ["HINDSIGHT_API_RETAIN_LLM_EXTRA_BODY"] = json.dumps(
{"temperature": 0.6, "top_p": 0.8}
)
os.environ["HINDSIGHT_API_REFLECT_LLM_EXTRA_BODY"] = json.dumps(
{"chat_template_kwargs": {"enable_thinking": True}}
)
os.environ["HINDSIGHT_API_CONSOLIDATION_LLM_EXTRA_BODY"] = json.dumps(
{"chat_template_kwargs": {"enable_thinking": False}, "presence_penalty": 0.0}
)
clear_config_cache()
config = get_config()
assert config.retain_llm_extra_body == {"temperature": 0.6, "top_p": 0.8}
assert config.reflect_llm_extra_body == {"chat_template_kwargs": {"enable_thinking": True}}
assert config.consolidation_llm_extra_body == {
"chat_template_kwargs": {"enable_thinking": False},
"presence_penalty": 0.0,
}

def test_global_only_leaves_per_scope_none(self, reset_env_extra_body):
from hindsight_api.config import clear_config_cache, get_config

os.environ["HINDSIGHT_API_LLM_EXTRA_BODY"] = json.dumps({"temperature": 0.7})
clear_config_cache()
config = get_config()
assert config.llm_extra_body == {"temperature": 0.7}
# Per-scope fields stay None — fallback happens at engine init, not parse time.
assert config.retain_llm_extra_body is None
assert config.reflect_llm_extra_body is None
assert config.consolidation_llm_extra_body is None

def test_invalid_json_raises_at_parse_time(self, reset_env_extra_body):
from hindsight_api.config import clear_config_cache, get_config

os.environ["HINDSIGHT_API_RETAIN_LLM_EXTRA_BODY"] = "{not valid json"
clear_config_cache()
with pytest.raises(json.JSONDecodeError):
get_config()
Loading