diff --git a/hindsight-api-slim/hindsight_api/mcp_tools.py b/hindsight-api-slim/hindsight_api/mcp_tools.py index 5a2fd0a5d..213ff09fd 100644 --- a/hindsight-api-slim/hindsight_api/mcp_tools.py +++ b/hindsight-api-slim/hindsight_api/mcp_tools.py @@ -922,6 +922,7 @@ async def reflect( response_schema: dict | None = None, tags: list[str] | None = None, tags_match: str = "any", + include_based_on: bool = False, bank_id: str | None = None, ) -> str: """ @@ -951,6 +952,7 @@ async def reflect( response_schema: Optional JSON schema for structured output. When provided, the response includes a 'structured_output' field. tags: Optional tags to filter memories by (e.g., ['project:alpha']) tags_match: How to match tags - 'any' (match any tag) or 'all' (match all tags). Default: 'any' + include_based_on: Include source facts used for synthesis. Defaults to false because broad reflections can exceed MCP client result limits. bank_id: Optional bank to reflect in (defaults to session bank). Use for cross-bank operations. """ try: @@ -978,6 +980,8 @@ async def reflect( reflect_result = await memory.reflect_async(**reflect_kwargs) result_data = json.loads(reflect_result.model_dump_json(indent=2)) + if not include_based_on: + result_data.pop("based_on", None) if response_schema is not None and hasattr(reflect_result, "structured_output"): result_data["structured_output"] = reflect_result.structured_output return json.dumps(result_data, indent=2) @@ -999,6 +1003,7 @@ async def reflect( response_schema: dict | None = None, tags: list[str] | None = None, tags_match: str = "any", + include_based_on: bool = False, ) -> dict: """ Generate thoughtful analysis by synthesizing stored memories with the bank's personality. @@ -1027,6 +1032,7 @@ async def reflect( response_schema: Optional JSON schema for structured output. When provided, the response includes a 'structured_output' field. tags: Optional tags to filter memories by (e.g., ['project:alpha']) tags_match: How to match tags - 'any' (match any tag) or 'all' (match all tags). Default: 'any' + include_based_on: Include source facts used for synthesis. Defaults to false because broad reflections can exceed MCP client result limits. """ try: target_bank = config.bank_id_resolver() @@ -1053,6 +1059,8 @@ async def reflect( reflect_result = await memory.reflect_async(**reflect_kwargs) result_data = reflect_result.model_dump() + if not include_based_on: + result_data.pop("based_on", None) if response_schema is not None and hasattr(reflect_result, "structured_output"): result_data["structured_output"] = reflect_result.structured_output return result_data diff --git a/hindsight-api-slim/tests/test_mcp_tools.py b/hindsight-api-slim/tests/test_mcp_tools.py index e4a158be4..3725a7b92 100644 --- a/hindsight-api-slim/tests/test_mcp_tools.py +++ b/hindsight-api-slim/tests/test_mcp_tools.py @@ -90,7 +90,10 @@ def test_with_none_timestamp(self): "trigger": {"interval": "daily"}, "last_refreshed_at": "2026-01-01T00:00:00", "created_at": "2026-01-01T00:00:00", - "reflect_response": {"text": "Prefers Python", "based_on": {"world_facts": [{"id": "f1", "text": "Python is popular"}]}}, + "reflect_response": { + "text": "Prefers Python", + "based_on": {"world_facts": [{"id": "f1", "text": "Python is popular"}]}, + }, }, { "id": "mm-2", @@ -148,11 +151,23 @@ async def _get_mental_model(**kwargs): # Retain/recall/reflect memory.retain_batch_async = AsyncMock() memory.submit_async_retain = AsyncMock(return_value={"operation_id": "op-retain"}) - memory.recall_async = AsyncMock(return_value=MagicMock(model_dump_json=lambda indent=None: '{"results": []}', model_dump=lambda: {"results": []})) - memory.reflect_async = AsyncMock(return_value=MagicMock(model_dump_json=lambda indent=None: '{"text": "reflection"}', model_dump=lambda: {"text": "reflection"}, structured_output=None)) + memory.recall_async = AsyncMock( + return_value=MagicMock( + model_dump_json=lambda indent=None: '{"results": []}', model_dump=lambda: {"results": []} + ) + ) + memory.reflect_async = AsyncMock( + return_value=MagicMock( + model_dump_json=lambda indent=None: '{"text": "reflection"}', + model_dump=lambda: {"text": "reflection"}, + structured_output=None, + ) + ) # Directive methods - memory.list_directives = AsyncMock(return_value=[{"id": "dir-1", "name": "Be concise", "content": "Keep responses short"}]) + memory.list_directives = AsyncMock( + return_value=[{"id": "dir-1", "name": "Be concise", "content": "Keep responses short"}] + ) memory.create_directive = AsyncMock(return_value={"id": "dir-new", "name": "Test", "content": "Test content"}) memory.delete_directive = AsyncMock(return_value=True) @@ -540,9 +555,7 @@ async def test_get_detail_metadata_only_has_core_fields(self, mcp_server_with_me assert "trigger" not in parsed async def test_get_detail_single_bank_content(self, mcp_server_single_bank, mock_memory): - result = await _tools(mcp_server_single_bank)["get_mental_model"].fn( - mental_model_id="mm-1", detail="content" - ) + result = await _tools(mcp_server_single_bank)["get_mental_model"].fn(mental_model_id="mm-1", detail="content") assert isinstance(result, dict) assert "content" in result assert "reflect_response" not in result @@ -1038,6 +1051,78 @@ async def test_reflect_without_tags_no_tags_in_kwargs(self, mock_memory): call_kwargs = mock_memory.reflect_async.call_args.kwargs assert "tags" not in call_kwargs + async def test_reflect_omits_based_on_by_default_multi_bank(self, mock_memory): + based_on = {"world": [{"id": "fact-1", "text": "large provenance payload"}]} + mock_memory.reflect_async = AsyncMock( + return_value=MagicMock( + model_dump_json=lambda indent=None: json.dumps( + {"text": "reflection", "based_on": based_on}, indent=indent + ), + model_dump=lambda: {"text": "reflection", "based_on": based_on}, + structured_output=None, + ) + ) + + mcp = _make_mcp_server(mock_memory, {"reflect"}) + result = await _tools(mcp)["reflect"].fn(query="test") + + parsed = json.loads(result) + assert parsed == {"text": "reflection"} + assert "include_based_on" not in mock_memory.reflect_async.call_args.kwargs + + async def test_reflect_can_include_based_on_multi_bank(self, mock_memory): + based_on = {"world": [{"id": "fact-1", "text": "large provenance payload"}]} + mock_memory.reflect_async = AsyncMock( + return_value=MagicMock( + model_dump_json=lambda indent=None: json.dumps( + {"text": "reflection", "based_on": based_on}, indent=indent + ), + model_dump=lambda: {"text": "reflection", "based_on": based_on}, + structured_output=None, + ) + ) + + mcp = _make_mcp_server(mock_memory, {"reflect"}) + result = await _tools(mcp)["reflect"].fn(query="test", include_based_on=True) + + parsed = json.loads(result) + assert parsed["based_on"] == based_on + + async def test_reflect_omits_based_on_by_default_single_bank(self, mock_memory): + based_on = {"world": [{"id": "fact-1", "text": "large provenance payload"}]} + mock_memory.reflect_async = AsyncMock( + return_value=MagicMock( + model_dump_json=lambda indent=None: json.dumps( + {"text": "reflection", "based_on": based_on}, indent=indent + ), + model_dump=lambda: {"text": "reflection", "based_on": based_on}, + structured_output=None, + ) + ) + + mcp = _make_mcp_server(mock_memory, {"reflect"}, include_bank_id=False) + result = await _tools(mcp)["reflect"].fn(query="test") + + assert result == {"text": "reflection"} + assert "include_based_on" not in mock_memory.reflect_async.call_args.kwargs + + async def test_reflect_can_include_based_on_single_bank(self, mock_memory): + based_on = {"world": [{"id": "fact-1", "text": "large provenance payload"}]} + mock_memory.reflect_async = AsyncMock( + return_value=MagicMock( + model_dump_json=lambda indent=None: json.dumps( + {"text": "reflection", "based_on": based_on}, indent=indent + ), + model_dump=lambda: {"text": "reflection", "based_on": based_on}, + structured_output=None, + ) + ) + + mcp = _make_mcp_server(mock_memory, {"reflect"}, include_bank_id=False) + result = await _tools(mcp)["reflect"].fn(query="test", include_based_on=True) + + assert result["based_on"] == based_on + @pytest.mark.asyncio class TestMentalModelTrigger: @@ -1059,9 +1144,7 @@ async def test_create_default_trigger_false(self, mock_memory): async def test_update_with_trigger(self, mock_memory): mcp = _make_mcp_server(mock_memory, {"update_mental_model"}) - await _tools(mcp)["update_mental_model"].fn( - mental_model_id="mm-1", trigger_refresh_after_consolidation=True - ) + await _tools(mcp)["update_mental_model"].fn(mental_model_id="mm-1", trigger_refresh_after_consolidation=True) call_kwargs = mock_memory.update_mental_model.call_args.kwargs assert call_kwargs["trigger"] == {"refresh_after_consolidation": True} @@ -1442,26 +1525,26 @@ async def test_update_bank_mission_maps_to_reflect_mission(self, mock_memory): async def test_update_bank_config_reflect_mission_takes_precedence_over_mission(self, mock_memory): """When both mission and config_updates.reflect_mission are provided, config wins.""" mcp = _make_mcp_server(mock_memory, {"update_bank"}, include_bank_id=True) - await _tools(mcp)["update_bank"].fn( - mission="old", config_updates={"reflect_mission": "new"} - ) + await _tools(mcp)["update_bank"].fn(mission="old", config_updates={"reflect_mission": "new"}) config_call = mock_memory._config_resolver.update_bank_config.call_args assert config_call.args[1]["reflect_mission"] == "new" async def test_update_bank_multiple_config_fields(self, mock_memory): """Multiple config fields can be set in a single config_updates dict.""" mcp = _make_mcp_server(mock_memory, {"update_bank"}, include_bank_id=True) - await _tools(mcp)["update_bank"].fn(config_updates={ - "retain_mission": "Extract technical decisions", - "disposition_skepticism": 5, - "disposition_literalism": 1, - "disposition_empathy": 4, - "enable_observations": True, - "observations_mission": "Focus on preferences", - "retain_extraction_mode": "custom", - "retain_custom_instructions": "Extract only action items", - "retain_chunk_size": 2000, - }) + await _tools(mcp)["update_bank"].fn( + config_updates={ + "retain_mission": "Extract technical decisions", + "disposition_skepticism": 5, + "disposition_literalism": 1, + "disposition_empathy": 4, + "enable_observations": True, + "observations_mission": "Focus on preferences", + "retain_extraction_mode": "custom", + "retain_custom_instructions": "Extract only action items", + "retain_chunk_size": 2000, + } + ) config_call = mock_memory._config_resolver.update_bank_config.call_args updates = config_call.args[1] assert updates["retain_mission"] == "Extract technical decisions" @@ -1508,9 +1591,7 @@ async def test_update_bank_config_updates_single_bank(self, mock_memory): async def test_update_bank_with_bank_id_override(self, mock_memory): """bank_id override routes config update to the correct bank.""" mcp = _make_mcp_server(mock_memory, {"update_bank"}, include_bank_id=True) - await _tools(mcp)["update_bank"].fn( - config_updates={"reflect_mission": "Test"}, bank_id="other-bank" - ) + await _tools(mcp)["update_bank"].fn(config_updates={"reflect_mission": "Test"}, bank_id="other-bank") config_call = mock_memory._config_resolver.update_bank_config.call_args assert config_call.args[0] == "other-bank" @@ -1527,11 +1608,13 @@ async def test_update_bank_config_resolver_validation_error(self, mock_memory): async def test_update_bank_any_configurable_field(self, mock_memory): """Any field in _CONFIGURABLE_FIELDS is accepted (future-proof).""" mcp = _make_mcp_server(mock_memory, {"update_bank"}, include_bank_id=True) - await _tools(mcp)["update_bank"].fn(config_updates={ - "recall_budget_fixed_low": 100, - "consolidation_llm_batch_size": 8, - "entity_labels": ["PERSON", "ORG"], - }) + await _tools(mcp)["update_bank"].fn( + config_updates={ + "recall_budget_fixed_low": 100, + "consolidation_llm_batch_size": 8, + "entity_labels": ["PERSON", "ORG"], + } + ) updates = mock_memory._config_resolver.update_bank_config.call_args.args[1] assert updates["recall_budget_fixed_low"] == 100 assert updates["consolidation_llm_batch_size"] == 8 @@ -1578,6 +1661,7 @@ async def test_list_tags_empty(self, mock_memory): result = await _tools(mcp)["list_tags"].fn() assert '"items": []' in result or "[]" in result + # ========================================================================= # Bank-Level Tool Filtering Tests # =========================================================================