vectorize-io · benfrank241 · May 22, 2026 · May 20, 2026 · May 20, 2026 · May 22, 2026
diff --git a/hindsight-api-slim/hindsight_api/mcp_tools.py b/hindsight-api-slim/hindsight_api/mcp_tools.py
@@ -922,6 +922,7 @@ async def reflect(
             response_schema: dict | None = None,
             tags: list[str] | None = None,
             tags_match: str = "any",
+            include_based_on: bool = False,
             bank_id: str | None = None,
         ) -> str:
             """
@@ -951,6 +952,7 @@ async def reflect(
                 response_schema: Optional JSON schema for structured output. When provided, the response includes a 'structured_output' field.
                 tags: Optional tags to filter memories by (e.g., ['project:alpha'])
                 tags_match: How to match tags - 'any' (match any tag) or 'all' (match all tags). Default: 'any'
+                include_based_on: Include source facts used for synthesis. Defaults to false because broad reflections can exceed MCP client result limits.
                 bank_id: Optional bank to reflect in (defaults to session bank). Use for cross-bank operations.
             """
             try:
@@ -978,6 +980,8 @@ async def reflect(
                 reflect_result = await memory.reflect_async(**reflect_kwargs)
 
                 result_data = json.loads(reflect_result.model_dump_json(indent=2))
+                if not include_based_on:
+                    result_data.pop("based_on", None)
                 if response_schema is not None and hasattr(reflect_result, "structured_output"):
                     result_data["structured_output"] = reflect_result.structured_output
                 return json.dumps(result_data, indent=2)
@@ -999,6 +1003,7 @@ async def reflect(
             response_schema: dict | None = None,
             tags: list[str] | None = None,
             tags_match: str = "any",
+            include_based_on: bool = False,
         ) -> dict:
             """
             Generate thoughtful analysis by synthesizing stored memories with the bank's personality.
@@ -1027,6 +1032,7 @@ async def reflect(
                 response_schema: Optional JSON schema for structured output. When provided, the response includes a 'structured_output' field.
                 tags: Optional tags to filter memories by (e.g., ['project:alpha'])
                 tags_match: How to match tags - 'any' (match any tag) or 'all' (match all tags). Default: 'any'
+                include_based_on: Include source facts used for synthesis. Defaults to false because broad reflections can exceed MCP client result limits.
             """
             try:
                 target_bank = config.bank_id_resolver()
@@ -1053,6 +1059,8 @@ async def reflect(
                 reflect_result = await memory.reflect_async(**reflect_kwargs)
 
                 result_data = reflect_result.model_dump()
+                if not include_based_on:
+                    result_data.pop("based_on", None)
                 if response_schema is not None and hasattr(reflect_result, "structured_output"):
                     result_data["structured_output"] = reflect_result.structured_output
                 return result_data

diff --git a/hindsight-api-slim/tests/test_mcp_tools.py b/hindsight-api-slim/tests/test_mcp_tools.py
@@ -90,7 +90,10 @@ def test_with_none_timestamp(self):
         "trigger": {"interval": "daily"},
         "last_refreshed_at": "2026-01-01T00:00:00",
         "created_at": "2026-01-01T00:00:00",
-        "reflect_response": {"text": "Prefers Python", "based_on": {"world_facts": [{"id": "f1", "text": "Python is popular"}]}},
+        "reflect_response": {
+            "text": "Prefers Python",
+            "based_on": {"world_facts": [{"id": "f1", "text": "Python is popular"}]},
+        },
     },
     {
         "id": "mm-2",
@@ -148,11 +151,23 @@ async def _get_mental_model(**kwargs):
     # Retain/recall/reflect
     memory.retain_batch_async = AsyncMock()
     memory.submit_async_retain = AsyncMock(return_value={"operation_id": "op-retain"})
-    memory.recall_async = AsyncMock(return_value=MagicMock(model_dump_json=lambda indent=None: '{"results": []}', model_dump=lambda: {"results": []}))
-    memory.reflect_async = AsyncMock(return_value=MagicMock(model_dump_json=lambda indent=None: '{"text": "reflection"}', model_dump=lambda: {"text": "reflection"}, structured_output=None))
+    memory.recall_async = AsyncMock(
+        return_value=MagicMock(
+            model_dump_json=lambda indent=None: '{"results": []}', model_dump=lambda: {"results": []}
+        )
+    )
+    memory.reflect_async = AsyncMock(
+        return_value=MagicMock(
+            model_dump_json=lambda indent=None: '{"text": "reflection"}',
+            model_dump=lambda: {"text": "reflection"},
+            structured_output=None,
+        )
+    )
 
     # Directive methods
-    memory.list_directives = AsyncMock(return_value=[{"id": "dir-1", "name": "Be concise", "content": "Keep responses short"}])
+    memory.list_directives = AsyncMock(
+        return_value=[{"id": "dir-1", "name": "Be concise", "content": "Keep responses short"}]
+    )
     memory.create_directive = AsyncMock(return_value={"id": "dir-new", "name": "Test", "content": "Test content"})
     memory.delete_directive = AsyncMock(return_value=True)
 
@@ -540,9 +555,7 @@ async def test_get_detail_metadata_only_has_core_fields(self, mcp_server_with_me
         assert "trigger" not in parsed
 
     async def test_get_detail_single_bank_content(self, mcp_server_single_bank, mock_memory):
-        result = await _tools(mcp_server_single_bank)["get_mental_model"].fn(
-            mental_model_id="mm-1", detail="content"
-        )
+        result = await _tools(mcp_server_single_bank)["get_mental_model"].fn(mental_model_id="mm-1", detail="content")
         assert isinstance(result, dict)
         assert "content" in result
         assert "reflect_response" not in result
@@ -1038,6 +1051,78 @@ async def test_reflect_without_tags_no_tags_in_kwargs(self, mock_memory):
         call_kwargs = mock_memory.reflect_async.call_args.kwargs
         assert "tags" not in call_kwargs
 
+    async def test_reflect_omits_based_on_by_default_multi_bank(self, mock_memory):
+        based_on = {"world": [{"id": "fact-1", "text": "large provenance payload"}]}
+        mock_memory.reflect_async = AsyncMock(
+            return_value=MagicMock(
+                model_dump_json=lambda indent=None: json.dumps(
+                    {"text": "reflection", "based_on": based_on}, indent=indent
+                ),
+                model_dump=lambda: {"text": "reflection", "based_on": based_on},
+                structured_output=None,
+            )
+        )
+
+        mcp = _make_mcp_server(mock_memory, {"reflect"})
+        result = await _tools(mcp)["reflect"].fn(query="test")
+
+        parsed = json.loads(result)
+        assert parsed == {"text": "reflection"}
+        assert "include_based_on" not in mock_memory.reflect_async.call_args.kwargs
+
+    async def test_reflect_can_include_based_on_multi_bank(self, mock_memory):
+        based_on = {"world": [{"id": "fact-1", "text": "large provenance payload"}]}
+        mock_memory.reflect_async = AsyncMock(
+            return_value=MagicMock(
+                model_dump_json=lambda indent=None: json.dumps(
+                    {"text": "reflection", "based_on": based_on}, indent=indent
+                ),
+                model_dump=lambda: {"text": "reflection", "based_on": based_on},
+                structured_output=None,
+            )
+        )
+
+        mcp = _make_mcp_server(mock_memory, {"reflect"})
+        result = await _tools(mcp)["reflect"].fn(query="test", include_based_on=True)
+
+        parsed = json.loads(result)
+        assert parsed["based_on"] == based_on
+
+    async def test_reflect_omits_based_on_by_default_single_bank(self, mock_memory):
+        based_on = {"world": [{"id": "fact-1", "text": "large provenance payload"}]}
+        mock_memory.reflect_async = AsyncMock(
+            return_value=MagicMock(
+                model_dump_json=lambda indent=None: json.dumps(
+                    {"text": "reflection", "based_on": based_on}, indent=indent
+                ),
+                model_dump=lambda: {"text": "reflection", "based_on": based_on},
+                structured_output=None,
+            )
+        )
+
+        mcp = _make_mcp_server(mock_memory, {"reflect"}, include_bank_id=False)
+        result = await _tools(mcp)["reflect"].fn(query="test")
+
+        assert result == {"text": "reflection"}
+        assert "include_based_on" not in mock_memory.reflect_async.call_args.kwargs
+
+    async def test_reflect_can_include_based_on_single_bank(self, mock_memory):
+        based_on = {"world": [{"id": "fact-1", "text": "large provenance payload"}]}
+        mock_memory.reflect_async = AsyncMock(
+            return_value=MagicMock(
+                model_dump_json=lambda indent=None: json.dumps(
+                    {"text": "reflection", "based_on": based_on}, indent=indent
+                ),
+                model_dump=lambda: {"text": "reflection", "based_on": based_on},
+                structured_output=None,
+            )
+        )
+
+        mcp = _make_mcp_server(mock_memory, {"reflect"}, include_bank_id=False)
+        result = await _tools(mcp)["reflect"].fn(query="test", include_based_on=True)
+
+        assert result["based_on"] == based_on
+
 
 @pytest.mark.asyncio
 class TestMentalModelTrigger:
@@ -1059,9 +1144,7 @@ async def test_create_default_trigger_false(self, mock_memory):
 
     async def test_update_with_trigger(self, mock_memory):
         mcp = _make_mcp_server(mock_memory, {"update_mental_model"})
-        await _tools(mcp)["update_mental_model"].fn(
-            mental_model_id="mm-1", trigger_refresh_after_consolidation=True
-        )
+        await _tools(mcp)["update_mental_model"].fn(mental_model_id="mm-1", trigger_refresh_after_consolidation=True)
         call_kwargs = mock_memory.update_mental_model.call_args.kwargs
         assert call_kwargs["trigger"] == {"refresh_after_consolidation": True}
 
@@ -1442,26 +1525,26 @@ async def test_update_bank_mission_maps_to_reflect_mission(self, mock_memory):
     async def test_update_bank_config_reflect_mission_takes_precedence_over_mission(self, mock_memory):
         """When both mission and config_updates.reflect_mission are provided, config wins."""
         mcp = _make_mcp_server(mock_memory, {"update_bank"}, include_bank_id=True)
-        await _tools(mcp)["update_bank"].fn(
-            mission="old", config_updates={"reflect_mission": "new"}
-        )
+        await _tools(mcp)["update_bank"].fn(mission="old", config_updates={"reflect_mission": "new"})
         config_call = mock_memory._config_resolver.update_bank_config.call_args
         assert config_call.args[1]["reflect_mission"] == "new"
 
     async def test_update_bank_multiple_config_fields(self, mock_memory):
         """Multiple config fields can be set in a single config_updates dict."""
         mcp = _make_mcp_server(mock_memory, {"update_bank"}, include_bank_id=True)
-        await _tools(mcp)["update_bank"].fn(config_updates={
-            "retain_mission": "Extract technical decisions",
-            "disposition_skepticism": 5,
-            "disposition_literalism": 1,
-            "disposition_empathy": 4,
-            "enable_observations": True,
-            "observations_mission": "Focus on preferences",
-            "retain_extraction_mode": "custom",
-            "retain_custom_instructions": "Extract only action items",
-            "retain_chunk_size": 2000,
-        })
+        await _tools(mcp)["update_bank"].fn(
+            config_updates={
+                "retain_mission": "Extract technical decisions",
+                "disposition_skepticism": 5,
+                "disposition_literalism": 1,
+                "disposition_empathy": 4,
+                "enable_observations": True,
+                "observations_mission": "Focus on preferences",
+                "retain_extraction_mode": "custom",
+                "retain_custom_instructions": "Extract only action items",
+                "retain_chunk_size": 2000,
+            }
+        )
         config_call = mock_memory._config_resolver.update_bank_config.call_args
         updates = config_call.args[1]
         assert updates["retain_mission"] == "Extract technical decisions"
@@ -1508,9 +1591,7 @@ async def test_update_bank_config_updates_single_bank(self, mock_memory):
     async def test_update_bank_with_bank_id_override(self, mock_memory):
         """bank_id override routes config update to the correct bank."""
         mcp = _make_mcp_server(mock_memory, {"update_bank"}, include_bank_id=True)
-        await _tools(mcp)["update_bank"].fn(
-            config_updates={"reflect_mission": "Test"}, bank_id="other-bank"
-        )
+        await _tools(mcp)["update_bank"].fn(config_updates={"reflect_mission": "Test"}, bank_id="other-bank")
         config_call = mock_memory._config_resolver.update_bank_config.call_args
         assert config_call.args[0] == "other-bank"
 
@@ -1527,11 +1608,13 @@ async def test_update_bank_config_resolver_validation_error(self, mock_memory):
     async def test_update_bank_any_configurable_field(self, mock_memory):
         """Any field in _CONFIGURABLE_FIELDS is accepted (future-proof)."""
         mcp = _make_mcp_server(mock_memory, {"update_bank"}, include_bank_id=True)
-        await _tools(mcp)["update_bank"].fn(config_updates={
-            "recall_budget_fixed_low": 100,
-            "consolidation_llm_batch_size": 8,
-            "entity_labels": ["PERSON", "ORG"],
-        })
+        await _tools(mcp)["update_bank"].fn(
+            config_updates={
+                "recall_budget_fixed_low": 100,
+                "consolidation_llm_batch_size": 8,
+                "entity_labels": ["PERSON", "ORG"],
+            }
+        )
         updates = mock_memory._config_resolver.update_bank_config.call_args.args[1]
         assert updates["recall_budget_fixed_low"] == 100
         assert updates["consolidation_llm_batch_size"] == 8
@@ -1578,6 +1661,7 @@ async def test_list_tags_empty(self, mock_memory):
         result = await _tools(mcp)["list_tags"].fn()
         assert '"items": []' in result or "[]" in result
 
+
 # =========================================================================
 # Bank-Level Tool Filtering Tests
 # =========================================================================