frdel · ehlowr0ld · Mar 11, 2025
diff --git a/agent.py b/agent.py
@@ -18,6 +18,9 @@
 import python.helpers.log as Log
 from python.helpers.dirty_json import DirtyJson
 from python.helpers.defer import DeferredTask
+from python.helpers import settings
+from python.helpers.settings import Settings
+from models import ModelProvider
 from typing import Callable
 
 
@@ -45,6 +48,8 @@ def __init__(
         self.paused = paused
         self.streaming_agent = streaming_agent
         self.task: DeferredTask | None = None
+        self.reasoning = "auto"  # Changed default from "off" to "auto"
+        self.deep_search = False
         AgentContext._counter += 1
         self.no = AgentContext._counter
 
@@ -152,6 +157,7 @@ class ModelConfig:
     limit_input: int = 0
     limit_output: int = 0
     vision: bool = False
+    reasoning: bool = False
     kwargs: dict = field(default_factory=dict)
 
 
@@ -188,6 +194,7 @@ class AgentConfig:
 class UserMessage:
     message: str
     attachments: list[str]
+    system_message: list[str] = field(default_factory=list[str])
 
 
 class LoopData:
@@ -242,7 +249,10 @@ def __init__(
         self.history = history.History(self)
         self.last_user_message: history.Message | None = None
         self.intervention: UserMessage | None = None
-        self.data = {}  # free data object all the tools can use
+        self.data = {
+            "chat_model_reasoning_tokens": 0,
+            "chat_model_reasoning_effort": "none",
+        }  # free data object all the tools can use
 
     async def monologue(self):
         while True:
@@ -263,27 +273,33 @@ async def monologue(self):
                     try:
                         # prepare LLM chain (model, system, history)
                         prompt = await self.prepare_prompt(loop_data=self.loop_data)
+                        heading = f"{self.agent_name}: Thinking"
+
+                        if not self.get_data('thinking_topic'):
+                            self.set_data('thinking_topic', "...")
 
                         # output that the agent is starting
                         PrintStyle(
                             bold=True,
                             font_color="green",
                             padding=True,
                             background_color="white",
-                        ).print(f"{self.agent_name}: Generating")
+                        ).print(f"{heading}: {self.get_data('thinking_topic') or '...'}")
                         log = self.context.log.log(
-                            type="agent", heading=f"{self.agent_name}: Generating"
+                            type="agent", heading=f"{heading}: {self.get_data('thinking_topic') or '...'}"
                         )
 
                         async def stream_callback(chunk: str, full: str):
+                            nonlocal heading
                             # output the agent response stream
                             if chunk:
                                 printer.stream(chunk)
-                                self.log_from_stream(full, log)
+                                self.log_from_stream(full, log, heading)
 
                         # store as last context window content
                         self.set_data(Agent.DATA_NAME_CTX_WINDOW, prompt.format())
 
+                        self.log_from_stream('', log, heading)
                         agent_response = await self.call_chat_model(
                             prompt, callback=stream_callback
                         )
@@ -429,10 +445,10 @@ def read_prompt(self, file: str, **kwargs) -> str:
         prompt = files.remove_code_fences(prompt)
         return prompt
 
-    def get_data(self, field: str):
+    def get_data(self, field: str) -> Any:
         return self.data.get(field, None)
 
-    def set_data(self, field: str, value):
+    def set_data(self, field: str, value: Any):
         self.data[field] = value
 
     def hist_add_message(self, ai: bool, content: history.MessageContent):
@@ -449,12 +465,14 @@ async def hist_add_user_message(
                 "fw.intervention.md",
                 message=message.message,
                 attachments=message.attachments,
+                system_message=message.system_message,
             )
         else:
             content = self.parse_prompt(
                 "fw.user_message.md",
                 message=message.message,
                 attachments=message.attachments,
+                system_message=message.system_message,
             )
 
         # remove empty attachments from template
@@ -491,13 +509,33 @@ def concat_messages(
         return self.history.output_text(human_label="user", ai_label="assistant")
 
     def get_chat_model(self):
-        return models.get_model(
-            models.ModelType.CHAT,
-            self.config.chat_model.provider,
-            self.config.chat_model.name,
-            **self.config.chat_model.kwargs,
+        reasoning_tokens = self.get_data("chat_model_reasoning_tokens") or 0
+        reasoning_effort = self.get_data("chat_model_reasoning_effort") or "none"
+        PrintStyle(font_color="blue", padding=True).print(
+            f"DEBUG: get_chat_model: reasoning_tokens: {reasoning_tokens}, reasoning_effort: {reasoning_effort}"
         )
-
+        set: Settings = settings.get_settings()
+        chat_model_ctx_length = int(set["chat_model_ctx_length"])
+        chat_model_ctx_output = int((1.0 - float(set["chat_model_ctx_history"])) * chat_model_ctx_length)
+
+        # native reasoning only supported for these 3 for now
+        if self.config.chat_model.provider in [ModelProvider.ANTHROPIC, ModelProvider.OPENAI, ModelProvider.OPENROUTER]:
+            return models.get_model(
+                models.ModelType.CHAT,
+                self.config.chat_model.provider,
+                self.config.chat_model.name,
+                max_tokens = chat_model_ctx_output,
+                chat_model_reasoning_tokens = reasoning_tokens,
+                chat_model_reasoning_effort = reasoning_effort,
+                **self.config.chat_model.kwargs,
+            )
+        else:
+            return models.get_model(
+                models.ModelType.CHAT,
+                self.config.chat_model.provider,
+                self.config.chat_model.name,
+                **self.config.chat_model.kwargs,
+            )
     def get_utility_model(self):
         return models.get_model(
             models.ModelType.CHAT,
@@ -554,7 +592,7 @@ async def call_chat_model(
     ):
         response = ""
 
-        # model class
+        # model class - the inner functions handle the reasoning tokens and effort
         model = self.get_chat_model()
 
         # rate limiter
@@ -570,6 +608,11 @@ async def call_chat_model(
             if callback:
                 await callback(content, response)
 
+        # reset reasoning config until next call to reasoning_tool
+        # these were set by reasoning_tool.execute()
+        self.set_data("chat_model_reasoning_tokens", 0)
+        self.set_data("chat_model_reasoning_effort", "none")
+
         return response
 
     async def rate_limiter(
@@ -648,15 +691,18 @@ async def process_tools(self, msg: str):
                 type="error", content=f"{self.agent_name}: Message misformat"
             )
 
-    def log_from_stream(self, stream: str, logItem: Log.LogItem):
+    def log_from_stream(self, stream: str, logItem: Log.LogItem, heading: str):
+        # We do not want to crash the loop because of a log error
         try:
             if len(stream) < 25:
                 return  # no reason to try
             response = DirtyJson.parse_string(stream)
             if isinstance(response, dict):
+                if "topic" in response:
+                    self.set_data('thinking_topic', response["topic"])
                 # log if result is a dictionary already
-                logItem.update(content=stream, kvps=response)
-        except Exception as e:
+                logItem.update(heading=f"{heading}: {self.get_data('thinking_topic') or '...'}", content=stream, kvps=response)
+        except Exception:
             pass
 
     def get_tool(self, name: str, args: dict, message: str, **kwargs):

diff --git a/docs/architecture.md b/docs/architecture.md
@@ -30,9 +30,9 @@ This architecture ensures:
 - Flexible deployment options for advanced users
 
 > [!NOTE]
-> The legacy approach of running Agent Zero directly on the host system (using Python, Conda, etc.) 
-> is still possible but requires Remote Function Calling (RFC) configuration through the Settings 
-> page. See [Full Binaries Installation](installation.md#in-depth-guide-for-full-binaries-installation) 
+> The legacy approach of running Agent Zero directly on the host system (using Python, Conda, etc.)
+> is still possible but requires Remote Function Calling (RFC) configuration through the Settings
+> page. See [Full Binaries Installation](installation.md#in-depth-guide-for-full-binaries-installation)
 > for detailed instructions.
 
 ## Implementation Details
@@ -74,7 +74,7 @@ This architecture ensures:
 | `run_ui.py` | Web UI launcher |
 
 > [!NOTE]
-> When using the Docker runtime container, these directories are mounted 
+> When using the Docker runtime container, these directories are mounted
 > within the `/a0` volume for data persistence until the container is restarted or deleted.
 
 ## Core Components
@@ -90,8 +90,11 @@ Communication flows between agents through messages, which are structured accord
 
 | Argument | Description |
 | --- | --- |
+| `Observations:` | The agent's observations of the world |
 | `Thoughts:` | The agent's Chain of Thought and planning process |
+| `Reflection:` | The agent's reflection on its thoughts and plans |
 | `Tool name:` | The specific tool used by the agent |
+| `Tool arguments:` | The arguments passed to the tool |
 | `Responses or queries:` | Results, feedback or queries from tools or other agents |
 
 #### Interaction Flow
@@ -122,7 +125,7 @@ Agent Zero comes with a set of built-in tools designed to help agents perform ta
 | knowledge_tool | Enables agents to retrieve information from memory, knowledge base or online external sources |
 | response_tool | Allows agents to output a response |
 | memory_tool | Enables agents to save, load, delete and forget information from memory |
-| webpage_content_tool | Enables agents to fetch and analyze the text content of webpages |
+| document_query | Enables agents to fetch and analyze the text content of webpages and local documents |
 
 #### Knowledge Tool
 The `knowledge_tool` uses SearXNG to search the web and retrieve information. It can also search the local knowledge base and memory for relevant information. The tool returns a summary of the information, which can be used by the agent to make decisions or answer questions.
@@ -140,8 +143,8 @@ The integration provides access to various types of content, including images, v
 In cases where SearXNG might not return satisfactory results, Agent Zero can be configured to fall back on other sources or methods, ensuring that users always have access to information.
 
 > [!NOTE]
-> The Knowledge Tool is designed to work seamlessly with both online searches through 
-> SearXNG and local knowledge base queries, providing a comprehensive information 
+> The Knowledge Tool is designed to work seamlessly with both online searches through
+> SearXNG and local knowledge base queries, providing a comprehensive information
 > retrieval system.
 
 #### Custom Tools
@@ -153,8 +156,8 @@ Users can create custom tools to extend Agent Zero's capabilities. Custom tools
 4. Follow existing patterns for consistency
 
 > [!NOTE]
-> Tools are always present in system prompt, so you should keep them to minimum. 
-> To save yourself some tokens, use the [Instruments module](#adding-instruments) 
+> Tools are always present in system prompt, so you should keep them to minimum.
+> To save yourself some tokens, use the [Instruments module](#adding-instruments)
 > to call custom scripts or functions.
 
 ### 3. Memory System
@@ -198,7 +201,7 @@ By dynamically adjusting context windows and summarizing past interactions, Agen
 > To maximize the effectiveness of context summarization, users should provide clear and specific instructions during interactions. This helps Agent Zero understand which details are most important to retain.
 
 ### 4. Prompts
-The `prompts` directory contains various Markdown files that control agent behavior and communication. The most important file is `agent.system.main.md`, which acts as a central hub, referencing other prompt files.
+The `prompts` directory contains various Markdown files that control agent behavior and communication. The most important file is `agent.system.main.md` (or `deepsearch.system.main.md`), which acts as a central hub, referencing other prompt files.
 
 #### Core Prompt Files
 | Prompt File | Description |
@@ -225,7 +228,7 @@ The `prompts` directory contains various Markdown files that control agent behav
 4. Select your custom prompts in the Settings page (Agent Config section)
 
 #### Dynamic Behavior System
-- **Behavior Adjustment**: 
+- **Behavior Adjustment**:
   - Agents can modify their behavior in real-time based on user instructions
   - Behavior changes are automatically integrated into the system prompt
   - Behavioral rules are merged intelligently, avoiding duplicates and conflicts
@@ -249,8 +252,8 @@ The `prompts` directory contains various Markdown files that control agent behav
   - Changes are applied without disrupting other components
   - Maintains separation between core functionality and behavioral rules
 
-> [!NOTE]  
-> You can customize any of these files. Agent Zero will use the files in your custom `prompts_subdir` 
+> [!NOTE]
+> You can customize any of these files. Agent Zero will use the files in your custom `prompts_subdir`
 > if they exist, otherwise, it will fall back to the files in `prompts/default`.
 
 > [!TIP]
@@ -265,7 +268,7 @@ Knowledge refers to the user-provided information and data that agents can lever
   - Automatically imported and indexed
   - Expandable format support
 
-- **Knowledge Base**: 
+- **Knowledge Base**:
   - Can include PDFs, databases, books, documentation
   - `/docs` folder automatically added
   - Used for answering questions and decision-making
@@ -308,6 +311,6 @@ Extensions can be found in `python/extensions` directory:
 4. Ensure compatibility with main system
 5. Test thoroughly before deployment
 
-> [!NOTE]  
+> [!NOTE]
 > Consider contributing valuable custom components to the main repository.
-> See [Contributing](contribution.md) for more information.
+> See [Contributing](contribution.md) for more information.
diff --git a/initialize.py b/initialize.py
@@ -13,6 +13,7 @@ def initialize():
         provider=models.ModelProvider[current_settings["chat_model_provider"]],
         name=current_settings["chat_model_name"],
         ctx_length=current_settings["chat_model_ctx_length"],
+        reasoning=current_settings["chat_model_reasoning"],
         limit_requests=current_settings["chat_model_rl_requests"],
         limit_input=current_settings["chat_model_rl_input"],
         limit_output=current_settings["chat_model_rl_output"],