Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(WIP) "Reasoning", "Deep ReSearch", "document_query", collapsible msgs, thought summaries #350

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 62 additions & 16 deletions agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
import python.helpers.log as Log
from python.helpers.dirty_json import DirtyJson
from python.helpers.defer import DeferredTask
from python.helpers import settings
from python.helpers.settings import Settings
from models import ModelProvider
from typing import Callable


Expand Down Expand Up @@ -45,6 +48,8 @@ def __init__(
self.paused = paused
self.streaming_agent = streaming_agent
self.task: DeferredTask | None = None
self.reasoning = "auto" # Changed default from "off" to "auto"
self.deep_search = False
AgentContext._counter += 1
self.no = AgentContext._counter

Expand Down Expand Up @@ -152,6 +157,7 @@ class ModelConfig:
limit_input: int = 0
limit_output: int = 0
vision: bool = False
reasoning: bool = False
kwargs: dict = field(default_factory=dict)


Expand Down Expand Up @@ -188,6 +194,7 @@ class AgentConfig:
class UserMessage:
message: str
attachments: list[str]
system_message: list[str] = field(default_factory=list[str])


class LoopData:
Expand Down Expand Up @@ -242,7 +249,10 @@ def __init__(
self.history = history.History(self)
self.last_user_message: history.Message | None = None
self.intervention: UserMessage | None = None
self.data = {} # free data object all the tools can use
self.data = {
"chat_model_reasoning_tokens": 0,
"chat_model_reasoning_effort": "none",
} # free data object all the tools can use

async def monologue(self):
while True:
Expand All @@ -263,27 +273,33 @@ async def monologue(self):
try:
# prepare LLM chain (model, system, history)
prompt = await self.prepare_prompt(loop_data=self.loop_data)
heading = f"{self.agent_name}: Thinking"

if not self.get_data('thinking_topic'):
self.set_data('thinking_topic', "...")

# output that the agent is starting
PrintStyle(
bold=True,
font_color="green",
padding=True,
background_color="white",
).print(f"{self.agent_name}: Generating")
).print(f"{heading}: {self.get_data('thinking_topic') or '...'}")
log = self.context.log.log(
type="agent", heading=f"{self.agent_name}: Generating"
type="agent", heading=f"{heading}: {self.get_data('thinking_topic') or '...'}"
)

async def stream_callback(chunk: str, full: str):
nonlocal heading
# output the agent response stream
if chunk:
printer.stream(chunk)
self.log_from_stream(full, log)
self.log_from_stream(full, log, heading)

# store as last context window content
self.set_data(Agent.DATA_NAME_CTX_WINDOW, prompt.format())

self.log_from_stream('', log, heading)
agent_response = await self.call_chat_model(
prompt, callback=stream_callback
)
Expand Down Expand Up @@ -429,10 +445,10 @@ def read_prompt(self, file: str, **kwargs) -> str:
prompt = files.remove_code_fences(prompt)
return prompt

def get_data(self, field: str):
def get_data(self, field: str) -> Any:
return self.data.get(field, None)

def set_data(self, field: str, value):
def set_data(self, field: str, value: Any):
self.data[field] = value

def hist_add_message(self, ai: bool, content: history.MessageContent):
Expand All @@ -449,12 +465,14 @@ async def hist_add_user_message(
"fw.intervention.md",
message=message.message,
attachments=message.attachments,
system_message=message.system_message,
)
else:
content = self.parse_prompt(
"fw.user_message.md",
message=message.message,
attachments=message.attachments,
system_message=message.system_message,
)

# remove empty attachments from template
Expand Down Expand Up @@ -491,13 +509,33 @@ def concat_messages(
return self.history.output_text(human_label="user", ai_label="assistant")

def get_chat_model(self):
return models.get_model(
models.ModelType.CHAT,
self.config.chat_model.provider,
self.config.chat_model.name,
**self.config.chat_model.kwargs,
reasoning_tokens = self.get_data("chat_model_reasoning_tokens") or 0
reasoning_effort = self.get_data("chat_model_reasoning_effort") or "none"
PrintStyle(font_color="blue", padding=True).print(
f"DEBUG: get_chat_model: reasoning_tokens: {reasoning_tokens}, reasoning_effort: {reasoning_effort}"
)

set: Settings = settings.get_settings()
chat_model_ctx_length = int(set["chat_model_ctx_length"])
chat_model_ctx_output = int((1.0 - float(set["chat_model_ctx_history"])) * chat_model_ctx_length)

# native reasoning only supported for these 3 for now
if self.config.chat_model.provider in [ModelProvider.ANTHROPIC, ModelProvider.OPENAI, ModelProvider.OPENROUTER]:
return models.get_model(
models.ModelType.CHAT,
self.config.chat_model.provider,
self.config.chat_model.name,
max_tokens = chat_model_ctx_output,
chat_model_reasoning_tokens = reasoning_tokens,
chat_model_reasoning_effort = reasoning_effort,
**self.config.chat_model.kwargs,
)
else:
return models.get_model(
models.ModelType.CHAT,
self.config.chat_model.provider,
self.config.chat_model.name,
**self.config.chat_model.kwargs,
)
def get_utility_model(self):
return models.get_model(
models.ModelType.CHAT,
Expand Down Expand Up @@ -554,7 +592,7 @@ async def call_chat_model(
):
response = ""

# model class
# model class - the inner functions handle the reasoning tokens and effort
model = self.get_chat_model()

# rate limiter
Expand All @@ -570,6 +608,11 @@ async def call_chat_model(
if callback:
await callback(content, response)

# reset reasoning config until next call to reasoning_tool
# these were set by reasoning_tool.execute()
self.set_data("chat_model_reasoning_tokens", 0)
self.set_data("chat_model_reasoning_effort", "none")

return response

async def rate_limiter(
Expand Down Expand Up @@ -648,15 +691,18 @@ async def process_tools(self, msg: str):
type="error", content=f"{self.agent_name}: Message misformat"
)

def log_from_stream(self, stream: str, logItem: Log.LogItem):
def log_from_stream(self, stream: str, logItem: Log.LogItem, heading: str):
# We do not want to crash the loop because of a log error
try:
if len(stream) < 25:
return # no reason to try
response = DirtyJson.parse_string(stream)
if isinstance(response, dict):
if "topic" in response:
self.set_data('thinking_topic', response["topic"])
# log if result is a dictionary already
logItem.update(content=stream, kvps=response)
except Exception as e:
logItem.update(heading=f"{heading}: {self.get_data('thinking_topic') or '...'}", content=stream, kvps=response)
except Exception:
pass

def get_tool(self, name: str, args: dict, message: str, **kwargs):
Expand Down
35 changes: 19 additions & 16 deletions docs/architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ This architecture ensures:
- Flexible deployment options for advanced users

> [!NOTE]
> The legacy approach of running Agent Zero directly on the host system (using Python, Conda, etc.)
> is still possible but requires Remote Function Calling (RFC) configuration through the Settings
> page. See [Full Binaries Installation](installation.md#in-depth-guide-for-full-binaries-installation)
> The legacy approach of running Agent Zero directly on the host system (using Python, Conda, etc.)
> is still possible but requires Remote Function Calling (RFC) configuration through the Settings
> page. See [Full Binaries Installation](installation.md#in-depth-guide-for-full-binaries-installation)
> for detailed instructions.

## Implementation Details
Expand Down Expand Up @@ -74,7 +74,7 @@ This architecture ensures:
| `run_ui.py` | Web UI launcher |

> [!NOTE]
> When using the Docker runtime container, these directories are mounted
> When using the Docker runtime container, these directories are mounted
> within the `/a0` volume for data persistence until the container is restarted or deleted.

## Core Components
Expand All @@ -90,8 +90,11 @@ Communication flows between agents through messages, which are structured accord

| Argument | Description |
| --- | --- |
| `Observations:` | The agent's observations of the world |
| `Thoughts:` | The agent's Chain of Thought and planning process |
| `Reflection:` | The agent's reflection on its thoughts and plans |
| `Tool name:` | The specific tool used by the agent |
| `Tool arguments:` | The arguments passed to the tool |
| `Responses or queries:` | Results, feedback or queries from tools or other agents |

#### Interaction Flow
Expand Down Expand Up @@ -122,7 +125,7 @@ Agent Zero comes with a set of built-in tools designed to help agents perform ta
| knowledge_tool | Enables agents to retrieve information from memory, knowledge base or online external sources |
| response_tool | Allows agents to output a response |
| memory_tool | Enables agents to save, load, delete and forget information from memory |
| webpage_content_tool | Enables agents to fetch and analyze the text content of webpages |
| document_query | Enables agents to fetch and analyze the text content of webpages and local documents |

#### Knowledge Tool
The `knowledge_tool` uses SearXNG to search the web and retrieve information. It can also search the local knowledge base and memory for relevant information. The tool returns a summary of the information, which can be used by the agent to make decisions or answer questions.
Expand All @@ -140,8 +143,8 @@ The integration provides access to various types of content, including images, v
In cases where SearXNG might not return satisfactory results, Agent Zero can be configured to fall back on other sources or methods, ensuring that users always have access to information.

> [!NOTE]
> The Knowledge Tool is designed to work seamlessly with both online searches through
> SearXNG and local knowledge base queries, providing a comprehensive information
> The Knowledge Tool is designed to work seamlessly with both online searches through
> SearXNG and local knowledge base queries, providing a comprehensive information
> retrieval system.

#### Custom Tools
Expand All @@ -153,8 +156,8 @@ Users can create custom tools to extend Agent Zero's capabilities. Custom tools
4. Follow existing patterns for consistency

> [!NOTE]
> Tools are always present in system prompt, so you should keep them to minimum.
> To save yourself some tokens, use the [Instruments module](#adding-instruments)
> Tools are always present in system prompt, so you should keep them to minimum.
> To save yourself some tokens, use the [Instruments module](#adding-instruments)
> to call custom scripts or functions.

### 3. Memory System
Expand Down Expand Up @@ -198,7 +201,7 @@ By dynamically adjusting context windows and summarizing past interactions, Agen
> To maximize the effectiveness of context summarization, users should provide clear and specific instructions during interactions. This helps Agent Zero understand which details are most important to retain.

### 4. Prompts
The `prompts` directory contains various Markdown files that control agent behavior and communication. The most important file is `agent.system.main.md`, which acts as a central hub, referencing other prompt files.
The `prompts` directory contains various Markdown files that control agent behavior and communication. The most important file is `agent.system.main.md` (or `deepsearch.system.main.md`), which acts as a central hub, referencing other prompt files.

#### Core Prompt Files
| Prompt File | Description |
Expand All @@ -225,7 +228,7 @@ The `prompts` directory contains various Markdown files that control agent behav
4. Select your custom prompts in the Settings page (Agent Config section)

#### Dynamic Behavior System
- **Behavior Adjustment**:
- **Behavior Adjustment**:
- Agents can modify their behavior in real-time based on user instructions
- Behavior changes are automatically integrated into the system prompt
- Behavioral rules are merged intelligently, avoiding duplicates and conflicts
Expand All @@ -249,8 +252,8 @@ The `prompts` directory contains various Markdown files that control agent behav
- Changes are applied without disrupting other components
- Maintains separation between core functionality and behavioral rules

> [!NOTE]
> You can customize any of these files. Agent Zero will use the files in your custom `prompts_subdir`
> [!NOTE]
> You can customize any of these files. Agent Zero will use the files in your custom `prompts_subdir`
> if they exist, otherwise, it will fall back to the files in `prompts/default`.

> [!TIP]
Expand All @@ -265,7 +268,7 @@ Knowledge refers to the user-provided information and data that agents can lever
- Automatically imported and indexed
- Expandable format support

- **Knowledge Base**:
- **Knowledge Base**:
- Can include PDFs, databases, books, documentation
- `/docs` folder automatically added
- Used for answering questions and decision-making
Expand Down Expand Up @@ -308,6 +311,6 @@ Extensions can be found in `python/extensions` directory:
4. Ensure compatibility with main system
5. Test thoroughly before deployment

> [!NOTE]
> [!NOTE]
> Consider contributing valuable custom components to the main repository.
> See [Contributing](contribution.md) for more information.
> See [Contributing](contribution.md) for more information.
1 change: 1 addition & 0 deletions initialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def initialize():
provider=models.ModelProvider[current_settings["chat_model_provider"]],
name=current_settings["chat_model_name"],
ctx_length=current_settings["chat_model_ctx_length"],
reasoning=current_settings["chat_model_reasoning"],
limit_requests=current_settings["chat_model_rl_requests"],
limit_input=current_settings["chat_model_rl_input"],
limit_output=current_settings["chat_model_rl_output"],
Expand Down
Loading