Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion config.example.toml
Original file line number Diff line number Diff line change
Expand Up @@ -87,4 +87,22 @@ headless = false
default_timeout = 300000
# Viewport dimensions
viewport_width = 1280
viewport_height = 720
viewport_height = 720

# Environment variables for browser agent stability tuning:
# These can be set in .env or shell environment for operational adjustments.
#
# TEMPO_BROWSER_USE_LLM_TIMEOUT: LLM response timeout in seconds (default: model-derived)
# - For GPT-5 models, automatically raised to minimum 120s to prevent timeouts
# - Example: TEMPO_BROWSER_USE_LLM_TIMEOUT=180
#
# TEMPO_BROWSER_USE_MAX_FAILURES: Max consecutive failures before abort (default: 3)
# - For GPT-5 models, enforces minimum of 3 to handle intermittent issues
# - Example: TEMPO_BROWSER_USE_MAX_FAILURES=5
#
# TEMPO_BROWSER_USE_MAX_HISTORY_ITEMS: Cap on conversation history items (default: unlimited)
# - Use to reduce context size and prevent LLM timeouts on long tasks
# - Example: TEMPO_BROWSER_USE_MAX_HISTORY_ITEMS=50
#
# TEMPO_BROWSER_USE_MAX_COMPLETION_TOKENS: Max tokens for LLM response (default: model-derived)
# - Example: TEMPO_BROWSER_USE_MAX_COMPLETION_TOKENS=4096
57 changes: 57 additions & 0 deletions tempo/agents/browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
by parent agents to handle web interaction subtasks.
"""

import logging
import os
from typing import TYPE_CHECKING, Any

Expand All @@ -21,6 +22,35 @@
from tempo.tools.network_capture import NetworkRequestCapture


logger = logging.getLogger(__name__)


def _parse_env_int(env_var: str, default: int | None = None) -> int | None:
"""
Parse an integer from an environment variable with validation.

Args:
env_var: Name of the environment variable
default: Default value if not set

Returns:
Parsed integer value, or default if not set

Logs a warning and returns default if the value is not a valid integer.
"""
value = os.getenv(env_var)
if value is None:
return default
try:
return int(value)
except ValueError:
logger.warning(
f"Invalid value for {env_var}: '{value}' is not an integer. "
f"Using default: {default}"
)
return default


@register_agent
class BrowserAgent(BaseAgentDefinition):
"""
Expand Down Expand Up @@ -167,11 +197,38 @@ def create_browser_use_agent(
task_description = f"{self.task}\n\nStarting URL: {self.url}"

# Create browser-use Agent
# Allow runtime overrides for stability testing (Debug Mode / ops tuning).
# Defaults preserve browser-use behavior (model-derived llm_timeout, max_failures=3).
llm_timeout_val = _parse_env_int("TEMPO_BROWSER_USE_LLM_TIMEOUT")
max_failures_val = _parse_env_int("TEMPO_BROWSER_USE_MAX_FAILURES", default=3)
max_history_items_val = _parse_env_int("TEMPO_BROWSER_USE_MAX_HISTORY_ITEMS")

# Evidence-based default: gpt-5.* can exceed 60s when prompts get large
# (we observed CancelledError + 60s timeout). Apply guardrails for gpt-5 models.
model_name = str(getattr(llm, "model", "")).lower()
if "gpt-5" in model_name:
# Guardrail: enforce minimum timeout that avoids known cancellations.
if llm_timeout_val is None or llm_timeout_val < 120:
logger.info(
f"Applying GPT-5 guardrail: llm_timeout={120}s "
f"(was {llm_timeout_val or 'unset'})"
)
llm_timeout_val = 120
# Guardrail: avoid overly-low failure caps which exacerbate intermittent provider issues.
if max_failures_val < 3:
logger.info(
f"Applying GPT-5 guardrail: max_failures={3} (was {max_failures_val})"
)
max_failures_val = 3

agent = Agent(
task=task_description,
llm=llm,
browser=browser,
tools=shared_tools, # Use the shared Tools instance
llm_timeout=llm_timeout_val,
max_failures=max_failures_val,
max_history_items=max_history_items_val,
)

return agent, browser, shared_tools
Expand Down
4 changes: 4 additions & 0 deletions tempo/infra/browser_use_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ def create_browser_use_llm(
# Prefer OpenAI direct if available; fallback to OpenRouter.
openai_key = os.getenv("OPENAI_API_KEY")
openrouter_key = os.getenv("OPENROUTER_API_KEY")
max_completion_tokens_env = os.getenv("TEMPO_BROWSER_USE_MAX_COMPLETION_TOKENS")
max_completion_tokens = int(max_completion_tokens_env) if max_completion_tokens_env else None

if openai_key:
from browser_use.llm.openai.chat import ChatOpenAI as BrowserUseChatOpenAI
Expand All @@ -59,6 +61,7 @@ def create_browser_use_llm(
model=model,
api_key=openai_key,
temperature=0.2,
max_completion_tokens=max_completion_tokens,
)

if openrouter_key:
Expand All @@ -70,6 +73,7 @@ def create_browser_use_llm(
model=openrouter_model,
api_key=openrouter_key,
temperature=0.2,
max_completion_tokens=max_completion_tokens,
default_headers={
"HTTP-Referer": "https://github.com/your-repo/tempo-sec",
"X-Title": "tempo-sec browser agent",
Expand Down
1 change: 0 additions & 1 deletion tempo/prompts/templates/browser_base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,6 @@ Use `get_network_requests` to inspect XHR/fetch requests made by the web applica
- Bypassing WAF restrictions by replaying legitimate browser requests

**Parameters:**
- `filter_type`: Filter by "xhr", "fetch", or "all" (default: both xhr and fetch)
- `include_bodies`: Set to `true` to include request/response bodies (useful for API analysis)
- `max_requests`: Maximum requests to return (default: 30)

Expand Down
3 changes: 3 additions & 0 deletions tempo/tools/browser_task_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,13 @@
Tool for parent agents to delegate browser tasks to a specialized browser agent.
"""

import logging
from typing import TYPE_CHECKING

from tempo.tools.base import Tool, ToolCategory, ToolResult, ToolSchema

logger = logging.getLogger(__name__)

if TYPE_CHECKING:
from tempo.infra.container import ContainerManager
from tempo.infra.llm import LLMClient
Expand Down
86 changes: 17 additions & 69 deletions tempo/tools/browser_use_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,71 +251,23 @@ def shell(
return result.output

def _register_filesystem_tools(self):
"""Register file operations tools."""
from tempo.tools.filesystem import FileReadTool, FileWriteTool

file_read_tool = FileReadTool(self.container)
file_write_tool = FileWriteTool(self.container)

@self.tools.action("Read the contents of a file")
def file_read(
path: str,
encoding: str = "utf-8",
max_bytes: int = 0,
) -> str:
"""
Read file contents.

Args:
path: Path to the file
encoding: Text encoding (default: utf-8)
max_bytes: Maximum bytes to read (0 = unlimited)

Returns:
File contents
"""
max_bytes_param = max_bytes if max_bytes > 0 else None
result = _run_async(
file_read_tool.execute(
path=path,
encoding=encoding,
max_bytes=max_bytes_param,
)
)
if not result.success:
return f"Error: {result.error}"
return result.output

@self.tools.action("Write content to a file (creates or overwrites)")
def file_write(
path: str,
content: str,
append: bool = False,
encoding: str = "utf-8",
) -> str:
"""
Write to a file.

Args:
path: Path to the file
content: Content to write
append: Append instead of overwrite
encoding: Text encoding (default: utf-8)

Returns:
Success confirmation
"""
result = _run_async(
file_write_tool.execute(
path=path,
content=content,
append=append,
encoding=encoding,
)
)
if not result.success:
return f"Error: {result.error}"
return result.output
"""Register file operations tools.

NOTE: Removed - browser-use already provides built-in file system tools:
- write_file: Write to files in browser-use's temp filesystem
- read_file: Read files from browser-use's temp filesystem
- replace_file: Replace strings in files

The tempo-sec file_read/file_write tools were duplicates that operated
on the Docker container filesystem, but the browser agent should use
browser-use's built-in tools for consistency with its todo.md and
other file-based workflows.

For Docker container file operations, the shell tool can be used:
- shell(command="cat /path/to/file") for reading
- shell(command="echo 'content' > /path/to/file") for writing
"""
pass # No custom filesystem tools - use browser-use built-ins

def _register_email_tools(self):
"""Register email verification tools."""
Expand Down Expand Up @@ -393,7 +345,6 @@ def _register_network_tools(self):
"Get captured network requests (XHR/fetch) from the current page"
)
def get_network_requests(
filter_type: str = "",
include_bodies: bool = False,
max_requests: int = 30,
) -> str:
Expand All @@ -409,7 +360,6 @@ def get_network_requests(
saved file path to access complete request history if output is truncated.

Args:
filter_type: Filter by type: "xhr", "fetch", "all" (default: xhr+fetch)
include_bodies: Include request/response bodies (use for API analysis)
max_requests: Maximum requests to return (default: 30)

Expand All @@ -420,14 +370,12 @@ def get_network_requests(
if include_bodies:
output, saved_file = _run_async(
network_capture.get_requests_async(
filter_type=filter_type,
include_bodies=include_bodies,
max_requests=max_requests,
)
)
else:
output, saved_file = network_capture.get_requests(
filter_type=filter_type,
include_bodies=include_bodies,
max_requests=max_requests,
)
Expand Down
12 changes: 0 additions & 12 deletions tempo/tools/network_capture.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,15 +373,13 @@ def _save_requests_to_file(self) -> str | None:

def get_requests(
self,
filter_type: str = "",
include_bodies: bool = False,
max_requests: int = 50,
) -> tuple[str, str | None]:
"""
Get formatted summary of captured requests.

Args:
filter_type: Filter by resource type (xhr, fetch, all, or empty for default)
include_bodies: Include request/response bodies
max_requests: Maximum number of requests to return

Expand All @@ -391,13 +389,6 @@ def get_requests(
# Determine which requests to include
requests_to_show = list(self._requests)

if filter_type:
filter_type_upper = filter_type.upper()
if filter_type_upper != "ALL":
requests_to_show = [
r for r in requests_to_show if r.resource_type == filter_type_upper
]

# Limit to max_requests (most recent)
if len(requests_to_show) > max_requests:
requests_to_show = requests_to_show[-max_requests:]
Expand Down Expand Up @@ -438,7 +429,6 @@ def get_requests(

async def get_requests_async(
self,
filter_type: str = "",
include_bodies: bool = False,
max_requests: int = 50,
) -> tuple[str, str | None]:
Expand All @@ -449,7 +439,6 @@ async def get_requests_async(
via CDP before formatting the output.

Args:
filter_type: Filter by resource type (xhr, fetch, all, or empty for default)
include_bodies: Include request/response bodies (triggers async fetch)
max_requests: Maximum number of requests to return

Expand All @@ -462,7 +451,6 @@ async def get_requests_async(

# Delegate to sync method for formatting
return self.get_requests(
filter_type=filter_type,
include_bodies=include_bodies,
max_requests=max_requests,
)
Expand Down
Loading