diff --git a/config.example.toml b/config.example.toml index af4acf6..1e5f9da 100644 --- a/config.example.toml +++ b/config.example.toml @@ -87,4 +87,22 @@ headless = false default_timeout = 300000 # Viewport dimensions viewport_width = 1280 -viewport_height = 720 \ No newline at end of file +viewport_height = 720 + +# Environment variables for browser agent stability tuning: +# These can be set in .env or shell environment for operational adjustments. +# +# TEMPO_BROWSER_USE_LLM_TIMEOUT: LLM response timeout in seconds (default: model-derived) +# - For GPT-5 models, automatically raised to minimum 120s to prevent timeouts +# - Example: TEMPO_BROWSER_USE_LLM_TIMEOUT=180 +# +# TEMPO_BROWSER_USE_MAX_FAILURES: Max consecutive failures before abort (default: 3) +# - For GPT-5 models, enforces minimum of 3 to handle intermittent issues +# - Example: TEMPO_BROWSER_USE_MAX_FAILURES=5 +# +# TEMPO_BROWSER_USE_MAX_HISTORY_ITEMS: Cap on conversation history items (default: unlimited) +# - Use to reduce context size and prevent LLM timeouts on long tasks +# - Example: TEMPO_BROWSER_USE_MAX_HISTORY_ITEMS=50 +# +# TEMPO_BROWSER_USE_MAX_COMPLETION_TOKENS: Max tokens for LLM response (default: model-derived) +# - Example: TEMPO_BROWSER_USE_MAX_COMPLETION_TOKENS=4096 \ No newline at end of file diff --git a/tempo/agents/browser.py b/tempo/agents/browser.py index 2a4a29b..35bb2ed 100644 --- a/tempo/agents/browser.py +++ b/tempo/agents/browser.py @@ -5,6 +5,7 @@ by parent agents to handle web interaction subtasks. """ +import logging import os from typing import TYPE_CHECKING, Any @@ -21,6 +22,35 @@ from tempo.tools.network_capture import NetworkRequestCapture +logger = logging.getLogger(__name__) + + +def _parse_env_int(env_var: str, default: int | None = None) -> int | None: + """ + Parse an integer from an environment variable with validation. + + Args: + env_var: Name of the environment variable + default: Default value if not set + + Returns: + Parsed integer value, or default if not set + + Logs a warning and returns default if the value is not a valid integer. + """ + value = os.getenv(env_var) + if value is None: + return default + try: + return int(value) + except ValueError: + logger.warning( + f"Invalid value for {env_var}: '{value}' is not an integer. " + f"Using default: {default}" + ) + return default + + @register_agent class BrowserAgent(BaseAgentDefinition): """ @@ -167,11 +197,38 @@ def create_browser_use_agent( task_description = f"{self.task}\n\nStarting URL: {self.url}" # Create browser-use Agent + # Allow runtime overrides for stability testing (Debug Mode / ops tuning). + # Defaults preserve browser-use behavior (model-derived llm_timeout, max_failures=3). + llm_timeout_val = _parse_env_int("TEMPO_BROWSER_USE_LLM_TIMEOUT") + max_failures_val = _parse_env_int("TEMPO_BROWSER_USE_MAX_FAILURES", default=3) + max_history_items_val = _parse_env_int("TEMPO_BROWSER_USE_MAX_HISTORY_ITEMS") + + # Evidence-based default: gpt-5.* can exceed 60s when prompts get large + # (we observed CancelledError + 60s timeout). Apply guardrails for gpt-5 models. + model_name = str(getattr(llm, "model", "")).lower() + if "gpt-5" in model_name: + # Guardrail: enforce minimum timeout that avoids known cancellations. + if llm_timeout_val is None or llm_timeout_val < 120: + logger.info( + f"Applying GPT-5 guardrail: llm_timeout={120}s " + f"(was {llm_timeout_val or 'unset'})" + ) + llm_timeout_val = 120 + # Guardrail: avoid overly-low failure caps which exacerbate intermittent provider issues. + if max_failures_val < 3: + logger.info( + f"Applying GPT-5 guardrail: max_failures={3} (was {max_failures_val})" + ) + max_failures_val = 3 + agent = Agent( task=task_description, llm=llm, browser=browser, tools=shared_tools, # Use the shared Tools instance + llm_timeout=llm_timeout_val, + max_failures=max_failures_val, + max_history_items=max_history_items_val, ) return agent, browser, shared_tools diff --git a/tempo/infra/browser_use_llm.py b/tempo/infra/browser_use_llm.py index 27f0716..5ea1ad0 100644 --- a/tempo/infra/browser_use_llm.py +++ b/tempo/infra/browser_use_llm.py @@ -51,6 +51,8 @@ def create_browser_use_llm( # Prefer OpenAI direct if available; fallback to OpenRouter. openai_key = os.getenv("OPENAI_API_KEY") openrouter_key = os.getenv("OPENROUTER_API_KEY") + max_completion_tokens_env = os.getenv("TEMPO_BROWSER_USE_MAX_COMPLETION_TOKENS") + max_completion_tokens = int(max_completion_tokens_env) if max_completion_tokens_env else None if openai_key: from browser_use.llm.openai.chat import ChatOpenAI as BrowserUseChatOpenAI @@ -59,6 +61,7 @@ def create_browser_use_llm( model=model, api_key=openai_key, temperature=0.2, + max_completion_tokens=max_completion_tokens, ) if openrouter_key: @@ -70,6 +73,7 @@ def create_browser_use_llm( model=openrouter_model, api_key=openrouter_key, temperature=0.2, + max_completion_tokens=max_completion_tokens, default_headers={ "HTTP-Referer": "https://github.com/your-repo/tempo-sec", "X-Title": "tempo-sec browser agent", diff --git a/tempo/prompts/templates/browser_base.txt b/tempo/prompts/templates/browser_base.txt index 585e105..c1ff5ca 100644 --- a/tempo/prompts/templates/browser_base.txt +++ b/tempo/prompts/templates/browser_base.txt @@ -126,7 +126,6 @@ Use `get_network_requests` to inspect XHR/fetch requests made by the web applica - Bypassing WAF restrictions by replaying legitimate browser requests **Parameters:** -- `filter_type`: Filter by "xhr", "fetch", or "all" (default: both xhr and fetch) - `include_bodies`: Set to `true` to include request/response bodies (useful for API analysis) - `max_requests`: Maximum requests to return (default: 30) diff --git a/tempo/tools/browser_task_tool.py b/tempo/tools/browser_task_tool.py index 2d401fa..c3f4b9e 100644 --- a/tempo/tools/browser_task_tool.py +++ b/tempo/tools/browser_task_tool.py @@ -4,10 +4,13 @@ Tool for parent agents to delegate browser tasks to a specialized browser agent. """ +import logging from typing import TYPE_CHECKING from tempo.tools.base import Tool, ToolCategory, ToolResult, ToolSchema +logger = logging.getLogger(__name__) + if TYPE_CHECKING: from tempo.infra.container import ContainerManager from tempo.infra.llm import LLMClient diff --git a/tempo/tools/browser_use_adapter.py b/tempo/tools/browser_use_adapter.py index 6a6a4d1..d6e83b4 100644 --- a/tempo/tools/browser_use_adapter.py +++ b/tempo/tools/browser_use_adapter.py @@ -251,71 +251,23 @@ def shell( return result.output def _register_filesystem_tools(self): - """Register file operations tools.""" - from tempo.tools.filesystem import FileReadTool, FileWriteTool - - file_read_tool = FileReadTool(self.container) - file_write_tool = FileWriteTool(self.container) - - @self.tools.action("Read the contents of a file") - def file_read( - path: str, - encoding: str = "utf-8", - max_bytes: int = 0, - ) -> str: - """ - Read file contents. - - Args: - path: Path to the file - encoding: Text encoding (default: utf-8) - max_bytes: Maximum bytes to read (0 = unlimited) - - Returns: - File contents - """ - max_bytes_param = max_bytes if max_bytes > 0 else None - result = _run_async( - file_read_tool.execute( - path=path, - encoding=encoding, - max_bytes=max_bytes_param, - ) - ) - if not result.success: - return f"Error: {result.error}" - return result.output - - @self.tools.action("Write content to a file (creates or overwrites)") - def file_write( - path: str, - content: str, - append: bool = False, - encoding: str = "utf-8", - ) -> str: - """ - Write to a file. - - Args: - path: Path to the file - content: Content to write - append: Append instead of overwrite - encoding: Text encoding (default: utf-8) - - Returns: - Success confirmation - """ - result = _run_async( - file_write_tool.execute( - path=path, - content=content, - append=append, - encoding=encoding, - ) - ) - if not result.success: - return f"Error: {result.error}" - return result.output + """Register file operations tools. + + NOTE: Removed - browser-use already provides built-in file system tools: + - write_file: Write to files in browser-use's temp filesystem + - read_file: Read files from browser-use's temp filesystem + - replace_file: Replace strings in files + + The tempo-sec file_read/file_write tools were duplicates that operated + on the Docker container filesystem, but the browser agent should use + browser-use's built-in tools for consistency with its todo.md and + other file-based workflows. + + For Docker container file operations, the shell tool can be used: + - shell(command="cat /path/to/file") for reading + - shell(command="echo 'content' > /path/to/file") for writing + """ + pass # No custom filesystem tools - use browser-use built-ins def _register_email_tools(self): """Register email verification tools.""" @@ -393,7 +345,6 @@ def _register_network_tools(self): "Get captured network requests (XHR/fetch) from the current page" ) def get_network_requests( - filter_type: str = "", include_bodies: bool = False, max_requests: int = 30, ) -> str: @@ -409,7 +360,6 @@ def get_network_requests( saved file path to access complete request history if output is truncated. Args: - filter_type: Filter by type: "xhr", "fetch", "all" (default: xhr+fetch) include_bodies: Include request/response bodies (use for API analysis) max_requests: Maximum requests to return (default: 30) @@ -420,14 +370,12 @@ def get_network_requests( if include_bodies: output, saved_file = _run_async( network_capture.get_requests_async( - filter_type=filter_type, include_bodies=include_bodies, max_requests=max_requests, ) ) else: output, saved_file = network_capture.get_requests( - filter_type=filter_type, include_bodies=include_bodies, max_requests=max_requests, ) diff --git a/tempo/tools/network_capture.py b/tempo/tools/network_capture.py index eaf2398..782b5d8 100644 --- a/tempo/tools/network_capture.py +++ b/tempo/tools/network_capture.py @@ -373,7 +373,6 @@ def _save_requests_to_file(self) -> str | None: def get_requests( self, - filter_type: str = "", include_bodies: bool = False, max_requests: int = 50, ) -> tuple[str, str | None]: @@ -381,7 +380,6 @@ def get_requests( Get formatted summary of captured requests. Args: - filter_type: Filter by resource type (xhr, fetch, all, or empty for default) include_bodies: Include request/response bodies max_requests: Maximum number of requests to return @@ -391,13 +389,6 @@ def get_requests( # Determine which requests to include requests_to_show = list(self._requests) - if filter_type: - filter_type_upper = filter_type.upper() - if filter_type_upper != "ALL": - requests_to_show = [ - r for r in requests_to_show if r.resource_type == filter_type_upper - ] - # Limit to max_requests (most recent) if len(requests_to_show) > max_requests: requests_to_show = requests_to_show[-max_requests:] @@ -438,7 +429,6 @@ def get_requests( async def get_requests_async( self, - filter_type: str = "", include_bodies: bool = False, max_requests: int = 50, ) -> tuple[str, str | None]: @@ -449,7 +439,6 @@ async def get_requests_async( via CDP before formatting the output. Args: - filter_type: Filter by resource type (xhr, fetch, all, or empty for default) include_bodies: Include request/response bodies (triggers async fetch) max_requests: Maximum number of requests to return @@ -462,7 +451,6 @@ async def get_requests_async( # Delegate to sync method for formatting return self.get_requests( - filter_type=filter_type, include_bodies=include_bodies, max_requests=max_requests, )