nwang783 · nwang783 · Jan 19, 2026 · Jan 17, 2026 · Jan 19, 2026 · Jan 19, 2026
diff --git a/config.example.toml b/config.example.toml
@@ -87,4 +87,22 @@ headless = false
 default_timeout = 300000
 # Viewport dimensions
 viewport_width = 1280
-viewport_height = 720
+viewport_height = 720
+
+# Environment variables for browser agent stability tuning:
+# These can be set in .env or shell environment for operational adjustments.
+#
+# TEMPO_BROWSER_USE_LLM_TIMEOUT: LLM response timeout in seconds (default: model-derived)
+#   - For GPT-5 models, automatically raised to minimum 120s to prevent timeouts
+#   - Example: TEMPO_BROWSER_USE_LLM_TIMEOUT=180
+#
+# TEMPO_BROWSER_USE_MAX_FAILURES: Max consecutive failures before abort (default: 3)
+#   - For GPT-5 models, enforces minimum of 3 to handle intermittent issues
+#   - Example: TEMPO_BROWSER_USE_MAX_FAILURES=5
+#
+# TEMPO_BROWSER_USE_MAX_HISTORY_ITEMS: Cap on conversation history items (default: unlimited)
+#   - Use to reduce context size and prevent LLM timeouts on long tasks
+#   - Example: TEMPO_BROWSER_USE_MAX_HISTORY_ITEMS=50
+#
+# TEMPO_BROWSER_USE_MAX_COMPLETION_TOKENS: Max tokens for LLM response (default: model-derived)
+#   - Example: TEMPO_BROWSER_USE_MAX_COMPLETION_TOKENS=4096
diff --git a/tempo/agents/browser.py b/tempo/agents/browser.py
@@ -5,6 +5,7 @@
 by parent agents to handle web interaction subtasks.
 """
 
+import logging
 import os
 from typing import TYPE_CHECKING, Any
 
@@ -21,6 +22,35 @@
     from tempo.tools.network_capture import NetworkRequestCapture
 
 
+logger = logging.getLogger(__name__)
+
+
+def _parse_env_int(env_var: str, default: int | None = None) -> int | None:
+    """
+    Parse an integer from an environment variable with validation.
+
+    Args:
+        env_var: Name of the environment variable
+        default: Default value if not set
+
+    Returns:
+        Parsed integer value, or default if not set
+
+    Logs a warning and returns default if the value is not a valid integer.
+    """
+    value = os.getenv(env_var)
+    if value is None:
+        return default
+    try:
+        return int(value)
+    except ValueError:
+        logger.warning(
+            f"Invalid value for {env_var}: '{value}' is not an integer. "
+            f"Using default: {default}"
+        )
+        return default
+
+
 @register_agent
 class BrowserAgent(BaseAgentDefinition):
     """
@@ -167,11 +197,38 @@ def create_browser_use_agent(
             task_description = f"{self.task}\n\nStarting URL: {self.url}"
 
         # Create browser-use Agent
+        # Allow runtime overrides for stability testing (Debug Mode / ops tuning).
+        # Defaults preserve browser-use behavior (model-derived llm_timeout, max_failures=3).
+        llm_timeout_val = _parse_env_int("TEMPO_BROWSER_USE_LLM_TIMEOUT")
+        max_failures_val = _parse_env_int("TEMPO_BROWSER_USE_MAX_FAILURES", default=3)
+        max_history_items_val = _parse_env_int("TEMPO_BROWSER_USE_MAX_HISTORY_ITEMS")
+
+        # Evidence-based default: gpt-5.* can exceed 60s when prompts get large
+        # (we observed CancelledError + 60s timeout). Apply guardrails for gpt-5 models.
+        model_name = str(getattr(llm, "model", "")).lower()
+        if "gpt-5" in model_name:
+            # Guardrail: enforce minimum timeout that avoids known cancellations.
+            if llm_timeout_val is None or llm_timeout_val < 120:
+                logger.info(
+                    f"Applying GPT-5 guardrail: llm_timeout={120}s "
+                    f"(was {llm_timeout_val or 'unset'})"
+                )
+                llm_timeout_val = 120
+            # Guardrail: avoid overly-low failure caps which exacerbate intermittent provider issues.
+            if max_failures_val < 3:
+                logger.info(
+                    f"Applying GPT-5 guardrail: max_failures={3} (was {max_failures_val})"
+                )
+                max_failures_val = 3
+
         agent = Agent(
             task=task_description,
             llm=llm,
             browser=browser,
             tools=shared_tools,  # Use the shared Tools instance
+            llm_timeout=llm_timeout_val,
+            max_failures=max_failures_val,
+            max_history_items=max_history_items_val,
         )
 
         return agent, browser, shared_tools

diff --git a/tempo/infra/browser_use_llm.py b/tempo/infra/browser_use_llm.py
@@ -51,6 +51,8 @@ def create_browser_use_llm(
     # Prefer OpenAI direct if available; fallback to OpenRouter.
     openai_key = os.getenv("OPENAI_API_KEY")
     openrouter_key = os.getenv("OPENROUTER_API_KEY")
+    max_completion_tokens_env = os.getenv("TEMPO_BROWSER_USE_MAX_COMPLETION_TOKENS")
+    max_completion_tokens = int(max_completion_tokens_env) if max_completion_tokens_env else None
 
     if openai_key:
         from browser_use.llm.openai.chat import ChatOpenAI as BrowserUseChatOpenAI
@@ -59,6 +61,7 @@ def create_browser_use_llm(
             model=model,
             api_key=openai_key,
             temperature=0.2,
+            max_completion_tokens=max_completion_tokens,
         )
 
     if openrouter_key:
@@ -70,6 +73,7 @@ def create_browser_use_llm(
             model=openrouter_model,
             api_key=openrouter_key,
             temperature=0.2,
+            max_completion_tokens=max_completion_tokens,
             default_headers={
                 "HTTP-Referer": "https://github.com/your-repo/tempo-sec",
                 "X-Title": "tempo-sec browser agent",

diff --git a/tempo/prompts/templates/browser_base.txt b/tempo/prompts/templates/browser_base.txt
@@ -126,7 +126,6 @@ Use `get_network_requests` to inspect XHR/fetch requests made by the web applica
 - Bypassing WAF restrictions by replaying legitimate browser requests
 
 **Parameters:**
-- `filter_type`: Filter by "xhr", "fetch", or "all" (default: both xhr and fetch)
 - `include_bodies`: Set to `true` to include request/response bodies (useful for API analysis)
 - `max_requests`: Maximum requests to return (default: 30)
 

diff --git a/tempo/tools/browser_task_tool.py b/tempo/tools/browser_task_tool.py
@@ -4,10 +4,13 @@
 Tool for parent agents to delegate browser tasks to a specialized browser agent.
 """
 
+import logging
 from typing import TYPE_CHECKING
 
 from tempo.tools.base import Tool, ToolCategory, ToolResult, ToolSchema
 
+logger = logging.getLogger(__name__)
+
 if TYPE_CHECKING:
     from tempo.infra.container import ContainerManager
     from tempo.infra.llm import LLMClient

diff --git a/tempo/tools/browser_use_adapter.py b/tempo/tools/browser_use_adapter.py
@@ -251,71 +251,23 @@ def shell(
             return result.output
 
     def _register_filesystem_tools(self):
-        """Register file operations tools."""
-        from tempo.tools.filesystem import FileReadTool, FileWriteTool
-
-        file_read_tool = FileReadTool(self.container)
-        file_write_tool = FileWriteTool(self.container)
-
-        @self.tools.action("Read the contents of a file")
-        def file_read(
-            path: str,
-            encoding: str = "utf-8",
-            max_bytes: int = 0,
-        ) -> str:
-            """
-            Read file contents.
-
-            Args:
-                path: Path to the file
-                encoding: Text encoding (default: utf-8)
-                max_bytes: Maximum bytes to read (0 = unlimited)
-
-            Returns:
-                File contents
-            """
-            max_bytes_param = max_bytes if max_bytes > 0 else None
-            result = _run_async(
-                file_read_tool.execute(
-                    path=path,
-                    encoding=encoding,
-                    max_bytes=max_bytes_param,
-                )
-            )
-            if not result.success:
-                return f"Error: {result.error}"
-            return result.output
-
-        @self.tools.action("Write content to a file (creates or overwrites)")
-        def file_write(
-            path: str,
-            content: str,
-            append: bool = False,
-            encoding: str = "utf-8",
-        ) -> str:
-            """
-            Write to a file.
-
-            Args:
-                path: Path to the file
-                content: Content to write
-                append: Append instead of overwrite
-                encoding: Text encoding (default: utf-8)
-
-            Returns:
-                Success confirmation
-            """
-            result = _run_async(
-                file_write_tool.execute(
-                    path=path,
-                    content=content,
-                    append=append,
-                    encoding=encoding,
-                )
-            )
-            if not result.success:
-                return f"Error: {result.error}"
-            return result.output
+        """Register file operations tools.
+
+        NOTE: Removed - browser-use already provides built-in file system tools:
+        - write_file: Write to files in browser-use's temp filesystem
+        - read_file: Read files from browser-use's temp filesystem  
+        - replace_file: Replace strings in files
+
+        The tempo-sec file_read/file_write tools were duplicates that operated
+        on the Docker container filesystem, but the browser agent should use
+        browser-use's built-in tools for consistency with its todo.md and
+        other file-based workflows.
+
+        For Docker container file operations, the shell tool can be used:
+        - shell(command="cat /path/to/file") for reading
+        - shell(command="echo 'content' > /path/to/file") for writing
+        """
+        pass  # No custom filesystem tools - use browser-use built-ins
 
     def _register_email_tools(self):
         """Register email verification tools."""
@@ -393,7 +345,6 @@ def _register_network_tools(self):
             "Get captured network requests (XHR/fetch) from the current page"
         )
         def get_network_requests(
-            filter_type: str = "",
             include_bodies: bool = False,
             max_requests: int = 30,
         ) -> str:
@@ -409,7 +360,6 @@ def get_network_requests(
             saved file path to access complete request history if output is truncated.
 
             Args:
-                filter_type: Filter by type: "xhr", "fetch", "all" (default: xhr+fetch)
                 include_bodies: Include request/response bodies (use for API analysis)
                 max_requests: Maximum requests to return (default: 30)
 
@@ -420,14 +370,12 @@ def get_network_requests(
             if include_bodies:
                 output, saved_file = _run_async(
                     network_capture.get_requests_async(
-                        filter_type=filter_type,
                         include_bodies=include_bodies,
                         max_requests=max_requests,
                     )
                 )
             else:
                 output, saved_file = network_capture.get_requests(
-                    filter_type=filter_type,
                     include_bodies=include_bodies,
                     max_requests=max_requests,
                 )

diff --git a/tempo/tools/network_capture.py b/tempo/tools/network_capture.py
@@ -373,15 +373,13 @@ def _save_requests_to_file(self) -> str | None:
 
     def get_requests(
         self,
-        filter_type: str = "",
         include_bodies: bool = False,
         max_requests: int = 50,
     ) -> tuple[str, str | None]:
         """
         Get formatted summary of captured requests.
 
         Args:
-            filter_type: Filter by resource type (xhr, fetch, all, or empty for default)
             include_bodies: Include request/response bodies
             max_requests: Maximum number of requests to return
 
@@ -391,13 +389,6 @@ def get_requests(
         # Determine which requests to include
         requests_to_show = list(self._requests)
 
-        if filter_type:
-            filter_type_upper = filter_type.upper()
-            if filter_type_upper != "ALL":
-                requests_to_show = [
-                    r for r in requests_to_show if r.resource_type == filter_type_upper
-                ]
-
         # Limit to max_requests (most recent)
         if len(requests_to_show) > max_requests:
             requests_to_show = requests_to_show[-max_requests:]
@@ -438,7 +429,6 @@ def get_requests(
 
     async def get_requests_async(
         self,
-        filter_type: str = "",
         include_bodies: bool = False,
         max_requests: int = 50,
     ) -> tuple[str, str | None]:
@@ -449,7 +439,6 @@ async def get_requests_async(
         via CDP before formatting the output.
 
         Args:
-            filter_type: Filter by resource type (xhr, fetch, all, or empty for default)
             include_bodies: Include request/response bodies (triggers async fetch)
             max_requests: Maximum number of requests to return
 
@@ -462,7 +451,6 @@ async def get_requests_async(
 
         # Delegate to sync method for formatting
         return self.get_requests(
-            filter_type=filter_type,
             include_bodies=include_bodies,
             max_requests=max_requests,
         )