Add docker sandbox integration

Dishwasha · Dishwasha · commit 1030fffacd7a · 2025-11-27T13:41:53.000-05:00
diff --git a/libs/deepagents-cli/README.md b/libs/deepagents-cli/README.md
@@ -47,7 +47,7 @@ deepagents --agent mybot
 deepagents --auto-approve
 
 # Execute code in a remote sandbox
-deepagents --sandbox modal        # or runloop, daytona
+deepagents --sandbox modal        # or runloop, daytona, docker
 deepagents --sandbox-id dbx_123   # reuse existing sandbox
 ```
 
diff --git a/libs/deepagents-cli/deepagents_cli/agent.py b/libs/deepagents-cli/deepagents_cli/agent.py
@@ -96,7 +96,7 @@ def get_system_prompt(assistant_id: str, sandbox_type: str | None = None) -> str
 
     Args:
         assistant_id: The agent identifier for path references
-        sandbox_type: Type of sandbox provider ("modal", "runloop", "daytona").
+        sandbox_type: Type of sandbox provider ("modal", "runloop", "daytona", "docker").
                      If None, agent is operating in local mode.
 
     Returns:
@@ -339,7 +339,7 @@ def create_agent_with_config(
         tools: Additional tools to provide to agent
         sandbox: Optional sandbox backend for remote execution (e.g., ModalBackend).
                  If None, uses local filesystem + shell.
-        sandbox_type: Type of sandbox provider ("modal", "runloop", "daytona")
+        sandbox_type: Type of sandbox provider ("modal", "runloop", "daytona", "docker")
 
     Returns:
         2-tuple of graph and backend
diff --git a/libs/deepagents-cli/deepagents_cli/integrations/docker.py b/libs/deepagents-cli/deepagents_cli/integrations/docker.py
@@ -0,0 +1,113 @@
+"""Docker sandbox backend implementation."""
+
+from __future__ import annotations
+
+from deepagents.backends.protocol import (
+    ExecuteResponse,
+    FileDownloadResponse,
+    FileUploadResponse,
+)
+from deepagents.backends.sandbox import BaseSandbox
+
+import io
+import tarfile
+
+
+class DockerBackend(BaseSandbox):
+    """Docker backend implementation conforming to SandboxBackendProtocol.
+
+    This implementation inherits all file operation methods from BaseSandbox
+    and only implements the execute() method using Docker SDK.
+    """
+
+    def __init__(self, sandbox: Sandbox) -> None:
+        """Initialize the DockerBackend with a Docker sandbox client.
+
+        Args:
+            sandbox: Docker sandbox instance
+        """
+        self._sandbox = sandbox
+        self._timeout: int = 30 * 60  # 30 mins
+
+    @property
+    def id(self) -> str:
+        """Unique identifier for the sandbox backend."""
+        return self._sandbox.id
+
+    def execute(
+        self,
+        command: str,
+    ) -> ExecuteResponse:
+        """Execute a command in the sandbox and return ExecuteResponse.
+
+        Args:
+            command: Full shell command string to execute.
+
+        Returns:
+            ExecuteResponse with combined output, exit code, optional signal, and truncation flag.
+        """
+        result = self._sandbox.exec_run(cmd=command, user="root", workdir="/root")
+
+        output = result.output.decode('utf-8', errors='replace') if result.output else ""
+        exit_code = result.exit_code
+
+        return ExecuteResponse(
+            output=output,
+            exit_code=exit_code,
+            truncated=False,
+        )
+
+    def download_files(self, paths: list[str]) -> list[FileDownloadResponse]:
+        """Download multiple files from the Docker sandbox.
+
+        Leverages Docker's get_archive functionality.
+
+        Args:
+            paths: List of file paths to download.
+
+        Returns:
+            List of FileDownloadResponse objects, one per input path.
+            Response order matches input order.
+        """
+
+        # Download files using Docker's get_archive
+        responses = []
+        try:
+            for path in paths:
+                strm, stat = self._sandbox.get_archive(path)
+                file_like_object = io.BytesIO(b"".join(chunk for chunk in strm))
+                print("Before tar")
+                with tarfile.open(fileobj=file_like_object, mode='r') as tar:
+                    print(f"{tar.getnames()}")
+                    with tar.extractfile(stat['name']) as f:
+                        content = f.read()
+                        responses.append(FileDownloadResponse(path=path, content=content, error=None))
+        except Exception as e:
+            pass
+
+        return responses
+
+    def upload_files(self, files: list[tuple[str, bytes]]) -> list[FileUploadResponse]:
+        """Upload multiple files to the Docker sandbox.
+
+        Leverages Docker's put_archiv functionality.
+
+        Args:
+            files: List of (path, content) tuples to upload.
+
+        Returns:
+            List of FileUploadResponse objects, one per input file.
+            Response order matches input order.
+        """
+
+        for path, content in files:
+            pw_tarstream = io.BytesIO()
+            with tarfile.TarFile(fileobj=pw_tarstream, mode='w') as tar:
+                data_size = len(content)
+                data_io = io.BytesIO(content)
+                info = tarfile.TarInfo(path)
+                info.size = data_size
+                tar.addfile(info, data_io)
+            self._sandbox.put_archive(path, pw_tarstream)
+
+        return [FileUploadResponse(path=path, error=None) for path, _ in files]
diff --git a/libs/deepagents-cli/deepagents_cli/integrations/sandbox_factory.py b/libs/deepagents-cli/deepagents_cli/integrations/sandbox_factory.py
@@ -266,10 +266,113 @@ def create_daytona_sandbox(
             console.print(f"[yellow]⚠ Cleanup failed: {e}[/yellow]")
 
 
+@contextmanager
+def create_docker_sandbox(
+    *, sandbox_id: str | None = None, setup_script_path: str | None = None
+) -> Generator[SandboxBackendProtocol, None, None]:
+    """Create or connect to Docker sandbox.
+
+    Args:
+        sandbox_id: Optional existing sandbox ID to reuse
+        setup_script_path: Optional path to setup script to run after sandbox starts
+
+    Yields:
+        (DockerBackend, sandbox_id)
+
+    Raises:
+        ImportError: Docker SDK not installed
+        Exception: Sandbox creation/connection failed
+        FileNotFoundError: Setup script not found
+        RuntimeError: Setup script failed
+    """
+    import docker
+
+    from deepagents_cli.integrations.docker import DockerBackend
+
+    sandbox_exists = sandbox_id != None
+    console.print(f"[yellow]{"Connecting to" if sandbox_exists else "Starting"} Docker sandbox...[/yellow]")
+
+    # Create ephemeral app (auto-cleans up on exit)
+    client = docker.from_env()
+
+    image_name = "python:3.12-slim"
+    try:
+        container = client.containers.get(sandbox_id) if sandbox_exists else client.containers.run(
+            image_name,
+            command="tail -f /dev/null",  # Keep container running
+            detach=True,
+            environment={"HOME": os.path.expanduser('~')},
+            tty=True,
+            mem_limit="512m",
+            cpu_quota=50000,  # Limits CPU usage (e.g., 50% of one core)
+            pids_limit=100,   # Limit number of processes
+            # Temporarily allow network and root access for setup
+            network_mode="bridge",
+            # No user restriction for install step
+            read_only=False,  # Temporarily allow writes
+            tmpfs={"/tmp": "rw,size=64m,noexec,nodev,nosuid"}, # Writable /tmp
+            volumes={
+                os.path.expanduser('~/.deepagents'): {"bind": os.path.expanduser('~/.deepagents'), 'mode': 'rw'},
+                os.getcwd(): {"bind": "/workspace", 'mode': 'rw'},
+                os.getcwd() + "/.deepagents": {"bind": os.getcwd() + "/.deepagents", 'mode': 'rw'}, # Needed for project skills to work
+            },
+        )
+    except docker.errors.ImageNotFound as e:
+        print(f"Error: The specified image '{image_name}' was not found.")
+        print(f"Details: {e}")
+        exit()
+    except docker.errors.ContainerError as e:
+        # This exception is raised if the container exits with a non-zero exit code
+        # and detach is False.
+        print(f"Error: The container exited with a non-zero exit code ({e.exit_status}).")
+        print(f"Command run: {e.command}")
+        print(f"Container logs: {e.logs.decode('utf-8')}")
+        print(f"Details: {e}")
+        exit()
+    except docker.errors.APIError as e:
+        # This covers other server-related errors, like connection issues or permission problems.
+        print(f"Error: A Docker API error occurred.")
+        print(f"Details: {e}")
+        exit()
+    except docker.errors.NotFound as e:
+        print("Container not found or not running.")
+        exit()
+    except Exception as e:
+        # General exception handler for any other unexpected errors
+        print(f"An unexpected error occurred: {e}")
+        exit()
+
+    sandbox_id = container.id
+
+    backend = DockerBackend(container)
+    console.print(f"[green]✓ Docker sandbox ready: {backend.id}[/green]")
+
+    # Run setup script if provided
+    if setup_script_path:
+        _run_sandbox_setup(backend, setup_script_path)
+    try:
+        yield backend
+    finally:
+        if not sandbox_exists:
+            try:
+                console.print(f"[dim]Terminating Docker sandbox {sandbox_id}...[/dim]")
+                try:
+                    container.stop(timeout=5)
+                    container.remove(force=True)
+                except docker.errors.NotFound:
+                    print(f"Container {sandbox_id} already removed.")
+                except docker.errors.APIError as e:
+                    print(f"Error during container cleanup {sandbox_id}: {e}")
+                console.print(f"[dim]✓ Docker sandbox {sandbox_id} terminated[/dim]")
+            except Exception as e:
+                console.print(f"[yellow]⚠ Cleanup failed: {e}[/yellow]")
+
+
 _PROVIDER_TO_WORKING_DIR = {
     "modal": "/workspace",
     "runloop": "/home/user",
     "daytona": "/home/daytona",
+    "docker": "/workspace",
 }
 
 
@@ -278,6 +381,7 @@ def create_daytona_sandbox(
     "modal": create_modal_sandbox,
     "runloop": create_runloop_sandbox,
     "daytona": create_daytona_sandbox,
+    "docker": create_docker_sandbox,
 }
 
 
@@ -294,7 +398,7 @@ def create_sandbox(
     the appropriate provider-specific context manager.
 
     Args:
-        provider: Sandbox provider ("modal", "runloop", "daytona")
+        provider: Sandbox provider ("modal", "runloop", "daytona", "docker")
         sandbox_id: Optional existing sandbox ID to reuse
         setup_script_path: Optional path to setup script to run after sandbox starts
 
@@ -318,7 +422,7 @@ def get_available_sandbox_types() -> list[str]:
     """Get list of available sandbox provider types.
 
     Returns:
-        List of sandbox type names (e.g., ["modal", "runloop", "daytona"])
+        List of sandbox type names (e.g., ["modal", "runloop", "daytona", "docker"])
     """
     return list(_SANDBOX_PROVIDERS.keys())
 
@@ -327,7 +431,7 @@ def get_default_working_dir(provider: str) -> str:
     """Get the default working directory for a given sandbox provider.
 
     Args:
-        provider: Sandbox provider name ("modal", "runloop", "daytona")
+        provider: Sandbox provider name ("modal", "runloop", "daytona", "docker")
 
     Returns:
         Default working directory path as string
diff --git a/libs/deepagents-cli/deepagents_cli/main.py b/libs/deepagents-cli/deepagents_cli/main.py
@@ -109,7 +109,7 @@ def parse_args():
     )
     parser.add_argument(
         "--sandbox",
-        choices=["none", "modal", "daytona", "runloop"],
+        choices=["none", "modal", "daytona", "runloop", "docker"],
         default="none",
         help="Remote sandbox for code execution (default: none - local only)",
     )
@@ -144,7 +144,7 @@ async def simple_cli(
 
     Args:
         backend: Backend for file operations (CompositeBackend)
-        sandbox_type: Type of sandbox being used (e.g., "modal", "runloop", "daytona").
+        sandbox_type: Type of sandbox being used (e.g., "modal", "runloop", "daytona", "docker").
                      If None, running in local mode.
         sandbox_id: ID of the active sandbox
         setup_script_path: Path to setup script that was run (if any)
@@ -329,7 +329,7 @@ async def main(
     Args:
         assistant_id: Agent identifier for memory storage
         session_state: Session state with auto-approve settings
-        sandbox_type: Type of sandbox ("none", "modal", "runloop", "daytona")
+        sandbox_type: Type of sandbox ("none", "modal", "runloop", "daytona", "docker")
         sandbox_id: Optional existing sandbox ID to reuse
         setup_script_path: Optional path to setup script to run in sandbox
     """
diff --git a/libs/deepagents-cli/deepagents_cli/ui.py b/libs/deepagents-cli/deepagents_cli/ui.py
@@ -556,7 +556,7 @@ def show_help() -> None:
     console.print("  --agent NAME                  Agent identifier (default: agent)")
     console.print("  --auto-approve                Auto-approve tool usage without prompting")
     console.print(
-        "  --sandbox TYPE                Remote sandbox for execution (modal, runloop, daytona)"
+        "  --sandbox TYPE                Remote sandbox for execution (modal, runloop, daytona, docker)"
     )
     console.print("  --sandbox-id ID               Reuse existing sandbox (skips creation/cleanup)")
     console.print()
diff --git a/libs/deepagents-cli/pyproject.toml b/libs/deepagents-cli/pyproject.toml
@@ -18,6 +18,7 @@ dependencies = [
   "markdownify>=0.13.0",
   "langchain>=1.0.7",
   "runloop-api-client>=0.69.0",
+  "docker>=7.1.0",
 ]
 
 [project.scripts]
diff --git a/libs/deepagents-cli/tests/integration_tests/test_sandbox_factory.py b/libs/deepagents-cli/tests/integration_tests/test_sandbox_factory.py
@@ -1,6 +1,6 @@
 """Test sandbox integrations with upload/download functionality.
 
-This module tests sandbox backends (RunLoop, Daytona, Modal) with support for
+This module tests sandbox backends (RunLoop, Daytona, Modal, Docker) with support for
 optional sandbox reuse to reduce test execution time.
 
 Set REUSE_SANDBOX=1 environment variable to reuse sandboxes across tests within
@@ -320,3 +320,13 @@ def sandbox(self) -> Iterator[BaseSandbox]:
         """Provide a Modal sandbox instance."""
         with create_sandbox("modal") as sandbox:
             yield sandbox
+
+
+# class TestDockerIntegration(BaseSandboxIntegrationTest):
+#     """Test Docker backend integration."""
+
+#     @pytest.fixture(scope="class")
+#     def sandbox(self) -> Iterator[BaseSandbox]:
+#         """Provide a Docker sandbox instance."""
+#         with create_sandbox("docker") as sandbox:
+#             yield sandbox
diff --git a/libs/deepagents-cli/uv.lock b/libs/deepagents-cli/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -556,7 +556,7 @@ def show_help() -> None:`
`556`	`556`	`console.print(" --agent NAME Agent identifier (default: agent)")`
`557`	`557`	`console.print(" --auto-approve Auto-approve tool usage without prompting")`
`558`	`558`	`console.print(`
`559`		`- " --sandbox TYPE Remote sandbox for execution (modal, runloop, daytona)"`
	`559`	`+ " --sandbox TYPE Remote sandbox for execution (modal, runloop, daytona, docker)"`
`560`	`560`	`)`
`561`	`561`	`console.print(" --sandbox-id ID Reuse existing sandbox (skips creation/cleanup)")`
`562`	`562`	`console.print()`
Original file line number	Diff line number	Diff line change
`@@ -18,6 +18,7 @@ dependencies = [`
`18`	`18`	`"markdownify>=0.13.0",`
`19`	`19`	`"langchain>=1.0.7",`
`20`	`20`	`"runloop-api-client>=0.69.0",`
	`21`	`+ "docker>=7.1.0",`
`21`	`22`	`]`
`22`	`23`
`23`	`24`	`[project.scripts]`