diff --git a/docs/source/en/tutorials/secure_code_execution.md b/docs/source/en/tutorials/secure_code_execution.md index a8429b6c7..8e1d924ba 100644 --- a/docs/source/en/tutorials/secure_code_execution.md +++ b/docs/source/en/tutorials/secure_code_execution.md @@ -30,8 +30,8 @@ By default, the `CodeAgent` runs LLM-generated code in your environment. This is inherently risky, LLM-generated code could be harmful to your environment. Malicious code execution can occur in several ways: -- **Plain LLM error:** LLMs are still far from perfect and may unintentionally generate harmful commands while attempting to be helpful. While this risk is low, instances have been observed where an LLM attempted to execute potentially dangerous code. -- **Supply chain attack:** Running an untrusted or compromised LLM could expose a system to harmful code generation. While this risk is extremely low when using well-known models on secure inference infrastructure, it remains a theoretical possibility. +- **Plain LLM error:** LLMs are still far from perfect and may unintentionally generate harmful commands while attempting to be helpful. While this risk is low, instances have been observed where an LLM attempted to execute potentially dangerous code. +- **Supply chain attack:** Running an untrusted or compromised LLM could expose a system to harmful code generation. While this risk is extremely low when using well-known models on secure inference infrastructure, it remains a theoretical possibility. - **Prompt injection:** an agent browsing the web could arrive on a malicious website that contains harmful instructions, thus injecting an attack into the agent's memory - **Exploitation of publicly accessible agents:** Agents exposed to the public can be misused by malicious actors to execute harmful code. Attackers may craft adversarial inputs to exploit the agent's execution capabilities, leading to unintended consequences. Once malicious code is executed, whether accidentally or intentionally, it can damage the file system, exploit local or cloud-based resources, abuse API services, and even compromise network security. @@ -102,10 +102,10 @@ These safeguards make out interpreter is safer. We have used it on a diversity of use cases, without ever observing any damage to the environment. > [!WARNING] -> It's important to understand that no local python sandbox can ever be completely secure. While our interpreter provides significant safety improvements over the standard Python interpreter, it is still possible for a determined attacker or a fine-tuned malicious LLM to find vulnerabilities and potentially harm your environment. -> +> It's important to understand that no local python sandbox can ever be completely secure. While our interpreter provides significant safety improvements over the standard Python interpreter, it is still possible for a determined attacker or a fine-tuned malicious LLM to find vulnerabilities and potentially harm your environment. +> > For example, if you've allowed packages like `Pillow` to process images, the LLM could generate code that creates thousands of large image files to fill your hard drive. Other advanced escape techniques might exploit deeper vulnerabilities in authorized packages. -> +> > Running LLM-generated code in your local environment always carries some inherent risk. The only way to run LLM-generated code with truly robust security isolation is to use remote execution options like E2B or Docker, as detailed below. The risk of a malicious attack is low when using well-known LLMs from trusted inference providers, but it is not zero. @@ -454,6 +454,10 @@ agent = CodeAgent(model=InferenceClientModel(), tools=[], executor_type="wasm") agent.run("Can you give me the 100th Fibonacci number?") ``` +> [!TIP] +> Using the agent as a context manager (with the `with` statement) ensures that the WebAssembly Deno sandbox is cleaned up immediately after the agent completes its task. +> Alternatively, it is possible to manually call the agent's `cleanup()` method. + ### Best practices for sandboxes These key practices apply to Blaxel, E2B, and Docker sandboxes: @@ -481,7 +485,7 @@ These key practices apply to Blaxel, E2B, and Docker sandboxes: As illustrated in the diagram earlier, both sandboxing approaches have different security implications: ### Approach 1: Running just the code snippets in a sandbox -- **Pros**: +- **Pros**: - Easier to set up with a simple parameter (`executor_type="blaxel"`, `executor_type="e2b"`, or `executor_type="docker"`) - No need to transfer API keys to the sandbox - Better protection for your local environment diff --git a/src/smolagents/remote_executors.py b/src/smolagents/remote_executors.py index 4da779bb3..ceb1f8b0a 100644 --- a/src/smolagents/remote_executors.py +++ b/src/smolagents/remote_executors.py @@ -21,6 +21,7 @@ import pickle import re import secrets +import socket import subprocess import tempfile import time @@ -823,6 +824,13 @@ def __del__(self): pass # Silently ignore errors during cleanup +def find_free_port() -> int: + """Find an available port by binding to port 0 and letting the OS choose.""" + with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: + s.bind(("", 0)) + return s.getsockname()[1] + + class WasmExecutor(RemotePythonExecutor): """ Remote Python code executor in a sandboxed WebAssembly environment powered by Pyodide and Deno. @@ -860,21 +868,38 @@ def __init__( self.deno_path = deno_path self.timeout = timeout + # Find an available port before setting up permissions + self.port = find_free_port() + # Default minimal permissions needed if deno_permissions is None: # Use minimal permissions for Deno execution - home_dir = os.getenv("HOME") + # Get the actual Deno directory from Deno itself + try: + result = subprocess.run( + [deno_path, "info", "--json"], + capture_output=True, + text=True, + check=True, + ) + deno_info = json.loads(result.stdout) + deno_cache_dir = deno_info.get("denoDir") + if not deno_cache_dir: + raise RuntimeError("Could not determine Deno cache directory from 'deno info --json'") + except (subprocess.SubprocessError, json.JSONDecodeError, KeyError) as e: + raise RuntimeError(f"Failed to get Deno cache directory: {e}") + deno_permissions = [ "allow-net=" + ",".join( [ - "0.0.0.0:8000", # allow requests to the local server + f"0.0.0.0:{self.port}", # allow requests to the local server "cdn.jsdelivr.net:443", # allow loading pyodide packages "pypi.org:443,files.pythonhosted.org:443", # allow pyodide install packages from PyPI ] ), - f"allow-read={home_dir}/.cache/deno", - f"allow-write={home_dir}/.cache/deno", + f"allow-read={deno_cache_dir}", + f"allow-write={deno_cache_dir}", ] self.deno_permissions = [f"--{perm}" for perm in deno_permissions] @@ -890,9 +915,10 @@ def _create_deno_runner(self): self.runner_dir = tempfile.mkdtemp(prefix="pyodide_deno_") self.runner_path = os.path.join(self.runner_dir, "pyodide_runner.js") - # Create the JavaScript runner file + # Create the JavaScript runner file with dynamic port + js_code = self.JS_CODE.format(port=self.port) with open(self.runner_path, "w") as f: - f.write(self.JS_CODE) + f.write(js_code) # Start the Deno server self._start_deno_server() @@ -909,23 +935,32 @@ def _start_deno_server(self): text=True, ) - # Wait for the server to start - time.sleep(2) # Give the server time to start + # Set the server URL with dynamic port + self.server_url = f"http://localhost:{self.port}" - # Check if the server started successfully - if self.server_process.poll() is not None: - stderr = self.server_process.stderr.read() - raise RuntimeError(f"Failed to start Deno server: {stderr}") + # Poll until server is ready (more robust than fixed sleep) + max_retries = 20 + retry_delay = 0.5 - self.server_url = "http://localhost:8000" # TODO: Another port? + for _ in range(max_retries): + # Check if process died + if self.server_process.poll() is not None: + stderr = self.server_process.stderr.read() + raise RuntimeError(f"Failed to start Deno server: {stderr}") - # Test the connection - try: - response = requests.get(self.server_url) - if response.status_code != 200: - raise RuntimeError(f"Server responded with status code {response.status_code}: {response.text}") - except requests.RequestException as e: - raise RuntimeError(f"Failed to connect to Deno server: {e}") + # Try to connect to the server + try: + response = requests.get(self.server_url, timeout=1) + if response.status_code == 200: + self.logger.log(f"Deno server started on port {self.port}", level=LogLevel.INFO) + return + except requests.RequestException: + # Server not ready yet, wait and retry + time.sleep(retry_delay) + + # If we get here, server didn't start in time + self.server_process.terminate() + raise RuntimeError(f"Server did not start within {max_retries * retry_delay} seconds") def run_code_raise_errors(self, code: str) -> CodeOutput: """ @@ -1026,14 +1061,42 @@ def delete(self): JS_CODE = dedent("""\ // pyodide_runner.js - Runs Python code in Pyodide within Deno - import { serve } from "https://deno.land/std/http/server.ts"; - import { loadPyodide } from "npm:pyodide"; + import {{ serve }} from "https://deno.land/std/http/server.ts"; + import {{ loadPyodide }} from "npm:pyodide"; // Initialize Pyodide instance const pyodidePromise = loadPyodide(); + // Filter out modules that are already importable in Pyodide (stdlib or preloaded) + async function getMissingPackages(pyodide, packages) {{ + if (!packages || packages.length === 0) {{ + return []; + }} + + globalThis.__smol_packages_to_check = packages; + try {{ + const missingJson = pyodide.runPython(` +import importlib.util +import json + +from js import __smol_packages_to_check as _packages + +def _needs_install(name): + try: + return importlib.util.find_spec(name) is None + except ModuleNotFoundError: + return True + +json.dumps([name for name in _packages.to_py() if _needs_install(name)]) +`); + return JSON.parse(missingJson); + }} finally {{ + delete globalThis.__smol_packages_to_check; + }} + }} + // Function to execute Python code and return the result - async function executePythonCode(code) { + async function executePythonCode(code) {{ const pyodide = await pyodidePromise; // Create a capture for stdout @@ -1048,82 +1111,86 @@ def delete(self): let error = null; let stdout = ""; - try { + try {{ // Execute the code result = await pyodide.runPythonAsync(code); // Get captured stdout stdout = pyodide.runPython("sys.stdout.getvalue()"); - } catch (e) { - error = { + }} catch (e) {{ + error = {{ name: e.constructor.name, message: e.message, stack: e.stack - }; + }}; // Extract Python exception details - if (e.constructor.name === "PythonError") { + if (e.constructor.name === "PythonError") {{ // Get the Python exception type from the error message: at the end of the traceback const errorMatch = e.message.match(/\\n([^:]+Exception): /); - if (errorMatch) { + if (errorMatch) {{ error.pythonExceptionType = errorMatch[1].split(".").pop(); - } + }} // If the error is a FinalAnswerException, extract its the encoded value - if (error.pythonExceptionType === "FinalAnswerException") { + if (error.pythonExceptionType === "FinalAnswerException") {{ // Extract the base64 encoded value from the error message const valueMatch = e.message.match(/FinalAnswerException: (.*?)(?:\\n|$)/); - if (valueMatch) { + if (valueMatch) {{ error.pythonExceptionValue = valueMatch[1]; - } - } - } - } + }} + }} + }} + }} - return { + return {{ result, stdout, error - }; - } + }}; + }} // Start a simple HTTP server to receive code execution requests - //const port = 8765; - //console.log(`Starting Pyodide server on port ${port}`); - - serve(async (req) => { - if (req.method === "POST") { - try { + serve(async (req) => {{ + if (req.method === "POST") {{ + try {{ const body = await req.json(); - const { code, packages = [] } = body; + const {{ code, packages = [] }} = body; // Load any requested packages - if (packages && packages.length > 0) { + if (packages && packages.length > 0) {{ const pyodide = await pyodidePromise; - //await pyodide.loadPackagesFromImports(code); - await pyodide.loadPackage("micropip"); - const micropip = pyodide.pyimport("micropip"); - try { - await micropip.install(packages); - } catch (e) { - console.error(`Failed to load package ${pkg}: ${e.message}`); - } - } + const packagesToInstall = await getMissingPackages(pyodide, packages); + + if (packagesToInstall.length > 0) {{ + await pyodide.loadPackage("micropip"); + const micropip = pyodide.pyimport("micropip"); + try {{ + await micropip.install(packagesToInstall, {{ keep_going: true }}); // keep going after first error, report list of errors at the end + }} catch (e) {{ + console.error(`Failed to load packages ${{packagesToInstall.join(", ")}}: ${{e.message}}`); + return new Response(JSON.stringify({{ error: e.message }}), {{ + status: 500, + headers: {{ "Content-Type": "application/json" }} + }}); + }} + }} + }} const result = await executePythonCode(code); - return new Response(JSON.stringify(result), { - headers: { "Content-Type": "application/json" } - }); - } catch (e) { - return new Response(JSON.stringify({ error: e.message }), { + return new Response(JSON.stringify(result), {{ + headers: {{ "Content-Type": "application/json" }} + }}); + }} catch (e) {{ + return new Response(JSON.stringify({{ error: e.message }}), {{ status: 500, - headers: { "Content-Type": "application/json" } - }); - } - } - - return new Response("Pyodide-Deno Executor is running. Send POST requests with code to execute.", { - headers: { "Content-Type": "text/plain" } - }); - }); + headers: {{ "Content-Type": "application/json" }} + }}); + }} + }} + + return new Response("Pyodide-Deno Executor is running. Send POST requests with code to execute.", {{ + headers: {{ "Content-Type": "text/plain" }} + }}); + }}, {{ port: {port} }}); """) diff --git a/tests/test_remote_executors.py b/tests/test_remote_executors.py index d9d63723d..a9047a952 100644 --- a/tests/test_remote_executors.py +++ b/tests/test_remote_executors.py @@ -446,9 +446,14 @@ def test_wasm_executor_instantiation(self): patch("subprocess.Popen") as mock_popen, patch("requests.get") as mock_get, patch("time.sleep"), + patch("smolagents.remote_executors.find_free_port", return_value=12345), ): # Configure mocks - mock_run.return_value.returncode = 0 + mock_version_run = MagicMock() + mock_version_run.returncode = 0 + mock_info_run = MagicMock() + mock_info_run.stdout = '{"denoDir": "/tmp/deno"}' + mock_run.side_effect = [mock_version_run, mock_info_run] mock_process = MagicMock() mock_process.poll.return_value = None mock_popen.return_value = mock_process @@ -465,9 +470,9 @@ def test_wasm_executor_instantiation(self): assert "pandas" in executor.installed_packages # Verify Deno was checked - assert mock_run.call_count == 1 - assert mock_run.call_args.args[0][0] == "deno" - assert mock_run.call_args.args[0][1] == "--version" + assert mock_run.call_count == 2 + assert mock_run.call_args_list[0].args[0][:2] == ["deno", "--version"] + assert mock_run.call_args_list[1].args[0][:3] == ["deno", "info", "--json"] # Verify server was started assert mock_popen.call_count == 1