diff --git a/mason.py b/mason.py
index 3c236e2d86..250a57af1f 100644
--- a/mason.py
+++ b/mason.py
@@ -254,6 +254,8 @@ def get_env_vars(
         "WANDB_API_KEY",
         "BEAKER_TOKEN",
         "OPENAI_API_KEY",
+        # Needed for tool use scripts.
+        "OPEN_INSTRUCT_TOOL_API_KEY",
         # litellm expects these env vars
         "AZURE_API_KEY",
         "AZURE_API_BASE",
diff --git a/open_instruct/tool_utils/Dockerfile b/open_instruct/tool_utils/Dockerfile
index 96fdc3e329..dba5595655 100644
--- a/open_instruct/tool_utils/Dockerfile
+++ b/open_instruct/tool_utils/Dockerfile
@@ -4,14 +4,19 @@ FROM python:3.10-slim
 # Set working directory in container
 WORKDIR /app
 
+# Install uv
+COPY --from=ghcr.io/astral-sh/uv:0.8.8 /uv /bin/uv
+
 # Copy requirements first to leverage Docker cache
-COPY requirements.txt requirements.txt
+COPY open_instruct/tool_utils/requirements.txt requirements.txt
 
-# Install dependencies
-RUN pip install --no-cache-dir -r requirements.txt
+# Install dependencies using uv
+RUN uv pip install --system --no-cache -r requirements.txt
 
-# Copy the rest of the application
-COPY . .
+# Copy the tool server files
+COPY open_instruct/__init__.py open_instruct/__init__.py
+COPY open_instruct/logger_utils.py open_instruct/logger_utils.py
+COPY open_instruct/tool_utils/tool_server.py tool_server.py
 
 # Create cache directory for code execution
 RUN mkdir -p cache && chmod 777 cache
@@ -23,4 +28,4 @@ ENV PYTHONUNBUFFERED=1
 EXPOSE 8080
 
 # Command to run the application
-CMD ["python", "tool_server.py"]
\ No newline at end of file
+CMD ["uv", "run", "--no-project", "tool_server.py"]
\ No newline at end of file
diff --git a/open_instruct/tool_utils/test_tools.py b/open_instruct/tool_utils/test_tools.py
index a71151e69f..87050efbae 100644
--- a/open_instruct/tool_utils/test_tools.py
+++ b/open_instruct/tool_utils/test_tools.py
@@ -60,15 +60,19 @@ class TestPythonCodeTool(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         """Start the tool server for tests."""
-        # Start the server in a subprocess
+        import os
+
+        env = os.environ.copy()
+        env.pop("OPEN_INSTRUCT_TOOL_API_KEY", None)
+
         cls.server_process = subprocess.Popen(
             ["uv", "run", "uvicorn", "tool_server:app", "--host", "0.0.0.0", "--port", "1212"],
             cwd="open_instruct/tool_utils",
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
-            start_new_session=True,  # Create new process group
+            start_new_session=True,
+            env=env,
         )
-        # Wait for server to start
         time.sleep(3)
         cls.api_endpoint = "http://localhost:1212/execute"
 
diff --git a/open_instruct/tool_utils/tool_server.py b/open_instruct/tool_utils/tool_server.py
index 9142f19c3d..23edc6b334 100644
--- a/open_instruct/tool_utils/tool_server.py
+++ b/open_instruct/tool_utils/tool_server.py
@@ -3,8 +3,19 @@
 
 This script sets up a FastAPI server that allows users to execute Python code snippets
 
+# API Key Authentication
+
+The server requires an API key for authentication. Set the OPEN_INSTRUCT_TOOL_API_KEY environment variable:
+
+```bash
+export OPEN_INSTRUCT_TOOL_API_KEY="your-api-key-here"
+```
+
+When running locally:
+```bash
 cd open_instruct/tool_utils
-PREIMPORT_PKGS=pandas,numpy,sympy,time,math,networkx uv run uvicorn tool_server:app --host 0.0.0.0 --port 1212
+OPEN_INSTRUCT_TOOL_API_KEY="your-api-key-here" PREIMPORT_PKGS=pandas,numpy,sympy,time,math,networkx uv run uvicorn tool_server:app --host 0.0.0.0 --port 1212
+```
 
 ```bash
 docker build -t tool-server .
@@ -16,8 +27,8 @@
 docker build -t ghcr.io/allenai/open-instruct/python-code-executor -f open_instruct/tool_utils/Dockerfile .
 docker push ghcr.io/allenai/open-instruct/python-code-executor
 
-# Run the server
-docker run -p 1212:8080 tool-server
+# Run the server (pass API key via environment variable)
+docker run -p 1212:8080 -e OPEN_INSTRUCT_TOOL_API_KEY="your-api-key-here" tool-server
 
 # gcloud run deploy:
 gcloud run deploy open-instruct-tool-server --project ai2-allennlp --region us-central1 --source .
@@ -39,25 +50,31 @@
 1) the timeout works
 2) the timeout in the first curl does not block the second curl
 
+All requests now require the X-API-Key header:
+
 ```
 curl -X POST https://open-instruct-tool-server-10554368204.us-central1.run.app/execute \
      -H "Content-Type: application/json" \
+     -H "X-API-Key: $OPEN_INSTRUCT_TOOL_API_KEY" \
      -d '{"code": "import time;time.sleep(4)", "timeout": 3}' \
      -w '\nTotal time: %{time_total}s\n'
 
 
 curl -X POST https://open-instruct-tool-server-10554368204.us-central1.run.app/execute \
      -H "Content-Type: application/json" \
+     -H "X-API-Key: $OPEN_INSTRUCT_TOOL_API_KEY" \
      -d '{"code": "print(1)", "timeout": 3}' \
      -w '\nTotal time: %{time_total}s\n'
 
 curl -X POST https://open-instruct-tool-server-10554368204.us-central1.run.app/execute \
      -H "Content-Type: application/json" \
+     -H "X-API-Key: $OPEN_INSTRUCT_TOOL_API_KEY" \
      -d '{"code": "import sympy", "timeout": 3}' \
      -w '\nTotal time: %{time_total}s\n'
 
 curl -X POST https://open-instruct-tool-server-10554368204.us-central1.run.app/execute \
      -H "Content-Type: application/json" \
+     -H "X-API-Key: $OPEN_INSTRUCT_TOOL_API_KEY" \
      -d '{"code": "import sympy", "timeout": 3}' \
      -w '\nTotal time: %{time_total}s\n'
 ```
@@ -80,7 +97,7 @@
 from contextlib import redirect_stderr, redirect_stdout
 from typing import Optional
 
-from fastapi import FastAPI
+from fastapi import Depends, FastAPI, Header, HTTPException
 from pydantic import BaseModel
 
 from open_instruct import logger_utils
@@ -202,11 +219,30 @@ class CodeResponse(BaseModel):
     success: bool
 
 
+###############################################################################
+# API Key Authentication
+###############################################################################
+EXPECTED_API_KEY = os.getenv("OPEN_INSTRUCT_TOOL_API_KEY")
+
+
+async def verify_api_key(x_api_key: str = Header(None, alias="X-API-Key")):
+    if not EXPECTED_API_KEY:
+        logger.warning("OPEN_INSTRUCT_TOOL_API_KEY not set - API key validation disabled")
+        return
+    if not x_api_key:
+        logger.warning("Missing API key in request")
+        raise HTTPException(status_code=401, detail="Missing API key")
+    if x_api_key != EXPECTED_API_KEY:
+        logger.warning("Invalid API key attempt")
+        raise HTTPException(status_code=401, detail="Invalid API key")
+    return x_api_key
+
+
 ###############################################################################
 # Endpoints
 ###############################################################################
 @app.post("/execute", response_model=CodeResponse)
-async def execute_code(req: CodeRequest):  # noqa: D401
+async def execute_code(req: CodeRequest, api_key: str = Depends(verify_api_key)):  # noqa: D401
     global process_pool  # noqa: PLW0603
 
     # Log input (truncate to 200 chars to avoid huge logs)
@@ -244,4 +280,21 @@ async def execute_code(req: CodeRequest):  # noqa: D401
 
 @app.get("/")
 async def root():  # noqa: D401
-    return {"message": "Python Code Executor API — POST /execute {code, timeout}"}
+    host = os.getenv("HOST", "http://localhost:1212")
+
+    examples = f"""Python Code Executor API
+
+Example usage:
+
+curl -X POST {host}/execute \\
+     -H "Content-Type: application/json" \\
+     -H "X-API-Key: $OPEN_INSTRUCT_TOOL_API_KEY" \\
+     -d '{{"code": "print(1 + 1)", "timeout": 3}}'
+
+curl -X POST {host}/execute \\
+     -H "Content-Type: application/json" \\
+     -H "X-API-Key: $OPEN_INSTRUCT_TOOL_API_KEY" \\
+     -d '{{"code": "import sympy; print(sympy.__version__)", "timeout": 3}}'
+"""
+
+    return {"message": examples}
diff --git a/open_instruct/tool_utils/tools.py b/open_instruct/tool_utils/tools.py
index efd60456ed..6720499ac6 100644
--- a/open_instruct/tool_utils/tools.py
+++ b/open_instruct/tool_utils/tools.py
@@ -35,8 +35,9 @@ class PythonCodeTool(Tool):
     """@vwxyzjn: I recommend using something like a FastAPI for this kind of stuff; 1) you
     won't accidentally block the main vLLM process and 2) way easier to parallelize via load balancing."""
 
-    def __init__(self, api_endpoint: str, *args, **kwargs):
+    def __init__(self, api_endpoint: str, api_key: str = None, *args, **kwargs):
         self.api_endpoint = api_endpoint
+        self.api_key = api_key
         super().__init__(*args, **kwargs)
 
     def __call__(self, prompt: str) -> ToolOutput:
@@ -79,17 +80,21 @@ def find_sum_of_a():
         timeout_seconds = 3
         start_time = time.time()
         try:
-            # Call the FastAPI endpoint to execute the code with client-side timeout
+            headers = {"Content-Type": "application/json"}
+            if self.api_key:
+                headers["X-API-Key"] = self.api_key
+
             response = requests.post(
                 self.api_endpoint,
-                json={"code": code, "timeout": timeout_seconds},  # Server-side timeout (keeping this)
-                timeout=timeout_seconds,  # Client-side timeout
+                json={"code": code, "timeout": timeout_seconds},
+                headers=headers,
+                timeout=timeout_seconds,
             )
 
-            # Parse the response
+            response.raise_for_status()
+
             result = response.json()
 
-            # Process the API response
             output = result["output"]
             error = result.get("error") or ""
 
diff --git a/requirements.txt b/requirements.txt
index d9295277ec..c428c8e2a1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,14 +1,14 @@
 # This file was autogenerated by uv via the following command:
 #    uv export --format requirements-txt --no-hashes --all-extras --no-emit-project
-absl-py==2.3.0
+absl-py==2.3.1
     # via tensorboard
-accelerate==1.8.1
+accelerate==1.10.1
     # via
     #   open-instruct
     #   peft
 aiohappyeyeballs==2.6.1
     # via aiohttp
-aiohttp==3.12.9
+aiohttp==3.13.0
     # via
     #   aiohttp-cors
     #   fsspec
@@ -17,15 +17,13 @@ aiohttp==3.12.9
     #   vllm
 aiohttp-cors==0.8.1
     # via ray
-aiosignal==1.3.2
+aiosignal==1.4.0
     # via aiohttp
-airportsdata==20250523
-    # via outlines
 annotated-types==0.7.0
     # via pydantic
 antlr4-python3-runtime==4.11.0
     # via open-instruct
-anyio==4.9.0
+anyio==4.11.0
     # via
     #   httpx
     #   openai
@@ -33,35 +31,37 @@ anyio==4.9.0
     #   watchfiles
 astor==0.8.1
     # via depyf
-async-timeout==5.0.1 ; python_full_version < '3.11'
-    # via aiohttp
-attrs==25.3.0
+attrs==25.4.0
     # via
     #   aiohttp
     #   jsonschema
     #   referencing
 babel==2.17.0
     # via mkdocs-material
-backrefs==5.8
+backrefs==5.9
     # via mkdocs-material
-beaker-py==1.36.4
-bitsandbytes==0.46.0 ; sys_platform != 'darwin'
+beaker-py==2.5.1
+bitsandbytes==0.48.1 ; sys_platform != 'darwin'
     # via open-instruct
-blake3==1.0.5
+blake3==1.0.8
     # via vllm
-cachetools==5.5.2
+cachetools==6.2.1
     # via
     #   google-auth
     #   vllm
-certifi==2025.4.26
+cbor2==5.7.0
+    # via vllm
+certifi==2025.10.5
     # via
     #   httpcore
     #   httpx
     #   requests
     #   sentry-sdk
-cffi==1.17.1 ; implementation_name == 'pypy'
-    # via pyzmq
-charset-normalizer==3.4.2
+cffi==2.0.0
+    # via
+    #   pyzmq
+    #   soundfile
+charset-normalizer==3.4.4
     # via requests
 click==8.2.1
     # via
@@ -74,9 +74,7 @@ click==8.2.1
     #   uvicorn
     #   wandb
 cloudpickle==3.1.1
-    # via
-    #   outlines
-    #   vllm
+    # via vllm
 colorama==0.4.6
     # via
     #   click
@@ -87,60 +85,58 @@ colorama==0.4.6
     #   pytest
     #   tqdm
     #   uvicorn
-colorful==0.5.6
+colorful==0.5.7
     # via ray
-compressed-tensors==0.10.1
+compressed-tensors==0.11.0
     # via vllm
-contourpy==1.3.2
+contourpy==1.3.3
     # via matplotlib
-cupy-cuda12x==13.4.1 ; sys_platform != 'darwin'
+cupy-cuda12x==13.6.0 ; sys_platform != 'darwin'
     # via ray
 cycler==0.12.1
     # via matplotlib
-datasets==4.0.0
+datasets==4.2.0
     # via open-instruct
-debugpy==1.8.14
+debugpy==1.8.17
     # via open-instruct
 deepspeed==0.15.4
     # via open-instruct
-depyf==0.18.0
+depyf==0.19.0
     # via vllm
-dill==0.3.8
+dill==0.4.0
     # via
     #   datasets
     #   depyf
     #   multiprocess
 diskcache==5.6.3
-    # via outlines
-distlib==0.3.9
+    # via vllm
+distlib==0.4.0
     # via virtualenv
 distro==1.9.0
     # via openai
-dnspython==2.7.0
+dnspython==2.8.0
     # via email-validator
-docker==7.1.0
-    # via beaker-py
 docker-pycreds==0.4.0
     # via wandb
 einops==0.8.1
     # via
     #   flash-attn
     #   vllm
-email-validator==2.2.0
-    # via fastapi
-exceptiongroup==1.3.0 ; python_full_version < '3.11'
+email-validator==2.3.0
     # via
-    #   anyio
-    #   pytest
-fastapi==0.115.12
+    #   fastapi
+    #   pydantic
+fastapi==0.119.0
     # via
     #   open-instruct
     #   vllm
-fastapi-cli==0.0.7
+fastapi-cli==0.0.13
     # via fastapi
+fastapi-cloud-cli==0.3.1
+    # via fastapi-cli
 fastrlock==0.8.3 ; sys_platform != 'darwin'
     # via cupy-cuda12x
-filelock==3.18.0
+filelock==3.20.0
     # via
     #   datasets
     #   huggingface-hub
@@ -149,40 +145,40 @@ filelock==3.18.0
     #   transformers
     #   virtualenv
     #   vllm
-flash-attn==2.8.0.post2 ; sys_platform != 'darwin'
+flash-attn==2.8.3 ; sys_platform != 'darwin'
     # via open-instruct
-fonttools==4.58.1
+fonttools==4.60.1
     # via matplotlib
-frozenlist==1.6.2
+frozendict==2.4.6
+    # via compressed-tensors
+frozenlist==1.8.0
     # via
     #   aiohttp
     #   aiosignal
-fsspec==2025.3.0
+fsspec==2025.9.0
     # via
     #   datasets
     #   huggingface-hub
     #   torch
-gguf==0.17.0
+gguf==0.17.1
     # via vllm
 ghp-import==2.1.0
     # via mkdocs
 gitdb==4.0.12
     # via gitpython
-gitpython==3.1.44
+gitpython==3.1.45
     # via wandb
-google-api-core==2.25.0
+google-api-core==2.26.0
     # via opencensus
-google-auth==2.39.0
+google-auth==2.41.1
     # via google-api-core
+google-crc32c==1.7.1
+    # via beaker-py
 googleapis-common-protos==1.70.0
-    # via
-    #   google-api-core
-    #   opentelemetry-exporter-otlp-proto-grpc
-    #   opentelemetry-exporter-otlp-proto-http
-grpcio==1.72.1
+    # via google-api-core
+grpcio==1.75.1
     # via
     #   beaker-py
-    #   opentelemetry-exporter-otlp-proto-grpc
     #   ray
     #   tensorboard
 h11==0.16.0
@@ -191,28 +187,29 @@ h11==0.16.0
     #   uvicorn
 hf-transfer==0.1.9
     # via open-instruct
-hf-xet==1.1.3
+hf-xet==1.1.10 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
     # via huggingface-hub
 hjson==3.1.0
     # via deepspeed
 httpcore==1.0.9
     # via httpx
-httptools==0.6.4
+httptools==0.7.1
     # via uvicorn
 httpx==0.28.1
     # via
+    #   datasets
     #   fastapi
+    #   fastapi-cloud-cli
     #   litellm
     #   openai
-huggingface-hub==0.32.4
+huggingface-hub==0.35.3
     # via
     #   accelerate
     #   datasets
     #   peft
     #   tokenizers
     #   transformers
-    #   vllm
-idna==3.10
+idna==3.11
     # via
     #   anyio
     #   email-validator
@@ -221,57 +218,50 @@ idna==3.10
     #   yarl
 immutabledict==1.2.0
     # via open-instruct
-importlib-metadata==8.0.0
+importlib-metadata==8.7.0
     # via
     #   litellm
     #   opentelemetry-api
 iniconfig==2.1.0
     # via pytest
 interegular==0.3.3
-    # via
-    #   lm-format-enforcer
-    #   outlines
-    #   outlines-core
+    # via lm-format-enforcer
 jinja2==3.1.6
     # via
     #   fastapi
     #   litellm
     #   mkdocs
     #   mkdocs-material
-    #   outlines
+    #   mlx-lm
     #   torch
-jiter==0.10.0
+jiter==0.11.0
     # via openai
-joblib==1.5.1
+joblib==1.5.2
     # via nltk
-jsonschema==4.24.0
+jsonschema==4.25.1
     # via
     #   litellm
     #   mistral-common
-    #   outlines
-    #   outlines-core
     #   ray
-jsonschema-specifications==2025.4.1
+jsonschema-specifications==2025.9.1
     # via jsonschema
-kiwisolver==1.4.8
+kiwisolver==1.4.9
     # via matplotlib
 langdetect==1.0.9
     # via open-instruct
 lark==1.2.2
-    # via
-    #   outlines
-    #   vllm
-liger-kernel==0.5.10 ; sys_platform != 'darwin'
+    # via vllm
+liger-kernel==0.6.2 ; sys_platform != 'darwin'
     # via open-instruct
-litellm==1.72.0
+litellm==1.75.0
     # via open-instruct
-llguidance==0.7.27 ; platform_machine == 'aarch64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
+llguidance==0.7.30 ; platform_machine == 'aarch64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
     # via vllm
 llvmlite==0.44.0
     # via numba
-lm-format-enforcer==0.10.11
+lm-format-enforcer==0.11.3
     # via vllm
-markdown==3.8
+markdown==3.9
     # via
     #   markdown-include
     #   mkdocs
@@ -279,14 +269,14 @@ markdown==3.8
     #   pymdown-extensions
     #   tensorboard
 markdown-include==0.8.1
-markdown-it-py==3.0.0
+markdown-it-py==4.0.0
     # via rich
-markupsafe==3.0.2
+markupsafe==3.0.3
     # via
     #   jinja2
     #   mkdocs
     #   werkzeug
-matplotlib==3.10.3
+matplotlib==3.10.7
     # via open-instruct
 mdurl==0.1.2
     # via markdown-it-py
@@ -294,41 +284,43 @@ mergedeep==1.3.4
     # via
     #   mkdocs
     #   mkdocs-get-deps
-mistral-common==1.5.6
+mistral-common==1.8.5
     # via vllm
 mkdocs==1.6.1
     # via mkdocs-material
 mkdocs-get-deps==0.2.0
     # via mkdocs
-mkdocs-material==9.6.14
+mkdocs-material==9.6.22
 mkdocs-material-extensions==1.3.1
     # via mkdocs-material
+mlx==0.29.2 ; platform_machine == 'arm64' and sys_platform == 'darwin'
+    # via mlx-lm
+mlx-lm==0.28.2 ; platform_machine == 'arm64' and sys_platform == 'darwin'
+    # via xgrammar
+mlx-metal==0.29.2 ; platform_machine == 'arm64' and sys_platform == 'darwin'
+    # via mlx
 mpmath==1.3.0
     # via sympy
-msgpack==1.1.0
+msgpack==1.1.2
     # via
     #   deepspeed
     #   ray
 msgspec==0.19.0
     # via vllm
-multidict==6.4.4
+multidict==6.7.0
     # via
     #   aiohttp
     #   yarl
 multiprocess==0.70.16
     # via datasets
-nest-asyncio==1.6.0
-    # via outlines
-networkx==3.4.2 ; python_full_version < '3.11'
-    # via torch
-networkx==3.5 ; python_full_version >= '3.11'
+networkx==3.5
     # via torch
-ninja==1.11.1.4
+ninja==1.13.0
     # via
     #   deepspeed
     #   vllm
     #   xgrammar
-nltk==3.9.1
+nltk==3.9.2
     # via open-instruct
 numba==0.61.2
     # via vllm
@@ -343,63 +335,68 @@ numpy==1.26.4
     #   gguf
     #   matplotlib
     #   mistral-common
+    #   mlx-lm
     #   numba
     #   open-instruct
     #   opencv-python-headless
-    #   outlines
     #   pandas
     #   peft
     #   scipy
+    #   soundfile
+    #   soxr
     #   tensorboard
     #   torchvision
     #   transformers
     #   vllm
     #   xformers
-nvidia-cublas-cu12==12.8.3.14 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    #   xgrammar
+nvidia-cublas-cu12==12.8.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via
     #   nvidia-cudnn-cu12
     #   nvidia-cusolver-cu12
     #   torch
-nvidia-cuda-cupti-cu12==12.8.57 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+nvidia-cuda-cupti-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via torch
-nvidia-cuda-nvrtc-cu12==12.8.61 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+nvidia-cuda-nvrtc-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via torch
-nvidia-cuda-runtime-cu12==12.8.57 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+nvidia-cuda-runtime-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via torch
-nvidia-cudnn-cu12==9.7.1.26 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+nvidia-cudnn-cu12==9.10.2.21 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via torch
-nvidia-cufft-cu12==11.3.3.41 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+nvidia-cufft-cu12==11.3.3.83 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via torch
-nvidia-cufile-cu12==1.13.0.11 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+nvidia-cufile-cu12==1.13.1.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via torch
-nvidia-curand-cu12==10.3.9.55 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+nvidia-curand-cu12==10.3.9.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via torch
-nvidia-cusolver-cu12==11.7.2.55 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+nvidia-cusolver-cu12==11.7.3.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via torch
-nvidia-cusparse-cu12==12.5.7.53 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+nvidia-cusparse-cu12==12.5.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via
     #   nvidia-cusolver-cu12
     #   torch
-nvidia-cusparselt-cu12==0.6.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+nvidia-cusparselt-cu12==0.7.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via torch
-nvidia-ml-py==12.575.51
+nvidia-ml-py==13.580.82
     # via nvitop
-nvidia-nccl-cu12==2.26.2 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+nvidia-nccl-cu12==2.27.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via torch
-nvidia-nvjitlink-cu12==12.8.61 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+nvidia-nvjitlink-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via
     #   nvidia-cufft-cu12
     #   nvidia-cusolver-cu12
     #   nvidia-cusparse-cu12
     #   torch
-nvidia-nvtx-cu12==12.8.55 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+nvidia-nvtx-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via torch
-nvitop==1.5.1
+nvitop==1.5.3
     # via open-instruct
-openai==1.84.0
+openai==2.3.0
     # via
     #   litellm
     #   vllm
+openai-harmony==0.0.4
+    # via vllm
 opencensus==0.11.4
     # via ray
 opencensus-context==0.1.3
@@ -408,45 +405,28 @@ opencv-python-headless==4.11.0.86
     # via
     #   mistral-common
     #   vllm
-opentelemetry-api==1.36.0
+opentelemetry-api==1.37.0
     # via
-    #   opentelemetry-exporter-otlp-proto-grpc
-    #   opentelemetry-exporter-otlp-proto-http
+    #   opentelemetry-exporter-prometheus
     #   opentelemetry-sdk
     #   opentelemetry-semantic-conventions
-    #   vllm
-opentelemetry-exporter-otlp==1.36.0
-    # via vllm
-opentelemetry-exporter-otlp-proto-common==1.36.0
-    # via
-    #   opentelemetry-exporter-otlp-proto-grpc
-    #   opentelemetry-exporter-otlp-proto-http
-opentelemetry-exporter-otlp-proto-grpc==1.36.0
-    # via opentelemetry-exporter-otlp
-opentelemetry-exporter-otlp-proto-http==1.36.0
-    # via opentelemetry-exporter-otlp
-opentelemetry-proto==1.36.0
-    # via
-    #   opentelemetry-exporter-otlp-proto-common
-    #   opentelemetry-exporter-otlp-proto-grpc
-    #   opentelemetry-exporter-otlp-proto-http
-opentelemetry-sdk==1.36.0
-    # via
-    #   opentelemetry-exporter-otlp-proto-grpc
-    #   opentelemetry-exporter-otlp-proto-http
-    #   vllm
-opentelemetry-semantic-conventions==0.57b0
+opentelemetry-exporter-prometheus==0.58b0
+    # via ray
+opentelemetry-proto==1.37.0
+    # via ray
+opentelemetry-sdk==1.37.0
+    # via
+    #   opentelemetry-exporter-prometheus
+    #   ray
+opentelemetry-semantic-conventions==0.58b0
     # via opentelemetry-sdk
-opentelemetry-semantic-conventions-ai==0.4.9
-    # via vllm
-outlines==0.1.11
+outlines-core==0.2.11
     # via vllm
-outlines-core==0.1.26
-    # via outlines
 packaging==25.0
     # via
     #   accelerate
     #   beaker-py
+    #   bitsandbytes
     #   datasets
     #   deepspeed
     #   huggingface-hub
@@ -461,36 +441,38 @@ packaging==25.0
     #   transformers
 paginate==0.5.7
     # via mkdocs-material
-pandas==2.2.3
+pandas==2.3.3
     # via datasets
 parameterized==0.9.0
-partial-json-parser==0.2.1.1.post5
+partial-json-parser==0.2.1.1.post6
     # via vllm
 pathspec==0.12.1
     # via mkdocs
-peft==0.15.2
+peft==0.17.1
     # via open-instruct
-pillow==11.2.1
+pillow==12.0.0
     # via
     #   matplotlib
     #   mistral-common
+    #   tensorboard
     #   torchvision
     #   vllm
-platformdirs==4.3.8
+platformdirs==4.5.0
     # via
     #   mkdocs-get-deps
     #   virtualenv
     #   wandb
 pluggy==1.6.0
     # via pytest
-prometheus-client==0.22.1
+prometheus-client==0.23.1
     # via
+    #   opentelemetry-exporter-prometheus
     #   prometheus-fastapi-instrumentator
     #   ray
     #   vllm
 prometheus-fastapi-instrumentator==7.1.0
     # via vllm
-propcache==0.3.1
+propcache==0.4.1
     # via
     #   aiohttp
     #   yarl
@@ -501,13 +483,14 @@ protobuf==5.29.5
     #   beaker-py
     #   google-api-core
     #   googleapis-common-protos
+    #   mlx-lm
     #   opentelemetry-proto
     #   proto-plus
     #   ray
     #   tensorboard
     #   vllm
     #   wandb
-psutil==7.0.0
+psutil==7.1.0
     # via
     #   accelerate
     #   deepspeed
@@ -519,9 +502,9 @@ py-cpuinfo==9.0.0
     # via
     #   deepspeed
     #   vllm
-py-spy==0.4.0
+py-spy==0.4.1
     # via ray
-pyarrow==20.0.0
+pyarrow==21.0.0
     # via datasets
 pyasn1==0.6.1
     # via
@@ -529,55 +512,58 @@ pyasn1==0.6.1
     #   rsa
 pyasn1-modules==0.4.2
     # via google-auth
+pybase64==1.4.2
+    # via vllm
 pycountry==24.6.1
-    # via outlines
-pycparser==2.22 ; implementation_name == 'pypy'
+    # via pydantic-extra-types
+pycparser==2.23 ; implementation_name != 'PyPy'
     # via cffi
-pydantic==2.11.5
+pydantic==2.12.2
     # via
-    #   beaker-py
     #   compressed-tensors
     #   deepspeed
     #   fastapi
+    #   fastapi-cloud-cli
     #   litellm
     #   lm-format-enforcer
     #   mistral-common
     #   open-instruct
     #   openai
-    #   outlines
+    #   openai-harmony
+    #   pydantic-extra-types
     #   ray
     #   vllm
     #   xgrammar
-pydantic-core==2.33.2
+pydantic-core==2.41.4
     # via pydantic
-pygments==2.19.1
+pydantic-extra-types==2.10.6
+    # via mistral-common
+pygments==2.19.2
     # via
     #   mkdocs-material
     #   pytest
     #   rich
-pymdown-extensions==10.15
+pymdown-extensions==10.16.1
     # via mkdocs-material
-pyparsing==3.2.3
+pyparsing==3.2.5
     # via matplotlib
-pytest==8.4.0
+pytest==8.4.2
 python-dateutil==2.9.0.post0
     # via
     #   ghp-import
     #   matplotlib
     #   pandas
-python-dotenv==1.1.0
+python-dotenv==1.1.1
     # via
     #   litellm
     #   uvicorn
-python-json-logger==3.3.0
+python-json-logger==4.0.0
     # via vllm
 python-multipart==0.0.20
     # via fastapi
 pytz==2025.2
     # via pandas
-pywin32==310 ; sys_platform == 'win32'
-    # via docker
-pyyaml==6.0.2
+pyyaml==6.0.3
     # via
     #   accelerate
     #   beaker-py
@@ -587,6 +573,7 @@ pyyaml==6.0.2
     #   lm-format-enforcer
     #   mkdocs
     #   mkdocs-get-deps
+    #   mlx-lm
     #   peft
     #   pymdown-extensions
     #   pyyaml-env-tag
@@ -597,71 +584,70 @@ pyyaml==6.0.2
     #   wandb
 pyyaml-env-tag==1.1
     # via mkdocs
-pyzmq==26.4.0
+pyzmq==27.1.0
     # via vllm
-ray==2.46.0
+ray==2.50.0
     # via
     #   open-instruct
     #   vllm
-referencing==0.36.2
+referencing==0.37.0
     # via
     #   jsonschema
     #   jsonschema-specifications
-    #   outlines
-regex==2024.11.6
+regex==2025.9.18
     # via
     #   nltk
     #   tiktoken
     #   transformers
     #   vllm
-requests==2.32.3
+requests==2.32.5
     # via
     #   beaker-py
     #   datasets
-    #   docker
     #   google-api-core
     #   huggingface-hub
     #   mistral-common
     #   mkdocs-material
     #   open-instruct
-    #   opentelemetry-exporter-otlp-proto-http
-    #   outlines
     #   ray
     #   tiktoken
     #   transformers
     #   vllm
     #   wandb
-rich==13.9.4
+rich==14.2.0
     # via
-    #   beaker-py
     #   rich-toolkit
     #   typer
-rich-toolkit==0.14.7
-    # via fastapi-cli
-rpds-py==0.25.1
+rich-toolkit==0.15.1
+    # via
+    #   fastapi-cli
+    #   fastapi-cloud-cli
+rignore==0.7.1
+    # via fastapi-cloud-cli
+rpds-py==0.27.1
     # via
     #   jsonschema
     #   referencing
-rsa==4.7.2
+rsa==4.9.1
     # via google-auth
-ruff==0.12.0
-safetensors==0.5.3
+ruff==0.14.0
+safetensors==0.6.2
     # via
     #   accelerate
     #   peft
     #   transformers
-scipy==1.15.3
+scipy==1.16.2
     # via vllm
-sentencepiece==0.2.0
+sentencepiece==0.2.1
+    # via vllm
+sentry-sdk==2.42.0
+    # via
+    #   fastapi-cloud-cli
+    #   wandb
+setproctitle==1.3.7
     # via
-    #   gguf
-    #   mistral-common
     #   vllm
-    #   xgrammar
-sentry-sdk==2.29.1
-    # via wandb
-setproctitle==1.3.6
-    # via wandb
+    #   wandb
 setuptools==79.0.1
     # via
     #   open-instruct
@@ -678,9 +664,8 @@ six==1.17.0
     #   langdetect
     #   opencensus
     #   python-dateutil
-    #   tensorboard
     #   vllm
-smart-open==7.1.0
+smart-open==7.3.1
     # via ray
 smmap==5.0.2
     # via gitdb
@@ -688,42 +673,42 @@ sniffio==1.3.1
     # via
     #   anyio
     #   openai
-starlette==0.46.2
+soundfile==0.13.1
+    # via mistral-common
+soxr==1.0.0
+    # via mistral-common
+starlette==0.48.0
     # via
     #   fastapi
     #   prometheus-fastapi-instrumentator
 sympy==1.14.0
     # via torch
-tensorboard==2.19.0
+tensorboard==2.20.0
     # via open-instruct
 tensorboard-data-server==0.7.2
     # via tensorboard
-tiktoken==0.9.0
+tiktoken==0.12.0
     # via
     #   litellm
     #   mistral-common
     #   vllm
-    #   xgrammar
-tokenizers==0.21.1
+tokenizers==0.22.1
     # via
     #   litellm
     #   transformers
     #   vllm
-tomli==2.2.1 ; python_full_version < '3.11'
-    # via pytest
-torch==2.7.0 ; sys_platform == 'darwin'
+torch==2.8.0 ; sys_platform == 'darwin'
     # via
     #   accelerate
     #   compressed-tensors
     #   deepspeed
     #   open-instruct
-    #   outlines
     #   peft
     #   torchaudio
     #   torchvision
     #   vllm
     #   xgrammar
-torch==2.7.0+cu128 ; sys_platform != 'darwin'
+torch==2.8.0+cu128 ; sys_platform != 'darwin'
     # via
     #   accelerate
     #   bitsandbytes
@@ -732,16 +717,15 @@ torch==2.7.0+cu128 ; sys_platform != 'darwin'
     #   flash-attn
     #   liger-kernel
     #   open-instruct
-    #   outlines
     #   peft
     #   torchaudio
     #   torchvision
     #   vllm
     #   xformers
     #   xgrammar
-torchaudio==2.7.0
+torchaudio==2.8.0
     # via vllm
-torchvision==0.22.0
+torchvision==0.23.0
     # via vllm
 tqdm==4.67.1
     # via
@@ -751,74 +735,74 @@ tqdm==4.67.1
     #   huggingface-hub
     #   nltk
     #   openai
-    #   outlines
     #   peft
     #   transformers
     #   vllm
-transformers==4.52.4
+transformers==4.57.1
     # via
     #   compressed-tensors
+    #   mlx-lm
     #   open-instruct
     #   peft
     #   vllm
     #   xgrammar
-triton==3.3.0 ; sys_platform != 'darwin'
+triton==3.4.0 ; sys_platform != 'darwin'
     # via
     #   liger-kernel
     #   torch
     #   xgrammar
-typer==0.16.0
-    # via fastapi-cli
-typing-extensions==4.14.0
+typer==0.19.2
+    # via
+    #   fastapi-cli
+    #   fastapi-cloud-cli
+typing-extensions==4.15.0
     # via
+    #   aiosignal
     #   anyio
-    #   exceptiongroup
     #   fastapi
+    #   grpcio
     #   huggingface-hub
     #   mistral-common
-    #   multidict
     #   openai
     #   opentelemetry-api
-    #   opentelemetry-exporter-otlp-proto-grpc
-    #   opentelemetry-exporter-otlp-proto-http
     #   opentelemetry-sdk
     #   opentelemetry-semantic-conventions
-    #   outlines
     #   pydantic
     #   pydantic-core
+    #   pydantic-extra-types
     #   referencing
-    #   rich
     #   rich-toolkit
+    #   starlette
     #   torch
     #   typer
     #   typing-inspection
-    #   uvicorn
     #   vllm
-typing-inspection==0.4.1
+    #   xgrammar
+typing-inspection==0.4.2
     # via pydantic
 tzdata==2025.2
     # via pandas
-urllib3==2.4.0
+urllib3==2.5.0
     # via
-    #   docker
     #   requests
     #   sentry-sdk
-uvicorn==0.34.3
+uvicorn==0.37.0
     # via
     #   fastapi
     #   fastapi-cli
+    #   fastapi-cloud-cli
     #   open-instruct
 uvloop==0.21.0 ; platform_python_implementation != 'PyPy' and sys_platform != 'cygwin' and sys_platform != 'win32'
     # via uvicorn
-virtualenv==20.31.2
+virtualenv==20.35.3
     # via ray
-vllm==0.9.1
+vllm==0.11.0
     # via open-instruct
 wandb==0.18.1
     # via open-instruct
 watchdog==6.0.0
     # via mkdocs
-watchfiles==1.0.5
+watchfiles==1.1.1
     # via
     #   uvicorn
     #   vllm
@@ -828,15 +812,15 @@ werkzeug==3.1.3
     # via tensorboard
 windows-curses==2.4.1 ; sys_platform == 'win32'
     # via nvitop
-wrapt==1.17.2
+wrapt==1.17.3
     # via smart-open
-xformers==0.0.30 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+xformers==0.0.32.post1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
     # via vllm
-xgrammar==0.1.19 ; platform_machine == 'aarch64' or platform_machine == 'x86_64'
+xgrammar==0.1.25 ; platform_machine == 'aarch64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
     # via vllm
-xxhash==3.5.0
+xxhash==3.6.0
     # via datasets
-yarl==1.20.0
+yarl==1.22.0
     # via aiohttp
-zipp==3.22.0
+zipp==3.23.0
     # via importlib-metadata
diff --git a/scripts/rebuild_tool_server.sh b/scripts/rebuild_tool_server.sh
new file mode 100755
index 0000000000..af567a51ce
--- /dev/null
+++ b/scripts/rebuild_tool_server.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+set -e
+
+cleanup() {
+    if [ -n "$SERVER_PID" ]; then
+        echo "Stopping local server (PID: $SERVER_PID)..."
+        kill $SERVER_PID 2>/dev/null || true
+    fi
+}
+
+trap cleanup EXIT
+
+if [[ -n "$(git status --porcelain 2>/dev/null)" ]]; then
+  echo "Error: Uncommitted changes detected. Please commit or stash before running."
+  echo "------- git status (short) -------"
+  git status --short
+  exit 1
+fi
+
+echo "Building Docker image for ghcr.io..."
+docker build -t ghcr.io/allenai/open-instruct/python-code-executor -f open_instruct/tool_utils/Dockerfile .
+
+echo "Starting server locally on port 1212..."
+docker run -p 1212:8080 -e OPEN_INSTRUCT_TOOL_API_KEY="$OPEN_INSTRUCT_TOOL_API_KEY" tool-server &
+SERVER_PID=$!
+
+echo ""
+echo "========================================="
+echo "Server started! (PID: $SERVER_PID)"
+echo "========================================="
+echo ""
+echo "USAGE INSTRUCTIONS:"
+echo "Test the server with the following commands to verify:"
+echo "1) The timeout works correctly"
+echo "2) The timeout in the first curl does not block the second curl"
+echo ""
+echo "Test 1 - This should timeout after 3 seconds:"
+echo "curl -X POST http://localhost:1212/execute \\"
+echo "     -H \"Content-Type: application/json\" \\"
+echo "     -H \"X-API-Key: \$OPEN_INSTRUCT_TOOL_API_KEY\" \\"
+echo "     -d '{\"code\": \"import time;time.sleep(4)\", \"timeout\": 3}' \\"
+echo "     -w '\\nTotal time: %{time_total}s\\n'"
+echo ""
+echo "Test 2 - This should complete quickly:"
+echo "curl -X POST http://localhost:1212/execute \\"
+echo "     -H \"Content-Type: application/json\" \\"
+echo "     -H \"X-API-Key: \$OPEN_INSTRUCT_TOOL_API_KEY\" \\"
+echo "     -d '{\"code\": \"print(1)\", \"timeout\": 3}' \\"
+echo "     -w '\\nTotal time: %{time_total}s\\n'"
+echo ""
+echo "========================================="
+echo ""
+
+read -p "Do you want to deploy to Google Cloud Run? (y/n) " -n 1 -r
+echo
+if [[ $REPLY =~ ^[Yy]$ ]]; then
+    echo "Pushing Docker image to ghcr.io..."
+    docker push ghcr.io/allenai/open-instruct/python-code-executor
+
+    echo "Deploying to Google Cloud Run..."
+    gcloud run deploy open-instruct-tool-server --project ai2-allennlp --region us-central1 --source .
+fi
+
+if [ -n "$BEAKER_TOKEN" ]; then
+    echo ""
+    read -p "BEAKER_TOKEN detected. Do you want to deploy to Beaker? (y/n) " -n 1 -r
+    echo
+    if [[ $REPLY =~ ^[Yy]$ ]]; then
+        echo "Deploying to Beaker..."
+        beaker_user=$(beaker account whoami --format json | jq -r '.[0].name')
+        beaker image delete $beaker_user/tool-server || true
+        beaker image create tool-server -n tool-server -w ai2/$beaker_user
+        uv run python mason.py \
+            --cluster ai2/phobos-cirrascale \
+            --workspace ai2/scaling-rl \
+            --image $beaker_user/tool-server --pure_docker_mode \
+            --priority high \
+            --budget ai2/oe-adapt \
+            --gpus 0 -- python tool_server.py
+    fi
+fi
+
+echo ""
+echo "Local server will be stopped automatically when script exits."