diff --git a/mason.py b/mason.py index 3c236e2d86..250a57af1f 100644 --- a/mason.py +++ b/mason.py @@ -254,6 +254,8 @@ def get_env_vars( "WANDB_API_KEY", "BEAKER_TOKEN", "OPENAI_API_KEY", + # Needed for tool use scripts. + "OPEN_INSTRUCT_TOOL_API_KEY", # litellm expects these env vars "AZURE_API_KEY", "AZURE_API_BASE", diff --git a/open_instruct/tool_utils/Dockerfile b/open_instruct/tool_utils/Dockerfile index 96fdc3e329..dba5595655 100644 --- a/open_instruct/tool_utils/Dockerfile +++ b/open_instruct/tool_utils/Dockerfile @@ -4,14 +4,19 @@ FROM python:3.10-slim # Set working directory in container WORKDIR /app +# Install uv +COPY --from=ghcr.io/astral-sh/uv:0.8.8 /uv /bin/uv + # Copy requirements first to leverage Docker cache -COPY requirements.txt requirements.txt +COPY open_instruct/tool_utils/requirements.txt requirements.txt -# Install dependencies -RUN pip install --no-cache-dir -r requirements.txt +# Install dependencies using uv +RUN uv pip install --system --no-cache -r requirements.txt -# Copy the rest of the application -COPY . . +# Copy the tool server files +COPY open_instruct/__init__.py open_instruct/__init__.py +COPY open_instruct/logger_utils.py open_instruct/logger_utils.py +COPY open_instruct/tool_utils/tool_server.py tool_server.py # Create cache directory for code execution RUN mkdir -p cache && chmod 777 cache @@ -23,4 +28,4 @@ ENV PYTHONUNBUFFERED=1 EXPOSE 8080 # Command to run the application -CMD ["python", "tool_server.py"] \ No newline at end of file +CMD ["uv", "run", "--no-project", "tool_server.py"] \ No newline at end of file diff --git a/open_instruct/tool_utils/test_tools.py b/open_instruct/tool_utils/test_tools.py index a71151e69f..87050efbae 100644 --- a/open_instruct/tool_utils/test_tools.py +++ b/open_instruct/tool_utils/test_tools.py @@ -60,15 +60,19 @@ class TestPythonCodeTool(unittest.TestCase): @classmethod def setUpClass(cls): """Start the tool server for tests.""" - # Start the server in a subprocess + import os + + env = os.environ.copy() + env.pop("OPEN_INSTRUCT_TOOL_API_KEY", None) + cls.server_process = subprocess.Popen( ["uv", "run", "uvicorn", "tool_server:app", "--host", "0.0.0.0", "--port", "1212"], cwd="open_instruct/tool_utils", stdout=subprocess.PIPE, stderr=subprocess.PIPE, - start_new_session=True, # Create new process group + start_new_session=True, + env=env, ) - # Wait for server to start time.sleep(3) cls.api_endpoint = "http://localhost:1212/execute" diff --git a/open_instruct/tool_utils/tool_server.py b/open_instruct/tool_utils/tool_server.py index 9142f19c3d..23edc6b334 100644 --- a/open_instruct/tool_utils/tool_server.py +++ b/open_instruct/tool_utils/tool_server.py @@ -3,8 +3,19 @@ This script sets up a FastAPI server that allows users to execute Python code snippets +# API Key Authentication + +The server requires an API key for authentication. Set the OPEN_INSTRUCT_TOOL_API_KEY environment variable: + +```bash +export OPEN_INSTRUCT_TOOL_API_KEY="your-api-key-here" +``` + +When running locally: +```bash cd open_instruct/tool_utils -PREIMPORT_PKGS=pandas,numpy,sympy,time,math,networkx uv run uvicorn tool_server:app --host 0.0.0.0 --port 1212 +OPEN_INSTRUCT_TOOL_API_KEY="your-api-key-here" PREIMPORT_PKGS=pandas,numpy,sympy,time,math,networkx uv run uvicorn tool_server:app --host 0.0.0.0 --port 1212 +``` ```bash docker build -t tool-server . @@ -16,8 +27,8 @@ docker build -t ghcr.io/allenai/open-instruct/python-code-executor -f open_instruct/tool_utils/Dockerfile . docker push ghcr.io/allenai/open-instruct/python-code-executor -# Run the server -docker run -p 1212:8080 tool-server +# Run the server (pass API key via environment variable) +docker run -p 1212:8080 -e OPEN_INSTRUCT_TOOL_API_KEY="your-api-key-here" tool-server # gcloud run deploy: gcloud run deploy open-instruct-tool-server --project ai2-allennlp --region us-central1 --source . @@ -39,25 +50,31 @@ 1) the timeout works 2) the timeout in the first curl does not block the second curl +All requests now require the X-API-Key header: + ``` curl -X POST https://open-instruct-tool-server-10554368204.us-central1.run.app/execute \ -H "Content-Type: application/json" \ + -H "X-API-Key: $OPEN_INSTRUCT_TOOL_API_KEY" \ -d '{"code": "import time;time.sleep(4)", "timeout": 3}' \ -w '\nTotal time: %{time_total}s\n' curl -X POST https://open-instruct-tool-server-10554368204.us-central1.run.app/execute \ -H "Content-Type: application/json" \ + -H "X-API-Key: $OPEN_INSTRUCT_TOOL_API_KEY" \ -d '{"code": "print(1)", "timeout": 3}' \ -w '\nTotal time: %{time_total}s\n' curl -X POST https://open-instruct-tool-server-10554368204.us-central1.run.app/execute \ -H "Content-Type: application/json" \ + -H "X-API-Key: $OPEN_INSTRUCT_TOOL_API_KEY" \ -d '{"code": "import sympy", "timeout": 3}' \ -w '\nTotal time: %{time_total}s\n' curl -X POST https://open-instruct-tool-server-10554368204.us-central1.run.app/execute \ -H "Content-Type: application/json" \ + -H "X-API-Key: $OPEN_INSTRUCT_TOOL_API_KEY" \ -d '{"code": "import sympy", "timeout": 3}' \ -w '\nTotal time: %{time_total}s\n' ``` @@ -80,7 +97,7 @@ from contextlib import redirect_stderr, redirect_stdout from typing import Optional -from fastapi import FastAPI +from fastapi import Depends, FastAPI, Header, HTTPException from pydantic import BaseModel from open_instruct import logger_utils @@ -202,11 +219,30 @@ class CodeResponse(BaseModel): success: bool +############################################################################### +# API Key Authentication +############################################################################### +EXPECTED_API_KEY = os.getenv("OPEN_INSTRUCT_TOOL_API_KEY") + + +async def verify_api_key(x_api_key: str = Header(None, alias="X-API-Key")): + if not EXPECTED_API_KEY: + logger.warning("OPEN_INSTRUCT_TOOL_API_KEY not set - API key validation disabled") + return + if not x_api_key: + logger.warning("Missing API key in request") + raise HTTPException(status_code=401, detail="Missing API key") + if x_api_key != EXPECTED_API_KEY: + logger.warning("Invalid API key attempt") + raise HTTPException(status_code=401, detail="Invalid API key") + return x_api_key + + ############################################################################### # Endpoints ############################################################################### @app.post("/execute", response_model=CodeResponse) -async def execute_code(req: CodeRequest): # noqa: D401 +async def execute_code(req: CodeRequest, api_key: str = Depends(verify_api_key)): # noqa: D401 global process_pool # noqa: PLW0603 # Log input (truncate to 200 chars to avoid huge logs) @@ -244,4 +280,21 @@ async def execute_code(req: CodeRequest): # noqa: D401 @app.get("/") async def root(): # noqa: D401 - return {"message": "Python Code Executor API — POST /execute {code, timeout}"} + host = os.getenv("HOST", "http://localhost:1212") + + examples = f"""Python Code Executor API + +Example usage: + +curl -X POST {host}/execute \\ + -H "Content-Type: application/json" \\ + -H "X-API-Key: $OPEN_INSTRUCT_TOOL_API_KEY" \\ + -d '{{"code": "print(1 + 1)", "timeout": 3}}' + +curl -X POST {host}/execute \\ + -H "Content-Type: application/json" \\ + -H "X-API-Key: $OPEN_INSTRUCT_TOOL_API_KEY" \\ + -d '{{"code": "import sympy; print(sympy.__version__)", "timeout": 3}}' +""" + + return {"message": examples} diff --git a/open_instruct/tool_utils/tools.py b/open_instruct/tool_utils/tools.py index efd60456ed..6720499ac6 100644 --- a/open_instruct/tool_utils/tools.py +++ b/open_instruct/tool_utils/tools.py @@ -35,8 +35,9 @@ class PythonCodeTool(Tool): """@vwxyzjn: I recommend using something like a FastAPI for this kind of stuff; 1) you won't accidentally block the main vLLM process and 2) way easier to parallelize via load balancing.""" - def __init__(self, api_endpoint: str, *args, **kwargs): + def __init__(self, api_endpoint: str, api_key: str = None, *args, **kwargs): self.api_endpoint = api_endpoint + self.api_key = api_key super().__init__(*args, **kwargs) def __call__(self, prompt: str) -> ToolOutput: @@ -79,17 +80,21 @@ def find_sum_of_a(): timeout_seconds = 3 start_time = time.time() try: - # Call the FastAPI endpoint to execute the code with client-side timeout + headers = {"Content-Type": "application/json"} + if self.api_key: + headers["X-API-Key"] = self.api_key + response = requests.post( self.api_endpoint, - json={"code": code, "timeout": timeout_seconds}, # Server-side timeout (keeping this) - timeout=timeout_seconds, # Client-side timeout + json={"code": code, "timeout": timeout_seconds}, + headers=headers, + timeout=timeout_seconds, ) - # Parse the response + response.raise_for_status() + result = response.json() - # Process the API response output = result["output"] error = result.get("error") or "" diff --git a/requirements.txt b/requirements.txt index d9295277ec..c428c8e2a1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,14 +1,14 @@ # This file was autogenerated by uv via the following command: # uv export --format requirements-txt --no-hashes --all-extras --no-emit-project -absl-py==2.3.0 +absl-py==2.3.1 # via tensorboard -accelerate==1.8.1 +accelerate==1.10.1 # via # open-instruct # peft aiohappyeyeballs==2.6.1 # via aiohttp -aiohttp==3.12.9 +aiohttp==3.13.0 # via # aiohttp-cors # fsspec @@ -17,15 +17,13 @@ aiohttp==3.12.9 # vllm aiohttp-cors==0.8.1 # via ray -aiosignal==1.3.2 +aiosignal==1.4.0 # via aiohttp -airportsdata==20250523 - # via outlines annotated-types==0.7.0 # via pydantic antlr4-python3-runtime==4.11.0 # via open-instruct -anyio==4.9.0 +anyio==4.11.0 # via # httpx # openai @@ -33,35 +31,37 @@ anyio==4.9.0 # watchfiles astor==0.8.1 # via depyf -async-timeout==5.0.1 ; python_full_version < '3.11' - # via aiohttp -attrs==25.3.0 +attrs==25.4.0 # via # aiohttp # jsonschema # referencing babel==2.17.0 # via mkdocs-material -backrefs==5.8 +backrefs==5.9 # via mkdocs-material -beaker-py==1.36.4 -bitsandbytes==0.46.0 ; sys_platform != 'darwin' +beaker-py==2.5.1 +bitsandbytes==0.48.1 ; sys_platform != 'darwin' # via open-instruct -blake3==1.0.5 +blake3==1.0.8 # via vllm -cachetools==5.5.2 +cachetools==6.2.1 # via # google-auth # vllm -certifi==2025.4.26 +cbor2==5.7.0 + # via vllm +certifi==2025.10.5 # via # httpcore # httpx # requests # sentry-sdk -cffi==1.17.1 ; implementation_name == 'pypy' - # via pyzmq -charset-normalizer==3.4.2 +cffi==2.0.0 + # via + # pyzmq + # soundfile +charset-normalizer==3.4.4 # via requests click==8.2.1 # via @@ -74,9 +74,7 @@ click==8.2.1 # uvicorn # wandb cloudpickle==3.1.1 - # via - # outlines - # vllm + # via vllm colorama==0.4.6 # via # click @@ -87,60 +85,58 @@ colorama==0.4.6 # pytest # tqdm # uvicorn -colorful==0.5.6 +colorful==0.5.7 # via ray -compressed-tensors==0.10.1 +compressed-tensors==0.11.0 # via vllm -contourpy==1.3.2 +contourpy==1.3.3 # via matplotlib -cupy-cuda12x==13.4.1 ; sys_platform != 'darwin' +cupy-cuda12x==13.6.0 ; sys_platform != 'darwin' # via ray cycler==0.12.1 # via matplotlib -datasets==4.0.0 +datasets==4.2.0 # via open-instruct -debugpy==1.8.14 +debugpy==1.8.17 # via open-instruct deepspeed==0.15.4 # via open-instruct -depyf==0.18.0 +depyf==0.19.0 # via vllm -dill==0.3.8 +dill==0.4.0 # via # datasets # depyf # multiprocess diskcache==5.6.3 - # via outlines -distlib==0.3.9 + # via vllm +distlib==0.4.0 # via virtualenv distro==1.9.0 # via openai -dnspython==2.7.0 +dnspython==2.8.0 # via email-validator -docker==7.1.0 - # via beaker-py docker-pycreds==0.4.0 # via wandb einops==0.8.1 # via # flash-attn # vllm -email-validator==2.2.0 - # via fastapi -exceptiongroup==1.3.0 ; python_full_version < '3.11' +email-validator==2.3.0 # via - # anyio - # pytest -fastapi==0.115.12 + # fastapi + # pydantic +fastapi==0.119.0 # via # open-instruct # vllm -fastapi-cli==0.0.7 +fastapi-cli==0.0.13 # via fastapi +fastapi-cloud-cli==0.3.1 + # via fastapi-cli fastrlock==0.8.3 ; sys_platform != 'darwin' # via cupy-cuda12x -filelock==3.18.0 +filelock==3.20.0 # via # datasets # huggingface-hub @@ -149,40 +145,40 @@ filelock==3.18.0 # transformers # virtualenv # vllm -flash-attn==2.8.0.post2 ; sys_platform != 'darwin' +flash-attn==2.8.3 ; sys_platform != 'darwin' # via open-instruct -fonttools==4.58.1 +fonttools==4.60.1 # via matplotlib -frozenlist==1.6.2 +frozendict==2.4.6 + # via compressed-tensors +frozenlist==1.8.0 # via # aiohttp # aiosignal -fsspec==2025.3.0 +fsspec==2025.9.0 # via # datasets # huggingface-hub # torch -gguf==0.17.0 +gguf==0.17.1 # via vllm ghp-import==2.1.0 # via mkdocs gitdb==4.0.12 # via gitpython -gitpython==3.1.44 +gitpython==3.1.45 # via wandb -google-api-core==2.25.0 +google-api-core==2.26.0 # via opencensus -google-auth==2.39.0 +google-auth==2.41.1 # via google-api-core +google-crc32c==1.7.1 + # via beaker-py googleapis-common-protos==1.70.0 - # via - # google-api-core - # opentelemetry-exporter-otlp-proto-grpc - # opentelemetry-exporter-otlp-proto-http -grpcio==1.72.1 + # via google-api-core +grpcio==1.75.1 # via # beaker-py - # opentelemetry-exporter-otlp-proto-grpc # ray # tensorboard h11==0.16.0 @@ -191,28 +187,29 @@ h11==0.16.0 # uvicorn hf-transfer==0.1.9 # via open-instruct -hf-xet==1.1.3 +hf-xet==1.1.10 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64' # via huggingface-hub hjson==3.1.0 # via deepspeed httpcore==1.0.9 # via httpx -httptools==0.6.4 +httptools==0.7.1 # via uvicorn httpx==0.28.1 # via + # datasets # fastapi + # fastapi-cloud-cli # litellm # openai -huggingface-hub==0.32.4 +huggingface-hub==0.35.3 # via # accelerate # datasets # peft # tokenizers # transformers - # vllm -idna==3.10 +idna==3.11 # via # anyio # email-validator @@ -221,57 +218,50 @@ idna==3.10 # yarl immutabledict==1.2.0 # via open-instruct -importlib-metadata==8.0.0 +importlib-metadata==8.7.0 # via # litellm # opentelemetry-api iniconfig==2.1.0 # via pytest interegular==0.3.3 - # via - # lm-format-enforcer - # outlines - # outlines-core + # via lm-format-enforcer jinja2==3.1.6 # via # fastapi # litellm # mkdocs # mkdocs-material - # outlines + # mlx-lm # torch -jiter==0.10.0 +jiter==0.11.0 # via openai -joblib==1.5.1 +joblib==1.5.2 # via nltk -jsonschema==4.24.0 +jsonschema==4.25.1 # via # litellm # mistral-common - # outlines - # outlines-core # ray -jsonschema-specifications==2025.4.1 +jsonschema-specifications==2025.9.1 # via jsonschema -kiwisolver==1.4.8 +kiwisolver==1.4.9 # via matplotlib langdetect==1.0.9 # via open-instruct lark==1.2.2 - # via - # outlines - # vllm -liger-kernel==0.5.10 ; sys_platform != 'darwin' + # via vllm +liger-kernel==0.6.2 ; sys_platform != 'darwin' # via open-instruct -litellm==1.72.0 +litellm==1.75.0 # via open-instruct -llguidance==0.7.27 ; platform_machine == 'aarch64' or platform_machine == 'arm64' or platform_machine == 'x86_64' +llguidance==0.7.30 ; platform_machine == 'aarch64' or platform_machine == 'arm64' or platform_machine == 'x86_64' # via vllm llvmlite==0.44.0 # via numba -lm-format-enforcer==0.10.11 +lm-format-enforcer==0.11.3 # via vllm -markdown==3.8 +markdown==3.9 # via # markdown-include # mkdocs @@ -279,14 +269,14 @@ markdown==3.8 # pymdown-extensions # tensorboard markdown-include==0.8.1 -markdown-it-py==3.0.0 +markdown-it-py==4.0.0 # via rich -markupsafe==3.0.2 +markupsafe==3.0.3 # via # jinja2 # mkdocs # werkzeug -matplotlib==3.10.3 +matplotlib==3.10.7 # via open-instruct mdurl==0.1.2 # via markdown-it-py @@ -294,41 +284,43 @@ mergedeep==1.3.4 # via # mkdocs # mkdocs-get-deps -mistral-common==1.5.6 +mistral-common==1.8.5 # via vllm mkdocs==1.6.1 # via mkdocs-material mkdocs-get-deps==0.2.0 # via mkdocs -mkdocs-material==9.6.14 +mkdocs-material==9.6.22 mkdocs-material-extensions==1.3.1 # via mkdocs-material +mlx==0.29.2 ; platform_machine == 'arm64' and sys_platform == 'darwin' + # via mlx-lm +mlx-lm==0.28.2 ; platform_machine == 'arm64' and sys_platform == 'darwin' + # via xgrammar +mlx-metal==0.29.2 ; platform_machine == 'arm64' and sys_platform == 'darwin' + # via mlx mpmath==1.3.0 # via sympy -msgpack==1.1.0 +msgpack==1.1.2 # via # deepspeed # ray msgspec==0.19.0 # via vllm -multidict==6.4.4 +multidict==6.7.0 # via # aiohttp # yarl multiprocess==0.70.16 # via datasets -nest-asyncio==1.6.0 - # via outlines -networkx==3.4.2 ; python_full_version < '3.11' - # via torch -networkx==3.5 ; python_full_version >= '3.11' +networkx==3.5 # via torch -ninja==1.11.1.4 +ninja==1.13.0 # via # deepspeed # vllm # xgrammar -nltk==3.9.1 +nltk==3.9.2 # via open-instruct numba==0.61.2 # via vllm @@ -343,63 +335,68 @@ numpy==1.26.4 # gguf # matplotlib # mistral-common + # mlx-lm # numba # open-instruct # opencv-python-headless - # outlines # pandas # peft # scipy + # soundfile + # soxr # tensorboard # torchvision # transformers # vllm # xformers -nvidia-cublas-cu12==12.8.3.14 ; platform_machine == 'x86_64' and sys_platform == 'linux' + # xgrammar +nvidia-cublas-cu12==12.8.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux' # via # nvidia-cudnn-cu12 # nvidia-cusolver-cu12 # torch -nvidia-cuda-cupti-cu12==12.8.57 ; platform_machine == 'x86_64' and sys_platform == 'linux' +nvidia-cuda-cupti-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux' # via torch -nvidia-cuda-nvrtc-cu12==12.8.61 ; platform_machine == 'x86_64' and sys_platform == 'linux' +nvidia-cuda-nvrtc-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux' # via torch -nvidia-cuda-runtime-cu12==12.8.57 ; platform_machine == 'x86_64' and sys_platform == 'linux' +nvidia-cuda-runtime-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux' # via torch -nvidia-cudnn-cu12==9.7.1.26 ; platform_machine == 'x86_64' and sys_platform == 'linux' +nvidia-cudnn-cu12==9.10.2.21 ; platform_machine == 'x86_64' and sys_platform == 'linux' # via torch -nvidia-cufft-cu12==11.3.3.41 ; platform_machine == 'x86_64' and sys_platform == 'linux' +nvidia-cufft-cu12==11.3.3.83 ; platform_machine == 'x86_64' and sys_platform == 'linux' # via torch -nvidia-cufile-cu12==1.13.0.11 ; platform_machine == 'x86_64' and sys_platform == 'linux' +nvidia-cufile-cu12==1.13.1.3 ; platform_machine == 'x86_64' and sys_platform == 'linux' # via torch -nvidia-curand-cu12==10.3.9.55 ; platform_machine == 'x86_64' and sys_platform == 'linux' +nvidia-curand-cu12==10.3.9.90 ; platform_machine == 'x86_64' and sys_platform == 'linux' # via torch -nvidia-cusolver-cu12==11.7.2.55 ; platform_machine == 'x86_64' and sys_platform == 'linux' +nvidia-cusolver-cu12==11.7.3.90 ; platform_machine == 'x86_64' and sys_platform == 'linux' # via torch -nvidia-cusparse-cu12==12.5.7.53 ; platform_machine == 'x86_64' and sys_platform == 'linux' +nvidia-cusparse-cu12==12.5.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux' # via # nvidia-cusolver-cu12 # torch -nvidia-cusparselt-cu12==0.6.3 ; platform_machine == 'x86_64' and sys_platform == 'linux' +nvidia-cusparselt-cu12==0.7.1 ; platform_machine == 'x86_64' and sys_platform == 'linux' # via torch -nvidia-ml-py==12.575.51 +nvidia-ml-py==13.580.82 # via nvitop -nvidia-nccl-cu12==2.26.2 ; platform_machine == 'x86_64' and sys_platform == 'linux' +nvidia-nccl-cu12==2.27.3 ; platform_machine == 'x86_64' and sys_platform == 'linux' # via torch -nvidia-nvjitlink-cu12==12.8.61 ; platform_machine == 'x86_64' and sys_platform == 'linux' +nvidia-nvjitlink-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux' # via # nvidia-cufft-cu12 # nvidia-cusolver-cu12 # nvidia-cusparse-cu12 # torch -nvidia-nvtx-cu12==12.8.55 ; platform_machine == 'x86_64' and sys_platform == 'linux' +nvidia-nvtx-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux' # via torch -nvitop==1.5.1 +nvitop==1.5.3 # via open-instruct -openai==1.84.0 +openai==2.3.0 # via # litellm # vllm +openai-harmony==0.0.4 + # via vllm opencensus==0.11.4 # via ray opencensus-context==0.1.3 @@ -408,45 +405,28 @@ opencv-python-headless==4.11.0.86 # via # mistral-common # vllm -opentelemetry-api==1.36.0 +opentelemetry-api==1.37.0 # via - # opentelemetry-exporter-otlp-proto-grpc - # opentelemetry-exporter-otlp-proto-http + # opentelemetry-exporter-prometheus # opentelemetry-sdk # opentelemetry-semantic-conventions - # vllm -opentelemetry-exporter-otlp==1.36.0 - # via vllm -opentelemetry-exporter-otlp-proto-common==1.36.0 - # via - # opentelemetry-exporter-otlp-proto-grpc - # opentelemetry-exporter-otlp-proto-http -opentelemetry-exporter-otlp-proto-grpc==1.36.0 - # via opentelemetry-exporter-otlp -opentelemetry-exporter-otlp-proto-http==1.36.0 - # via opentelemetry-exporter-otlp -opentelemetry-proto==1.36.0 - # via - # opentelemetry-exporter-otlp-proto-common - # opentelemetry-exporter-otlp-proto-grpc - # opentelemetry-exporter-otlp-proto-http -opentelemetry-sdk==1.36.0 - # via - # opentelemetry-exporter-otlp-proto-grpc - # opentelemetry-exporter-otlp-proto-http - # vllm -opentelemetry-semantic-conventions==0.57b0 +opentelemetry-exporter-prometheus==0.58b0 + # via ray +opentelemetry-proto==1.37.0 + # via ray +opentelemetry-sdk==1.37.0 + # via + # opentelemetry-exporter-prometheus + # ray +opentelemetry-semantic-conventions==0.58b0 # via opentelemetry-sdk -opentelemetry-semantic-conventions-ai==0.4.9 - # via vllm -outlines==0.1.11 +outlines-core==0.2.11 # via vllm -outlines-core==0.1.26 - # via outlines packaging==25.0 # via # accelerate # beaker-py + # bitsandbytes # datasets # deepspeed # huggingface-hub @@ -461,36 +441,38 @@ packaging==25.0 # transformers paginate==0.5.7 # via mkdocs-material -pandas==2.2.3 +pandas==2.3.3 # via datasets parameterized==0.9.0 -partial-json-parser==0.2.1.1.post5 +partial-json-parser==0.2.1.1.post6 # via vllm pathspec==0.12.1 # via mkdocs -peft==0.15.2 +peft==0.17.1 # via open-instruct -pillow==11.2.1 +pillow==12.0.0 # via # matplotlib # mistral-common + # tensorboard # torchvision # vllm -platformdirs==4.3.8 +platformdirs==4.5.0 # via # mkdocs-get-deps # virtualenv # wandb pluggy==1.6.0 # via pytest -prometheus-client==0.22.1 +prometheus-client==0.23.1 # via + # opentelemetry-exporter-prometheus # prometheus-fastapi-instrumentator # ray # vllm prometheus-fastapi-instrumentator==7.1.0 # via vllm -propcache==0.3.1 +propcache==0.4.1 # via # aiohttp # yarl @@ -501,13 +483,14 @@ protobuf==5.29.5 # beaker-py # google-api-core # googleapis-common-protos + # mlx-lm # opentelemetry-proto # proto-plus # ray # tensorboard # vllm # wandb -psutil==7.0.0 +psutil==7.1.0 # via # accelerate # deepspeed @@ -519,9 +502,9 @@ py-cpuinfo==9.0.0 # via # deepspeed # vllm -py-spy==0.4.0 +py-spy==0.4.1 # via ray -pyarrow==20.0.0 +pyarrow==21.0.0 # via datasets pyasn1==0.6.1 # via @@ -529,55 +512,58 @@ pyasn1==0.6.1 # rsa pyasn1-modules==0.4.2 # via google-auth +pybase64==1.4.2 + # via vllm pycountry==24.6.1 - # via outlines -pycparser==2.22 ; implementation_name == 'pypy' + # via pydantic-extra-types +pycparser==2.23 ; implementation_name != 'PyPy' # via cffi -pydantic==2.11.5 +pydantic==2.12.2 # via - # beaker-py # compressed-tensors # deepspeed # fastapi + # fastapi-cloud-cli # litellm # lm-format-enforcer # mistral-common # open-instruct # openai - # outlines + # openai-harmony + # pydantic-extra-types # ray # vllm # xgrammar -pydantic-core==2.33.2 +pydantic-core==2.41.4 # via pydantic -pygments==2.19.1 +pydantic-extra-types==2.10.6 + # via mistral-common +pygments==2.19.2 # via # mkdocs-material # pytest # rich -pymdown-extensions==10.15 +pymdown-extensions==10.16.1 # via mkdocs-material -pyparsing==3.2.3 +pyparsing==3.2.5 # via matplotlib -pytest==8.4.0 +pytest==8.4.2 python-dateutil==2.9.0.post0 # via # ghp-import # matplotlib # pandas -python-dotenv==1.1.0 +python-dotenv==1.1.1 # via # litellm # uvicorn -python-json-logger==3.3.0 +python-json-logger==4.0.0 # via vllm python-multipart==0.0.20 # via fastapi pytz==2025.2 # via pandas -pywin32==310 ; sys_platform == 'win32' - # via docker -pyyaml==6.0.2 +pyyaml==6.0.3 # via # accelerate # beaker-py @@ -587,6 +573,7 @@ pyyaml==6.0.2 # lm-format-enforcer # mkdocs # mkdocs-get-deps + # mlx-lm # peft # pymdown-extensions # pyyaml-env-tag @@ -597,71 +584,70 @@ pyyaml==6.0.2 # wandb pyyaml-env-tag==1.1 # via mkdocs -pyzmq==26.4.0 +pyzmq==27.1.0 # via vllm -ray==2.46.0 +ray==2.50.0 # via # open-instruct # vllm -referencing==0.36.2 +referencing==0.37.0 # via # jsonschema # jsonschema-specifications - # outlines -regex==2024.11.6 +regex==2025.9.18 # via # nltk # tiktoken # transformers # vllm -requests==2.32.3 +requests==2.32.5 # via # beaker-py # datasets - # docker # google-api-core # huggingface-hub # mistral-common # mkdocs-material # open-instruct - # opentelemetry-exporter-otlp-proto-http - # outlines # ray # tiktoken # transformers # vllm # wandb -rich==13.9.4 +rich==14.2.0 # via - # beaker-py # rich-toolkit # typer -rich-toolkit==0.14.7 - # via fastapi-cli -rpds-py==0.25.1 +rich-toolkit==0.15.1 + # via + # fastapi-cli + # fastapi-cloud-cli +rignore==0.7.1 + # via fastapi-cloud-cli +rpds-py==0.27.1 # via # jsonschema # referencing -rsa==4.7.2 +rsa==4.9.1 # via google-auth -ruff==0.12.0 -safetensors==0.5.3 +ruff==0.14.0 +safetensors==0.6.2 # via # accelerate # peft # transformers -scipy==1.15.3 +scipy==1.16.2 # via vllm -sentencepiece==0.2.0 +sentencepiece==0.2.1 + # via vllm +sentry-sdk==2.42.0 + # via + # fastapi-cloud-cli + # wandb +setproctitle==1.3.7 # via - # gguf - # mistral-common # vllm - # xgrammar -sentry-sdk==2.29.1 - # via wandb -setproctitle==1.3.6 - # via wandb + # wandb setuptools==79.0.1 # via # open-instruct @@ -678,9 +664,8 @@ six==1.17.0 # langdetect # opencensus # python-dateutil - # tensorboard # vllm -smart-open==7.1.0 +smart-open==7.3.1 # via ray smmap==5.0.2 # via gitdb @@ -688,42 +673,42 @@ sniffio==1.3.1 # via # anyio # openai -starlette==0.46.2 +soundfile==0.13.1 + # via mistral-common +soxr==1.0.0 + # via mistral-common +starlette==0.48.0 # via # fastapi # prometheus-fastapi-instrumentator sympy==1.14.0 # via torch -tensorboard==2.19.0 +tensorboard==2.20.0 # via open-instruct tensorboard-data-server==0.7.2 # via tensorboard -tiktoken==0.9.0 +tiktoken==0.12.0 # via # litellm # mistral-common # vllm - # xgrammar -tokenizers==0.21.1 +tokenizers==0.22.1 # via # litellm # transformers # vllm -tomli==2.2.1 ; python_full_version < '3.11' - # via pytest -torch==2.7.0 ; sys_platform == 'darwin' +torch==2.8.0 ; sys_platform == 'darwin' # via # accelerate # compressed-tensors # deepspeed # open-instruct - # outlines # peft # torchaudio # torchvision # vllm # xgrammar -torch==2.7.0+cu128 ; sys_platform != 'darwin' +torch==2.8.0+cu128 ; sys_platform != 'darwin' # via # accelerate # bitsandbytes @@ -732,16 +717,15 @@ torch==2.7.0+cu128 ; sys_platform != 'darwin' # flash-attn # liger-kernel # open-instruct - # outlines # peft # torchaudio # torchvision # vllm # xformers # xgrammar -torchaudio==2.7.0 +torchaudio==2.8.0 # via vllm -torchvision==0.22.0 +torchvision==0.23.0 # via vllm tqdm==4.67.1 # via @@ -751,74 +735,74 @@ tqdm==4.67.1 # huggingface-hub # nltk # openai - # outlines # peft # transformers # vllm -transformers==4.52.4 +transformers==4.57.1 # via # compressed-tensors + # mlx-lm # open-instruct # peft # vllm # xgrammar -triton==3.3.0 ; sys_platform != 'darwin' +triton==3.4.0 ; sys_platform != 'darwin' # via # liger-kernel # torch # xgrammar -typer==0.16.0 - # via fastapi-cli -typing-extensions==4.14.0 +typer==0.19.2 + # via + # fastapi-cli + # fastapi-cloud-cli +typing-extensions==4.15.0 # via + # aiosignal # anyio - # exceptiongroup # fastapi + # grpcio # huggingface-hub # mistral-common - # multidict # openai # opentelemetry-api - # opentelemetry-exporter-otlp-proto-grpc - # opentelemetry-exporter-otlp-proto-http # opentelemetry-sdk # opentelemetry-semantic-conventions - # outlines # pydantic # pydantic-core + # pydantic-extra-types # referencing - # rich # rich-toolkit + # starlette # torch # typer # typing-inspection - # uvicorn # vllm -typing-inspection==0.4.1 + # xgrammar +typing-inspection==0.4.2 # via pydantic tzdata==2025.2 # via pandas -urllib3==2.4.0 +urllib3==2.5.0 # via - # docker # requests # sentry-sdk -uvicorn==0.34.3 +uvicorn==0.37.0 # via # fastapi # fastapi-cli + # fastapi-cloud-cli # open-instruct uvloop==0.21.0 ; platform_python_implementation != 'PyPy' and sys_platform != 'cygwin' and sys_platform != 'win32' # via uvicorn -virtualenv==20.31.2 +virtualenv==20.35.3 # via ray -vllm==0.9.1 +vllm==0.11.0 # via open-instruct wandb==0.18.1 # via open-instruct watchdog==6.0.0 # via mkdocs -watchfiles==1.0.5 +watchfiles==1.1.1 # via # uvicorn # vllm @@ -828,15 +812,15 @@ werkzeug==3.1.3 # via tensorboard windows-curses==2.4.1 ; sys_platform == 'win32' # via nvitop -wrapt==1.17.2 +wrapt==1.17.3 # via smart-open -xformers==0.0.30 ; platform_machine == 'x86_64' and sys_platform == 'linux' +xformers==0.0.32.post1 ; platform_machine == 'x86_64' and sys_platform == 'linux' # via vllm -xgrammar==0.1.19 ; platform_machine == 'aarch64' or platform_machine == 'x86_64' +xgrammar==0.1.25 ; platform_machine == 'aarch64' or platform_machine == 'arm64' or platform_machine == 'x86_64' # via vllm -xxhash==3.5.0 +xxhash==3.6.0 # via datasets -yarl==1.20.0 +yarl==1.22.0 # via aiohttp -zipp==3.22.0 +zipp==3.23.0 # via importlib-metadata diff --git a/scripts/rebuild_tool_server.sh b/scripts/rebuild_tool_server.sh new file mode 100755 index 0000000000..af567a51ce --- /dev/null +++ b/scripts/rebuild_tool_server.sh @@ -0,0 +1,84 @@ +#!/bin/bash +set -e + +cleanup() { + if [ -n "$SERVER_PID" ]; then + echo "Stopping local server (PID: $SERVER_PID)..." + kill $SERVER_PID 2>/dev/null || true + fi +} + +trap cleanup EXIT + +if [[ -n "$(git status --porcelain 2>/dev/null)" ]]; then + echo "Error: Uncommitted changes detected. Please commit or stash before running." + echo "------- git status (short) -------" + git status --short + exit 1 +fi + +echo "Building Docker image for ghcr.io..." +docker build -t ghcr.io/allenai/open-instruct/python-code-executor -f open_instruct/tool_utils/Dockerfile . + +echo "Starting server locally on port 1212..." +docker run -p 1212:8080 -e OPEN_INSTRUCT_TOOL_API_KEY="$OPEN_INSTRUCT_TOOL_API_KEY" tool-server & +SERVER_PID=$! + +echo "" +echo "=========================================" +echo "Server started! (PID: $SERVER_PID)" +echo "=========================================" +echo "" +echo "USAGE INSTRUCTIONS:" +echo "Test the server with the following commands to verify:" +echo "1) The timeout works correctly" +echo "2) The timeout in the first curl does not block the second curl" +echo "" +echo "Test 1 - This should timeout after 3 seconds:" +echo "curl -X POST http://localhost:1212/execute \\" +echo " -H \"Content-Type: application/json\" \\" +echo " -H \"X-API-Key: \$OPEN_INSTRUCT_TOOL_API_KEY\" \\" +echo " -d '{\"code\": \"import time;time.sleep(4)\", \"timeout\": 3}' \\" +echo " -w '\\nTotal time: %{time_total}s\\n'" +echo "" +echo "Test 2 - This should complete quickly:" +echo "curl -X POST http://localhost:1212/execute \\" +echo " -H \"Content-Type: application/json\" \\" +echo " -H \"X-API-Key: \$OPEN_INSTRUCT_TOOL_API_KEY\" \\" +echo " -d '{\"code\": \"print(1)\", \"timeout\": 3}' \\" +echo " -w '\\nTotal time: %{time_total}s\\n'" +echo "" +echo "=========================================" +echo "" + +read -p "Do you want to deploy to Google Cloud Run? (y/n) " -n 1 -r +echo +if [[ $REPLY =~ ^[Yy]$ ]]; then + echo "Pushing Docker image to ghcr.io..." + docker push ghcr.io/allenai/open-instruct/python-code-executor + + echo "Deploying to Google Cloud Run..." + gcloud run deploy open-instruct-tool-server --project ai2-allennlp --region us-central1 --source . +fi + +if [ -n "$BEAKER_TOKEN" ]; then + echo "" + read -p "BEAKER_TOKEN detected. Do you want to deploy to Beaker? (y/n) " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + echo "Deploying to Beaker..." + beaker_user=$(beaker account whoami --format json | jq -r '.[0].name') + beaker image delete $beaker_user/tool-server || true + beaker image create tool-server -n tool-server -w ai2/$beaker_user + uv run python mason.py \ + --cluster ai2/phobos-cirrascale \ + --workspace ai2/scaling-rl \ + --image $beaker_user/tool-server --pure_docker_mode \ + --priority high \ + --budget ai2/oe-adapt \ + --gpus 0 -- python tool_server.py + fi +fi + +echo "" +echo "Local server will be stopped automatically when script exits."