diff --git a/.cursor/BUGBOT.md b/.cursor/BUGBOT.md index 2986c86ad..3319ce820 100644 --- a/.cursor/BUGBOT.md +++ b/.cursor/BUGBOT.md @@ -77,3 +77,13 @@ When these files change, verify and update any affected skill files: - `skills/brainstorm/SKILL.md` If workflow-relevant changes are detected without matching skill updates, request that the author update the impacted skills before merge. + +## Environment Rollout Logic + +Do not request library utilities solely because two or more environments contain similar message, state, or rollout-loop data manipulation. A few explicit lines inside an environment are often the clearest and most discoverable implementation. + +In particular, do not suggest moving small helpers for selecting messages, extracting text from `state`, or juggling rollout-local fields into hidden library modules. Buried helpers are not easily discoverable by end users, clutter the public API when promoted, and make the docs responsible for enumerating every three-line convenience function. + +Prefer explicit environment-local code unless the repeated logic is a framework contract, fixes a correctness bug at the boundary, or is already part of documented user-facing API. Do not ask authors to create one-off private helpers for simple rollout logic; if a few lines are used once, they should usually stay inline at the call site. + +Helpers are acceptable when the logic is reused in multiple places, is a taskset-bound object that forms part of the environment contract, or is complex enough to deserve a named secondary module. Excess reliance on buried rollout-loop helpers should be treated as non-idiomatic and a code smell. diff --git a/.github/workflows/README.md b/.github/workflows/README.md index 139334df3..db2046855 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -5,7 +5,7 @@ This directory contains automated workflows for the verifiers project. ## Workflows ### 1. Style (`style.yml`) -**Purpose**: Code style checking using ruff and ty. +**Purpose**: Code style checking using ruff, ty, and Semgrep policy rules. **Triggers**: - Pull requests (opened, synchronized, reopened) @@ -14,7 +14,8 @@ This directory contains automated workflows for the verifiers project. **What it does**: - Runs ruff for linting and formatting checks - Runs ty type checks with `uv run ty check verifiers` -- Uses configuration from `pyproject.toml` +- Runs Semgrep policy checks through pre-commit's isolated hook environment. +- Uses configuration from `pyproject.toml`, `.pre-commit-config.yaml`, and `.semgrep/verifiers.yml` ### 2. Test (`test.yml`) **Purpose**: Comprehensive testing with coverage reports. @@ -47,6 +48,9 @@ To run checks locally the same way they run in CI: # Ty parity with CI (Python 3.13 target configured in `pyproject.toml`) uv run ty check verifiers +# Verifiers-specific policy lint +env PYTHONWARNINGS=ignore::SyntaxWarning uv run pre-commit run semgrep-v1-policy --all-files + # Tests uv sync uv run pytest tests/ -v diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index 13920cf20..c50434b13 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -40,3 +40,19 @@ jobs: run: uv sync - name: Run ty run: uv run ty check verifiers + semgrep: + name: Semgrep + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: "3.13" + - name: Install uv + uses: astral-sh/setup-uv@v7 + - name: Install dependencies + run: uv sync --group dev --group policy + - name: Run Semgrep policy checks + run: env PYTHONWARNINGS=ignore::SyntaxWarning uv run pre-commit run semgrep-v1-policy --config .pre-commit-config.yaml --all-files diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 107afb082..e74ceb56f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,6 +15,11 @@ repos: entry: uv run ruff format language: system types_or: [python, pyi] + - id: semgrep-v1-policy + name: Semgrep v1 policy + entry: uv run --group policy semgrep --metrics=off --disable-version-check --config .semgrep/verifiers.yml --error --quiet + language: system + pass_filenames: false - id: sync-agents-md name: Sync AGENTS.md from docs entry: uv run python scripts/sync.py diff --git a/.semgrep/verifiers.yml b/.semgrep/verifiers.yml new file mode 100644 index 000000000..2485f4b5d --- /dev/null +++ b/.semgrep/verifiers.yml @@ -0,0 +1,84 @@ +rules: + - id: verifiers-no-future-annotations + languages: [python] + severity: ERROR + message: Do not use `from __future__ import annotations`; quote only the specific forward references that need it. + pattern: from __future__ import annotations + + - id: verifiers-v1-config-param-one-type + languages: [python] + severity: ERROR + message: Public v1 `config` parameters must be one concrete config type or `None`; keep raw mappings at explicit config-loader boundaries. + paths: + include: + - /verifiers/v1/**/*.py + - /environments/**/*.py + exclude: + - /verifiers/v1/config.py + - /verifiers/v1/utils/**/*.py + patterns: + - pattern: | + def $FUNC(..., config: $ANNOT = None, ...): + ... + - metavariable-regex: + metavariable: $ANNOT + regex: "(Any|ConfigMap|Mapping\\[str, object\\]|dict\\[str, object\\]|.*\\|.*\\|.*)" + + - id: verifiers-no-private-framework-classes + languages: [python] + severity: ERROR + message: Do not define leading-underscore classes in framework code; use a clear public name or a module-level value/function instead. + paths: + include: + - /verifiers/**/*.py + pattern-regex: "^\\s*class _[A-Za-z]" + + - id: verifiers-no-raw-any-v1 + languages: [python] + severity: ERROR + message: Do not use raw `Any` in v1 or environment code; use a precise type or a named boundary alias in verifiers.v1.types. + paths: + include: + - /verifiers/v1/**/*.py + - /environments/**/*.py + exclude: + - /verifiers/v1/types.py + - /environments/openenv_*/proj/**/*.py + pattern-regex: "\\bAny\\b" + + - id: verifiers-no-raw-object-containers-v1 + languages: [python] + severity: ERROR + message: Do not spell broad object containers in v1 or environment code; use the named boundary types ConfigMap, ConfigData, Handler, GroupHandler, Objects, TaskRow, ProgramMap, or a narrower type. + paths: + include: + - /verifiers/v1/**/*.py + - /environments/**/*.py + exclude: + - /verifiers/v1/types.py + - /verifiers/v1/utils/object_utils.py + - /verifiers/v1/utils/task_freeze_utils.py + - /environments/openenv_*/proj/**/*.py + pattern-regex: "(?x)(\\b(?:Mapping|MutableMapping|dict|list|Sequence|Iterable|Callable|Awaitable|tuple)\\[[^\\n\\]]*\\bobject\\b|\\bcast\\([^\\n)]*\\bobject\\b)" + + - id: verifiers-no-raw-mapping-annotations-v1 + languages: [python] + severity: ERROR + message: Do not use raw Mapping annotations in v1 or environment code; prefer dict or a named v1 boundary type. Keep Mapping only for isinstance checks or explicit aliases. + paths: + include: + - /verifiers/v1/**/*.py + - /environments/**/*.py + exclude: + - /verifiers/v1/types.py + - /environments/openenv_*/proj/**/*.py + pattern-regex: "\\b(?:Mapping|MutableMapping)\\[" + + - id: verifiers-get-messages-typed + languages: [python] + severity: ERROR + message: "`get_messages` must return typed `Message` objects, not raw dictionaries." + paths: + include: + - /verifiers/utils/message_utils.py + pattern-regex: "def\\s+get_messages\\b[\\s\\S]*?->\\s*list\\s*\\[\\s*dict\\b" diff --git a/AGENTS.md b/AGENTS.md index 07953cc47..2346dba1d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -13,6 +13,18 @@ These points are direct restatements of Verifiers docs so agents can follow the - Use `ToolEnv`/`MCPEnv` for stateless tools and `StatefulToolEnv` when per-rollout state must persist (sandbox/session/db handles). (See `docs/environments.md`.) - If external API keys are required, validate them in `load_environment()` with `vf.ensure_keys(...)` so failures are explicit and early. (See `docs/environments.md`.) +## Style Rules + +Use these rules when shaping user-facing Verifiers APIs, configs, and environment files. + +- Prefer Verifiers-native interfaces over stdlib-pure plumbing in user code. A stdlib-pure expression that forces every environment to write path manipulation, import-resource handling, ad hoc discovery, or boilerplate constants is a style bug; put that logic behind a Verifiers abstraction instead. +- Keep user-facing APIs incredibly minimal and elegant. The best surface is usually golfy but intuitive: one obvious field, one obvious constructor, and no redundant knobs unless there is a concrete long-term reason. +- Use Pydantic config models wherever structured configuration is needed. Pydantic is always acceptable and preferred over loose dictionaries when it clarifies the contract. +- Prefer strict, narrow types. Use `object`, broad unions, or untyped mappings only at explicit framework boundaries where arbitrary user values are genuinely part of the contract. +- Basic environments should fit in a few dozen self-contained, idiomatic lines: import `verifiers`, define `load_environment`, pipe bindings/config through constructors, and keep policy values in config subclasses or literal constructor kwargs when needed. +- Environment modules should not define global helper functions. Put reusable logic in well-named utility modules, taskset/harness classes, toolsets, or small local classes owned by the abstraction. Rare exceptions are process-level handles, such as a lock or semaphore, when that is the only reasonable way to enforce the intended runtime control. +- Additional code should have a clear home. Do not hide utilities at the bottom of files or scatter one-off helpers through environment entrypoints. + ## Repository Development Notes Use this guidance when contributing to the `verifiers` repository itself. @@ -22,6 +34,9 @@ Use this guidance when contributing to the `verifiers` repository itself. - Keep changes aligned with documented architecture (`verifiers/`, `environments/`, `configs/`, `tests/`, `docs/`) and update docs when behavior changes. (See `docs/development.md`.) - Prefer a single clear path over maintaining parallel approaches by default; if two options exist, preserve both only when there is an explicit long-term reason. - Aggressively deprecate/remove inferior paths when they are not part of an intended multi-option contract, especially in repo-internal development workflows. +- Treat broad dynamic mappings as explicit framework boundaries, not casual public API types. Use a named domain alias or typed Pydantic field for legitimate arbitrary payloads such as task rows, protocol messages, sandbox/program specs, and `objects`/binding-style config; do not expose raw `Mapping[str, object]` in user-facing signatures unless that looseness is the point of the abstraction. +- If a user request conflicts with repository style, formatting, or API-quality guidelines, push back instead of implementing the literal request. Identify a comparable request or explicit guideline relaxation that preserves clean, maintainable, modular code across the current request and adjacent future use cases; implement that plan, then explain the decision process and tradeoffs directly to the user. +- Before v0.2.0, breaking backward compatibility inside v1 Taskset/Harness APIs is acceptable and encouraged when it improves the core design. Preserve v0 multi-turn environment compatibility unless the user explicitly asks for a v0 migration. - Treat public configuration and docs as part of the API. Keep TOML shapes consistent across eval, GEPA, RL, and Hosted Training; normalize legacy inputs at the ingestion boundary instead of spreading compatibility branches through examples. - For v1 Taskset/Harness work, make the taskset own task data, task tools, user behavior, metrics, rewards, and task-specific configuration. Use the base `vf.Harness` unless the harness really owns a reusable execution mechanism. - When renaming or deleting an environment/module path, update package metadata, README/docs references, tests, build includes, and generated AGENTS output in the same change. diff --git a/README.md b/README.md index b5db203bd..40f64d333 100644 --- a/README.md +++ b/README.md @@ -135,7 +135,7 @@ For new environments with reusable tasksets, toolsets, custom programs, or custom harnesses, use the v1 Taskset/Harness path: ```python # my_env.py -import verifiers.v1 as vf +import verifiers as vf def source(): yield { @@ -151,8 +151,7 @@ async def contains_answer(task, state) -> float: def load_taskset(config: vf.TasksetConfig | None = None): return vf.Taskset(source=source, rewards=[contains_answer], config=config) -def load_environment(config: vf.EnvConfig | None = None) -> vf.Env: - config = config or vf.EnvConfig() +def load_environment(config: vf.EnvConfig) -> vf.Env: return vf.Env(taskset=load_taskset(config=config.taskset)) ``` If no harness is passed, `vf.Env` uses the base endpoint-backed harness. See @@ -164,7 +163,7 @@ harness with: ```python env = vf.Env( - taskset=vf.HarborTaskset(tasks="/path/to/harbor/tasks"), + taskset=vf.HarborTaskset(), harness=vf.OpenCode(), ) ``` diff --git a/assets/agents/common_best_practices.md b/assets/agents/common_best_practices.md index da8020eaa..587e7684f 100644 --- a/assets/agents/common_best_practices.md +++ b/assets/agents/common_best_practices.md @@ -8,3 +8,15 @@ These points are direct restatements of Verifiers docs so agents can follow the - For new taskset/harness environments, use the v1 `vf.Env` / `vf.Taskset` / `vf.Harness` format. Treat [BYO Harness](docs/byo-harness.md) as the canonical authoring guide for reusable tasksets, reusable harnesses, framework programs, endpoint interception, and sandboxed Python/command programs. - Use `ToolEnv`/`MCPEnv` for stateless tools and `StatefulToolEnv` when per-rollout state must persist (sandbox/session/db handles). (See `docs/environments.md`.) - If external API keys are required, validate them in `load_environment()` with `vf.ensure_keys(...)` so failures are explicit and early. (See `docs/environments.md`.) + +## Style Rules + +Use these rules when shaping user-facing Verifiers APIs, configs, and environment files. + +- Prefer Verifiers-native interfaces over stdlib-pure plumbing in user code. A stdlib-pure expression that forces every environment to write path manipulation, import-resource handling, ad hoc discovery, or boilerplate constants is a style bug; put that logic behind a Verifiers abstraction instead. +- Keep user-facing APIs incredibly minimal and elegant. The best surface is usually golfy but intuitive: one obvious field, one obvious constructor, and no redundant knobs unless there is a concrete long-term reason. +- Use Pydantic config models wherever structured configuration is needed. Pydantic is always acceptable and preferred over loose dictionaries when it clarifies the contract. +- Prefer strict, narrow types. Use `object`, broad unions, or untyped mappings only at explicit framework boundaries where arbitrary user values are genuinely part of the contract. +- Basic environments should fit in a few dozen self-contained, idiomatic lines: import `verifiers`, define `load_environment`, pipe bindings/config through constructors, and keep policy values in config subclasses or literal constructor kwargs when needed. +- Environment modules should not define global helper functions. Put reusable logic in well-named utility modules, taskset/harness classes, toolsets, or small local classes owned by the abstraction. Rare exceptions are process-level handles, such as a lock or semaphore, when that is the only reasonable way to enforce the intended runtime control. +- Additional code should have a clear home. Do not hide utilities at the bottom of files or scatter one-off helpers through environment entrypoints. diff --git a/assets/agents/repo_development_best_practices.md b/assets/agents/repo_development_best_practices.md index 1631f8bcc..fa6d350a9 100644 --- a/assets/agents/repo_development_best_practices.md +++ b/assets/agents/repo_development_best_practices.md @@ -7,6 +7,9 @@ Use this guidance when contributing to the `verifiers` repository itself. - Keep changes aligned with documented architecture (`verifiers/`, `environments/`, `configs/`, `tests/`, `docs/`) and update docs when behavior changes. (See `docs/development.md`.) - Prefer a single clear path over maintaining parallel approaches by default; if two options exist, preserve both only when there is an explicit long-term reason. - Aggressively deprecate/remove inferior paths when they are not part of an intended multi-option contract, especially in repo-internal development workflows. +- Treat broad dynamic mappings as explicit framework boundaries, not casual public API types. Use a named domain alias or typed Pydantic field for legitimate arbitrary payloads such as task rows, protocol messages, sandbox/program specs, and `objects`/binding-style config; do not expose raw `Mapping[str, object]` in user-facing signatures unless that looseness is the point of the abstraction. +- If a user request conflicts with repository style, formatting, or API-quality guidelines, push back instead of implementing the literal request. Identify a comparable request or explicit guideline relaxation that preserves clean, maintainable, modular code across the current request and adjacent future use cases; implement that plan, then explain the decision process and tradeoffs directly to the user. +- Before v0.2.0, breaking backward compatibility inside v1 Taskset/Harness APIs is acceptable and encouraged when it improves the core design. Preserve v0 multi-turn environment compatibility unless the user explicitly asks for a v0 migration. - Treat public configuration and docs as part of the API. Keep TOML shapes consistent across eval, GEPA, RL, and Hosted Training; normalize legacy inputs at the ingestion boundary instead of spreading compatibility branches through examples. - For v1 Taskset/Harness work, make the taskset own task data, task tools, user behavior, metrics, rewards, and task-specific configuration. Use the base `vf.Harness` unless the harness really owns a reusable execution mechanism. - When renaming or deleting an environment/module path, update package metadata, README/docs references, tests, build includes, and generated AGENTS output in the same change. diff --git a/assets/lab/AGENTS.md b/assets/lab/AGENTS.md index bc46006a5..301cfd6d2 100644 --- a/assets/lab/AGENTS.md +++ b/assets/lab/AGENTS.md @@ -15,6 +15,18 @@ These points are direct restatements of Verifiers docs so agents can follow the - Use `ToolEnv`/`MCPEnv` for stateless tools and `StatefulToolEnv` when per-rollout state must persist (sandbox/session/db handles). (See `docs/environments.md`.) - If external API keys are required, validate them in `load_environment()` with `vf.ensure_keys(...)` so failures are explicit and early. (See `docs/environments.md`.) +## Style Rules + +Use these rules when shaping user-facing Verifiers APIs, configs, and environment files. + +- Prefer Verifiers-native interfaces over stdlib-pure plumbing in user code. A stdlib-pure expression that forces every environment to write path manipulation, import-resource handling, ad hoc discovery, or boilerplate constants is a style bug; put that logic behind a Verifiers abstraction instead. +- Keep user-facing APIs incredibly minimal and elegant. The best surface is usually golfy but intuitive: one obvious field, one obvious constructor, and no redundant knobs unless there is a concrete long-term reason. +- Use Pydantic config models wherever structured configuration is needed. Pydantic is always acceptable and preferred over loose dictionaries when it clarifies the contract. +- Prefer strict, narrow types. Use `object`, broad unions, or untyped mappings only at explicit framework boundaries where arbitrary user values are genuinely part of the contract. +- Basic environments should fit in a few dozen self-contained, idiomatic lines: import `verifiers`, define `load_environment`, pipe bindings/config through constructors, and keep policy values in config subclasses or literal constructor kwargs when needed. +- Environment modules should not define global helper functions. Put reusable logic in well-named utility modules, taskset/harness classes, toolsets, or small local classes owned by the abstraction. Rare exceptions are process-level handles, such as a lock or semaphore, when that is the only reasonable way to enforce the intended runtime control. +- Additional code should have a clear home. Do not hide utilities at the bottom of files or scatter one-off helpers through environment entrypoints. + ## End-User Lab Workspace Notes Use this guidance in projects created via `prime lab setup`. diff --git a/assets/lab/environments/AGENTS.md b/assets/lab/environments/AGENTS.md index 14095d742..60f640210 100644 --- a/assets/lab/environments/AGENTS.md +++ b/assets/lab/environments/AGENTS.md @@ -6,7 +6,7 @@ This file mirrors the "Environments" documentation page. --- -This guide walks through building environments in Verifiers, from simple single-turn tasks to complex multi-turn agents with tools. See [Overview](overview.md) for how to initialize a new environment template. For new taskset/harness environments, use the v1 `Taskset`/`Harness` format documented in [BYO Harness](byo-harness.md). +This guide walks through building environments in Verifiers, from simple single-turn tasks to complex multi-turn agents with tools. See [Overview](overview.md) for how to initialize a new environment template. For reusable taskset/harness environments, see [BYO Harness](byo-harness.md). ## Table of Contents @@ -279,7 +279,9 @@ rubric = vf.Rubric(funcs=[correct_answer, diversity_bonus]) ### Shared Objects -Beyond rollout data, reward functions can request static objects that live within the Rubric class. These are stored in the Rubric's `class_objects` dictionary, and can be added after initialization via `add_class_object()`: +In rubric environments, reward functions can request static helper objects that +live within the Rubric class. These are stored in the Rubric's `class_objects` +dictionary, and can be added after initialization via `add_class_object()`: ```python rubric = vf.Rubric(funcs=[my_reward_func]) @@ -290,23 +292,13 @@ async def my_reward_func(completion, my_helper) -> float: return await my_helper.score(completion) ``` -Two common types of shared objects are **parsers** and **judges**. +For taskset/harness environments, use taskset-owned `objects` and `bindings` as +shown in [BYO Harness](byo-harness.md#shared-dependencies). -Parsers encapsulate logic for extracting structured content from model responses. When passed to a rubric, the parser is automatically available to reward functions: - -```python -parser = vf.XMLParser(["reasoning", "answer"]) -rubric = vf.Rubric(funcs=[my_reward_func], parser=parser) - -async def my_reward_func(completion, parser) -> float: - parsed = parser.parse_answer(completion) - # parsed.reasoning, parsed.answer available - ... -``` - -Parsers can also be passed to environments, where they are often used during rollouts to validate or extract content. This allows parsing logic to be shared between the environment's interaction loop and the rubric's reward functions. - -Judges are used for tasks where deterministic evaluation is impractical, and an LLM is used to score responses. **JudgeRubric** is a built-in class which stores an LLM client inside the rubric, and provides a `judge` callable to reward functions for scoring responses: +Judges are used for tasks where deterministic evaluation is impractical, and an +LLM is used to score responses. **JudgeRubric** stores an LLM client inside the +rubric, and provides a `judge` callable to reward +functions for scoring responses: ```python judge_rubric = vf.JudgeRubric( @@ -537,26 +529,39 @@ The `env_response` method is an abstract method that must be overridden by all ` ```python class MyGameEnv(vf.MultiTurnEnv): + def __init__(self, dataset, rubric, extract_action): + super().__init__(dataset=dataset, rubric=rubric) + self.extract_action = extract_action + async def env_response(self, messages: vf.Messages, state: vf.State) -> vf.Messages: """Generate the environment's response after each model turn.""" - parsed = self.parser.parse(messages) - action = parsed.action + action = self.extract_action(messages) feedback = process_action(action) return [{"role": "user", "content": feedback}] -async def correct_action(parser, completion, answer) -> float: - parsed = parser.parse(completion) - return 1.0 if parsed.action == answer else 0.0 +class ActionExtractor: + def __call__(self, messages: vf.Messages) -> str: + text = messages[-1]["content"] if messages else "" + return str(text).strip() + + +async def correct_action(extract_action, completion, answer) -> float: + return 1.0 if extract_action(completion) == answer else 0.0 def load_environment(): - parser = vf.XMLParser(fields=["action"]) - rubric = vf.Rubric(funcs=[correct_action], parser=parser) - return MyGameEnv(dataset=dataset, rubric=rubric, parser=parser) + extract_action = ActionExtractor() + rubric = vf.Rubric(funcs=[correct_action]) + rubric.add_class_object("extract_action", extract_action) + return MyGameEnv(dataset=dataset, rubric=rubric, extract_action=extract_action) ``` -`env_response` receives the full conversation history thus far (and `state`) and returns a list of _new_ messages to append. When a parser is passed to the environment, it becomes available as `self.parser`. Passing the same parser to the rubric makes it available to reward functions by name. For tool environments, `env_response` typically executes tool calls and returns results. For games or other custom protocols, this might involve parsing structured output (as above) and returning state updates or feedback. +`env_response` receives the full conversation history thus far (and `state`) and +returns a list of _new_ messages to append. For tool environments, +`env_response` typically executes tool calls and returns results. For games or +other custom protocols, this might involve extracting structured output and +returning state updates or feedback. Several other methods can optionally be overridden for more control in complex custom environments: @@ -893,7 +898,7 @@ Supported third-party environment integrations include: - **`BrowserEnv`** — unified browser automation via [Browserbase](https://browserbase.com) with DOM and CUA modes - **`OpenEnvEnv`** — wraps OpenEnv gym and MCP contracts using Prime Sandboxes with prebuilt images referenced from `.build.json` -These require additional dependencies installed via extras (e.g., `uv add 'verifiers[ta]'` for TextArena, `uv add 'verifiers[browser]'` for BrowserEnv, `uv add 'verifiers[openenv]'` for OpenEnvEnv). For OpenEnv environments, build the bundled project image with `prime env build ` before evaluation or training. +These require additional dependencies installed via extras (e.g., `uv add 'verifiers[ta]'` for TextArena, `uv add 'verifiers[browser]'` for BrowserEnv). OpenEnvEnv uses the base Verifiers install; the bundled OpenEnv project under `proj/` owns its server dependencies and must be built with `uv run vf-build ` before evaluation or training. Newer and more experimental environment classes include: @@ -910,7 +915,7 @@ Newer and more experimental environment classes include: timeouts=SandboxTimeouts(read_file=30.0, extract=180.0, poll=120.0), ) ``` -- **V1 `vf.Env` / `vf.Taskset` / `vf.Harness`** — preferred taskset/harness pattern for composing task data and program execution without subclassing. Use this for new environments that need reusable tasksets, reusable harnesses, config-driven metrics, rewards, toolsets, users, endpoint interception, or sandboxed Python/command programs. `vf.Taskset` owns train/eval rows, prompt shaping, setup/update/reward hooks, and toolsets. `vf.Harness` owns the framework program, endpoint proxy, model controls, sandbox options, and runtime hooks. `vf.Env` wires them into the standard evaluation and training surface. +- **`vf.Env` / `vf.Taskset` / `vf.Harness`** — preferred taskset/harness pattern for composing task data and program execution without subclassing. Use this for environments that need reusable tasksets, reusable harnesses, config-driven metrics, rewards, toolsets, users, endpoint interception, or sandboxed Python/command programs. `vf.Taskset` owns train/eval rows, prompt shaping, setup/update/reward hooks, and toolsets. `vf.Harness` owns the framework program, endpoint proxy, model controls, sandbox options, and runtime hooks. `vf.Env` wires them into the standard evaluation and training surface. - **`SWEDebugEnv`** — no-agent debugger for SWE-style `SandboxTaskSet` instances. It creates the task sandbox, optionally runs `taskset.setup(state)`, performs one debug step (`none`, `gold_patch`, `command`, or `script`), and optionally runs the task tests and scorer. It records setup, sandbox creation, gold patch, debug command, and test timings in state for validation and timing investigations. - **`HarborEnv`** — loads Harbor-format agent benchmark tasks - **`RLMEnv`** — implements [Recursive Language Models](https://alexzhang13.github.io/blog/2025/rlm/) for unbounded context processing via REPL-based decomposition and recursive sub-LLM calls diff --git a/docs/byo-harness.md b/docs/byo-harness.md index 34097af5c..e1bc99145 100644 --- a/docs/byo-harness.md +++ b/docs/byo-harness.md @@ -33,7 +33,7 @@ LangChain, OpenAI Agents, CLI, or base harness should consume those tools from runtime state instead of constructing its own copy. ```python -import verifiers.v1 as vf +import verifiers as vf def source(): @@ -54,8 +54,7 @@ def load_taskset(config: vf.TasksetConfig | None = None): return vf.Taskset(source=source, rewards=[contains_answer], config=config) -def load_environment(config: vf.EnvConfig | None = None): - config = config or vf.EnvConfig() +def load_environment(config: vf.EnvConfig): return vf.Env(taskset=load_taskset(config=config.taskset)) ``` @@ -67,7 +66,7 @@ zero-argument loader so imports and constructors stay cheap. ```python from datasets import load_dataset -import verifiers.v1 as vf +import verifiers as vf class GSM8KTasksetConfig(vf.TasksetConfig): @@ -100,6 +99,74 @@ Do not use a top-level string `task` field for routing. v1 tasksets serialize the full task payload through `info["task"]` for worker compatibility, and environment routing uses `info["env_id"]`. +## Shared Dependencies + +Shared dependencies live on the taskset and are injected into named lifecycle or +scoring functions through bindings: + +```python +import re +import verifiers as vf + + +class AnswerExtractor: + def __init__(self): + self.pattern = re.compile(r"(.*?)", re.DOTALL) + + def __call__(self, completion: list[dict[str, object]]) -> str: + message = vf.get_messages(completion, role="assistant")[-1] + text = str(message.content or "") + match = self.pattern.search(text) + return "" if match is None else match.group(1).strip() + + +@vf.reward +async def exact(task, state, extract_answer) -> float: + response = extract_answer(state.get("completion") or []) + return float(response == task["answer"]) + + +def load_environment(config: vf.EnvConfig) -> vf.Env: + return vf.Env( + taskset=vf.Taskset( + source=source, + rewards=[exact], + objects={"extract_answer": AnswerExtractor}, + bindings={"exact.extract_answer": "objects.extract_answer"}, + config=config.taskset, + ) + ) +``` + +`objects` values are instances or zero-argument factories. Factories are lazy +and resolve once per taskset runtime. Bindings keep the reward signature explicit +without moving shared dependencies into global state. + +## Message Access + +Taskset/harness environments expose one transcript selector: + +```python +messages = vf.get_messages(state.get("completion") or [], role="assistant") +response = str(messages[-1].content or "") if messages else "" + +assistant_turns = len(vf.get_messages(state.get("completion") or [], role="assistant")) +``` + +Use `vf.get_messages(...)` to get the transcript as typed message objects, +optionally filtered by role. Index or slice the returned list with ordinary +Python. The helper does not parse answers; task-specific extraction belongs in +ordinary Python or a taskset-bound object. + +Keep rollout-loop data manipulation explicit. A few lines that read +`state["completion"]`, select messages, inspect task fields, or build a prompt +should usually be written directly where they are used, not hidden behind a +library helper or a one-off private function. Helpers are appropriate when the +logic is reused in multiple places, when a taskset-bound object is part of the +environment contract, or when complex behavior belongs in a named secondary +module. Do not create buried `utils` imports just to avoid three clear lines in +a reward, update, setup, or program function. + ## Task Controls Tasks can request rollout behavior through top-level serializable fields: @@ -141,7 +208,7 @@ the eval/training worker. Use `task.program` when a taskset owns files or environment variables that a reusable harness should consume. The taskset cannot change the harness command -or tool interface; duplicate keys across the taskset and harness fail. +or tool channel; duplicate keys across the taskset and harness fail. ## Toolsets @@ -162,8 +229,10 @@ taskset = vf.Taskset(source=source, toolsets=[toolset]) ``` Bindings inject hidden arguments that the model does not see. Common binding -roots are `task.*`, `state.*`, and `tools.*`. Tool and user callables can also -bind `objects.*` from their own private dependency factories. +roots are `task.*`, `state.*`, and `tools.*`. Tasksets, toolsets, and users can +also bind `objects.*` from their own private dependency factories. +String binding sources are always framework paths. Use a callable source for +literal string values so misspelled paths fail during setup. Custom harness programs can adapt taskset-owned tools through `state.get_tools()`. That keeps the same taskset reusable across the base harness, a third-party @@ -211,8 +280,7 @@ def load_harness(config: vf.HarnessConfig | None = None): ) -def load_environment(config: vf.EnvConfig | None = None): - config = config or vf.EnvConfig() +def load_environment(config: vf.EnvConfig): return vf.Env( taskset=load_taskset(config=config.taskset), harness=load_harness(config=config.harness), @@ -233,10 +301,12 @@ All model calls go through the v1 interception endpoint so trajectory capture, tool forwarding, and protocol translation share one path. Sandbox command programs can request the resolved tools as an MCP server with -`program={"command": [...], "sandbox": True, "tools": "mcp"}`. Python programs +`program={"command": [...], "sandbox": True, "channels": "mcp"}`. Python programs receive callable tool handles by default, or can set -`program={"sandbox": True, "tools": "callable"}` when the base loop is moved -into a sandbox. +`program={"sandbox": True, "channels": "callable"}` when the base loop is moved +into a sandbox. `program.channels` supports only the generic `callable` and `mcp` +channels. Harness-specific tool carriers, such as RLM skill uploads, should +live on the taskset upload directory contract or the harness config. For sandboxed `program.fn` refs, v1 resolves the owning local package from the resolved module root: single-file modules use `pyproject.toml` in the same @@ -269,20 +339,29 @@ config surface; do not subclass `Env` just to bypass inference. Packaged CLI harnesses should use the same boundary. These implementations live under `verifiers.v1.packages` while the v1 surface stabilizes, and are -re-exported through `verifiers.v1`. `CLIHarness` is the generic command wrapper; -`OpenCode`, `Pi`, `MiniSWEAgent`, and `RLM` are bundled leaf wrappers: +re-exported through `verifiers.v1`. `OpenCode`, `Pi`, `MiniSWEAgent`, and `RLM` +are bundled `Harness` leaf wrappers for common command-line agents: ```python def load_environment(): return vf.Env( - taskset=vf.HarborTaskset(tasks="/path/to/harbor/tasks"), + taskset=vf.HarborTaskset(), harness=vf.OpenCode(), ) ``` -`HarborTaskset` owns Harbor task loading, sandbox overrides, task uploads, and -test scoring. CLI harnesses own CLI installation/config/run behavior and work -with any taskset that supplies a prompt. +`HarborTaskset()` loads Harbor-format task directories from the environment +package's reserved `tasks/` directory. `HarborTaskset(dataset="owner/name")` +fetches a Harbor Hub dataset. The taskset owns Harbor task loading, sandbox +overrides, task uploads, and test scoring. CLI harnesses own CLI +installation/config/run behavior and work with any taskset that supplies a +prompt. +Tasksets can expose package-owned upload directories with `get_upload_dirs()`. +The base `Taskset` discovers a sibling `skills/` directory by default, and +`RLM` uploads that directory to `/rlm/skills` unless `skills=` is passed +explicitly to the harness. +Use `RLMConfig` in `env.harness` for RLM-specific settings such as +`rlm_repo_ref`, `rlm_tools`, `rlm_max_turns`, and `summarize_at_tokens`. ## Setup, Updates, Signals, And Cleanup @@ -308,14 +387,16 @@ async def best_of_n(tasks, states) -> list[float]: ... ``` -Rollout signals accept `task, state`, plus any Toolset-bound hidden args. Group -signals accept exactly `tasks, states` and return one value per state. Setup -functions use `@vf.setup` and run before the program body; update functions use -`@vf.update` and run before scoring; cleanup functions use `@vf.cleanup` and run -after scoring; teardown functions use `@vf.teardown`. +Rollout signals can request framework args such as `task`, `state`, +`completion`, and `prompt`, plus hidden args supplied by taskset or toolset +bindings. Group signals can request `tasks`, `states`, and bound hidden args, +and must return one value per state. Setup functions use `@vf.setup` and run +before the program body; update functions use `@vf.update` and run before +scoring; cleanup functions use `@vf.cleanup` and run after scoring; teardown +functions use `@vf.teardown`. For sandbox command/Python programs, program files, directories, setup commands, -state handoff, and tool-interface setup are framework setup contributions with +state handoff, and channel setup are framework setup contributions with fixed priorities. User `@vf.setup(priority=...)` handlers can intentionally run before or after those built-ins without adding new lifecycle hooks. @@ -334,8 +415,7 @@ The recommended loader takes one `config` object and routes its `taskset` and `harness` sections: ```python -def load_environment(config: vf.EnvConfig | None = None): - config = config or vf.EnvConfig() +def load_environment(config: vf.EnvConfig): return vf.Env( taskset=load_taskset(config=config.taskset), harness=load_harness(config=config.harness), @@ -361,44 +441,47 @@ max_turns = 4 weight = 0.5 ``` -For concise v0-style named args, pass typed child config objects as defaults. -Explicit `taskset`/`harness` sections stay the most specific source and override -those defaults. +For concise named args, define one typed args object and pass it as `args`. +`EnvConfig.args` is intentionally user-defined; environment packages decide how +those args flow into taskset and harness construction. ```python +class MyEnvArgsConfig(vf.Config): + split: str = "train" + max_turns: int = 10 + + class MyTasksetConfig(vf.TasksetConfig): split: str = "train" -def load_taskset( - split: str | None = None, - config: vf.TasksetConfig | None = None, -): - config = MyTasksetConfig(config, split=split) +def load_taskset(config: vf.TasksetConfig | None = None): + config = MyTasksetConfig(config) ... -def load_harness( - max_turns: int | None = None, - config: vf.HarnessConfig | None = None, -): - config = vf.HarnessConfig(config, max_turns=max_turns) +def load_harness(config: vf.HarnessConfig | None = None): + config = vf.HarnessConfig(config) ... def load_environment( - config: vf.EnvConfig | None = None, + config: vf.EnvConfig, split: str = "train", max_turns: int = 10, ): config = vf.EnvConfig( config, - taskset=MyTasksetConfig(split=split), - harness=vf.HarnessConfig(max_turns=max_turns), + args=MyEnvArgsConfig(split=split, max_turns=max_turns), ) + args = MyEnvArgsConfig(config.args) return vf.Env( - taskset=load_taskset(config=config.taskset), - harness=load_harness(config=config.harness), + taskset=load_taskset( + config=MyTasksetConfig(config.taskset, split=args.split) + ), + harness=load_harness( + config=vf.HarnessConfig(config.harness, max_turns=args.max_turns) + ), ) ``` @@ -427,6 +510,18 @@ tools = ["my_env.tools:search"] bindings = { "search.index" = "objects.index" } ``` +Taskset and harness sections can import a base config with `config` and then +overlay local fields. Collection fields extend the imported config. + +```toml +[env.harness] +config = "my_env.configs:load_another_harness_config" + +[[env.harness.rewards]] +fn = "my_env.rewards:new_reward_func" +weight = 0 +``` + Callable config uses `fn = "module:callable"` when metadata is needed: ```toml @@ -441,14 +536,14 @@ The callable name is always its Python function name. Use creating a new signal. For command harnesses, keep endpoint and tool registration under the requested -`program.tools` interface: +`program.channels` channel: ```toml [env.harness.program] command = ["my-cli", "run", "--config", "/tmp/my-cli.json"] sandbox = true -[env.harness.program.tools] +[env.harness.program.channels] mcp = { fn = "my_env.cli:write_cli_config" } [env.harness.program.bindings] diff --git a/docs/environments.md b/docs/environments.md index 00247f98b..7b1e3a9d5 100644 --- a/docs/environments.md +++ b/docs/environments.md @@ -1,6 +1,6 @@ # Environments -This guide walks through building environments in Verifiers, from simple single-turn tasks to complex multi-turn agents with tools. See [Overview](overview.md) for how to initialize a new environment template. For new taskset/harness environments, use the v1 `Taskset`/`Harness` format documented in [BYO Harness](byo-harness.md). +This guide walks through building environments in Verifiers, from simple single-turn tasks to complex multi-turn agents with tools. See [Overview](overview.md) for how to initialize a new environment template. For reusable taskset/harness environments, see [BYO Harness](byo-harness.md). ## Table of Contents @@ -273,7 +273,9 @@ rubric = vf.Rubric(funcs=[correct_answer, diversity_bonus]) ### Shared Objects -Beyond rollout data, reward functions can request static objects that live within the Rubric class. These are stored in the Rubric's `class_objects` dictionary, and can be added after initialization via `add_class_object()`: +In rubric environments, reward functions can request static helper objects that +live within the Rubric class. These are stored in the Rubric's `class_objects` +dictionary, and can be added after initialization via `add_class_object()`: ```python rubric = vf.Rubric(funcs=[my_reward_func]) @@ -284,23 +286,13 @@ async def my_reward_func(completion, my_helper) -> float: return await my_helper.score(completion) ``` -Two common types of shared objects are **parsers** and **judges**. +For taskset/harness environments, use taskset-owned `objects` and `bindings` as +shown in [BYO Harness](byo-harness.md#shared-dependencies). -Parsers encapsulate logic for extracting structured content from model responses. When passed to a rubric, the parser is automatically available to reward functions: - -```python -parser = vf.XMLParser(["reasoning", "answer"]) -rubric = vf.Rubric(funcs=[my_reward_func], parser=parser) - -async def my_reward_func(completion, parser) -> float: - parsed = parser.parse_answer(completion) - # parsed.reasoning, parsed.answer available - ... -``` - -Parsers can also be passed to environments, where they are often used during rollouts to validate or extract content. This allows parsing logic to be shared between the environment's interaction loop and the rubric's reward functions. - -Judges are used for tasks where deterministic evaluation is impractical, and an LLM is used to score responses. **JudgeRubric** is a built-in class which stores an LLM client inside the rubric, and provides a `judge` callable to reward functions for scoring responses: +Judges are used for tasks where deterministic evaluation is impractical, and an +LLM is used to score responses. **JudgeRubric** stores an LLM client inside the +rubric, and provides a `judge` callable to reward +functions for scoring responses: ```python judge_rubric = vf.JudgeRubric( @@ -531,26 +523,39 @@ The `env_response` method is an abstract method that must be overridden by all ` ```python class MyGameEnv(vf.MultiTurnEnv): + def __init__(self, dataset, rubric, extract_action): + super().__init__(dataset=dataset, rubric=rubric) + self.extract_action = extract_action + async def env_response(self, messages: vf.Messages, state: vf.State) -> vf.Messages: """Generate the environment's response after each model turn.""" - parsed = self.parser.parse(messages) - action = parsed.action + action = self.extract_action(messages) feedback = process_action(action) return [{"role": "user", "content": feedback}] -async def correct_action(parser, completion, answer) -> float: - parsed = parser.parse(completion) - return 1.0 if parsed.action == answer else 0.0 +class ActionExtractor: + def __call__(self, messages: vf.Messages) -> str: + text = messages[-1]["content"] if messages else "" + return str(text).strip() + + +async def correct_action(extract_action, completion, answer) -> float: + return 1.0 if extract_action(completion) == answer else 0.0 def load_environment(): - parser = vf.XMLParser(fields=["action"]) - rubric = vf.Rubric(funcs=[correct_action], parser=parser) - return MyGameEnv(dataset=dataset, rubric=rubric, parser=parser) + extract_action = ActionExtractor() + rubric = vf.Rubric(funcs=[correct_action]) + rubric.add_class_object("extract_action", extract_action) + return MyGameEnv(dataset=dataset, rubric=rubric, extract_action=extract_action) ``` -`env_response` receives the full conversation history thus far (and `state`) and returns a list of _new_ messages to append. When a parser is passed to the environment, it becomes available as `self.parser`. Passing the same parser to the rubric makes it available to reward functions by name. For tool environments, `env_response` typically executes tool calls and returns results. For games or other custom protocols, this might involve parsing structured output (as above) and returning state updates or feedback. +`env_response` receives the full conversation history thus far (and `state`) and +returns a list of _new_ messages to append. For tool environments, +`env_response` typically executes tool calls and returns results. For games or +other custom protocols, this might involve extracting structured output and +returning state updates or feedback. Several other methods can optionally be overridden for more control in complex custom environments: @@ -887,7 +892,7 @@ Supported third-party environment integrations include: - **`BrowserEnv`** — unified browser automation via [Browserbase](https://browserbase.com) with DOM and CUA modes - **`OpenEnvEnv`** — wraps OpenEnv gym and MCP contracts using Prime Sandboxes with prebuilt images referenced from `.build.json` -These require additional dependencies installed via extras (e.g., `uv add 'verifiers[ta]'` for TextArena, `uv add 'verifiers[browser]'` for BrowserEnv, `uv add 'verifiers[openenv]'` for OpenEnvEnv). For OpenEnv environments, build the bundled project image with `prime env build ` before evaluation or training. +These require additional dependencies installed via extras (e.g., `uv add 'verifiers[ta]'` for TextArena, `uv add 'verifiers[browser]'` for BrowserEnv). OpenEnvEnv uses the base Verifiers install; the bundled OpenEnv project under `proj/` owns its server dependencies and must be built with `uv run vf-build ` before evaluation or training. Newer and more experimental environment classes include: @@ -904,7 +909,7 @@ Newer and more experimental environment classes include: timeouts=SandboxTimeouts(read_file=30.0, extract=180.0, poll=120.0), ) ``` -- **V1 `vf.Env` / `vf.Taskset` / `vf.Harness`** — preferred taskset/harness pattern for composing task data and program execution without subclassing. Use this for new environments that need reusable tasksets, reusable harnesses, config-driven metrics, rewards, toolsets, users, endpoint interception, or sandboxed Python/command programs. `vf.Taskset` owns train/eval rows, prompt shaping, setup/update/reward hooks, and toolsets. `vf.Harness` owns the framework program, endpoint proxy, model controls, sandbox options, and runtime hooks. `vf.Env` wires them into the standard evaluation and training surface. +- **`vf.Env` / `vf.Taskset` / `vf.Harness`** — preferred taskset/harness pattern for composing task data and program execution without subclassing. Use this for environments that need reusable tasksets, reusable harnesses, config-driven metrics, rewards, toolsets, users, endpoint interception, or sandboxed Python/command programs. `vf.Taskset` owns train/eval rows, prompt shaping, setup/update/reward hooks, and toolsets. `vf.Harness` owns the framework program, endpoint proxy, model controls, sandbox options, and runtime hooks. `vf.Env` wires them into the standard evaluation and training surface. - **`SWEDebugEnv`** — no-agent debugger for SWE-style `SandboxTaskSet` instances. It creates the task sandbox, optionally runs `taskset.setup(state)`, performs one debug step (`none`, `gold_patch`, `command`, or `script`), and optionally runs the task tests and scorer. It records setup, sandbox creation, gold patch, debug command, and test timings in state for validation and timing investigations. - **`HarborEnv`** — loads Harbor-format agent benchmark tasks - **`RLMEnv`** — implements [Recursive Language Models](https://alexzhang13.github.io/blog/2025/rlm/) for unbounded context processing via REPL-based decomposition and recursive sub-LLM calls diff --git a/docs/overview.md b/docs/overview.md index 6c6dc8986..50f9ed62e 100644 --- a/docs/overview.md +++ b/docs/overview.md @@ -80,7 +80,7 @@ For new environments with reusable tasksets, toolsets, custom programs, or custom harnesses, use the v1 Taskset/Harness path: ```python # my_env.py -import verifiers.v1 as vf +import verifiers as vf def source(): yield { @@ -96,8 +96,7 @@ async def contains_answer(task, state) -> float: def load_taskset(config: vf.TasksetConfig | None = None): return vf.Taskset(source=source, rewards=[contains_answer], config=config) -def load_environment(config: vf.EnvConfig | None = None) -> vf.Env: - config = config or vf.EnvConfig() +def load_environment(config: vf.EnvConfig) -> vf.Env: return vf.Env(taskset=load_taskset(config=config.taskset)) ``` If no harness is passed, `vf.Env` uses the base endpoint-backed harness. See @@ -109,7 +108,7 @@ CLI harness with: ```python env = vf.Env( - taskset=vf.HarborTaskset(tasks="/path/to/harbor/tasks"), + taskset=vf.HarborTaskset(), harness=vf.OpenCode(), ) ``` diff --git a/docs/reference.md b/docs/reference.md index 5a4da7c88..8b9697607 100644 --- a/docs/reference.md +++ b/docs/reference.md @@ -504,11 +504,11 @@ Persistent Python REPL in sandbox. Extends `SandboxEnv`. class OpenEnvEnv(MultiTurnEnv): def __init__( self, - openenv_project: str | Path, + openenv_project: str | Path | None = None, num_train_examples: int = 100, num_eval_examples: int = 50, seed: int = 0, - prompt_renderer: Callable[..., ChatMessages] | None = None, + prompt_renderer: Callable[..., Messages] | None = None, max_turns: int = -1, rubric: Rubric | None = None, **kwargs, @@ -738,15 +738,13 @@ class Toolset: bindings=None, objects=None, write: bool = False, - scope: Literal["rollout", "group", "global"] = "rollout", + scope: Literal["rollout", "group", "global"] | None = None, sandbox=None, stops=(), setups=(), updates=(), - metrics=(), - rewards=(), - advantages=(), cleanups=(), + teardowns=(), config: ToolsetConfig | Mapping[str, object] | None = None, ): ... @@ -754,9 +752,11 @@ class MCPTool: def __init__(command: str, args=None, env=None, cwd: str | None = None): ... ``` -Toolsets package callable tools, MCP servers, private dependency factories, and -hidden bindings. `objects.*` bindings are private to the owning toolset/user and -are not directly accessible from state. +Toolsets package callable tools, MCP servers, private dependency factories, +hidden bindings, and tool-owned lifecycle handlers. `objects.*` bindings are +private to the owning toolset/user and are not directly accessible from state. +String binding sources are framework paths; literal strings should be bound via +callable sources. #### v1 Config Models @@ -1009,6 +1009,7 @@ class Config(BaseModel): ) -> Self: ... class EnvConfig(Config): + args: object | None = None taskset: object | None = None harness: object | None = None diff --git a/environments/AGENTS.md b/environments/AGENTS.md index 2c6a3b814..35b3393a6 100644 --- a/environments/AGENTS.md +++ b/environments/AGENTS.md @@ -6,7 +6,7 @@ This file mirrors the "Environments" documentation page. --- -This guide walks through building environments in Verifiers, from simple single-turn tasks to complex multi-turn agents with tools. See [Overview](overview.md) for how to initialize a new environment template. For new taskset/harness environments, use the v1 `Taskset`/`Harness` format documented in [BYO Harness](byo-harness.md). +This guide walks through building environments in Verifiers, from simple single-turn tasks to complex multi-turn agents with tools. See [Overview](overview.md) for how to initialize a new environment template. For reusable taskset/harness environments, see [BYO Harness](byo-harness.md). ## Table of Contents @@ -279,7 +279,9 @@ rubric = vf.Rubric(funcs=[correct_answer, diversity_bonus]) ### Shared Objects -Beyond rollout data, reward functions can request static objects that live within the Rubric class. These are stored in the Rubric's `class_objects` dictionary, and can be added after initialization via `add_class_object()`: +In rubric environments, reward functions can request static helper objects that +live within the Rubric class. These are stored in the Rubric's `class_objects` +dictionary, and can be added after initialization via `add_class_object()`: ```python rubric = vf.Rubric(funcs=[my_reward_func]) @@ -290,23 +292,13 @@ async def my_reward_func(completion, my_helper) -> float: return await my_helper.score(completion) ``` -Two common types of shared objects are **parsers** and **judges**. +For taskset/harness environments, use taskset-owned `objects` and `bindings` as +shown in [BYO Harness](byo-harness.md#shared-dependencies). -Parsers encapsulate logic for extracting structured content from model responses. When passed to a rubric, the parser is automatically available to reward functions: - -```python -parser = vf.XMLParser(["reasoning", "answer"]) -rubric = vf.Rubric(funcs=[my_reward_func], parser=parser) - -async def my_reward_func(completion, parser) -> float: - parsed = parser.parse_answer(completion) - # parsed.reasoning, parsed.answer available - ... -``` - -Parsers can also be passed to environments, where they are often used during rollouts to validate or extract content. This allows parsing logic to be shared between the environment's interaction loop and the rubric's reward functions. - -Judges are used for tasks where deterministic evaluation is impractical, and an LLM is used to score responses. **JudgeRubric** is a built-in class which stores an LLM client inside the rubric, and provides a `judge` callable to reward functions for scoring responses: +Judges are used for tasks where deterministic evaluation is impractical, and an +LLM is used to score responses. **JudgeRubric** stores an LLM client inside the +rubric, and provides a `judge` callable to reward +functions for scoring responses: ```python judge_rubric = vf.JudgeRubric( @@ -537,26 +529,39 @@ The `env_response` method is an abstract method that must be overridden by all ` ```python class MyGameEnv(vf.MultiTurnEnv): + def __init__(self, dataset, rubric, extract_action): + super().__init__(dataset=dataset, rubric=rubric) + self.extract_action = extract_action + async def env_response(self, messages: vf.Messages, state: vf.State) -> vf.Messages: """Generate the environment's response after each model turn.""" - parsed = self.parser.parse(messages) - action = parsed.action + action = self.extract_action(messages) feedback = process_action(action) return [{"role": "user", "content": feedback}] -async def correct_action(parser, completion, answer) -> float: - parsed = parser.parse(completion) - return 1.0 if parsed.action == answer else 0.0 +class ActionExtractor: + def __call__(self, messages: vf.Messages) -> str: + text = messages[-1]["content"] if messages else "" + return str(text).strip() + + +async def correct_action(extract_action, completion, answer) -> float: + return 1.0 if extract_action(completion) == answer else 0.0 def load_environment(): - parser = vf.XMLParser(fields=["action"]) - rubric = vf.Rubric(funcs=[correct_action], parser=parser) - return MyGameEnv(dataset=dataset, rubric=rubric, parser=parser) + extract_action = ActionExtractor() + rubric = vf.Rubric(funcs=[correct_action]) + rubric.add_class_object("extract_action", extract_action) + return MyGameEnv(dataset=dataset, rubric=rubric, extract_action=extract_action) ``` -`env_response` receives the full conversation history thus far (and `state`) and returns a list of _new_ messages to append. When a parser is passed to the environment, it becomes available as `self.parser`. Passing the same parser to the rubric makes it available to reward functions by name. For tool environments, `env_response` typically executes tool calls and returns results. For games or other custom protocols, this might involve parsing structured output (as above) and returning state updates or feedback. +`env_response` receives the full conversation history thus far (and `state`) and +returns a list of _new_ messages to append. For tool environments, +`env_response` typically executes tool calls and returns results. For games or +other custom protocols, this might involve extracting structured output and +returning state updates or feedback. Several other methods can optionally be overridden for more control in complex custom environments: @@ -893,7 +898,7 @@ Supported third-party environment integrations include: - **`BrowserEnv`** — unified browser automation via [Browserbase](https://browserbase.com) with DOM and CUA modes - **`OpenEnvEnv`** — wraps OpenEnv gym and MCP contracts using Prime Sandboxes with prebuilt images referenced from `.build.json` -These require additional dependencies installed via extras (e.g., `uv add 'verifiers[ta]'` for TextArena, `uv add 'verifiers[browser]'` for BrowserEnv, `uv add 'verifiers[openenv]'` for OpenEnvEnv). For OpenEnv environments, build the bundled project image with `prime env build ` before evaluation or training. +These require additional dependencies installed via extras (e.g., `uv add 'verifiers[ta]'` for TextArena, `uv add 'verifiers[browser]'` for BrowserEnv). OpenEnvEnv uses the base Verifiers install; the bundled OpenEnv project under `proj/` owns its server dependencies and must be built with `uv run vf-build ` before evaluation or training. Newer and more experimental environment classes include: @@ -910,7 +915,7 @@ Newer and more experimental environment classes include: timeouts=SandboxTimeouts(read_file=30.0, extract=180.0, poll=120.0), ) ``` -- **V1 `vf.Env` / `vf.Taskset` / `vf.Harness`** — preferred taskset/harness pattern for composing task data and program execution without subclassing. Use this for new environments that need reusable tasksets, reusable harnesses, config-driven metrics, rewards, toolsets, users, endpoint interception, or sandboxed Python/command programs. `vf.Taskset` owns train/eval rows, prompt shaping, setup/update/reward hooks, and toolsets. `vf.Harness` owns the framework program, endpoint proxy, model controls, sandbox options, and runtime hooks. `vf.Env` wires them into the standard evaluation and training surface. +- **`vf.Env` / `vf.Taskset` / `vf.Harness`** — preferred taskset/harness pattern for composing task data and program execution without subclassing. Use this for environments that need reusable tasksets, reusable harnesses, config-driven metrics, rewards, toolsets, users, endpoint interception, or sandboxed Python/command programs. `vf.Taskset` owns train/eval rows, prompt shaping, setup/update/reward hooks, and toolsets. `vf.Harness` owns the framework program, endpoint proxy, model controls, sandbox options, and runtime hooks. `vf.Env` wires them into the standard evaluation and training surface. - **`SWEDebugEnv`** — no-agent debugger for SWE-style `SandboxTaskSet` instances. It creates the task sandbox, optionally runs `taskset.setup(state)`, performs one debug step (`none`, `gold_patch`, `command`, or `script`), and optionally runs the task tests and scorer. It records setup, sandbox creation, gold patch, debug command, and test timings in state for validation and timing investigations. - **`HarborEnv`** — loads Harbor-format agent benchmark tasks - **`RLMEnv`** — implements [Recursive Language Models](https://alexzhang13.github.io/blog/2025/rlm/) for unbounded context processing via REPL-based decomposition and recursive sub-LLM calls diff --git a/environments/README.md b/environments/README.md index 503d65b37..7e4e991d7 100644 --- a/environments/README.md +++ b/environments/README.md @@ -44,7 +44,7 @@ This folder contains installable example environments that showcase common usage - **RLMEnv (Recursive Language Model)** - **rlm_secrets**: Puzzle environment testing RLM functionality including root-level tools, sub-LLM tool use, and file operations. - - **hello_rlm_v1**: v1 sandboxed RLM-style CLI program with endpoint interception and metrics collection. + - **hello_rlm_v1**: v1 `vf.RLM` harness example with endpoint interception and metrics collection. - **V1 Taskset/Harness** - **dspy_rlm**: DSPy RLM harness on GSM8K through `vf.Env`; DSPy uses the V1 interception endpoint from rollout state. diff --git a/environments/alphabet_sort/alphabet_sort_v1.py b/environments/alphabet_sort/alphabet_sort_v1.py index 61fa83277..7b51674f8 100644 --- a/environments/alphabet_sort/alphabet_sort_v1.py +++ b/environments/alphabet_sort/alphabet_sort_v1.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import difflib import json import logging @@ -8,7 +6,7 @@ from datasets import Dataset, load_dataset -import verifiers.v1 as vf +import verifiers as vf logger = logging.getLogger(__name__) @@ -206,7 +204,7 @@ def score_response( def eval_turn( - completion: list[dict], + completion: list[vf.ConfigData], turn_num: int, state: dict, similarity_power: int, @@ -217,7 +215,8 @@ def eval_turn( return 0.0 expected = ground_truths[turn_num - 1] assistant_msgs = [ - str(m.get("content") or "") for m in completion if m.get("role") == "assistant" + str(message.content or "") + for message in vf.get_messages(completion, role="assistant") ] if len(assistant_msgs) < turn_num: return 0.0 @@ -269,7 +268,7 @@ async def weighted_reward(task, state) -> float: async def alphabet_user(task, state, transcript) -> list[dict[str, str]]: - assistant_count = len([m for m in transcript if m.get("role") == "assistant"]) + assistant_count = len(vf.get_messages(transcript, role="assistant")) follow_ups = state["info"]["follow_ups"] if assistant_count <= 0 or assistant_count > len(follow_ups): return [] diff --git a/environments/bfcl_v3/README.md b/environments/bfcl_v3/README.md index 2c1ffae20..2483d59e0 100644 --- a/environments/bfcl_v3/README.md +++ b/environments/bfcl_v3/README.md @@ -4,7 +4,7 @@ Berkeley Function Calling Leaderboard v3 on the v1 Taskset/Harness runtime. ```bash prime env install bfcl-v3 --from-repo -prime eval run bfcl-v3 -a '{"test_categories": ["simple_python"]}' +prime eval run bfcl-v3 -a '{"test_category": "simple_python"}' ``` Single-turn categories use task-local schema-backed toolsets. Multi-turn diff --git a/environments/bfcl_v3/bfcl_v3.py b/environments/bfcl_v3/bfcl_v3.py index 3308a43bc..da07de2cf 100644 --- a/environments/bfcl_v3/bfcl_v3.py +++ b/environments/bfcl_v3/bfcl_v3.py @@ -1,21 +1,28 @@ -from __future__ import annotations - import json import re from collections.abc import Mapping, Sequence -from typing import Any, cast - -import verifiers as vf0 -import verifiers.v1 as vf -from verifiers.types import AssistantMessage, MessageContent, Messages, Tool, ToolCall -from verifiers.utils.eval_utils import quiet_datasets -from verifiers.utils.message_utils import normalize_messages +from typing import cast + +import verifiers as vf +from verifiers.types import ( + AssistantMessage, + MessageContent, + Messages, + Tool, + ToolCall, + ToolMessage, + UserMessage, +) +from verifiers.utils.message_utils import message_role, normalize_messages from verifiers.v1.utils.endpoint_utils import assistant_completion_from_messages from verifiers.v1.utils.json_utils import json_args +from verifiers.v1.types import ConfigMap BFCL_TOOLSET_REF = "bfcl_v3:load_bfcl_toolset" _BFCL_PATCHED = False +BFCLRawMessage = str | ConfigMap +BFCLRawTurn = str | ConfigMap | Sequence[BFCLRawMessage] | None class BFCLTasksetConfig(vf.TasksetConfig): @@ -77,7 +84,7 @@ def bfcl_tool_defs(functions: object) -> list[Tool]: Tool( name=str(function["name"]), description=str(function.get("description") or ""), - parameters=dict(cast(Mapping[str, object], function["parameters"])), + parameters=dict(cast(ConfigMap, function["parameters"])), strict=False, ) ) @@ -92,12 +99,14 @@ def __init__(self, tool_def: Tool): self.tool_def = tool_def async def __call__(self, state: vf.State, **arguments: object) -> str: - calls = cast(list[object], state.setdefault("bfcl_executed_tool_calls", [])) + calls = cast( + list[vf.ConfigData], state.setdefault("bfcl_executed_tool_calls", []) + ) calls.append({self.name: arguments}) return "recorded" -def load_bfcl_toolset(task: Mapping[str, object]) -> vf.Toolset: +def load_bfcl_toolset(task: ConfigMap) -> vf.Toolset: return vf.Toolset( tools=[ BFCLSchemaTool(tool_def) @@ -106,15 +115,15 @@ def load_bfcl_toolset(task: Mapping[str, object]) -> vf.Toolset: ) -def bfcl_functions(task: Mapping[str, object]) -> object: +def bfcl_functions(task: ConfigMap) -> object: return task.get("function_with_hints") or task["function"] -def bfcl_missed_function(task: Mapping[str, object]) -> Mapping[str, object]: +def bfcl_missed_function(task: ConfigMap) -> ConfigMap: value = task.get("missed_function_with_hints") or task.get("missed_function") or {} if not isinstance(value, Mapping): raise TypeError("BFCL missed_function must be a mapping.") - return cast(Mapping[str, object], value) + return cast(ConfigMap, value) def build_source(test_category: str, examples_per_category: int = -1): @@ -148,13 +157,13 @@ def source(): test_category, entry, hinted_entry, - cast(Mapping[str, object] | None, ground_truth), + cast(ConfigMap | None, ground_truth), ) if is_multi_turn(test_category): max_steps = maximum_step_limit() row["max_steps_per_turn"] = max_steps row["max_turns"] = ( - len(cast(Sequence[object], row["question"])) * max_steps + len(cast(Sequence[BFCLRawTurn], row["question"])) * max_steps ) else: row["max_turns"] = 1 @@ -167,15 +176,15 @@ def source(): def bfcl_row( test_category: str, - entry: Mapping[str, object], - hinted_entry: Mapping[str, object], - ground_truth: Mapping[str, object] | None, -) -> dict[str, object]: - question = cast(list[object], entry["question"]) + entry: ConfigMap, + hinted_entry: ConfigMap, + ground_truth: ConfigMap | None, +) -> vf.ConfigData: + question = cast(list[BFCLRawTurn], entry["question"]) first_turn_system_prompt, first_turn_prompt = split_system_prompt( normalize_turn(question[0]) ) - row: dict[str, object] = { + row: vf.ConfigData = { "task_id": str(entry["id"]), "id": str(entry["id"]), "category": test_category, @@ -205,20 +214,20 @@ def bfcl_row( return row -def normalize_turn(value: object) -> list[dict[str, object]]: +def normalize_turn(value: object) -> list[vf.ConfigData]: if value is None: return [] if isinstance(value, str): return [{"role": "user", "content": value}] if isinstance(value, Mapping): - return [dict(cast(Mapping[str, object], value))] + return [dict(cast(ConfigMap, value))] if isinstance(value, Sequence): messages = [] for item in value: if isinstance(item, str): messages.append({"role": "user", "content": item}) elif isinstance(item, Mapping): - messages.append(dict(cast(Mapping[str, object], item))) + messages.append(dict(cast(ConfigMap, item))) else: raise TypeError(f"Unsupported BFCL message item: {type(item).__name__}") return messages @@ -226,8 +235,8 @@ def normalize_turn(value: object) -> list[dict[str, object]]: def split_system_prompt( - messages: Sequence[Mapping[str, object]], -) -> tuple[list[dict[str, object]], list[dict[str, object]]]: + messages: Sequence[ConfigMap], +) -> tuple[list[vf.ConfigData], list[vf.ConfigData]]: system_prompt = [] prompt = [] for message in messages: @@ -243,33 +252,24 @@ def maximum_step_limit() -> int: return cast(int, MAXIMUM_STEP_LIMIT) -def model_name(state: Mapping[str, object]) -> str: +def model_name(state: ConfigMap) -> str: runtime = state.get("runtime") or {} if isinstance(runtime, Mapping): - runtime_map = cast(Mapping[str, object], runtime) + runtime_map = cast(ConfigMap, runtime) model = runtime_map.get("model") if isinstance(model, str) and model: return model return "unknown" -def assistant_tool_calls(state: Mapping[str, object]) -> list[ToolCall]: +def assistant_tool_calls(state: ConfigMap) -> list[ToolCall]: completion = state.get("completion") or [] if not isinstance(completion, Sequence): return [] - for message in reversed(completion): - if message_role(message) != "assistant": - continue - return parse_tool_calls(message) - return [] - - -def message_role(message: object) -> str | None: - if isinstance(message, Mapping): - message_map = cast(Mapping[str, object], message) - role = message_map.get("role") - return str(role) if role is not None else None - return getattr(message, "role", None) + messages = vf.get_messages(completion, role="assistant") + if not messages: + return [] + return parse_tool_calls(messages[-1]) def parse_tool_calls(message: object) -> list[ToolCall]: @@ -277,7 +277,7 @@ def parse_tool_calls(message: object) -> list[ToolCall]: return list(message.tool_calls or []) raw_tool_calls: object if isinstance(message, Mapping): - message_map = cast(Mapping[str, object], message) + message_map = cast(ConfigMap, message) raw_tool_calls = message_map.get("tool_calls") or [] else: raw_tool_calls = getattr(message, "tool_calls", []) or [] @@ -290,10 +290,10 @@ def parse_tool_calls(message: object) -> list[ToolCall]: continue if not isinstance(raw_call, Mapping): continue - raw_call = cast(Mapping[str, object], raw_call) + raw_call = cast(ConfigMap, raw_call) function = raw_call.get("function") if isinstance(function, Mapping): - function_map = cast(Mapping[str, object], function) + function_map = cast(ConfigMap, function) name = str(function_map.get("name") or "") arguments = function_map.get("arguments") or "{}" else: @@ -313,7 +313,7 @@ def parse_tool_calls(message: object) -> list[ToolCall]: return calls -def convert_to_gorilla(tool_calls: list[ToolCall]) -> list[dict[str, object]]: +def convert_to_gorilla(tool_calls: list[ToolCall]) -> list[vf.ConfigData]: decoded_output = [] for tool_call in tool_calls: decoded_output.append({tool_call.name: json_args(tool_call.arguments)}) @@ -334,7 +334,7 @@ def json_clone(value: object) -> object: @vf.reward(weight=1.0) -async def bfcl_reward(task: Mapping[str, object], state: vf.State) -> float: +async def bfcl_reward(task: ConfigMap, state: vf.State) -> float: patch_bfcl_eval() from bfcl_eval.utils import is_multi_turn, is_relevance_or_irrelevance @@ -346,7 +346,7 @@ async def bfcl_reward(task: Mapping[str, object], state: vf.State) -> float: return ast_reward(task, state) -def relevance_reward(task: Mapping[str, object], state: Mapping[str, object]) -> float: +def relevance_reward(task: ConfigMap, state: ConfigMap) -> float: patch_bfcl_eval() from bfcl_eval.utils import is_empty_output @@ -361,7 +361,7 @@ def relevance_reward(task: Mapping[str, object], state: Mapping[str, object]) -> return float(contain_func_call) -def ast_reward(task: Mapping[str, object], state: Mapping[str, object]) -> float: +def ast_reward(task: ConfigMap, state: ConfigMap) -> float: patch_bfcl_eval() from bfcl_eval.constants.enums import Language from bfcl_eval.eval_checker.ast_eval.ast_checker import ast_checker @@ -397,7 +397,7 @@ def ast_reward(task: Mapping[str, object], state: Mapping[str, object]) -> float return float(bool(checker_result["valid"])) -def multi_turn_reward(task: Mapping[str, object], state: Mapping[str, object]) -> float: +def multi_turn_reward(task: ConfigMap, state: ConfigMap) -> float: patch_bfcl_eval() from bfcl_eval.eval_checker.multi_turn_eval.multi_turn_checker import ( multi_turn_checker, @@ -468,7 +468,7 @@ async def bfcl_multi_turn_program( ] prompt_messages = [message.model_dump(exclude_none=True) for message in messages] - def sync_completion() -> list[dict[str, object]]: + def sync_completion() -> list[vf.ConfigData]: rendered_messages = [ message.model_dump(exclude_none=True) for message in messages ] @@ -479,10 +479,10 @@ def sync_completion() -> list[dict[str, object]]: category = str(task["category"]) tool_defs = bfcl_tool_defs(bfcl_functions(task)) - next_prompts = list(cast(Sequence[object], task["question"]))[1:] + next_prompts = list(cast(Sequence[list[vf.ConfigData]], task["question"]))[1:] holdout_function = bfcl_missed_function(task) - initial_config = cast(dict[Any, Any], json_clone(task.get("initial_config") or {})) - involved_classes = cast(list[Any], json_clone(task["involved_classes"])) + initial_config = cast(vf.ConfigData, json_clone(task.get("initial_config") or {})) + involved_classes = cast(list[str], json_clone(task["involved_classes"])) max_steps_per_turn = int(task.get("max_steps_per_turn") or maximum_step_limit()) turn_idx = 0 steps_per_turn = 0 @@ -533,7 +533,7 @@ def sync_completion() -> list[dict[str, object]]: ) for execution_result, tool_call in zip(execution_results, tool_calls): messages.append( - vf0.ToolMessage( + ToolMessage( tool_call_id=tool_call.id, content=cast(MessageContent, execution_result), ) @@ -556,7 +556,7 @@ def sync_completion() -> list[dict[str, object]]: if next_prompt: raise ValueError("BFCL holdout turns must not include user messages.") messages.append( - vf0.UserMessage(content=DEFAULT_USER_PROMPT_FOR_ADDITIONAL_FUNCTION_FC) + UserMessage(content=DEFAULT_USER_PROMPT_FOR_ADDITIONAL_FUNCTION_FC) ) else: messages.extend(normalize_messages(cast(Messages, next_prompt))) @@ -604,44 +604,30 @@ def load_harness( return vf.Harness(config=config) -def load_v1_environment( - test_category: str = "simple_python", - examples_per_category: int = -1, - config: vf.EnvConfig | None = None, -) -> vf.Env: - config = vf.EnvConfig( - config, - taskset=BFCLTasksetConfig( - test_category=test_category, - examples_per_category=examples_per_category, - ), - harness=BFCLHarnessConfig(test_category=test_category), - ) - return vf.Env( - taskset=load_taskset(config=config.taskset), - harness=load_harness(config=config.harness), - ) - - def load_environment( + config: vf.EnvConfig, + *, + test_category: str = "simple_python", test_categories: list[str] | None = None, examples_per_category: int = -1, - **kwargs: object, -) -> vf0.Environment: - patch_bfcl_eval() - from bfcl_eval.utils import parse_test_category_argument - - categories = parse_test_category_argument(test_categories or ["all"]) - with quiet_datasets(): - envs = cast( - list[vf0.Environment], - [ - load_v1_environment( - test_category=category, - examples_per_category=examples_per_category, - config=kwargs.get("config"), - ) - for category in categories - ], +) -> vf.Env | vf.EnvGroup: + categories = [test_category] if test_categories is None else test_categories + envs: list[vf.Env] = [] + for category in categories: + category_config = vf.EnvConfig( + config, + taskset=BFCLTasksetConfig( + test_category=category, + examples_per_category=examples_per_category, + ), + harness=BFCLHarnessConfig(test_category=category), + ) + envs.append( + vf.Env( + taskset=load_taskset(config=category_config.taskset), + harness=load_harness(config=category_config.harness), + ) ) - return vf0.EnvGroup(envs=envs, env_names=categories) + if test_categories is not None: + return vf.EnvGroup(envs=envs, env_names=categories) + return envs[0] diff --git a/environments/dspy_flights/dspy_flights.py b/environments/dspy_flights/dspy_flights.py index 499c69067..9d811a785 100644 --- a/environments/dspy_flights/dspy_flights.py +++ b/environments/dspy_flights/dspy_flights.py @@ -1,15 +1,12 @@ -from __future__ import annotations - import asyncio import functools import random import string -from collections.abc import Callable, Mapping -from typing import Any +from collections.abc import Mapping from pydantic import BaseModel -import verifiers.v1 as vf +import verifiers as vf PROGRAM_SANDBOX = { "image": "python:3.11-slim", @@ -140,10 +137,10 @@ def source(): def row( example_id: int, user_request: str, - expected: dict[str, object], - initial_itineraries: dict[str, dict[str, object]] | None = None, - ) -> dict[str, object]: - task: dict[str, object] = { + expected: vf.ConfigData, + initial_itineraries: dict[str, vf.ConfigData] | None = None, + ) -> vf.ConfigData: + task: vf.ConfigData = { "example_id": example_id, "user_request": user_request, "prompt": [{"role": "user", "content": user_request}], @@ -252,7 +249,7 @@ def itinerary(confirmation_number: str, user_name: str, flight_id: str) -> Itine def build_airline_tools( task, -) -> tuple[list[Callable[..., object]], dict[str, Mapping[str, BaseModel]]]: +) -> tuple[list[vf.Handler], dict[str, dict[str, BaseModel]]]: users = user_database() flights = flight_database() itineraries = { @@ -336,7 +333,7 @@ def file_ticket(user_request: str, user_profile: UserProfile): ) return ticket_id - tools: list[Callable[..., object]] = [ + tools: list[vf.Handler] = [ async_tool(fetch_flight_info), async_tool(fetch_itinerary), async_tool(pick_flight), @@ -345,14 +342,14 @@ def file_ticket(user_request: str, user_profile: UserProfile): async_tool(get_user_info), async_tool(file_ticket), ] - databases: dict[str, Mapping[str, BaseModel]] = { + databases: dict[str, dict[str, BaseModel]] = { "itinerary_database": itineraries, "ticket_database": tickets, } return tools, databases -def async_tool(fn: Callable[..., object]) -> Callable[..., Any]: +def async_tool(fn: vf.Handler) -> vf.Handler: @functools.wraps(fn) async def wrapped(*args: object, **kwargs: object) -> object: return await asyncio.to_thread(fn, *args, **kwargs) @@ -360,7 +357,7 @@ async def wrapped(*args: object, **kwargs: object) -> object: return wrapped -def dump_database(database: Mapping[str, BaseModel]) -> dict[str, dict[str, object]]: +def dump_database(database: dict[str, BaseModel]) -> dict[str, vf.ConfigData]: return {key: value.model_dump() for key, value in database.items()} @@ -434,8 +431,7 @@ def load_harness(config: vf.HarnessConfig | None = None): ) -def load_environment(config: vf.EnvConfig | None = None): - config = config or vf.EnvConfig() +def load_environment(config: vf.EnvConfig): return vf.Env( taskset=load_taskset(config=config.taskset), harness=load_harness(config=config.harness), diff --git a/environments/dspy_rlm/dspy_rlm.py b/environments/dspy_rlm/dspy_rlm.py index fa58129da..165717a00 100644 --- a/environments/dspy_rlm/dspy_rlm.py +++ b/environments/dspy_rlm/dspy_rlm.py @@ -1,9 +1,6 @@ -from __future__ import annotations - import re -from collections.abc import Mapping -import verifiers.v1 as vf +import verifiers as vf from verifiers.utils.data_utils import load_example_dataset @@ -19,8 +16,16 @@ async def run_dspy_rlm_program(task: vf.Task, state: vf.State) -> vf.State: ) with dspy.context(lm=lm): + question = task.get("question") + if question is not None: + query = str(question) + else: + query = "" + prompt = task.get("prompt") + if isinstance(prompt, list) and prompt: + query = str(vf.get_messages(prompt)[-1].content or "") rlm = dspy.RLM("query -> answer", max_iterations=10) - result = await rlm.aforward(query=task_question(task)) + result = await rlm.aforward(query=query) final_output = str(result.answer) state["agent_result"] = final_output @@ -33,31 +38,6 @@ def load_rows(split: str, num_examples: int): return load_example_dataset("gsm8k", split=split, n=n) -def task_question(task: vf.Task) -> str: - question = task.get("question") - if question is not None: - return str(question) - prompt = task.get("prompt") - if isinstance(prompt, list) and prompt: - last_message = prompt[-1] - if isinstance(last_message, Mapping): - return str(last_message.get("content") or "") - return "" - - -def completion_text(state: vf.State) -> str: - agent_result = state.get("agent_result") - if agent_result is not None: - return str(agent_result) - completion = state.get("completion") - if isinstance(completion, list) and completion: - last_message = completion[-1] - if isinstance(last_message, Mapping): - return str(last_message.get("content") or "") - return str(getattr(last_message, "content", last_message) or "") - return "" - - def extract_dspy_answer(text: str) -> str: match = re.search(r"SUBMIT\((.+?)\)", text) if match: @@ -87,7 +67,18 @@ def answers_match(agent_answer: str, answer: str) -> float: def answer_reward(task: vf.Task, state: vf.State) -> float: """Check if the agent's final output contains the correct answer.""" - agent_answer = extract_dspy_answer(completion_text(state)) + result = state.get("agent_result") + if result is not None: + text = str(result) + else: + completion = state.get("completion") + messages = [] + if isinstance(completion, list): + messages = vf.get_messages(completion, role="assistant") or vf.get_messages( + completion + ) + text = str(messages[-1].content or "") if messages else "" + agent_answer = extract_dspy_answer(text) if not agent_answer: return 0.0 return answers_match(agent_answer, str(task.get("answer", ""))) @@ -112,12 +103,11 @@ def load_harness(config: vf.HarnessConfig | None = None) -> vf.Harness: def load_environment( + config: vf.EnvConfig, num_train_examples: int = 50, num_eval_examples: int = 20, - config: vf.EnvConfig | None = None, ) -> vf.Env: """Load the DSPy RLM V1 taskset/harness example environment.""" - config = config or vf.EnvConfig() return vf.Env( taskset=load_taskset( num_train_examples=num_train_examples, diff --git a/environments/hello_group_reward_v1/hello_group_reward_v1.py b/environments/hello_group_reward_v1/hello_group_reward_v1.py index cf1295764..3a5d5e9de 100644 --- a/environments/hello_group_reward_v1/hello_group_reward_v1.py +++ b/environments/hello_group_reward_v1/hello_group_reward_v1.py @@ -1,10 +1,8 @@ -from __future__ import annotations - from collections.abc import Mapping from difflib import SequenceMatcher from statistics import mean -import verifiers.v1 as vf +import verifiers as vf SYSTEM_PROMPT = """\ @@ -28,7 +26,7 @@ def group_reward_task( near: str, partial: str, wrong: str, -) -> dict[str, object]: +) -> vf.ConfigData: return { "task_id": task_id, "question": question, @@ -42,7 +40,7 @@ def group_reward_task( } -TASKS: list[dict[str, object]] = [ +TASKS: list[vf.ConfigData] = [ group_reward_task( "distributed-systems", "Describe v1 verifiers in one short phrase.", @@ -340,7 +338,8 @@ def load_harness( def load_environment( num_examples: int = -1, - config: vf.EnvConfig | None = None, + *, + config: vf.EnvConfig, ) -> vf.Env: config = vf.EnvConfig( config, @@ -354,6 +353,7 @@ def load_environment( def load_v1_environment( num_examples: int = -1, - config: vf.EnvConfig | None = None, + *, + config: vf.EnvConfig, ) -> vf.Env: return load_environment(num_examples=num_examples, config=config) diff --git a/environments/hello_mcp_harbor/hello_mcp_harbor.py b/environments/hello_mcp_harbor/hello_mcp_harbor.py index 9b9364822..eff810386 100644 --- a/environments/hello_mcp_harbor/hello_mcp_harbor.py +++ b/environments/hello_mcp_harbor/hello_mcp_harbor.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import json import logging from pathlib import Path diff --git a/environments/hello_parallel_sandbox_v1/hello_parallel_sandbox_v1.py b/environments/hello_parallel_sandbox_v1/hello_parallel_sandbox_v1.py index e33f9f4b7..160e87539 100644 --- a/environments/hello_parallel_sandbox_v1/hello_parallel_sandbox_v1.py +++ b/environments/hello_parallel_sandbox_v1/hello_parallel_sandbox_v1.py @@ -1,13 +1,10 @@ -from __future__ import annotations - import asyncio import json -from collections.abc import Mapping -import verifiers.v1 as vf +import verifiers as vf +from verifiers.v1.types import ConfigMap from verifiers.v1.utils.judge_utils import ( clamp_float, - completion_text, parse_judge_json, truncate_command_record, truncate_text, @@ -52,7 +49,7 @@ {"score": 0.0-1.0, "reason": "..."} """ -TASKS: list[dict[str, object]] = [ +TASKS: list[vf.ConfigData] = [ { "task_id": "exact-token", "answer": "prime-v1-shared-sandbox", @@ -156,7 +153,8 @@ async def bash(command: str, sandbox, state) -> str: @vf.update(priority=10) async def parallel_sandbox_audit(task, state) -> None: - response = completion_text(state.get("completion")) + messages = vf.get_messages(state.get("completion") or [], role="assistant") + response = str(messages[-1].content or "") if messages else "" audit_specs = [ ( "file_audit", @@ -197,14 +195,19 @@ async def run_audit( for label, system_prompt, prompt in audit_specs ) ) - state["parallel_audits"] = [ - { - "name": label, - "findings": completion_text(audit_state.get("completion")), - "trajectory_id": audit_state.get("trajectory_id"), - } - for label, audit_state in audit_states - ] + state["parallel_audits"] = [] + for label, audit_state in audit_states: + messages = vf.get_messages( + audit_state.get("completion") or [], role="assistant" + ) + findings = str(messages[-1].content or "") if messages else "" + state["parallel_audits"].append( + { + "name": label, + "findings": findings, + "trajectory_id": audit_state.get("trajectory_id"), + } + ) @vf.reward(weight=1.0) @@ -225,7 +228,8 @@ async def sandbox_stage_score(task, state) -> float: system_prompt=REWARD_JUDGE_SYSTEM_PROMPT, max_turns=2, ).run(judge_task, judge_state) - judge_text = completion_text(judge_state.get("completion")) + messages = vf.get_messages(judge_state.get("completion") or [], role="assistant") + judge_text = str(messages[-1].content or "") if messages else "" parsed = parse_judge_json(judge_text) score = clamp_float(parsed.get("score", 0.0)) state["reward_judge"] = { @@ -258,7 +262,7 @@ async def update_audits(task, state) -> float: return float(len(audits) if isinstance(audits, list) else 0) -def file_audit_prompt(task: Mapping[str, object], response: str) -> str: +def file_audit_prompt(task: ConfigMap, response: str) -> str: return ( "Task instruction:\n" f"{task['instruction']}\n\n" @@ -279,7 +283,7 @@ def file_audit_prompt(task: Mapping[str, object], response: str) -> str: ) -def command_audit_prompt(task: Mapping[str, object]) -> str: +def command_audit_prompt(task: ConfigMap) -> str: return ( "Task instruction:\n" f"{task['instruction']}\n\n" @@ -298,14 +302,16 @@ def command_audit_prompt(task: Mapping[str, object]) -> str: ) -def reward_prompt(task: Mapping[str, object], state: Mapping[str, object]) -> str: +def reward_prompt(task: ConfigMap, state: ConfigMap) -> str: + messages = vf.get_messages(state.get("completion") or [], role="assistant") + response = str(messages[-1].content or "") if messages else "" return ( "Task instruction:\n" f"{task['instruction']}\n\n" "Expected answer text:\n" f"{task['answer']}\n\n" "Assistant final answer:\n" - f"{completion_text(state.get('completion'))}\n\n" + f"{response}\n\n" "Update-stage audit findings:\n" f"{json.dumps(state.get('parallel_audits', []), indent=2)}\n\n" "Call bash to inspect `/tmp/answer.txt` directly, then score whether " @@ -360,7 +366,7 @@ def load_harness( ) -> vf.Harness: config = ParallelSandboxHarnessConfig(config, max_turns=max_turns) return vf.Harness( - program={"sandbox": True, "tools": "callable"}, + program={"sandbox": True, "channels": "callable"}, sandbox=PROGRAM_SANDBOX, max_turns=config.max_turns, config=config, @@ -370,7 +376,8 @@ def load_harness( def load_environment( num_examples: int = -1, max_turns: int = 4, - config: vf.EnvConfig | None = None, + *, + config: vf.EnvConfig, ) -> vf.Env: config = vf.EnvConfig( config, @@ -386,7 +393,8 @@ def load_environment( def load_v1_environment( num_examples: int = -1, max_turns: int = 4, - config: vf.EnvConfig | None = None, + *, + config: vf.EnvConfig, ) -> vf.Env: return load_environment( num_examples=num_examples, diff --git a/environments/hello_rlm_v1/hello_rlm_v1.py b/environments/hello_rlm_v1/hello_rlm_v1.py index 5a54f4f5c..b0f599881 100644 --- a/environments/hello_rlm_v1/hello_rlm_v1.py +++ b/environments/hello_rlm_v1/hello_rlm_v1.py @@ -1,30 +1,4 @@ -from __future__ import annotations - -import hashlib -import json -import random -import shlex -from collections.abc import Callable, Mapping -from pathlib import Path -from typing import Any - -import verifiers.v1 as vf -from verifiers.envs.experimental.utils.git_checkout_cache import ( - resolve_git_checkout, - validate_git_checkout, -) - -DEFAULT_RLM_REPO_URL = "github.com/PrimeIntellect-ai/rlm-harness.git" -DEFAULT_RLM_REF = "main" -DEFAULT_RLM_MAX_TURNS = 100 -DEFAULT_RLM_EXEC_TIMEOUT = 300 -DEFAULT_RLM_MAX_DEPTH = 0 -DEFAULT_APPEND_TO_SYSTEM_PROMPT_PATH = "/task/append_to_system_prompt.txt" -DEFAULT_RLM_CHECKOUT_PATH = "/tmp/rlm-checkout" -DEFAULT_RLM_LOCAL_CHECKOUT_CACHE_ROOT = ( - Path.home() / ".cache" / "verifiers" / "rlm-checkouts" -) -REQUIRED_RLM_CHECKOUT_FILES = ("install.sh", "pyproject.toml") +import verifiers as vf @vf.reward(weight=1.0) @@ -33,21 +7,6 @@ async def exact_answer(task, state) -> float: return float(str(task["answer"]).lower() in stdout.lower()) -@vf.metric -async def rlm_sub_llm_call_count(task, state) -> float: - return float(rlm_metric(state, "sub_llm_call_count")) - - -@vf.metric -async def rlm_sub_llm_total_turns(task, state) -> float: - return float(rlm_metric(state, "sub_llm_total_turns")) - - -@vf.metric -async def rlm_sub_llm_total_tool_calls(task, state) -> float: - return float(rlm_metric(state, "sub_llm_total_tool_calls")) - - def source(): return [ { @@ -101,195 +60,13 @@ def load_taskset(config: vf.TasksetConfig | None = None): ) -def load_harness( - config: vf.HarnessConfig | None = None, - workdir: str = "/workspace", - instruction_path: str = "/task/instruction.md", - rlm_repo_url: str = DEFAULT_RLM_REPO_URL, - rlm_ref: str = DEFAULT_RLM_REF, - rlm_max_turns: int = DEFAULT_RLM_MAX_TURNS, - rlm_exec_timeout: int = DEFAULT_RLM_EXEC_TIMEOUT, - rlm_max_depth: int = DEFAULT_RLM_MAX_DEPTH, - summarize_at_tokens: int | tuple[int, int] | list[int] | None = None, - include_sub_rlm_trajectories: bool = False, - append_to_system_prompt: str = "", - local_checkout: str | Path | None = None, - gh_token: str | None = None, - rlm_tools: list[str] | None = None, - rlm_env: Mapping[str, str] | None = None, -): - harness_config = vf.HarnessConfig(config) - if not include_sub_rlm_trajectories: - harness_config.keep_trajectory_step = keep_only_parent_rlm_steps - tool_names = list(rlm_tools) if rlm_tools is not None else ["ipython"] - summarize_resolver = build_summarize_resolver(summarize_at_tokens) - env = { - "PATH": "/root/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", - "OPENAI_MODEL": "runtime.model", - "RLM_MODEL": "runtime.model", - "RLM_TOOLS": ",".join(tool_names), - "RLM_MAX_TURNS": str(rlm_max_turns), - "RLM_EXEC_TIMEOUT": str(rlm_exec_timeout), - "RLM_MAX_DEPTH": str(rlm_max_depth), - **dict(rlm_env or {}), - } - if summarize_resolver is not None: - env["RLM_SUMMARIZE_AT_TOKENS"] = summarize_resolver - - return vf.Harness( - sandbox={ - "image": "python:3.11-slim", - "workdir": workdir, - "cpu_cores": 1, - "memory_gb": 2, - "disk_size_gb": 5, - "network_access": True, - "timeout_minutes": 60, - "command_timeout": max(rlm_exec_timeout + 120, 600), - }, - program={ - "sandbox": True, - "dirs": { - DEFAULT_RLM_CHECKOUT_PATH: rlm_checkout_loader( - local_checkout=local_checkout, - rlm_repo_url=rlm_repo_url, - rlm_ref=rlm_ref, - gh_token=gh_token, - ) - }, - "files": { - instruction_path: "task.question", - DEFAULT_APPEND_TO_SYSTEM_PROMPT_PATH: append_to_system_prompt, - }, - "setup": [ - "apt-get update && apt-get install -y --no-install-recommends " - "ca-certificates curl git && rm -rf /var/lib/apt/lists/*", - build_install_command(), - ], - "command": ["bash", "-lc", build_run_script(instruction_path, workdir)], - "env": env, - "artifacts": { - "rlm_metrics": { - "path": f"{workdir}/.rlm/sessions/*/meta.json", - "format": "json", - "key": "metrics", - } - }, - }, - metrics=[ - rlm_sub_llm_call_count, - rlm_sub_llm_total_turns, - rlm_sub_llm_total_tool_calls, - ], - config=harness_config, - ) +def load_harness(config: vf.RLMConfig | None = None): + return vf.RLM(config=config) -def load_environment(config: vf.EnvConfig | None = None): - config = config or vf.EnvConfig() +def load_environment(config: vf.EnvConfig): + harness_config = None if config.harness is None else vf.RLMConfig(config.harness) return vf.Env( taskset=load_taskset(config=config.taskset), - harness=load_harness(config=config.harness), + harness=load_harness(config=harness_config), ) - - -def build_install_command() -> str: - script = f""" -set -eo pipefail -export RLM_CHECKOUT_PATH={shlex.quote(DEFAULT_RLM_CHECKOUT_PATH)} -test -f "$RLM_CHECKOUT_PATH/install.sh" -bash "$RLM_CHECKOUT_PATH/install.sh" -""" - return f"bash -lc {shlex.quote(script)}" - - -def build_run_script(instruction_path: str, workdir: str) -> str: - return f""" -set -eo pipefail -export PATH="$HOME/.local/bin:$PATH" -export RLM_MODEL="${{RLM_MODEL:-$OPENAI_MODEL}}" -export OPENAI_API_KEY="${{OPENAI_API_KEY:-intercepted}}" -export RLM_APPEND_TO_SYSTEM_PROMPT="$(cat {shlex.quote(DEFAULT_APPEND_TO_SYSTEM_PROMPT_PATH)} 2>/dev/null || true)" -cd "${{AGENT_WORKDIR:-{workdir}}}" -rlm "$(cat {shlex.quote(instruction_path)})" -""" - - -def rlm_checkout_loader( - local_checkout: str | Path | None, - rlm_repo_url: str, - rlm_ref: str, - gh_token: str | None, -) -> Callable[[], Path]: - checkout: Path | None = None - - def load() -> Path: - nonlocal checkout - if checkout is not None: - return checkout - if local_checkout is not None: - checkout = validate_git_checkout( - Path(local_checkout), - required_files=REQUIRED_RLM_CHECKOUT_FILES, - ) - else: - checkout = resolve_git_checkout( - repo_url=rlm_repo_url, - ref=rlm_ref, - cache_root=DEFAULT_RLM_LOCAL_CHECKOUT_CACHE_ROOT, - gh_token=gh_token, - required_files=REQUIRED_RLM_CHECKOUT_FILES, - ) - return checkout - - return load - - -def keep_only_parent_rlm_steps(step, state, headers) -> bool: - return str(headers.get("x-rlm-depth", "0")) == "0" - - -def rlm_metric(state: Mapping[str, Any], key: str) -> float: - artifacts = state.get("artifacts") - if not isinstance(artifacts, Mapping): - return 0.0 - metrics = artifacts.get("rlm_metrics") - if not isinstance(metrics, Mapping): - return 0.0 - return float(metrics.get(key, 0.0) or 0.0) - - -def build_summarize_resolver( - value: int | tuple[int, int] | list[int] | None, -) -> Callable[..., str | None] | None: - if value is None: - return None - if isinstance(value, bool): - raise ValueError("summarize_at_tokens must be an int or (lo, hi) pair") - if isinstance(value, int): - if value <= 0: - raise ValueError("summarize_at_tokens must be positive") - - def fixed_threshold(state): - _ = state - return str(value) - - return fixed_threshold - if isinstance(value, (tuple, list)): - if len(value) != 2: - raise ValueError("summarize_at_tokens pair must have 2 elements") - lo, hi = int(value[0]), int(value[1]) - if lo <= 0 or hi <= 0 or lo > hi: - raise ValueError("summarize_at_tokens pair must satisfy 0 < lo <= hi") - - def sampled_threshold(state): - return str(draw_threshold(state, lo, hi)) - - return sampled_threshold - raise ValueError("summarize_at_tokens must be int, (lo, hi), or None") - - -def draw_threshold(state: Mapping[str, Any], lo: int, hi: int) -> int: - prompt = json.dumps(state.get("prompt"), sort_keys=True, default=str) - digest = hashlib.sha256(prompt.encode("utf-8")).hexdigest() - return random.Random(int(digest[:16], 16)).randint(lo, hi) diff --git a/environments/hello_self_judge_v1/hello_self_judge_v1.py b/environments/hello_self_judge_v1/hello_self_judge_v1.py index 773928c5e..c710c4a61 100644 --- a/environments/hello_self_judge_v1/hello_self_judge_v1.py +++ b/environments/hello_self_judge_v1/hello_self_judge_v1.py @@ -1,13 +1,11 @@ -from __future__ import annotations - import json from collections.abc import Mapping from typing import cast -import verifiers.v1 as vf +import verifiers as vf +from verifiers.v1.types import ConfigMap from verifiers.v1.utils.judge_utils import ( clamp_float, - completion_text, parse_judge_json, truncate_command_record, truncate_text, @@ -51,7 +49,7 @@ """ -TASKS: list[dict[str, object]] = [ +TASKS: list[vf.ConfigData] = [ { "task_id": "example-domains", "question": ( @@ -194,7 +192,7 @@ async def self_consistency_score(task, state) -> float: updated = state.get("update_judge") if not isinstance(updated, Mapping): return 0.0 - updated = cast(Mapping[str, object], updated) + updated = cast(ConfigMap, updated) findings = str(updated.get("findings") or "") if not findings: return 0.0 @@ -216,7 +214,8 @@ async def self_consistency_score(task, state) -> float: max_turns=1, ).run(judge_task, judge_state) - judge_text = completion_text(judge_state.get("completion")) + messages = vf.get_messages(judge_state.get("completion") or [], role="assistant") + judge_text = str(messages[-1].content or "") if messages else "" parsed = parse_judge_json(judge_text) score = clamp_float(parsed.get("score", 0.0)) state["judge"] = { @@ -229,7 +228,8 @@ async def self_consistency_score(task, state) -> float: @vf.update(priority=10) async def sandbox_judge(task, state) -> None: - response = completion_text(state.get("completion")) + messages = vf.get_messages(state.get("completion") or [], role="assistant") + response = str(messages[-1].content or "") if messages else "" judge_task = vf.Task( { "prompt": [ @@ -254,15 +254,17 @@ async def sandbox_judge(task, state) -> None: ).run(judge_task, judge_state) judge_bash_outputs = state.get("bash_tool_outputs", [])[bash_output_start:] + messages = vf.get_messages(judge_state.get("completion") or [], role="assistant") + findings = str(messages[-1].content or "") if messages else "" state["update_judge"] = { - "findings": completion_text(judge_state.get("completion")), + "findings": findings, "trajectory_id": judge_state["trajectory_id"], "bash_calls": len(judge_bash_outputs), } state["sandbox_report"] = judge_bash_outputs -def update_prompt(task: Mapping[str, object], response: str) -> str: +def update_prompt(task: ConfigMap, response: str) -> str: return ( "Task:\n" f"{task['question']}\n\n" @@ -288,7 +290,7 @@ def update_prompt(task: Mapping[str, object], response: str) -> str: ) -def score_prompt(task: Mapping[str, object], findings: str) -> str: +def score_prompt(task: ConfigMap, findings: str) -> str: return ( "Task:\n" f"{task['question']}\n\n" @@ -369,7 +371,8 @@ def load_harness( def load_environment( num_examples: int = -1, max_turns: int = 8, - config: vf.EnvConfig | None = None, + *, + config: vf.EnvConfig, ) -> vf.Env: config = vf.EnvConfig( config, @@ -385,7 +388,8 @@ def load_environment( def load_v1_environment( num_examples: int = -1, max_turns: int = 8, - config: vf.EnvConfig | None = None, + *, + config: vf.EnvConfig, ) -> vf.Env: return load_environment( num_examples=num_examples, diff --git a/environments/hello_subagent_v1/hello_subagent_v1.py b/environments/hello_subagent_v1/hello_subagent_v1.py index 2e2d0084b..a5076530a 100644 --- a/environments/hello_subagent_v1/hello_subagent_v1.py +++ b/environments/hello_subagent_v1/hello_subagent_v1.py @@ -1,7 +1,4 @@ -from __future__ import annotations - -import verifiers.v1 as vf -from verifiers.v1.utils.judge_utils import completion_text +import verifiers as vf async def ask_subagent(name: str, harness, state) -> str: @@ -20,7 +17,8 @@ async def ask_subagent(name: str, harness, state) -> str: ).freeze() child_state = state.for_task(task, borrow="model") child_state = await harness.run(task, child_state) - answer = completion_text(child_state.get("completion")).strip() + messages = vf.get_messages(child_state.get("completion") or [], role="assistant") + answer = str(messages[-1].content or "").strip() if messages else "" state.setdefault("subagent_calls", []).append({"name": name, "answer": answer}) return answer @@ -32,7 +30,9 @@ async def subagent_calls(task, state) -> float: @vf.reward(weight=1.0) async def exact_answer(task, state) -> float: - return float(completion_text(state.get("completion")).strip() == task["answer"]) + messages = vf.get_messages(state.get("completion") or [], role="assistant") + answer = str(messages[-1].content or "").strip() if messages else "" + return float(answer == task["answer"]) NAME_GROUPS = [ @@ -67,7 +67,8 @@ def load_child_harness(): def load_toolset(): return vf.Toolset( tools=[ask_subagent], - bindings={"ask_subagent.harness": load_child_harness()}, + objects={"harness": load_child_harness}, + bindings={"ask_subagent.harness": "objects.harness"}, scope="rollout", ) @@ -93,8 +94,7 @@ def load_harness(config: vf.HarnessConfig | None = None): ) -def load_environment(config: vf.EnvConfig | None = None): - config = config or vf.EnvConfig() +def load_environment(config: vf.EnvConfig): return vf.Env( taskset=load_taskset(config=config.taskset), harness=load_harness(config=config.harness), diff --git a/environments/langchain_deep_agents_wikispeedia/langchain_deep_agents_wikispeedia.py b/environments/langchain_deep_agents_wikispeedia/langchain_deep_agents_wikispeedia.py index 79146da28..3aa57e9c3 100644 --- a/environments/langchain_deep_agents_wikispeedia/langchain_deep_agents_wikispeedia.py +++ b/environments/langchain_deep_agents_wikispeedia/langchain_deep_agents_wikispeedia.py @@ -1,16 +1,19 @@ -from __future__ import annotations - import asyncio +import json from collections.abc import Awaitable, Callable, Iterator, Mapping, Sequence -from typing import cast +from typing import Protocol, cast from datasets import Dataset -import verifiers.v1 as vf -from verifiers.v1.utils.prompt_utils import state_system_prompt_text +import verifiers as vf from wiki_graph import WikiGraph, WikiPair, load_wiki_graph +class AgentMessage(Protocol): + role: str + content: object + + def system_prompt(allow_go_back: bool = True) -> str: backtracking = ( "Use `go_back` to undo your last click." @@ -129,18 +132,17 @@ async def agent_timeout(task: vf.Task, state: vf.State) -> float: def iter_tool_calls(state: vf.State) -> Iterator[str]: completion = state.get("completion") or [] - for msg in completion: - if not isinstance(msg, Mapping): - continue - tool_calls = msg.get("tool_calls") + messages = ( + vf.get_messages(completion, role="assistant") + if isinstance(completion, list) + else [] + ) + for msg in messages: + tool_calls = msg.tool_calls if not isinstance(tool_calls, list): continue for tool_call in tool_calls: - if not isinstance(tool_call, Mapping): - continue - name = tool_call.get("name") - if isinstance(name, str): - yield name + yield tool_call.name def count_tool_calls(state: vf.State, name: str | None = None) -> int: @@ -189,25 +191,25 @@ async def total_tool_calls(task: vf.Task, state: vf.State) -> float: async def assistant_turns(task: vf.Task, state: vf.State) -> float: completion = state.get("completion") or [] - count = sum( - 1 - for msg in completion - if isinstance(msg, Mapping) and msg.get("role") == "assistant" + return float( + len(vf.get_messages(completion, role="assistant")) + if isinstance(completion, list) + else 0 ) - return float(count) async def invalid_link_rate(task: vf.Task, state: vf.State) -> float: clicks = 0 invalid = 0 completion = state.get("completion") or [] - for msg in completion: - if not isinstance(msg, Mapping): - continue - if msg.get("role") != "tool" or msg.get("name") != "click_link": + messages = ( + vf.get_messages(completion, role="tool") if isinstance(completion, list) else [] + ) + for msg in messages: + if getattr(msg, "name", None) != "click_link": continue clicks += 1 - content = msg.get("content", "") + content = msg.content if isinstance(content, str) and "is not a valid link" in content: invalid += 1 return float(invalid / clicks) if clicks else 0.0 @@ -233,7 +235,7 @@ def build_dataset( f"Your mission: {source} >> {target}\n\n" f"Here is the starting article:\n\n{starting}" ) - info: dict[str, object] = { + info: vf.ConfigData = { "source": source, "target": target, "shortest_path": dist, @@ -254,7 +256,9 @@ def build_dataset( return Dataset.from_list(records) -def serialize_agent_completion(messages: Sequence[object]) -> list[dict[str, object]]: +def serialize_agent_completion( + messages: Sequence[AgentMessage | vf.ConfigMap], +) -> list[vf.ConfigData]: role_aliases = { "human": "user", "ai": "assistant", @@ -262,7 +266,7 @@ def serialize_agent_completion(messages: Sequence[object]) -> list[dict[str, obj "system": "system", } call_names: dict[str, str] = {} - serialized: list[dict[str, object]] = [] + serialized: list[vf.ConfigData] = [] for message in messages: if isinstance(message, Mapping): payload = dict(message) @@ -282,7 +286,7 @@ def serialize_agent_completion(messages: Sequence[object]) -> list[dict[str, obj ) raw_role = payload.get("role") or payload.get("type") or "assistant" role = role_aliases.get(str(raw_role), str(raw_role)) - item: dict[str, object] = { + item: vf.ConfigData = { "role": role, "content": payload.get("content", ""), } @@ -299,6 +303,14 @@ def serialize_agent_completion(messages: Sequence[object]) -> list[dict[str, obj ) if isinstance(tool_id, str) and isinstance(name, str): call_names[tool_id] = name + arguments = tool_call_payload.get("arguments") + if not isinstance(arguments, str): + args = tool_call_payload.get("args", {}) + try: + arguments = json.dumps(args if args is not None else {}) + except (TypeError, ValueError): + arguments = str(args) + tool_call_payload["arguments"] = arguments normalized_tool_calls.append(tool_call_payload) item["tool_calls"] = normalized_tool_calls name = payload.get("name") @@ -367,12 +379,19 @@ async def run_langchain_deep_agents_wikispeedia_program( ) runtime_tools = state.get_tools() nav_tools = langchain_navigation_tools(runtime_tools) + state_system_prompt = "" + system_prompt_messages = state.get("system_prompt") + if isinstance(system_prompt_messages, list): + state_system_prompt = "\n\n".join( + str(message.content or "") + for message in vf.get_messages(system_prompt_messages) + ) agent = create_deep_agent( model=model, tools=nav_tools, - system_prompt=state_system_prompt_text(task, state) or SYSTEM_PROMPT, + system_prompt=state_system_prompt or SYSTEM_PROMPT, ) - prompt = str(cast(list[dict[str, object]], state["prompt"])[-1]["content"]) + prompt = str(cast(list[vf.ConfigData], state["prompt"])[-1]["content"]) recursion_limit = state.get_max_turns(max_turns) invoke_config = ( {"recursion_limit": recursion_limit} if recursion_limit > 0 else None @@ -504,6 +523,7 @@ def load_harness( def load_environment( + config: vf.EnvConfig, cache_dir: str | None = None, min_path_length: int = 3, max_path_length: int = 6, @@ -517,10 +537,8 @@ def load_environment( timeout_seconds: float = 1200.0, efficiency_weight: float = 0.0, stratify_path_length: bool = True, - config: vf.EnvConfig | None = None, ) -> vf.Env: """Load the v1 Wikispeedia taskset with a LangChain Deep Agents harness.""" - config = config or vf.EnvConfig() return vf.Env( taskset=load_taskset( diff --git a/environments/langchain_deep_agents_wikispeedia/wiki_graph.py b/environments/langchain_deep_agents_wikispeedia/wiki_graph.py index 33f5b6d95..9848bad39 100644 --- a/environments/langchain_deep_agents_wikispeedia/wiki_graph.py +++ b/environments/langchain_deep_agents_wikispeedia/wiki_graph.py @@ -1,7 +1,5 @@ """Wikispeedia article graph: download, parse, and query the SNAP dataset.""" -from __future__ import annotations - import logging import os import random @@ -185,13 +183,13 @@ def _load_distance_matrix( WikiPair = tuple[str, str, int] -def load_wiki_graph(cache_dir: str | Path | None = None) -> WikiGraph: +def load_wiki_graph(cache_dir: str | Path | None = None) -> "WikiGraph": cache_key = str(Path(cache_dir).expanduser()) if cache_dir is not None else "" return cached_wiki_graph(cache_key) @lru_cache(maxsize=None) -def cached_wiki_graph(cache_key: str) -> WikiGraph: +def cached_wiki_graph(cache_key: str) -> "WikiGraph": cache_dir = Path(cache_key) if cache_key else None return WikiGraph.load(cache_dir=cache_dir) @@ -278,7 +276,7 @@ def __init__( self._name_lookup: dict[str, str] = {name.lower(): name for name in articles} @classmethod - def load(cls, cache_dir: Path | None = None) -> WikiGraph: + def load(cls, cache_dir: Path | None = None) -> "WikiGraph": cache_dir = cache_dir or DEFAULT_CACHE_DIR graph_dir, articles_dir = _ensure_data(cache_dir) diff --git a/environments/math_python/math_python_v1.py b/environments/math_python/math_python_v1.py index 6531cc7b8..9e2cbd4a2 100644 --- a/environments/math_python/math_python_v1.py +++ b/environments/math_python/math_python_v1.py @@ -1,10 +1,8 @@ -from __future__ import annotations - import json from math_verify import parse, verify -import verifiers.v1 as vf +import verifiers as vf from verifiers.errors import SandboxError from verifiers.utils.data_utils import extract_boxed_answer, load_example_dataset @@ -60,11 +58,8 @@ async def python(code: str, sandbox, session) -> str: @vf.reward(weight=1.0) async def correct_answer(task, state) -> float: completion = state.get("completion") or [] - response_text = "" - for message in reversed(completion): - if message.get("role") == "assistant": - response_text = str(message.get("content") or "") - break + messages = vf.get_messages(completion, role="assistant") + response_text = str(messages[-1].content or "") if messages else "" response = extract_boxed_answer(response_text) answer = str(task["answer"]) if not response or len(response) > 50_000: diff --git a/environments/mcp_search_env/mcp_search_env.py b/environments/mcp_search_env/mcp_search_env.py index 9b8820b37..c800dbb9f 100644 --- a/environments/mcp_search_env/mcp_search_env.py +++ b/environments/mcp_search_env/mcp_search_env.py @@ -1,13 +1,12 @@ -from __future__ import annotations - -from collections.abc import Iterable, Mapping +from collections.abc import Iterable from pathlib import Path +import sys from typing import cast from datasets import Dataset from pydantic import Field -import verifiers.v1 as vf +import verifiers as vf SYSTEM_PROMPT = "Use the available MCP tools to answer the question." @@ -66,9 +65,9 @@ class MCPSearchTasksetConfig(vf.TasksetConfig): - mcp_servers: list[dict[str, object]] | None = None + mcp_servers: list[vf.ConfigData] | None = None max_turns: int = 6 - examples: list[dict[str, object]] = Field( + examples: list[vf.ConfigData] = Field( default_factory=lambda: [dict(example) for example in DEFAULT_EXAMPLES] ) @@ -77,11 +76,11 @@ class MCPSearchTaskset(vf.Taskset): config_type = MCPSearchTasksetConfig -def default_mcp_servers() -> list[dict[str, object]]: +def default_mcp_servers() -> list[vf.ConfigData]: return [ { "name": "records", - "command": "python", + "command": sys.executable, "args": [str(Path(__file__).with_name("mcp_server.py"))], "description": "Synthetic search-record MCP server", }, @@ -93,13 +92,13 @@ def default_dataset() -> Dataset: def source( - dataset: Iterable[Mapping[str, object]] | None = None, + dataset: Iterable[vf.ConfigMap] | None = None, *, max_turns: int = 6, ): rows = dataset if dataset is not None else default_dataset() for index, row in enumerate(rows): - row = cast(Mapping[str, object], row) + row = cast(vf.ConfigMap, row) question = str(row["question"]) yield { **dict(row), @@ -109,30 +108,34 @@ def source( } -def mcp_tool_from_config(config: Mapping[str, object]) -> vf.MCPTool: +def mcp_tool_from_config(config: vf.ConfigMap) -> vf.MCPTool: return vf.MCPTool( command=str(config["command"]), - args=[str(arg) for arg in cast(Iterable[object], config.get("args") or [])], + args=[ + str(arg) + for arg in cast( + Iterable[str | int | float | bool], config.get("args") or [] + ) + ], env=cast(dict[str, str] | None, config.get("env")), cwd=cast(str | None, config.get("cwd")), ) -def response_text(state: vf.State) -> str: - completion = state.get("completion") or [] - for message in reversed(completion): - if message.get("role") == "assistant": - return str(message.get("content") or "") - return "" - - @vf.reward(weight=1.0) async def exact_title_reward(task: vf.Task, state: vf.State) -> float: - return float(str(task["answer"]).lower() in response_text(state).lower()) + completion = state.get("completion") or [] + messages = ( + vf.get_messages(completion, role="assistant") + if isinstance(completion, list) + else [] + ) + response = str(messages[-1].content or "") if messages else "" + return float(str(task["answer"]).lower() in response.lower()) def load_toolset( - mcp_servers: Iterable[Mapping[str, object]] | None = None, + mcp_servers: Iterable[vf.ConfigMap] | None = None, config: vf.ToolsetConfig | None = None, ) -> vf.Toolset: servers = mcp_servers or default_mcp_servers() @@ -143,17 +146,17 @@ def load_toolset( def load_taskset( - config: vf.TasksetConfig | Mapping[str, object] | None = None, - dataset: Iterable[Mapping[str, object]] | None = None, - mcp_servers: Iterable[Mapping[str, object]] | None = None, + config: MCPSearchTasksetConfig | None = None, + dataset: Iterable[vf.ConfigMap] | None = None, + mcp_servers: Iterable[vf.ConfigMap] | None = None, max_turns: int | None = None, ) -> MCPSearchTaskset: - taskset_overrides: dict[str, object] = {} + taskset_overrides: vf.ConfigData = {} if mcp_servers is not None: taskset_overrides["mcp_servers"] = [dict(server) for server in mcp_servers] if max_turns is not None: taskset_overrides["max_turns"] = max_turns - taskset_config = MCPSearchTasksetConfig.from_config(config, **taskset_overrides) + taskset_config = MCPSearchTasksetConfig(config, **taskset_overrides) return MCPSearchTaskset( source=lambda: source( dataset if dataset is not None else taskset_config.examples, @@ -166,31 +169,35 @@ def load_taskset( ) -def load_harness(config: vf.HarnessConfig | Mapping[str, object] | None = None): +def load_harness(config: vf.HarnessConfig | None = None): return vf.Harness(config=config) def load_environment( - config: vf.EnvConfig | Mapping[str, object] | None = None, - dataset: Iterable[Mapping[str, object]] | None = None, - mcp_servers: Iterable[Mapping[str, object]] | None = None, + config: vf.EnvConfig, + dataset: Iterable[vf.ConfigMap] | None = None, + mcp_servers: Iterable[vf.ConfigMap] | None = None, max_turns: int | None = None, ) -> vf.Env: - taskset_overrides: dict[str, object] = {} + taskset_overrides: vf.ConfigData = {} if mcp_servers is not None: taskset_overrides["mcp_servers"] = [dict(server) for server in mcp_servers] if max_turns is not None: taskset_overrides["max_turns"] = max_turns - config = vf.EnvConfig.from_config( + config = vf.EnvConfig( config, - taskset=MCPSearchTasksetConfig.from_config(**taskset_overrides), + taskset=MCPSearchTasksetConfig(**taskset_overrides), + ) + taskset_config = ( + None if config.taskset is None else MCPSearchTasksetConfig(config.taskset) + ) + harness_config = ( + None if config.harness is None else vf.HarnessConfig(config.harness) ) return vf.Env( taskset=load_taskset( - config=cast(vf.TasksetConfig | Mapping[str, object] | None, config.taskset), + config=taskset_config, dataset=dataset, ), - harness=load_harness( - config=cast(vf.HarnessConfig | Mapping[str, object] | None, config.harness) - ), + harness=load_harness(config=harness_config), ) diff --git a/environments/nested_harness_v1/nested_harness_v1.py b/environments/nested_harness_v1/nested_harness_v1.py index 3e3128224..2cb013ba5 100644 --- a/environments/nested_harness_v1/nested_harness_v1.py +++ b/environments/nested_harness_v1/nested_harness_v1.py @@ -1,6 +1,4 @@ -from __future__ import annotations - -import verifiers.v1 as vf +import verifiers as vf class NestedHarnessConfig(vf.HarnessConfig): @@ -102,8 +100,7 @@ def load_harness(config: NestedHarnessConfig | None = None): ) -def load_environment(config: vf.EnvConfig | None = None): - config = config or vf.EnvConfig() +def load_environment(config: vf.EnvConfig): return vf.Env( taskset=load_taskset(config=config.taskset), harness=load_harness(config=config.harness), diff --git a/environments/openai_agents_env/openai_agents_env.py b/environments/openai_agents_env/openai_agents_env.py index 2b7d1baa9..f9382ccfe 100644 --- a/environments/openai_agents_env/openai_agents_env.py +++ b/environments/openai_agents_env/openai_agents_env.py @@ -1,9 +1,6 @@ -from __future__ import annotations - import re -from collections.abc import Mapping -import verifiers.v1 as vf +import verifiers as vf from verifiers.utils.data_utils import load_example_dataset ANSWER_RE = re.compile(r"^\s*ANSWER\s*:?\s*(.+?)\s*$", re.IGNORECASE) @@ -49,7 +46,16 @@ async def run_openai_agents_program(task: vf.Task, state: vf.State) -> vf.State: tools=[function_tool(calculate)], ) - result = await Runner.run(agent, input=task_question(task)) + question = task.get("question") + if question is not None: + query = str(question) + else: + query = "" + prompt = task.get("prompt") + if isinstance(prompt, list) and prompt: + query = str(vf.get_messages(prompt)[-1].content or "") + + result = await Runner.run(agent, input=query) final_output = str(result.final_output) state["agent_result"] = final_output state["completion"] = [{"role": "assistant", "content": final_output}] @@ -61,31 +67,6 @@ def load_rows(split: str, num_examples: int): return load_example_dataset("gsm8k", split=split, n=n) -def task_question(task: vf.Task) -> str: - question = task.get("question") - if question is not None: - return str(question) - prompt = task.get("prompt") - if isinstance(prompt, list) and prompt: - last_message = prompt[-1] - if isinstance(last_message, Mapping): - return str(last_message.get("content") or "") - return "" - - -def completion_text(state: vf.State) -> str: - agent_result = state.get("agent_result") - if agent_result is not None: - return str(agent_result) - completion = state.get("completion") - if isinstance(completion, list) and completion: - last_message = completion[-1] - if isinstance(last_message, Mapping): - return str(last_message.get("content") or "") - return str(getattr(last_message, "content", last_message) or "") - return "" - - def extract_answer(text: str) -> str: for line in reversed(text.splitlines()): match = ANSWER_RE.match(line) @@ -105,7 +86,18 @@ def answers_match(agent_answer: str, answer: str) -> float: def answer_reward(task: vf.Task, state: vf.State) -> float: """Check if the agent's final output contains the correct answer.""" - agent_answer = extract_answer(completion_text(state)) + result = state.get("agent_result") + if result is not None: + text = str(result) + else: + completion = state.get("completion") + messages = [] + if isinstance(completion, list): + messages = vf.get_messages(completion, role="assistant") or vf.get_messages( + completion + ) + text = str(messages[-1].content or "") if messages else "" + agent_answer = extract_answer(text) if not agent_answer: return 0.0 return answers_match(agent_answer, str(task.get("answer", ""))) @@ -130,12 +122,11 @@ def load_harness(config: vf.HarnessConfig | None = None) -> vf.Harness: def load_environment( + config: vf.EnvConfig, num_train_examples: int = 50, num_eval_examples: int = 20, - config: vf.EnvConfig | None = None, ) -> vf.Env: """Load the OpenAI Agents SDK V1 taskset/harness example environment.""" - config = config or vf.EnvConfig() return vf.Env( taskset=load_taskset( num_train_examples=num_train_examples, diff --git a/environments/opencode_harbor/README.md b/environments/opencode_harbor/README.md index 9eb148fae..292912c7b 100644 --- a/environments/opencode_harbor/README.md +++ b/environments/opencode_harbor/README.md @@ -35,21 +35,19 @@ Notes: | Arg | Type | Default | Description | | --- | ---- | ------- | ----------- | -| `tasks` | str | bundled `tasks/` | Local Harbor task directory or dataset directory. | | `task_names` | list[str] | `null` | Explicit Harbor task names to run. | -| `dataset` | str | `null` | `terminal-bench-sample` or `terminal-bench` task selection. | +| `dataset` | str | `null` | Harbor Hub dataset id. Defaults to bundled `tasks/`. | OpenCode settings belong under the v1 harness config: ```toml [env.harness] max_turns = 4 -disabled_tools = ["webfetch", "question"] agent_workdir = "/app" ``` -By default, this environment uses `vf.OpenCode` with only `webfetch` and -`question` disabled. Set `env.harness.disabled_tools` to override that list. +This environment does not set a custom disabled-tool list. It inherits the +`vf.OpenCodeConfig` defaults. ### Metrics Summarize key metrics your rubric emits and how they’re interpreted. diff --git a/environments/opencode_harbor/opencode_harbor.py b/environments/opencode_harbor/opencode_harbor.py index 4eaa6f8b9..7b83167d9 100644 --- a/environments/opencode_harbor/opencode_harbor.py +++ b/environments/opencode_harbor/opencode_harbor.py @@ -1,203 +1,8 @@ -from __future__ import annotations +import verifiers as vf -from collections.abc import Mapping -from pathlib import Path -from typing import cast -import verifiers.v1 as vf - -_TASKS_DIR = Path(__file__).parent / "tasks" -_DEFAULT_DISABLED_TOOLS = ["webfetch", "question"] - -TERMINAL_BENCH_SAMPLE_TASKS = [ - "build-cython-ext", - "chess-best-move", - "configure-git-webserver", - "fix-code-vulnerability", - "log-summary-date-ranges", - "polyglot-c-py", - "qemu-alpine-ssh", - "qemu-startup", - "regex-log", - "sqlite-with-gcov", -] - - -def load_taskset( - config: vf.HarborTasksetConfig | Mapping[str, object] | None = None, - tasks: str | Path | None = None, - task_names: list[str] | None = None, - dataset: str | None = None, - docker_image: str | None = None, - cpu_cores: float | None = None, - memory_gb: float | None = None, - disk_size_gb: float | None = None, - timeout_minutes: int | None = None, - agent_timeout_seconds: float | None = None, - verifier_timeout_seconds: float | None = None, - workdir: str | None = None, - task_dir: str | None = None, - scope: str | None = None, - env: dict[str, object] | None = None, -) -> vf.HarborTaskset: - config = vf.HarborTasksetConfig.from_config( - config, - **_taskset_overrides( - tasks=tasks, - task_names=task_names, - docker_image=docker_image, - cpu_cores=cpu_cores, - memory_gb=memory_gb, - disk_size_gb=disk_size_gb, - timeout_minutes=timeout_minutes, - agent_timeout_seconds=agent_timeout_seconds, - verifier_timeout_seconds=verifier_timeout_seconds, - workdir=workdir, - task_dir=task_dir, - scope=scope, - env=env, - ), - ) - tasks_root = tasks if tasks is not None else config.tasks or _TASKS_DIR - selected_task_names = _dataset_task_names( - dataset=dataset, - tasks_root=tasks_root, - task_names=config.task_names, - ) - return vf.HarborTaskset( - tasks=cast(str | Path, tasks_root), - task_names=selected_task_names, - config=config, - ) - - -def load_harness( - config: vf.OpenCodeConfig | Mapping[str, object] | None = None, -) -> vf.OpenCode: - if _has_disabled_tools(config): - return vf.OpenCode(config=config) - return vf.OpenCode(config=config, disabled_tools=list(_DEFAULT_DISABLED_TOOLS)) - - -def _has_disabled_tools( - config: vf.OpenCodeConfig | Mapping[str, object] | None, -) -> bool: - if isinstance(config, vf.OpenCodeConfig): - return "disabled_tools" in config.model_fields_set - return isinstance(config, Mapping) and "disabled_tools" in config - - -def load_environment( - config: vf.EnvConfig | Mapping[str, object] | None = None, - tasks: str | Path | None = None, - task_names: list[str] | None = None, - dataset: str | None = None, - docker_image: str | None = None, - cpu_cores: float | None = None, - memory_gb: float | None = None, - disk_size_gb: float | None = None, - timeout_minutes: int | None = None, - agent_timeout_seconds: float | None = None, - verifier_timeout_seconds: float | None = None, - workdir: str | None = None, - task_dir: str | None = None, - scope: str | None = None, - env: dict[str, object] | None = None, -) -> vf.Env: - config = vf.EnvConfig.from_config( - config, - taskset=vf.HarborTasksetConfig.from_config( - **_taskset_overrides( - tasks=tasks, - task_names=task_names, - docker_image=docker_image, - cpu_cores=cpu_cores, - memory_gb=memory_gb, - disk_size_gb=disk_size_gb, - timeout_minutes=timeout_minutes, - agent_timeout_seconds=agent_timeout_seconds, - verifier_timeout_seconds=verifier_timeout_seconds, - workdir=workdir, - task_dir=task_dir, - scope=scope, - env=env, - ) - ), - ) +def load_environment(config: vf.EnvConfig) -> vf.Env: return vf.Env( - taskset=load_taskset( - config=cast( - vf.HarborTasksetConfig | Mapping[str, object] | None, config.taskset - ), - dataset=dataset, - ), - harness=load_harness( - config=cast(vf.OpenCodeConfig | Mapping[str, object] | None, config.harness) - ), + taskset=vf.HarborTaskset(config=config.taskset), + harness=vf.OpenCode(config=config.harness), ) - - -def _taskset_overrides( - *, - tasks: str | Path | None = None, - task_names: list[str] | None = None, - docker_image: str | None = None, - cpu_cores: float | None = None, - memory_gb: float | None = None, - disk_size_gb: float | None = None, - timeout_minutes: int | None = None, - agent_timeout_seconds: float | None = None, - verifier_timeout_seconds: float | None = None, - workdir: str | None = None, - task_dir: str | None = None, - scope: str | None = None, - env: dict[str, object] | None = None, -) -> dict[str, object]: - overrides: dict[str, object] = {} - if tasks is not None: - overrides["tasks"] = str(tasks) - if task_names is not None: - overrides["task_names"] = task_names - if docker_image is not None: - overrides["docker_image"] = docker_image - if cpu_cores is not None: - overrides["cpu_cores"] = cpu_cores - if memory_gb is not None: - overrides["memory_gb"] = memory_gb - if disk_size_gb is not None: - overrides["disk_size_gb"] = disk_size_gb - if timeout_minutes is not None: - overrides["timeout_minutes"] = timeout_minutes - if agent_timeout_seconds is not None: - overrides["agent_timeout_seconds"] = agent_timeout_seconds - if verifier_timeout_seconds is not None: - overrides["verifier_timeout_seconds"] = verifier_timeout_seconds - if workdir is not None: - overrides["workdir"] = workdir - if task_dir is not None: - overrides["task_dir"] = task_dir - if scope is not None: - overrides["scope"] = scope - if env is not None: - overrides["env"] = env - return overrides - - -def _dataset_task_names( - dataset: str | None, - tasks_root: object, - task_names: list[str] | None, -) -> list[str] | None: - if dataset is None: - return task_names - if task_names: - raise ValueError("Cannot specify both 'dataset' and 'task_names'.") - if dataset == "terminal-bench-sample": - return TERMINAL_BENCH_SAMPLE_TASKS - if dataset == "terminal-bench": - return [ - path.name - for path in sorted(Path(str(tasks_root)).iterdir()) - if path.is_dir() - ] - raise ValueError("dataset must be 'terminal-bench' or 'terminal-bench-sample'.") diff --git a/environments/openenv_echo/README.md b/environments/openenv_echo/README.md index 51c626b21..29ea51aea 100644 --- a/environments/openenv_echo/README.md +++ b/environments/openenv_echo/README.md @@ -19,7 +19,7 @@ ### Task - **Type**: Tool use, multi-turn. -- **Parser**: Default `Parser` (no special formatting). +- **Output format**: MCP tool calls. - **Rubric overview**: `OpenEnvEpisodicSumRubric` sums per-step rewards; `MultiTurnMonitorRubric` tracks turn count. ### Quickstart @@ -56,8 +56,7 @@ Notes: - If your environments directory is not `./environments`, run: `uv run vf-build openenv-echo -p /path/to/environments` - If you customize the bundled OpenEnv project, rerun `uv run vf-build openenv-echo` (the `proj/.build.json` manifest is updated). -- `openenv_echo.py` defines `render_echo_prompt()` and passes it via `prompt_renderer` -to keep the initial MCP prompt concise. +- `openenv_echo.py` defines `EchoPromptRenderer` and passes it via `prompt_renderer` to keep the initial MCP prompt concise. ### Troubleshooting diff --git a/environments/openenv_echo/openenv_echo.py b/environments/openenv_echo/openenv_echo.py index 40fa27513..70ac16764 100644 --- a/environments/openenv_echo/openenv_echo.py +++ b/environments/openenv_echo/openenv_echo.py @@ -1,46 +1,48 @@ -from typing import Any - import verifiers as vf from verifiers.types import Messages, UserMessage from verifiers.utils.message_utils import normalize_messages -def render_echo_prompt( - observation: Any, - *, - action_schema: dict[str, Any] | None = None, - context: str = "reset", -) -> Messages: - if not isinstance(observation, dict): - raise RuntimeError( - f"openenv-echo prompt renderer expected dict observation, got {type(observation).__name__}." - ) - - messages = observation.get("messages") - if isinstance(messages, list) and messages: - try: - return normalize_messages( - messages, field_name="openenv-echo observation messages" +class EchoPromptRenderer: + def __call__( + self, + observation: object, + *, + action_schema: vf.ConfigData | None = None, + context: str = "reset", + ) -> Messages: + if not isinstance(observation, dict): + raise RuntimeError( + f"openenv-echo prompt renderer expected dict observation, got {type(observation).__name__}." ) - except TypeError as e: - raise RuntimeError(str(e)) from e - - prompt = observation.get("prompt") - if isinstance(prompt, str) and prompt.strip(): - return [UserMessage(content=prompt)] - - if context == "reset" and isinstance(action_schema, dict): - return [ - UserMessage( - content=( - "You are connected to an OpenEnv MCP environment. " - "Call at least one tool before your final response. " - "Action contract: call_tool(tool_name: str, arguments: object)." + + messages = observation.get("messages") + if isinstance(messages, list) and messages: + try: + return normalize_messages( + messages, field_name="openenv-echo observation messages" ) - ) - ] + except TypeError as e: + raise RuntimeError(str(e)) from e + + prompt = observation.get("prompt") + if isinstance(prompt, str) and prompt.strip(): + return [UserMessage(content=prompt)] + + if context == "reset" and isinstance(action_schema, dict): + return [ + UserMessage( + content=( + "You are connected to an OpenEnv MCP environment. " + "Call at least one tool before your final response. " + "Action contract: call_tool(tool_name: str, arguments: object)." + ) + ) + ] - raise RuntimeError("openenv-echo observation did not include a renderable prompt.") + raise RuntimeError( + "openenv-echo observation did not include a renderable prompt." + ) def load_environment( @@ -52,5 +54,5 @@ def load_environment( num_train_examples=num_train_examples, num_eval_examples=num_eval_examples, seed=seed, - prompt_renderer=render_echo_prompt, + prompt_renderer=EchoPromptRenderer(), ) diff --git a/environments/openenv_echo/proj/pyproject.toml b/environments/openenv_echo/proj/pyproject.toml index 2f45c6bf2..cb556083f 100644 --- a/environments/openenv_echo/proj/pyproject.toml +++ b/environments/openenv_echo/proj/pyproject.toml @@ -15,7 +15,7 @@ description = "Echo Environment for OpenEnv - simple test environment that echoe requires-python = ">=3.10" dependencies = [ # Core OpenEnv dependencies (required for server functionality) - "openenv-core[core]==0.2.1", + "openenv-core>=0.3.0", "fastapi>=0.115.0", "pydantic>=2.0.0", "uvicorn>=0.24.0", diff --git a/environments/openenv_textarena/README.md b/environments/openenv_textarena/README.md index a52050ca4..7950e1fb8 100644 --- a/environments/openenv_textarena/README.md +++ b/environments/openenv_textarena/README.md @@ -19,7 +19,7 @@ ### Task - **Type**: Multi-turn gym interaction. -- **Parser**: Default `Parser` (no special formatting). +- **Output format**: Game actions. - **Rubric overview**: `OpenEnvEpisodicSumRubric` sums per-step rewards; `MultiTurnMonitorRubric` tracks turn count. ### Quickstart @@ -48,5 +48,4 @@ prime eval run openenv-textarena - Upstream TextArena app defaults to `TEXTARENA_ENV_ID=Wordle-v0`. - To use another game, set environment variables in the OpenEnv project/server config before building. -- `openenv_textarena.py` defines `render_textarena_prompt()` and passes it via -`prompt_renderer` so observations are rendered as useful game messages. +- `openenv_textarena.py` defines `TextArenaPromptRenderer` and passes it via `prompt_renderer` so observations are rendered as useful game messages. diff --git a/environments/openenv_textarena/openenv_textarena.py b/environments/openenv_textarena/openenv_textarena.py index c79e5d0ef..04a385cac 100644 --- a/environments/openenv_textarena/openenv_textarena.py +++ b/environments/openenv_textarena/openenv_textarena.py @@ -1,66 +1,64 @@ import re -from typing import Any import verifiers as vf from verifiers.types import Messages, UserMessage -_TEXTARENA_ENV_ID_RE = re.compile(r"^[A-Za-z0-9_-]+-v\d+$") +class TextArenaPromptRenderer: + env_id_pattern = re.compile(r"^[A-Za-z0-9_-]+-v\d+$") + def __call__( + self, + observation: object, + *, + context: str = "reset", + ) -> Messages: + if not isinstance(observation, dict): + raise RuntimeError( + f"openenv-textarena prompt renderer expected dict observation, got {type(observation).__name__}." + ) -def _message_text_from_observation(observation: dict[str, Any]) -> str | None: - raw_messages = observation.get("messages") - if not isinstance(raw_messages, list): - return None - for item in reversed(raw_messages): - if isinstance(item, dict): - content = item.get("content") - if isinstance(content, str) and content.strip(): - return content.strip() - return None - + message_text = self.message_text(observation) + prompt_text = self.prompt_text(observation) -def _prompt_text_from_observation(observation: dict[str, Any]) -> str | None: - prompt = observation.get("prompt") - if not isinstance(prompt, str): - return None - value = prompt.strip() - if not value: - return None - # TextArena sometimes falls back to env id like "Wordle-v0", which is not - # a useful model prompt for subsequent turns. - if _TEXTARENA_ENV_ID_RE.fullmatch(value): - return None - return value + if context == "step": + if message_text is not None: + return [UserMessage(content=message_text)] + if prompt_text is not None: + return [UserMessage(content=prompt_text)] + else: + if prompt_text is not None: + return [UserMessage(content=prompt_text)] + if message_text is not None: + return [UserMessage(content=message_text)] - -def render_textarena_prompt( - observation: Any, - *, - context: str = "reset", -) -> Messages: - if not isinstance(observation, dict): raise RuntimeError( - f"openenv-textarena prompt renderer expected dict observation, got {type(observation).__name__}." + "openenv-textarena observation did not include renderable prompt text." ) - message_text = _message_text_from_observation(observation) - prompt_text = _prompt_text_from_observation(observation) - - if context == "step": - if message_text is not None: - return [UserMessage(content=message_text)] - if prompt_text is not None: - return [UserMessage(content=prompt_text)] - else: - if prompt_text is not None: - return [UserMessage(content=prompt_text)] - if message_text is not None: - return [UserMessage(content=message_text)] + def message_text(self, observation: vf.ConfigData) -> str | None: + raw_messages = observation.get("messages") + if not isinstance(raw_messages, list): + return None + for item in reversed(raw_messages): + if isinstance(item, dict): + content = item.get("content") + if isinstance(content, str) and content.strip(): + return content.strip() + return None - raise RuntimeError( - "openenv-textarena observation did not include renderable prompt text." - ) + def prompt_text(self, observation: vf.ConfigData) -> str | None: + prompt = observation.get("prompt") + if not isinstance(prompt, str): + return None + value = prompt.strip() + if not value: + return None + # TextArena sometimes falls back to env id like "Wordle-v0", which is + # not a useful model prompt for subsequent turns. + if self.env_id_pattern.fullmatch(value): + return None + return value def load_environment( @@ -72,5 +70,5 @@ def load_environment( num_train_examples=num_train_examples, num_eval_examples=num_eval_examples, seed=seed, - prompt_renderer=render_textarena_prompt, + prompt_renderer=TextArenaPromptRenderer(), ) diff --git a/environments/openenv_textarena/proj/client.py b/environments/openenv_textarena/proj/client.py index 3d18743fc..95ac2de26 100644 --- a/environments/openenv_textarena/proj/client.py +++ b/environments/openenv_textarena/proj/client.py @@ -11,8 +11,6 @@ over HTTP. """ -from __future__ import annotations - from typing import Any, Dict from openenv.core.client_types import StepResult diff --git a/environments/openenv_textarena/proj/models.py b/environments/openenv_textarena/proj/models.py index 9e745b11c..7e7965128 100644 --- a/environments/openenv_textarena/proj/models.py +++ b/environments/openenv_textarena/proj/models.py @@ -11,8 +11,6 @@ with TextArena game environments (e.g., Wordle-v0). """ -from __future__ import annotations - from pydantic import BaseModel, Field from typing import Any, Dict, List, Optional diff --git a/environments/openenv_textarena/proj/pyproject.toml b/environments/openenv_textarena/proj/pyproject.toml index e33e5d55c..e173345b6 100644 --- a/environments/openenv_textarena/proj/pyproject.toml +++ b/environments/openenv_textarena/proj/pyproject.toml @@ -15,7 +15,7 @@ description = "TextArena environment for OpenEnv" requires-python = ">=3.10" dependencies = [ # Core OpenEnv dependencies (required for server functionality) - "openenv-core[core]==0.2.1", + "openenv-core>=0.3.0", "fastapi>=0.115.0", "pydantic>=2.0.0", "uvicorn>=0.24.0", @@ -47,4 +47,3 @@ server = "textarena_env.server.app:main" # Explicitly list packages - "textarena_env" maps to current dir packages = ["textarena_env", "textarena_env.server"] package-dir = {"textarena_env" = ".", "textarena_env.server" = "server"} - diff --git a/environments/openenv_textarena/proj/rewards.py b/environments/openenv_textarena/proj/rewards.py index e3c783c1e..66fedf0d6 100644 --- a/environments/openenv_textarena/proj/rewards.py +++ b/environments/openenv_textarena/proj/rewards.py @@ -1,7 +1,5 @@ """Reward provider utilities for TextArena environments.""" -from __future__ import annotations - import re from typing import Dict, List, Protocol, Tuple diff --git a/environments/openenv_textarena/proj/server/app.py b/environments/openenv_textarena/proj/server/app.py index 83d381895..e4692781b 100644 --- a/environments/openenv_textarena/proj/server/app.py +++ b/environments/openenv_textarena/proj/server/app.py @@ -6,8 +6,6 @@ """FastAPI application entrypoint for the TextArena environment.""" -from __future__ import annotations - import os from openenv.core.env_server.http_server import create_app diff --git a/environments/openenv_textarena/proj/server/environment.py b/environments/openenv_textarena/proj/server/environment.py index 27b115d1e..a3aa03549 100644 --- a/environments/openenv_textarena/proj/server/environment.py +++ b/environments/openenv_textarena/proj/server/environment.py @@ -6,8 +6,6 @@ """Server implementation for the generic TextArena environment.""" -from __future__ import annotations - import sys from typing import Any, Dict, Iterable, List, Optional from uuid import uuid4 diff --git a/environments/reverse_text/reverse_text_v1.py b/environments/reverse_text/reverse_text_v1.py index 98209cd55..09fd9a722 100644 --- a/environments/reverse_text/reverse_text_v1.py +++ b/environments/reverse_text/reverse_text_v1.py @@ -1,18 +1,27 @@ -from __future__ import annotations - +import re from difflib import SequenceMatcher from datasets import load_dataset -import verifiers.v1 as vf -from verifiers.parsers.xml_parser import XMLParser +import verifiers as vf + + +class TagExtractor: + def __init__(self, tag: str): + self.pattern = re.compile(rf"<{tag}>(.*?)", re.DOTALL) -parser = XMLParser(["reversed_text"], answer_field="reversed_text") + def __call__(self, completion: list[vf.ConfigData]) -> str: + messages = vf.get_messages(completion, role="assistant") + if not messages: + return "" + message = messages[-1] + match = self.pattern.search(str(message.content or "")) + return match.group(1).strip() if match else "" @vf.reward(weight=1.0) -async def lcs_reward_func(task, state) -> float: - response = parser.parse_answer(state.get("completion") or []) or "" +async def lcs_reward_func(task, state, extract_reversed_text) -> float: + response = extract_reversed_text(state.get("completion") or []) answer = str(task["answer"]) return SequenceMatcher(None, response, answer).ratio() @@ -56,6 +65,10 @@ def load_taskset( source=build_source(dataset_name, dataset_split), system_prompt=system_prompt, rewards=[lcs_reward_func], + objects={"extract_reversed_text": lambda: TagExtractor("reversed_text")}, + bindings={ + "lcs_reward_func.extract_reversed_text": "objects.extract_reversed_text" + }, config=config, ) diff --git a/environments/rlm_swe_v1/README.md b/environments/rlm_swe_v1/README.md index a607e6bb1..1a6fc1c8f 100644 --- a/environments/rlm_swe_v1/README.md +++ b/environments/rlm_swe_v1/README.md @@ -3,9 +3,23 @@ v1 RLM coding environment using the R2E-Gym SWE taskset and `vf.RLM` harness. ```python -from rlm_swe_v1 import load_environment +import verifiers as vf -env = load_environment() +env = vf.load_environment("rlm-swe-v1") +``` + +Tune the taskset and harness through typed v1 config objects: + +```python +import verifiers as vf +from rlm_swe_v1 import RlmSweTasksetConfig, load_environment + +env = load_environment( + config=vf.EnvConfig( + taskset=RlmSweTasksetConfig(timeout_minutes=90), + harness=vf.RLMConfig(rlm_repo_ref="main", rlm_tools=["bash", "edit"]), + ) +) ``` The taskset is fully implemented in this environment package on the v1 stack. diff --git a/environments/rlm_swe_v1/rlm_swe_v1.py b/environments/rlm_swe_v1/rlm_swe_v1.py index 02a3ecaec..c4a9da2fb 100644 --- a/environments/rlm_swe_v1/rlm_swe_v1.py +++ b/environments/rlm_swe_v1/rlm_swe_v1.py @@ -1,18 +1,17 @@ -from __future__ import annotations - import json import logging import os import re import shlex -from collections.abc import Mapping +from collections.abc import Iterable, Mapping from pathlib import Path -from typing import Any +from typing import Protocol, cast from datasets import load_dataset from pydantic import Field -import verifiers.v1 as vf +import verifiers as vf +from verifiers.v1.types import ConfigMap, ProgramOptionMap logger = logging.getLogger(__name__) @@ -32,7 +31,38 @@ class RlmSweTasksetConfig(vf.TasksetConfig): ds_keep_in_memory: bool = True timeout_minutes: int | None = None hide_tests_from_agent: bool = True - env: dict[str, object] = Field(default_factory=dict) + env: vf.ConfigData = Field(default_factory=dict) + + +class SandboxCommandResult(Protocol): + exit_code: int + stdout: str | None + stderr: str | None + + +class R2ESandbox(Protocol): + id: str + + async def execute( + self, + command: str, + working_dir: str | None = None, + timeout: int = 90, + ) -> SandboxCommandResult: ... + + async def download_file( + self, remote_path: str, local_path: str, timeout: int = 300 + ) -> None: ... + + async def upload_file( + self, remote_path: str, local_path: str, timeout: int = 300 + ) -> None: ... + + async def upload_bytes(self, remote_path: str, data: bytes, name: str) -> None: ... + + async def run_background_job( + self, command: str, timeout: int, working_dir: str + ) -> SandboxCommandResult: ... class R2ESWETaskset(vf.Taskset): @@ -48,9 +78,8 @@ def __init__( ds_keep_in_memory: bool | None = None, timeout_minutes: int | None = None, hide_tests_from_agent: bool | None = None, - env: Mapping[str, object] | None = None, - config: RlmSweTasksetConfig | Mapping[str, object] | None = None, - **kwargs: Any, + env: ProgramOptionMap | None = None, + config: RlmSweTasksetConfig | None = None, ): config = RlmSweTasksetConfig(config) self.dataset_name = dataset_name or config.dataset_name @@ -80,16 +109,15 @@ def __init__( source=self.load_rows, taskset_id="swe/r2e", config=config, - **kwargs, ) - def load_rows(self) -> list[dict[str, Any]]: - rows: list[dict[str, Any]] = [] + def load_rows(self) -> list[vf.ConfigData]: + rows: list[vf.ConfigData] = [] for index, row in enumerate(self.load_dataset_rows()): row = dict(row) info = dict(row["info"]) instruction = str(info["problem_statement"]) - task_row: dict[str, Any] = { + task_row: vf.ConfigData = { "example_id": index, "task_id": info.get("instance_id") or index, "question": row.get("question", instruction), @@ -103,7 +131,7 @@ def load_rows(self) -> list[dict[str, Any]]: rows.append(task_row) return rows - def load_dataset_rows(self): + def load_dataset_rows(self) -> Iterable[ConfigMap]: dataset_kwargs = dict( num_proc=self.ds_num_proc, keep_in_memory=self.ds_keep_in_memory, @@ -121,14 +149,17 @@ def load_dataset_rows(self): lambda row: row.get("repo_name") not in filter_set, **dataset_kwargs, ) - return dataset.map( - process_r2e_example, - remove_columns=dataset.column_names, - **dataset_kwargs, + return cast( + Iterable[ConfigMap], + dataset.map( + process_r2e_example, + remove_columns=dataset.column_names, + **dataset_kwargs, + ), ) - def sandbox_config(self, info: Mapping[str, object]) -> dict[str, object]: - config: dict[str, object] = { + def sandbox_config(self, info: ConfigMap) -> vf.ConfigData: + config: vf.ConfigData = { "image": f"{REGISTRY_PREFIX}/{info['docker_image']}", "cpu_cores": 4, "memory_gb": 4, @@ -168,7 +199,9 @@ async def setup_r2e_sandbox(self, task, state, sandbox=None) -> None: state.setdefault("test_timeout", timeout_minutes * 60) await self.setup_sandbox(sandbox, state) - async def setup_sandbox(self, sandbox: Any, state: dict[str, Any]) -> None: + async def setup_sandbox( + self, sandbox: R2ESandbox, state: vf.MutableConfigMap + ) -> None: async def exec_checked( command: str, working_dir: str | None = None, timeout: int = 90 ): @@ -248,8 +281,8 @@ async def solved(self, task, state) -> float: async def run_tests( self, - sandbox: Any, - state: dict[str, Any], + sandbox: R2ESandbox, + state: vf.MutableConfigMap, test_timeout: int, ) -> str: local_archive_path = state.get("r2e_tests_archive_local_path") @@ -288,7 +321,7 @@ async def run_tests( ) return result.stdout or "" - def calculate_reward(self, test_output: str, info: Mapping[str, object]) -> float: + def calculate_reward(self, test_output: str, info: ConfigMap) -> float: parsed = parse_log_pytest(test_output) parsed = decolor_dict_keys(parsed) expected_raw = info["expected_output_json"] @@ -307,7 +340,9 @@ def calculate_reward(self, test_output: str, info: Mapping[str, object]) -> floa return 0.0 return 1.0 - async def apply_gold_patch(self, sandbox: Any, state) -> None: + async def apply_gold_patch( + self, sandbox: R2ESandbox, state: vf.MutableConfigMap + ) -> None: info = state["info"] patch = extract_gold_patch( str(info["parsed_commit_content"]), @@ -331,7 +366,7 @@ async def apply_gold_patch(self, sandbox: Any, state) -> None: f"git apply failed: exit_code={result.exit_code} stderr={stderr}" ) - async def validate_instance(self, state) -> bool: + async def validate_instance(self, state: vf.MutableConfigMap) -> bool: sandbox = state["_rlm_swe_sandbox"] await self.apply_gold_patch(sandbox, state) test_output = await self.run_tests( @@ -350,7 +385,7 @@ async def cleanup_r2e_state(self, task, state) -> None: state.pop("_rlm_swe_sandbox", None) -def process_r2e_example(row: Mapping[str, Any]) -> dict[str, Any]: +def process_r2e_example(row: ConfigMap) -> vf.ConfigData: info = dict(row) info.setdefault("instance_id", row.get("commit_hash")) info.setdefault("repo", row.get("repo_name")) @@ -375,7 +410,7 @@ def parse_log_pytest(log: str | None) -> dict[str, str]: return test_status_map -def decolor_dict_keys(values: Mapping[str, str]) -> dict[str, str]: +def decolor_dict_keys(values: dict[str, str]) -> dict[str, str]: return {re.sub(r"\u001b\[\d+m", "", key): value for key, value in values.items()} @@ -454,120 +489,37 @@ def extract_gold_patch( def load_taskset( - config: RlmSweTasksetConfig | Mapping[str, object] | None = None, - dataset_name: str | None = None, - repo_path: str | None = None, - alt_path: str | None = None, - filter_repos: list[str] | None = None, - ds_num_proc: int | None = None, - ds_keep_in_memory: bool | None = None, - timeout_minutes: int | None = None, - hide_tests_from_agent: bool | None = None, - env: Mapping[str, object] | None = None, + config: RlmSweTasksetConfig | None = None, ) -> R2ESWETaskset: - return R2ESWETaskset( - dataset_name=dataset_name, - repo_path=repo_path, - alt_path=alt_path, - filter_repos=filter_repos, - ds_num_proc=ds_num_proc, - ds_keep_in_memory=ds_keep_in_memory, - timeout_minutes=timeout_minutes, - hide_tests_from_agent=hide_tests_from_agent, - env=env, - config=config, - ) + return R2ESWETaskset(config=config) def load_harness( - config: vf.HarnessConfig | None = None, - workdir: str = DEFAULT_REPO_PATH, - gh_token: str | None = None, - rlm_tools: list[str] | None = None, - skills: str | Path | None = None, - **rlm_kwargs: Any, + config: vf.RLMConfig | None = None, + taskset: R2ESWETaskset | None = None, ) -> vf.RLM: - token = gh_token or os.environ.get("GH_TOKEN") - tools = list(rlm_tools if rlm_tools is not None else DEFAULT_RLM_TOOLS) + user_config = vf.RLMConfig(config) + config = vf.RLMConfig( + vf.RLMConfig(workdir=DEFAULT_REPO_PATH, rlm_tools=list(DEFAULT_RLM_TOOLS)), + **user_config.model_dump(exclude_unset=True, exclude_none=True), + ) + if taskset is not None: + config = vf.RLMConfig( + config, + workdir=taskset.repo_path, + gh_token=config.gh_token or os.environ.get("GH_TOKEN"), + env_vars={**taskset.get_env_vars(), **config.env_vars}, + ) return vf.RLM( - workdir=workdir, - gh_token=token, - rlm_tools=tools, - skills=skills, config=config, - **rlm_kwargs, ) -def load_environment( - config: vf.EnvConfig | None = None, - dataset_name: str | None = None, - repo_path: str | None = None, - alt_path: str | None = None, - filter_repos: list[str] | None = None, - ds_num_proc: int | None = None, - ds_keep_in_memory: bool | None = None, - timeout_minutes: int | None = None, - hide_tests_from_agent: bool | None = None, - env: Mapping[str, object] | None = None, - instruction_path: str | None = None, - rlm_repo_url: str | None = None, - rlm_ref: str | None = None, - rlm_max_turns: int | None = None, - rlm_exec_timeout: int | None = None, - rlm_max_depth: int | None = None, - summarize_at_tokens: int | tuple[int, int] | list[int] | None = None, - include_sub_rlm_trajectories: bool | None = None, - append_to_system_prompt: str | None = None, - local_checkout: str | Path | None = None, - gh_token: str | None = None, - rlm_tools: list[str] | None = None, - rlm_env: dict[str, object] | None = None, - skills: str | Path | None = None, -) -> vf.Env: - config = vf.EnvConfig( - config, - taskset=RlmSweTasksetConfig( - dataset_name=dataset_name or DEFAULT_DATASET_NAME, - repo_path=repo_path or DEFAULT_REPO_PATH, - alt_path=alt_path or DEFAULT_ALT_PATH, - filter_repos=filter_repos, - ds_num_proc=ds_num_proc, - ds_keep_in_memory=ds_keep_in_memory - if ds_keep_in_memory is not None - else True, - timeout_minutes=timeout_minutes, - hide_tests_from_agent=hide_tests_from_agent - if hide_tests_from_agent is not None - else True, - env=dict(env or {}), - ), - ) - taskset = load_taskset(config=config.taskset) - merged_rlm_env = {**taskset.get_env_vars(), **dict(rlm_env or {})} - rlm_kwargs = { - key: value - for key, value in { - "instruction_path": instruction_path, - "rlm_repo_url": rlm_repo_url, - "rlm_ref": rlm_ref, - "rlm_max_turns": rlm_max_turns, - "rlm_exec_timeout": rlm_exec_timeout, - "rlm_max_depth": rlm_max_depth, - "summarize_at_tokens": summarize_at_tokens, - "include_sub_rlm_trajectories": include_sub_rlm_trajectories, - "append_to_system_prompt": append_to_system_prompt, - "local_checkout": local_checkout, - "rlm_env": merged_rlm_env, - }.items() - if value is not None - } - harness = load_harness( - config=config.harness, - workdir=taskset.repo_path, - gh_token=gh_token, - rlm_tools=rlm_tools, - skills=skills, - **rlm_kwargs, +def load_environment(config: vf.EnvConfig) -> vf.Env: + taskset_config = ( + None if config.taskset is None else RlmSweTasksetConfig(config.taskset) ) + harness_config = None if config.harness is None else vf.RLMConfig(config.harness) + taskset = load_taskset(config=taskset_config) + harness = load_harness(config=harness_config, taskset=taskset) return vf.Env(taskset=taskset, harness=harness) diff --git a/environments/tau2_bench_v1/tau2_bench_v1.py b/environments/tau2_bench_v1/tau2_bench_v1.py index 4e8b7dfbe..257c80bcd 100644 --- a/environments/tau2_bench_v1/tau2_bench_v1.py +++ b/environments/tau2_bench_v1/tau2_bench_v1.py @@ -1,20 +1,19 @@ -from __future__ import annotations - import asyncio import json import os import shutil import subprocess import uuid -from collections.abc import Callable, Mapping +from collections.abc import Callable from copy import deepcopy from datetime import datetime, timedelta from pathlib import Path -from typing import Any, cast +from typing import cast import verifiers as core_vf -import verifiers.v1 as vf +import verifiers as vf from verifiers.types import Tool +from verifiers.v1.types import ConfigMap from tau2.agent.llm_agent import AGENT_INSTRUCTION, SYSTEM_PROMPT, LLMAgent from tau2.agent.llm_agent import is_valid_agent_history_message @@ -64,7 +63,7 @@ def download_tau2_data() -> None: shutil.rmtree(temp_dir) -def tau_msg_to_vf_dict(message: Message) -> dict[str, object]: +def tau_msg_to_vf_dict(message: Message) -> vf.ConfigData: if isinstance(message, AssistantMessage): if message.tool_calls: return core_vf.AssistantMessage( @@ -93,11 +92,11 @@ def tau_msg_to_vf_dict(message: Message) -> dict[str, object]: raise ValueError(f"Unknown tau2 message type: {type(message)}") -def dump_tau_message(message: Message) -> dict[str, object]: - return cast(dict[str, object], message.model_dump(mode="json", exclude_none=True)) +def dump_tau_message(message: Message) -> vf.ConfigData: + return cast(vf.ConfigData, message.model_dump(mode="json", exclude_none=True)) -def load_tau_message(payload: Mapping[str, object]) -> Message: +def load_tau_message(payload: ConfigMap) -> Message: role = payload.get("role") if role == "assistant": return AssistantMessage.model_validate(payload) @@ -114,9 +113,9 @@ class Tau2Session: def __init__( self, domain: str, - task_payload: Mapping[str, object], + task_payload: ConfigMap, user_model: str, - user_args: Mapping[str, object], + user_args: ConfigMap, max_steps: int, max_errors: int, ): @@ -127,7 +126,7 @@ def __init__( self.max_steps = max_steps self.max_errors = max_errors self.ready = False - self.initial_prompt_messages: list[dict[str, object]] = [] + self.initial_prompt_messages: list[vf.ConfigData] = [] self.recorded_assistant_messages = 0 self.pending_agent_tool_calls: list[ToolCall] = [] self.num_assistant_tool_calls = 0 @@ -268,11 +267,11 @@ def init_from_history(self, message_history: list[Message]) -> None: async def record_assistant_from_state(self, state: vf.State) -> None: completion = state.get("completion") or [] - assistant_messages = [ - message - for message in completion - if isinstance(message, Mapping) and message.get("role") == "assistant" - ] + assistant_messages = ( + vf.get_messages(completion, role="assistant") + if isinstance(completion, list) + else [] + ) for message in assistant_messages[self.recorded_assistant_messages :]: assistant_message = assistant_from_openai_message(message) self.agent_state.messages.append(assistant_message) @@ -299,7 +298,7 @@ async def record_assistant_from_state(self, state: vf.State) -> None: self.render_state(state) async def call_agent_tool( - self, name: str, arguments: Mapping[str, object], state: vf.State + self, name: str, arguments: ConfigMap, state: vf.State ) -> str: await self.record_assistant_from_state(state) tool_call = self.pop_pending_tool_call(name, arguments) @@ -315,9 +314,7 @@ async def call_agent_tool( self.render_state(state) return tool_message.content or "" - def pop_pending_tool_call( - self, name: str, arguments: Mapping[str, object] - ) -> ToolCall: + def pop_pending_tool_call(self, name: str, arguments: ConfigMap) -> ToolCall: for index, tool_call in enumerate(self.pending_agent_tool_calls): if tool_call.name == name: return self.pending_agent_tool_calls.pop(index) @@ -328,7 +325,7 @@ def pop_pending_tool_call( requestor="assistant", ) - async def user_messages(self, state: vf.State) -> list[dict[str, object]]: + async def user_messages(self, state: vf.State) -> list[vf.ConfigData]: await self.record_assistant_from_state(state) if self.done: self.render_state(state) @@ -337,8 +334,8 @@ async def user_messages(self, state: vf.State) -> list[dict[str, object]]: self.render_state(state) return messages - async def advance_until_agent(self, state: vf.State) -> list[dict[str, object]]: - messages: list[dict[str, object]] = [] + async def advance_until_agent(self, state: vf.State) -> list[vf.ConfigData]: + messages: list[vf.ConfigData] = [] while not (self.done or self.to_role == Role.AGENT): if self.to_role == Role.USER: user_message, self.user_state = await asyncio.to_thread( @@ -429,7 +426,7 @@ def render_state(self, state: vf.State) -> None: def make_tau2_setup( session_factory: Callable[..., Tau2Session], -) -> Callable[..., object]: +) -> vf.Handler: @vf.setup(priority=100) async def tau2_setup(task: vf.Task, state: vf.State) -> None: runtime = state.runtime_state() @@ -449,37 +446,28 @@ async def tau2_setup(task: vf.Task, state: vf.State) -> None: return tau2_setup -def assistant_from_openai_message(message: Mapping[str, object]) -> AssistantMessage: +def assistant_from_openai_message(message: vf.AssistantMessage) -> AssistantMessage: tool_calls = [] - for raw_tool_call in cast(list[object], message.get("tool_calls") or []): - if isinstance(raw_tool_call, str): - raw_tool_call = json.loads(raw_tool_call) - if not isinstance(raw_tool_call, Mapping): - raise TypeError("tau2 tool calls must be mappings.") - function = raw_tool_call.get("function") or {} - if function is not None and not isinstance(function, Mapping): - raise TypeError("tau2 tool call function payload must be a mapping.") - function = cast(Mapping[str, object], function) - arguments = function.get("arguments", raw_tool_call.get("arguments", "{}")) + for raw_tool_call in message.tool_calls or []: + arguments = raw_tool_call.arguments if isinstance(arguments, str): parsed_arguments = json.loads(arguments or "{}") else: parsed_arguments = arguments - name = function.get("name") or raw_tool_call.get("name") or "" tool_calls.append( ToolCall( - id=str(raw_tool_call.get("id") or f"call_{uuid.uuid4().hex[:8]}"), - name=str(name), - arguments=cast(dict[str, object], parsed_arguments), + id=raw_tool_call.id or f"call_{uuid.uuid4().hex[:8]}", + name=raw_tool_call.name, + arguments=cast(vf.ConfigData, parsed_arguments), requestor="assistant", ) ) - content = message.get("content") + content = message.content return AssistantMessage( role="assistant", content=content if isinstance(content, str) and content else None, tool_calls=tool_calls or None, - raw_data=dict(message), + raw_data=message.model_dump(exclude_none=True), ) @@ -516,7 +504,7 @@ def source(domain: str, max_turns: int): def load_session_factory( domain: str, user_model: str, - user_args: Mapping[str, object], + user_args: ConfigMap, max_steps: int, max_errors: int, ) -> Callable[..., Tau2Session]: @@ -531,7 +519,7 @@ def load_session(task, state) -> Tau2Session: task_info = json.loads(task_info) session = Tau2Session( domain=domain, - task_payload=cast(Mapping[str, object], task_info), + task_payload=cast(ConfigMap, task_info), user_model=user_model, user_args=user_args, max_steps=max_steps, @@ -543,17 +531,17 @@ def load_session(task, state) -> Tau2Session: return load_session -def make_tau2_tool(name: str, schema: Mapping[str, object]) -> Callable[..., object]: +def make_tau2_tool(name: str, schema: ConfigMap) -> vf.Handler: async def tool(session: Tau2Session, state, **arguments) -> str: return await session.call_agent_tool(name, arguments, state) - function_schema = cast(Mapping[str, object], schema["function"]) + function_schema = cast(ConfigMap, schema["function"]) tool.__name__ = name tool.__doc__ = str(function_schema.get("description") or "") tool.tool_def = Tool( name=name, description=str(function_schema.get("description") or ""), - parameters=cast(dict[str, Any], function_schema.get("parameters") or {}), + parameters=cast(vf.ConfigData, function_schema.get("parameters") or {}), strict=False, ) return tool @@ -562,7 +550,7 @@ async def tool(session: Tau2Session, state, **arguments) -> str: def load_toolset( domain: str = "telecom", user_model: str = DEFAULT_USER_MODEL, - user_args: Mapping[str, object] = DEFAULT_LLM_ARGS_USER, + user_args: ConfigMap = DEFAULT_LLM_ARGS_USER, user_base_url: str = DEFAULT_USER_BASE_URL, user_api_key_var: str = DEFAULT_USER_API_KEY_VAR, max_steps: int = DEFAULT_MAX_STEPS, @@ -575,7 +563,7 @@ def load_toolset( schemas = [tool.openai_schema for tool in environment.get_tools()] tools = [ make_tau2_tool( - str(cast(Mapping[str, object], schema["function"])["name"]), + str(cast(ConfigMap, schema["function"])["name"]), schema, ) for schema in schemas @@ -597,8 +585,8 @@ def load_toolset( def tau2_user_args( - user_args: Mapping[str, object], user_base_url: str, user_api_key_var: str -) -> dict[str, object]: + user_args: ConfigMap, user_base_url: str, user_api_key_var: str +) -> vf.ConfigData: return { **dict(user_args), "api_base": user_base_url, @@ -606,17 +594,17 @@ def tau2_user_args( } -async def tau2_user(session: Tau2Session, state, transcript) -> list[dict[str, object]]: +async def tau2_user(session: Tau2Session, state, transcript) -> list[vf.ConfigData]: _ = transcript return await session.user_messages(state) @vf.reward(weight=1.0) async def tau2_reward(task, state) -> float: - tau2_state = cast(Mapping[str, object], state["tau2"]) + tau2_state = cast(ConfigMap, state["tau2"]) messages = [ - load_tau_message(cast(Mapping[str, object], message)) - for message in cast(list[object], tau2_state["messages"]) + load_tau_message(cast(ConfigMap, message)) + for message in cast(list[ConfigMap], tau2_state["messages"]) ] termination = tau2_state.get("termination_reason") if isinstance(termination, str): @@ -680,7 +668,7 @@ async def tau2_num_user_tool_calls(task, state) -> float: class Tau2TasksetConfig(vf.TasksetConfig): domain: str = "telecom" user_model: str = DEFAULT_USER_MODEL - user_args: dict[str, object] | None = None + user_args: vf.ConfigData | None = None user_base_url: str = DEFAULT_USER_BASE_URL user_api_key_var: str = DEFAULT_USER_API_KEY_VAR max_steps: int = DEFAULT_MAX_STEPS @@ -696,13 +684,13 @@ def __init__( domain: str | None = None, *, user_model: str | None = None, - user_args: Mapping[str, object] | None = None, + user_args: ConfigMap | None = None, user_base_url: str | None = None, user_api_key_var: str | None = None, max_steps: int | None = None, max_errors: int | None = None, max_turns: int | None = None, - config: vf.TasksetConfig | Mapping[str, object] | None = None, + config: Tau2TasksetConfig | None = None, ): config = Tau2TasksetConfig( config, @@ -762,13 +750,13 @@ def load_taskset( domain: str | None = None, *, user_model: str | None = None, - user_args: Mapping[str, object] | None = None, + user_args: ConfigMap | None = None, user_base_url: str | None = None, user_api_key_var: str | None = None, max_steps: int | None = None, max_errors: int | None = None, max_turns: int | None = None, - config: vf.TasksetConfig | Mapping[str, object] | None = None, + config: Tau2TasksetConfig | None = None, ) -> Tau2Taskset: return Tau2Taskset( domain=domain, @@ -783,17 +771,17 @@ def load_taskset( ) -def load_v1_environment( +def load_environment( domain: str = "telecom", *, user_model: str = DEFAULT_USER_MODEL, - user_args: Mapping[str, object] | None = None, + user_args: ConfigMap | None = None, user_base_url: str = DEFAULT_USER_BASE_URL, user_api_key_var: str = DEFAULT_USER_API_KEY_VAR, max_steps: int = DEFAULT_MAX_STEPS, max_errors: int = DEFAULT_MAX_ERRORS, max_turns: int = DEFAULT_MAX_STEPS, - config: vf.EnvConfig | None = None, + config: vf.EnvConfig, ) -> vf.Env: config = vf.EnvConfig( config, @@ -808,32 +796,11 @@ def load_v1_environment( max_turns=max_turns, ), ) - taskset = load_taskset( - config=config.taskset, + taskset_config = ( + None if config.taskset is None else Tau2TasksetConfig(config.taskset) ) - return vf.Env(taskset=taskset, harness=vf.Harness(config=config.harness)) - - -def load_environment( - domain: str = "telecom", - *, - user_model: str = DEFAULT_USER_MODEL, - user_args: Mapping[str, object] | None = None, - user_base_url: str = DEFAULT_USER_BASE_URL, - user_api_key_var: str = DEFAULT_USER_API_KEY_VAR, - max_steps: int = DEFAULT_MAX_STEPS, - max_errors: int = DEFAULT_MAX_ERRORS, - max_turns: int = DEFAULT_MAX_STEPS, - config: vf.EnvConfig | None = None, -) -> vf.Env: - return load_v1_environment( - domain=domain, - user_model=user_model, - user_args=user_args, - user_base_url=user_base_url, - user_api_key_var=user_api_key_var, - max_steps=max_steps, - max_errors=max_errors, - max_turns=max_turns, - config=config, + harness_config = ( + None if config.harness is None else vf.HarnessConfig(config.harness) ) + taskset = load_taskset(config=taskset_config) + return vf.Env(taskset=taskset, harness=vf.Harness(config=harness_config)) diff --git a/environments/wiki_search/wiki_search_v1.py b/environments/wiki_search/wiki_search_v1.py index f290c6ead..ac4190873 100644 --- a/environments/wiki_search/wiki_search_v1.py +++ b/environments/wiki_search/wiki_search_v1.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import asyncio import os from typing import cast @@ -10,7 +8,7 @@ from datasets import load_dataset from openai import AsyncOpenAI -import verifiers.v1 as vf +import verifiers as vf CHROMA_DB_DIR = ".chroma_db" _chroma_semaphore: asyncio.Semaphore | None = None @@ -54,7 +52,7 @@ def load_wiki( embed_model: str, embed_base_url: str, embed_api_key_var: str, -) -> dict[str, object]: +) -> vf.ConfigData: page_id_to_title: dict[str, str] = {} page_id_to_content: dict[str, str] = {} corpus = load_dataset(corpus_dataset, split=corpus_split) @@ -206,11 +204,8 @@ def judge_reward_factory( @vf.reward(weight=1.0) async def judge_reward_func(task, state) -> float: completion = state.get("completion") or [] - response = "" - for message in reversed(completion): - if message.get("role") == "assistant": - response = str(message.get("content") or "") - break + messages = vf.get_messages(completion, role="assistant") + response = str(messages[-1].content or "") if messages else "" prompt = JUDGE_PROMPT.format( question=task["question"], answer=task["answer"], @@ -242,7 +237,7 @@ def load_toolset( embed_api_key_var: str = "OPENAI_API_KEY", config=None, ): - def load_wiki_index() -> dict[str, object]: + def load_wiki_index() -> vf.ConfigData: return load_wiki( corpus_dataset=corpus_dataset, corpus_split=corpus_split, diff --git a/pyproject.toml b/pyproject.toml index c365ccd88..fcbf27566 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,13 +68,15 @@ dev = [ "ipywidgets", "reasoning-gym", "textarena", - "openenv-core[core]==0.2.1", "stagehand>=3.0.0", "aiohttp>=3.9.0", "python-dotenv>=1.0.0", "nltk", "renderers>=0.1.8.dev0", ] +policy = [ + "semgrep>=1.150.0", +] [project.optional-dependencies] rg = [ @@ -84,14 +86,14 @@ ta = [ "textarena", "nltk", ] -openenv = [ - "openenv-core[core]==0.2.1", -] browser = [ "stagehand>=3.0.0", "aiohttp>=3.9.0", "python-dotenv>=1.0.0", ] +openenv = [ + "openenv-core>=0.3.0", +] renderers = [ "renderers>=0.1.8.dev0", ] @@ -111,7 +113,12 @@ rl = [ [tool.uv] preview = true required-version = ">=0.11.1" - +conflicts = [ + [ + { extra = "openenv" }, + { group = "policy" }, + ], +] [[tool.uv.index]] name = "pypi" url = "https://pypi.org/simple" @@ -123,6 +130,7 @@ exclude-newer = "7 days" prime-tunnel = false prime-sandboxes = false renderers = false +openenv-core = false [tool.uv.extra-build-dependencies] flash-attn = [{ requirement = "torch", match-runtime = true }] @@ -130,6 +138,11 @@ flash-attn = [{ requirement = "torch", match-runtime = true }] [tool.uv.extra-build-variables] flash-attn = { FLASH_ATTENTION_SKIP_CUDA_BUILD = "TRUE" } +[tool.ruff] +exclude = [ + ".semgrep", +] + [project.scripts] vf-eval = "verifiers.scripts.eval:main" vf-gepa = "verifiers.scripts.gepa:main" diff --git a/scripts/sync.py b/scripts/sync.py index 0d2402ea5..ce388c649 100644 --- a/scripts/sync.py +++ b/scripts/sync.py @@ -1,8 +1,6 @@ #!/usr/bin/env python3 """Compile AGENTS.md / CLAUDE.md files from modular docs sources.""" -from __future__ import annotations - from pathlib import Path ROOT = Path(__file__).parent.parent diff --git a/skills/browse-environments/SKILL.md b/skills/browse-environments/SKILL.md index 42255a27a..eec60ee23 100644 --- a/skills/browse-environments/SKILL.md +++ b/skills/browse-environments/SKILL.md @@ -61,7 +61,7 @@ prime eval run name -m openai/gpt-4.1-mini -n 5 ```bash prime env install reverse-text --from-repo ``` -4. For v1 Taskset + Harness examples, inspect the environment package for `load_taskset`, `load_harness`, and the `load_environment(...)` wrapper that returns a `vf.Environment` for published eval and training runs. +4. For v1 Taskset + Harness examples, inspect the environment package for `load_environment(config: vf.EnvConfig) -> vf.Env` and the direct `Taskset`/`Harness` wiring. Do not assume `load_taskset` or `load_harness` wrappers exist unless they encode real reusable wiring. ## Anti-Patterns 1. Do not recommend building from scratch if a strong ecosystem option exists. diff --git a/skills/create-environments/SKILL.md b/skills/create-environments/SKILL.md index 99bfa2988..8b5de68ee 100644 --- a/skills/create-environments/SKILL.md +++ b/skills/create-environments/SKILL.md @@ -43,8 +43,16 @@ prime env install math-python --from-repo - `StatefulToolEnv` for per-rollout resources. - `CliAgentEnv` for running agent binaries in sandboxes with API interception. Override `get_sandbox_resources(state)` for per-instance resources, `build_env_vars(state)` for custom env vars. - V1 `vf.Env` with `vf.Taskset`/`vf.Harness` for the current taskset/harness environment pattern that separates the task collection from the rollout runner. Use this for new taskset/harness work that needs config-driven metrics, rewards, toolsets, user functions, endpoint interception, or sandboxed Python/command programs. Framework programs should build clients from `state.get_endpoint_config(api="chat")`. -3. Implement `load_environment(...) -> vf.Environment` with explicit arguments. -4. Add `pyproject.toml` defaults in `[tool.verifiers.eval]` only when stable. +3. For v1, import `verifiers as vf` and implement `load_environment(config: vf.EnvConfig) -> vf.Env`. Treat `config` as required and typed; the loader is responsible for passing an `EnvConfig`. +4. For v0 environments, keep the existing `vf.Environment` patterns and preserve v0 compatibility. +5. Add `pyproject.toml` defaults in `[tool.verifiers.eval]` only when stable. + +### V1 Authoring Rules +1. Keep v1 environment entrypoints tiny: `import verifiers as vf`, define `load_environment(config: vf.EnvConfig) -> vf.Env`, and wire `Taskset`/`Harness` constructors directly. +2. Use `Taskset(objects=..., bindings=...)` for shared taskset dependencies such as extractors, clients, or format checkers. Do not introduce v1 Parser/Rubric wrappers; parsing is ordinary Python or a bound object. +3. Use `vf.get_messages(state.get("completion") or [], role="assistant")` when reading state completions. The helper returns typed message objects and should not receive `None`. +4. Use `program.channels` for v1 program protocol/channel selection. Do not use stale `program.tools` terminology. +5. Avoid no-op `load_taskset`/`load_harness` wrappers. Add named helpers only when they encode real reusable wiring. ### V1 Taskset/Harness Shape 1. Put task data, task-owned tools, user behavior, metrics, rewards, and task-specific configuration on the `Taskset`. diff --git a/skills/train-with-environments/SKILL.md b/skills/train-with-environments/SKILL.md index 0b0b9dd52..79d7b09b3 100644 --- a/skills/train-with-environments/SKILL.md +++ b/skills/train-with-environments/SKILL.md @@ -36,7 +36,7 @@ prime lab setup --prime-rl prime env install my-env prime eval run my-env -m openai/gpt-4.1-mini -n 20 -r 3 -s ``` -2. For v1 Taskset + Harness environments, verify the package still exposes `load_environment(...) -> vf.Environment`; trainers interact with the same environment boundary even when the implementation is BYO Harness internally. +2. For v1 Taskset + Harness environments, verify the package exposes `load_environment(config: vf.EnvConfig) -> vf.Env`; trainers interact with the same environment boundary even when the implementation is BYO Harness internally. 3. Confirm reward diversity exists at baseline. 4. Start with conservative run length and inspect samples early. diff --git a/tests/test_context_token_metrics.py b/tests/test_context_token_metrics.py index e0d18c90f..37bb8e69c 100644 --- a/tests/test_context_token_metrics.py +++ b/tests/test_context_token_metrics.py @@ -5,10 +5,9 @@ using the last trajectory step. """ -from unittest.mock import MagicMock - import pytest +from verifiers.types import Response, ResponseMessage, Usage from verifiers.utils.usage_utils import compute_context_token_metrics @@ -20,12 +19,39 @@ USER = {"role": "user", "content": "hi"} -def _make_response(prompt_tokens: int, completion_tokens: int) -> MagicMock: - response = MagicMock() - response.usage = MagicMock( - prompt_tokens=prompt_tokens, completion_tokens=completion_tokens +def _make_response(prompt_tokens: int, completion_tokens: int) -> Response: + return Response( + id="test", + created=0, + model="test", + usage=Usage( + prompt_tokens=prompt_tokens, + reasoning_tokens=0, + completion_tokens=completion_tokens, + total_tokens=prompt_tokens + completion_tokens, + ), + message=ResponseMessage( + role="assistant", + content="", + finish_reason="stop", + is_truncated=False, + ), + ) + + +def _make_response_without_usage() -> Response: + return Response( + id="test", + created=0, + model="test", + usage=None, + message=ResponseMessage( + role="assistant", + content="", + finish_reason="stop", + is_truncated=False, + ), ) - return response def _asst(i: int) -> dict: @@ -115,13 +141,11 @@ def test_skips_none_responses_for_last_step(self): assert metrics["final_input_tokens"] == 230 - 50 def test_skips_responses_without_usage(self): - """Responses with no .usage attribute are skipped entirely.""" - no_usage = MagicMock() - no_usage.usage = None + """Responses with usage=None are skipped entirely.""" trajectory = [ {"response": _make_response(100, 20)}, {"response": _make_response(200, 30)}, - {"response": no_usage}, # last step, but no usage + {"response": _make_response_without_usage()}, ] metrics = compute_context_token_metrics(trajectory) # Should use step 1 (last with usage): total = 230 @@ -130,11 +154,9 @@ def test_skips_responses_without_usage(self): def test_all_responses_lack_usage(self): """If no response has usage data, return zeros.""" - no_usage = MagicMock() - no_usage.usage = None trajectory = [ - {"response": no_usage}, - {"response": no_usage}, + {"response": _make_response_without_usage()}, + {"response": _make_response_without_usage()}, ] metrics = compute_context_token_metrics(trajectory) assert metrics["final_output_tokens"] == 0 diff --git a/tests/test_environment_extra.py b/tests/test_environment_extra.py index 83502ee17..73150c2f7 100644 --- a/tests/test_environment_extra.py +++ b/tests/test_environment_extra.py @@ -9,8 +9,6 @@ - make_dataset tool call sanitization """ -from __future__ import annotations - import asyncio import json from unittest.mock import AsyncMock diff --git a/tests/test_gym_env.py b/tests/test_gym_env.py index 5ef8aba1d..70773e024 100644 --- a/tests/test_gym_env.py +++ b/tests/test_gym_env.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import re from typing import Any diff --git a/tests/test_harbor_env_mcp.py b/tests/test_harbor_env_mcp.py index 9d5a3e84e..22d9c2efd 100644 --- a/tests/test_harbor_env_mcp.py +++ b/tests/test_harbor_env_mcp.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import asyncio import time from typing import Any diff --git a/tests/test_langchain_deep_agents_wikispeedia.py b/tests/test_langchain_deep_agents_wikispeedia.py index 57387719f..bec58212f 100644 --- a/tests/test_langchain_deep_agents_wikispeedia.py +++ b/tests/test_langchain_deep_agents_wikispeedia.py @@ -57,7 +57,7 @@ def test_wikispeedia_loads_as_v1_taskset_harness( ) -> None: module = load_module(monkeypatch) - env = module.load_environment(train_size=1, eval_size=1) + env = module.load_environment(config=vf.EnvConfig(), train_size=1, eval_size=1) assert isinstance(env, vf.Env) assert isinstance(env.taskset, vf.Taskset) @@ -157,6 +157,7 @@ async def test_wikispeedia_tools_resolve_through_v1_runtime( wiki = make_small_wiki(module) monkeypatch.setattr(module, "load_wiki_graph", lambda cache_dir=None: wiki) env = module.load_environment( + config=vf.EnvConfig(), train_size=2, eval_size=1, min_path_length=1, @@ -248,6 +249,12 @@ class FakeAgent: async def ainvoke(self, payload, config=None): raise GraphRecursionError("recursion limit") + created_system_prompts = [] + + def fake_create_deep_agent(**kwargs): + created_system_prompts.append(kwargs["system_prompt"]) + return FakeAgent() + fake_deepagents = types.ModuleType("deepagents") fake_langchain_openai = types.ModuleType("langchain_openai") fake_langgraph = types.ModuleType("langgraph") @@ -255,7 +262,7 @@ async def ainvoke(self, payload, config=None): fake_langchain_core = types.ModuleType("langchain_core") fake_tools_module = types.ModuleType("langchain_core.tools") - fake_deepagents.create_deep_agent = lambda **kwargs: FakeAgent() + fake_deepagents.create_deep_agent = fake_create_deep_agent fake_langchain_openai.ChatOpenAI = FakeChatOpenAI fake_langgraph_errors.GraphRecursionError = GraphRecursionError fake_langgraph.errors = fake_langgraph_errors @@ -276,12 +283,16 @@ async def ainvoke(self, payload, config=None): { "info": {"source": "A"}, "prompt": [{"role": "user", "content": "start"}], - "system_prompt": [{"role": "system", "content": "prompt"}], + "system_prompt": [ + {"role": "user", "content": "first prompt chunk"}, + {"role": "system", "content": "second prompt chunk"}, + ], } ) result = await program({}, state) + assert created_system_prompts == ["first prompt chunk\n\nsecond prompt chunk"] assert result["agent_timeout"] is True assert result["stop_reason"] == "agent_recursion_limit" assert result["agent_completion"] == [] @@ -298,7 +309,7 @@ async def test_wikispeedia_tool_metrics_use_agent_completion( { "role": "assistant", "content": "", - "tool_calls": [{"id": "call_1", "name": "click_link"}], + "tool_calls": [{"id": "call_1", "name": "click_link", "arguments": "{}"}], }, { "role": "tool", diff --git a/tests/test_lean_task.py b/tests/test_lean_task.py index b9beadf92..ce73186db 100644 --- a/tests/test_lean_task.py +++ b/tests/test_lean_task.py @@ -1,7 +1,5 @@ """Tests for ``LeanTaskSet`` lean-guard wrapping and reward enforcement.""" -from __future__ import annotations - from dataclasses import dataclass import pytest diff --git a/tests/test_mcp_search_env.py b/tests/test_mcp_search_env.py index 74a561a97..bb23884fd 100644 --- a/tests/test_mcp_search_env.py +++ b/tests/test_mcp_search_env.py @@ -1,10 +1,9 @@ -from __future__ import annotations - import importlib.util import inspect from pathlib import Path from typing import Any +import pytest import verifiers.v1 as vf @@ -27,7 +26,7 @@ def _load_mcp_search_module() -> Any: def test_mcp_search_env_is_v1_only() -> None: module = _load_mcp_search_module() - env = module.load_environment(max_turns=4) + env = module.load_environment(config=vf.EnvConfig(), max_turns=4) assert isinstance(env, vf.Env) assert isinstance(env.taskset, vf.Taskset) @@ -59,3 +58,18 @@ def test_mcp_search_taskset_accepts_v1_taskset_config() -> None: assert env.taskset.config.max_turns == 3 assert all(row["max_turns"] == 3 for row in rows) + + +@pytest.mark.asyncio +async def test_mcp_search_reward_handles_missing_assistant() -> None: + module = _load_mcp_search_module() + + task = vf.Task({"answer": "expected"}) + assert await module.exact_title_reward(task, vf.State({"completion": []})) == 0.0 + assert ( + await module.exact_title_reward( + task, + vf.State({"completion": [{"role": "user", "content": "expected"}]}), + ) + == 0.0 + ) diff --git a/tests/test_message_utils.py b/tests/test_message_utils.py index f71cf6744..18f91909a 100644 --- a/tests/test_message_utils.py +++ b/tests/test_message_utils.py @@ -1,5 +1,9 @@ -from verifiers.types import AssistantMessage -from verifiers.utils.message_utils import from_raw_message, normalize_messages +from verifiers.types import AssistantMessage, UserMessage +from verifiers.utils.message_utils import ( + from_raw_message, + get_messages, + normalize_messages, +) def test_from_raw_message_normalizes_oai_tool_calls(): @@ -55,3 +59,30 @@ def test_normalize_messages_accepts_oai_tool_call_dicts(): assert assistant.tool_calls[0].id == "call_2" assert assistant.tool_calls[0].name == "lookup" assert assistant.tool_calls[0].arguments == '{"q": "hello"}' + + +def test_get_messages_returns_typed_messages(): + messages = get_messages( + [ + {"role": "user", "content": "question"}, + {"role": "assistant", "content": "answer"}, + ] + ) + + assert isinstance(messages[0], UserMessage) + assert isinstance(messages[1], AssistantMessage) + assert messages[-1].content == "answer" + + +def test_get_messages_filters_by_role_with_typed_return(): + messages = get_messages( + [ + {"role": "user", "content": "question"}, + {"role": "assistant", "content": "answer"}, + ], + role="assistant", + ) + + assert len(messages) == 1 + assert isinstance(messages[0], AssistantMessage) + assert messages[0].content == "answer" diff --git a/tests/test_nemorl_client.py b/tests/test_nemorl_client.py index 76d848f47..89a721ae9 100644 --- a/tests/test_nemorl_client.py +++ b/tests/test_nemorl_client.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from unittest.mock import AsyncMock, patch import pytest diff --git a/tests/test_opencode_harbor.py b/tests/test_opencode_harbor.py index d0b48bd76..d35d341f1 100644 --- a/tests/test_opencode_harbor.py +++ b/tests/test_opencode_harbor.py @@ -1,11 +1,8 @@ -from __future__ import annotations - import importlib.util +import sys from pathlib import Path from typing import Any, cast -import pytest - import verifiers.v1 as vf @@ -23,6 +20,7 @@ def _load_opencode_module() -> Any: assert spec.loader is not None module = importlib.util.module_from_spec(spec) + sys.modules[spec.name] = module spec.loader.exec_module(module) return module @@ -30,33 +28,32 @@ def _load_opencode_module() -> Any: def test_load_environment_uses_v1_taskset_and_harness() -> None: module = _load_opencode_module() - env = module.load_environment() + env = module.load_environment(config=vf.EnvConfig()) assert isinstance(env, vf.Env) assert isinstance(env.taskset, vf.HarborTaskset) assert isinstance(env.harness, vf.OpenCode) assert isinstance(env.harness.config, vf.OpenCodeConfig) assert not hasattr(module, "OpenCodeHarborHarnessConfig") - assert Path(env.taskset.tasks) == Path(module.__file__).parent / "tasks" + assert not hasattr(module, "TERMINAL_BENCH_SAMPLE_TASKS") + assert env.taskset.resolve_tasks_root() == Path(module.__file__).parent / "tasks" assert env.harness.config.max_turns == 4 - assert env.harness.config.disabled_tools == ["webfetch", "question"] + assert env.harness.config.disabled_tools == vf.OpenCodeConfig().disabled_tools + assert "webfetch" in env.harness.config.disabled_tools + assert "question" in env.harness.config.disabled_tools program = cast(dict[str, object], env.harness.program) - mcp_setup = cast(dict[str, object], program["tools"])["mcp"] + mcp_setup = cast(dict[str, object], program["channels"])["mcp"] assert '"webfetch": false' in cast(str, mcp_setup) assert '"question": false' in cast(str, mcp_setup) - assert '"read": false' not in cast(str, mcp_setup) -def test_load_environment_accepts_v1_taskset_and_harness_config( - tmp_path: Path, -) -> None: +def test_load_environment_accepts_v1_taskset_and_harness_config() -> None: module = _load_opencode_module() env = module.load_environment( config=vf.EnvConfig( taskset={ - "tasks": str(tmp_path), "task_names": ["task-a"], "cpu_cores": 1.5, }, @@ -68,7 +65,7 @@ def test_load_environment_accepts_v1_taskset_and_harness_config( ) ) - assert Path(env.taskset.tasks) == tmp_path + assert env.taskset.resolve_tasks_root() == Path(module.__file__).parent / "tasks" assert env.taskset.task_names == ["task-a"] assert env.taskset.cpu_cores == 1.5 assert env.harness.config.agent_workdir == "/workspace" @@ -76,25 +73,14 @@ def test_load_environment_accepts_v1_taskset_and_harness_config( program = cast(dict[str, object], env.harness.program) command = cast(list[object], program["command"]) - mcp_setup = cast(dict[str, object], program["tools"])["mcp"] + mcp_setup = cast(dict[str, object], program["channels"])["mcp"] assert "/workspace" in cast(str, command[2]) assert '"webfetch": false' in cast(str, mcp_setup) assert '"question": false' not in cast(str, mcp_setup) -def test_dataset_shortcuts_select_task_names() -> None: - module = _load_opencode_module() - - env = module.load_environment(dataset="terminal-bench-sample") - - assert env.taskset.task_names == module.TERMINAL_BENCH_SAMPLE_TASKS - - -def test_dataset_rejects_explicit_task_names() -> None: +def test_pyproject_does_not_define_unsupported_harness_defaults() -> None: module = _load_opencode_module() + pyproject = Path(module.__file__).parent / "pyproject.toml" - with pytest.raises(ValueError, match="dataset.*task_names"): - module.load_environment( - dataset="terminal-bench-sample", - task_names=["hello-world"], - ) + assert "[tool.verifiers.harness]" not in pyproject.read_text() diff --git a/tests/test_openenv_client.py b/tests/test_openenv_client.py new file mode 100644 index 000000000..000f26d0b --- /dev/null +++ b/tests/test_openenv_client.py @@ -0,0 +1,162 @@ +from typing import Any + +import verifiers as vf +from verifiers.envs.integrations import openenv_env +from verifiers.types import UserMessage + + +class StepResult: + def __init__( + self, observation: dict[str, object], reward: float | None, done: bool + ): + self.observation = observation + self.reward = reward + self.done = done + + +class FakeGenericEnvClient: + instances: list["FakeGenericEnvClient"] = [] + + def __init__(self, base_url: str): + self.base_url = base_url + self.connected = False + self.closed = False + self.reset_seeds: list[int] = [] + self.step_actions: list[dict[str, object]] = [] + FakeGenericEnvClient.instances.append(self) + + async def connect(self) -> None: + self.connected = True + + async def reset(self, *, seed: int) -> StepResult: + self.reset_seeds.append(seed) + return StepResult({"prompt": f"seed-{seed}"}, reward=None, done=False) + + async def step(self, action: dict[str, object]) -> StepResult: + self.step_actions.append(action) + return StepResult({"prompt": "next"}, reward=1.0, done=True) + + async def close(self) -> None: + self.closed = True + + +class FakeMCPToolClient: + instances: list["FakeMCPToolClient"] = [] + + def __init__(self, base_url: str): + self.base_url = base_url + self.connected = False + self.closed = False + self.step_actions: list[Any] = [] + FakeMCPToolClient.instances.append(self) + + async def connect(self) -> None: + self.connected = True + + async def reset(self, *, seed: int) -> StepResult: + return StepResult({"prompt": f"mcp-{seed}"}, reward=None, done=False) + + async def list_tools(self) -> list[dict[str, object]]: + return [ + { + "name": "echo", + "description": "Echo a message", + "input_schema": {"type": "object", "properties": {}}, + } + ] + + async def step(self, action: Any) -> StepResult: + self.step_actions.append(action) + return StepResult({"result": {"data": "ok"}}, reward=1.0, done=True) + + async def close(self) -> None: + self.closed = True + + +class FakeCallToolAction: + def __init__(self, tool_name: str, arguments: dict[str, object]): + self.tool_name = tool_name + self.arguments = arguments + + +def render_prompt(observation: Any, **kwargs: Any): + assert isinstance(observation, dict) + return [UserMessage(content=str(observation["prompt"]))] + + +async def test_openenv_uses_public_async_generic_client(monkeypatch): + FakeGenericEnvClient.instances.clear() + monkeypatch.setattr(openenv_env, "GenericEnvClient", FakeGenericEnvClient) + env = vf.OpenEnvEnv( + num_train_examples=1, + num_eval_examples=0, + prompt_renderer=render_prompt, + ) + + async def create_server(): + return openenv_env.OpenEnvServer( + sandbox_id="sandbox", + exposure_id="exposure", + base_url="http://localhost:8000", + port=8000, + contract="gym", + ) + + async def fetch_action_schema(base_url: str) -> dict[str, object]: + return {"type": "object", "properties": {}} + + monkeypatch.setattr(env, "_create_server", create_server) + monkeypatch.setattr(env, "_fetch_action_schema", fetch_action_schema) + + state = vf.State({"info": {"seed": 7}, "trajectory": []}) + await env.setup_state(state) + + assert state["prompt"] == [UserMessage(content="seed-7")] + assert len(FakeGenericEnvClient.instances) == 1 + client = FakeGenericEnvClient.instances[0] + assert client.base_url == "http://localhost:8000" + assert client.connected is True + assert client.reset_seeds == [7] + + +async def test_openenv_uses_public_async_mcp_client(monkeypatch): + FakeMCPToolClient.instances.clear() + monkeypatch.setattr(openenv_env, "MCPToolClient", FakeMCPToolClient) + monkeypatch.setattr(openenv_env, "CallToolAction", FakeCallToolAction) + env = vf.OpenEnvEnv( + num_train_examples=1, + num_eval_examples=0, + prompt_renderer=render_prompt, + ) + + async def create_server(): + return openenv_env.OpenEnvServer( + sandbox_id="sandbox", + exposure_id="exposure", + base_url="http://localhost:8000", + port=8000, + contract="mcp", + ) + + async def fetch_action_schema(base_url: str) -> dict[str, object]: + return { + "type": "object", + "properties": {"type": {"enum": ["list_tools", "call_tool"]}}, + } + + monkeypatch.setattr(env, "_create_server", create_server) + monkeypatch.setattr(env, "_fetch_action_schema", fetch_action_schema) + + state = vf.State({"info": {"seed": 9}, "trajectory": []}) + await env.setup_state(state) + result = await env._mcp_step_tool( + state["openenv_mcp_client"], "echo", {"message": "hi"} + ) + + assert state["prompt"] == [UserMessage(content="mcp-9")] + assert state["tool_defs"][0].name == "echo" + assert result.reward == 1.0 + client = FakeMCPToolClient.instances[0] + action = client.step_actions[0] + assert action.tool_name == "echo" + assert action.arguments == {"message": "hi"} diff --git a/tests/test_renderer_e2e.py b/tests/test_renderer_e2e.py index 8948357b3..046253a8d 100644 --- a/tests/test_renderer_e2e.py +++ b/tests/test_renderer_e2e.py @@ -17,8 +17,6 @@ are exercised. Tokenizers come from the local HF cache; no network. """ -from __future__ import annotations - import logging from typing import Any diff --git a/tests/test_save_utils.py b/tests/test_save_utils.py index 940eacf2f..fc8da4c69 100644 --- a/tests/test_save_utils.py +++ b/tests/test_save_utils.py @@ -15,7 +15,7 @@ from openai import OpenAI from pydantic import BaseModel -from verifiers.types import ClientConfig +from verifiers.types import ClientConfig, Response, ResponseMessage, Usage from verifiers.utils.metric_utils import ( EnvMetrics, ErrorRateMetric, @@ -28,14 +28,13 @@ from verifiers.utils.save_utils import ( GenerateOutputsBuilder, _delta_intermediate_mm_data, - extract_usage_tokens, load_outputs, make_serializable, save_new_outputs, states_to_outputs, validate_resume_metadata, ) -from verifiers.utils.usage_utils import StateUsageTracker +from verifiers.utils.usage_utils import StateUsageTracker, response_usage_tokens # Test models for make_serializable tests @@ -49,6 +48,26 @@ class NestedModel(BaseModel): tags: list[str] +def make_response(prompt_tokens: int, completion_tokens: int) -> Response: + return Response( + id="test", + created=0, + model="test", + usage=Usage( + prompt_tokens=prompt_tokens, + reasoning_tokens=0, + completion_tokens=completion_tokens, + total_tokens=prompt_tokens + completion_tokens, + ), + message=ResponseMessage( + role="assistant", + content="", + finish_reason="stop", + is_truncated=False, + ), + ) + + class TestSerialization: def test_serialize_simple_pydantic_model(self): model = SimpleModel(name="test", value=42) @@ -181,43 +200,12 @@ def test_generate_outputs_builder_serializes_endpoint_configs_base_url(self): class TestSavingResults: - def test_extract_usage_tokens_prompt_completion(self): - response = type( - "Response", - (), - { - "usage": { - "prompt_tokens": 10, - "completion_tokens": 5, - "input_tokens": 999, - "output_tokens": 999, - } - }, - )() - input_tokens, output_tokens = extract_usage_tokens(response) + def test_response_usage_tokens_prompt_completion(self): + response = make_response(prompt_tokens=10, completion_tokens=5) + input_tokens, output_tokens = response_usage_tokens(response) assert input_tokens == 10 assert output_tokens == 5 - def test_extract_usage_tokens_input_output(self): - response = type( - "Response", - (), - {"usage": {"input_tokens": 8, "output_tokens": 3}}, - )() - input_tokens, output_tokens = extract_usage_tokens(response) - assert input_tokens == 8 - assert output_tokens == 3 - - def test_extract_usage_tokens_invalid_values(self): - response = type( - "Response", - (), - {"usage": {"prompt_tokens": "bad", "completion_tokens": object()}}, - )() - input_tokens, output_tokens = extract_usage_tokens(response) - assert input_tokens == 0 - assert output_tokens == 0 - def test_state_with_tracker_and_no_usage_does_not_emit_token_usage( self, make_state ): @@ -229,6 +217,22 @@ def test_state_with_tracker_and_no_usage_does_not_emit_token_usage( output = states_to_outputs([state], state_columns=[])[0] assert "token_usage" not in output + def test_state_with_empty_tracker_falls_back_to_trajectory_usage(self, make_state): + state = make_state() + tracker = StateUsageTracker() + state["usage_tracker"] = tracker + state["usage"] = tracker.usage + state["trajectory"] = [{"response": make_response(10, 5)}] + + output = states_to_outputs([state], state_columns=[])[0] + + assert output["token_usage"] == { + "input_tokens": 10.0, + "output_tokens": 5.0, + "final_input_tokens": 10, + "final_output_tokens": 5, + } + def test_states_to_outputs(self, make_state): states = [ make_state( diff --git a/tests/test_setup_script.py b/tests/test_setup_script.py index ceff7473d..fb71cc80b 100644 --- a/tests/test_setup_script.py +++ b/tests/test_setup_script.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import sys import pytest diff --git a/tests/test_v1_bfcl.py b/tests/test_v1_bfcl.py index f98e9c69b..f44037132 100644 --- a/tests/test_v1_bfcl.py +++ b/tests/test_v1_bfcl.py @@ -1,9 +1,12 @@ -from __future__ import annotations - import importlib.util from pathlib import Path from types import ModuleType +import pytest + +import verifiers as root_vf +import verifiers.v1 as vf + def load_bfcl_module() -> ModuleType: path = Path(__file__).parents[1] / "environments" / "bfcl_v3" / "bfcl_v3.py" @@ -53,3 +56,80 @@ def test_bfcl_row_preserves_hinted_holdout_functions() -> None: assert row["function_with_hints"] == [{"name": "hinted"}] assert row["missed_function"] == {"1": [{"name": "plain_holdout"}]} assert row["missed_function_with_hints"] == {"1": [{"name": "hinted_holdout"}]} + + +def test_bfcl_empty_completion_has_no_tool_calls() -> None: + bfcl = load_bfcl_module() + + assert bfcl.assistant_tool_calls({"completion": []}) == [] + assert ( + bfcl.assistant_tool_calls( + {"completion": [{"role": "user", "content": "no assistant"}]} + ) + == [] + ) + + +def test_bfcl_public_loader_is_v1_only(monkeypatch: pytest.MonkeyPatch) -> None: + bfcl = load_bfcl_module() + seen_taskset_config: vf.TasksetConfig | None = None + seen_harness_config: vf.HarnessConfig | None = None + + def fake_taskset(config: vf.TasksetConfig | None = None) -> vf.Taskset: + nonlocal seen_taskset_config + seen_taskset_config = config + return vf.Taskset(source=[], config=config) + + def fake_harness(config: vf.HarnessConfig | None = None) -> vf.Harness: + nonlocal seen_harness_config + seen_harness_config = config + return vf.Harness(config=config) + + monkeypatch.setattr(bfcl, "load_taskset", fake_taskset) + monkeypatch.setattr(bfcl, "load_harness", fake_harness) + + env = bfcl.load_environment( + config=vf.EnvConfig(), + test_category="simple_python", + examples_per_category=0, + ) + + assert isinstance(env, vf.Env) + assert isinstance(seen_taskset_config, bfcl.BFCLTasksetConfig) + assert isinstance(seen_harness_config, bfcl.BFCLHarnessConfig) + assert seen_taskset_config.test_category == "simple_python" + assert seen_taskset_config.examples_per_category == 0 + assert seen_harness_config.test_category == "simple_python" + assert not hasattr(bfcl, "load_v1_environment") + + +def test_bfcl_loader_supports_category_groups( + monkeypatch: pytest.MonkeyPatch, +) -> None: + bfcl = load_bfcl_module() + seen_taskset_categories = [] + seen_harness_categories = [] + + def fake_taskset(config: vf.TasksetConfig | None = None) -> vf.Taskset: + assert isinstance(config, bfcl.BFCLTasksetConfig) + seen_taskset_categories.append(config.test_category) + return vf.Taskset(source=[{"question": "q", "answer": "a"}], config=config) + + def fake_harness(config: vf.HarnessConfig | None = None) -> vf.Harness: + assert isinstance(config, bfcl.BFCLHarnessConfig) + seen_harness_categories.append(config.test_category) + return vf.Harness(config=config) + + monkeypatch.setattr(bfcl, "load_taskset", fake_taskset) + monkeypatch.setattr(bfcl, "load_harness", fake_harness) + + env = bfcl.load_environment( + config=vf.EnvConfig(), + test_categories=["simple_python", "simple_java"], + examples_per_category=0, + ) + + assert isinstance(env, root_vf.EnvGroup) + assert env.env_names == ["simple_python", "simple_java"] + assert seen_taskset_categories == ["simple_python", "simple_java"] + assert seen_harness_categories == ["simple_python", "simple_java"] diff --git a/tests/test_v1_config_extension.py b/tests/test_v1_config_extension.py index f89c3e28a..e6b4db29a 100644 --- a/tests/test_v1_config_extension.py +++ b/tests/test_v1_config_extension.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import importlib import sys import types @@ -10,6 +8,7 @@ import verifiers as vf from verifiers.v1 import ( + Config, Env, EnvConfig, Harness, @@ -159,6 +158,31 @@ async def update_from_binding( state["expected"] = expected +@vf.update(stage="group") +async def group_update_from_binding( + tasks: list[Mapping[str, object]], states: list[dict[str, object]], expected: str +) -> None: + _ = tasks + for state in states: + state["group_expected"] = expected + + +@vf.reward +async def reward_from_binding( + task: Mapping[str, object], state: dict[str, object], expected: str +) -> float: + _ = state + return float(task.get("answer") == expected) + + +@vf.reward(stage="group") +async def group_reward_from_binding( + tasks: list[Mapping[str, object]], states: list[dict[str, object]], expected: str +) -> list[float]: + _ = tasks + return [float(state.get("answer") == expected) for state in states] + + async def colliding_tool(value: str, token: str) -> str: return f"{token}:{value}" @@ -263,12 +287,19 @@ async def setup_aware_program( def config_toolset(prefix: str = "cfg") -> Toolset: + def prefix_value() -> str: + return prefix + return Toolset( tools=[config_tool], - bindings={"config_tool.prefix": prefix}, + bindings={"config_tool.prefix": prefix_value}, ) +def load_another_harness_config() -> HarnessConfig: + return HarnessConfig(max_turns=6, rewards=[config_reward]) + + ref_module = types.ModuleType(REF_MODULE) setattr(ref_module, "source_loader", source_loader) setattr(ref_module, "eval_source_loader", eval_source_loader) @@ -293,6 +324,7 @@ def config_toolset(prefix: str = "cfg") -> Toolset: setattr(ref_module, "sandbox_user", sandbox_user) setattr(ref_module, "config_program", config_program) setattr(ref_module, "setup_aware_program", setup_aware_program) +setattr(ref_module, "load_another_harness_config", load_another_harness_config) sys.modules[REF_MODULE] = ref_module @@ -409,16 +441,21 @@ def test_env_capabilities_follow_group_lifecycle_handlers() -> None: assert not group_cleanup_env.provides_advantages -def test_group_lifecycle_handlers_reject_extra_args() -> None: +@pytest.mark.asyncio +async def test_group_lifecycle_handlers_require_bound_extra_args() -> None: @vf.update(stage="group") async def bad_group_update(tasks, states, extra) -> None: _ = tasks, states, extra - with pytest.raises(ValueError, match="exactly tasks and states"): - Env( - taskset=Taskset(source=source_loader, updates=[bad_group_update]), - harness=Harness(program=config_program), - ) + env = Env( + taskset=Taskset(source=source_loader, updates=[bad_group_update]), + harness=Harness(program=config_program), + ) + task = Task({"prompt": [{"role": "user", "content": "hi"}]}).freeze() + state = State.for_task(task) + + with pytest.raises(TypeError, match="extra"): + await env.harness.runtime.update_group([task], [state]) def test_env_capabilities_follow_custom_taskset_init_group() -> None: @@ -462,7 +499,10 @@ def test_harness_config_extends_constructor_surface() -> None: assert harness.program is config_program assert harness.config.max_turns == 3 - assert harness.metrics == [config_metric] + assert [metric.__name__ for metric in harness.metrics] == [ + "num_turns", + "config_metric", + ] assert harness.rewards == [config_reward] assert harness.advantages == [config_advantage] assert harness.setups == [config_setup] @@ -483,6 +523,14 @@ def test_harness_owns_default_render_completion_update() -> None: ) +def test_harness_owns_default_num_turns_metric() -> None: + harness = Harness(program=config_program) + + assert any( + signal["name"] == "num_turns" for signal in harness.runtime.rollout_signals + ) + + @pytest.mark.asyncio async def test_update_config_runs_before_rollout_scoring() -> None: harness = Harness( @@ -553,8 +601,11 @@ async def test_group_update_config_runs_before_group_scoring() -> None: def test_lifecycle_fields_are_framework_managed() -> None: + assert vf.State is State + task = Task({"prompt": [{"role": "user", "content": "hi"}]}).freeze() - state = State.for_task(task) + state = vf.State.for_task(task) + assert state.uses_v1_contract is True for key, value in { "is_completed": True, @@ -562,8 +613,7 @@ def test_lifecycle_fields_are_framework_managed() -> None: "is_truncated": True, "error": {"message": "boom"}, }.items(): - with pytest.raises(RuntimeError, match="framework-managed"): - State({key: value}) + assert State({key: value})[key] == value with pytest.raises(RuntimeError, match="framework-managed"): state[key] = value with pytest.raises(RuntimeError, match="framework-managed"): @@ -572,8 +622,16 @@ def test_lifecycle_fields_are_framework_managed() -> None: state.setdefault(key, value) with pytest.raises(RuntimeError, match="framework-managed"): state.pop(key) + state["user_field"] = "ok" + assert state.popitem() == ("user_field", "ok") + + protected_only = State()._enable_v1_contract() + protected_only._set_completed(False) + protected_only._set_stop_condition(None, overwrite=True) + protected_only._set_truncated(False, overwrite=True) + protected_only._set_error(None) with pytest.raises(RuntimeError, match="framework-managed"): - state.popitem() + protected_only.popitem() with pytest.raises(RuntimeError, match="framework-managed"): state.clear() @@ -604,7 +662,9 @@ def test_toolsets_config_accepts_addressable_map_and_fn_tables() -> None: assert set(taskset.named_toolsets) == {"direct", "configured"} assert taskset.toolsets[0].tools == (direct_tool,) - assert taskset.toolsets[1].bindings == {"config_tool.prefix": "from_config"} + prefix = taskset.toolsets[1].bindings["config_tool.prefix"] + assert callable(prefix) + assert prefix() == "from_config" @pytest.mark.asyncio @@ -695,6 +755,21 @@ async def test_tool_bindings_inject_owner_private_objects() -> None: assert await state.get_tools()["object_tool"](value="alpha") == "alpha" +def test_binding_strings_must_be_framework_paths() -> None: + with pytest.raises(ValueError, match="Binding string sources"): + Toolset(tools=[config_tool], bindings={"config_tool.prefix": "literal"}) + + +def test_binding_sources_reject_direct_objects() -> None: + with pytest.raises(TypeError, match="framework path or callable"): + Toolset(tools=[config_tool], bindings={"config_tool.prefix": object()}) + + +def test_toolset_binding_keys_must_target_callable_args() -> None: + with pytest.raises(ValueError, match="callable.arg"): + Toolset(tools=[config_tool], bindings={"prefix": "task.answer"}) + + @pytest.mark.asyncio async def test_rollout_handlers_receive_bound_hidden_args() -> None: harness = Harness( @@ -715,6 +790,89 @@ async def test_rollout_handlers_receive_bound_hidden_args() -> None: assert state["expected"] == "ok" +@pytest.mark.asyncio +async def test_harness_handlers_receive_bound_hidden_args() -> None: + harness = Harness( + updates=[update_from_binding], + bindings={"update_from_binding.expected": "task.answer"}, + ) + task = Task( + {"prompt": [{"role": "user", "content": "hi"}], "answer": "ok"} + ).freeze() + state = await harness.setup_state(task, State.for_task(task)) + + await harness.runtime.update_rollout(task, state) + + assert state["expected"] == "ok" + + +@pytest.mark.asyncio +async def test_taskset_handlers_receive_bound_hidden_args() -> None: + taskset = Taskset( + updates=[update_from_binding], + bindings={"update_from_binding.expected": "task.answer"}, + ) + harness = Harness() + harness.attach_taskset(taskset) + task = Task( + {"prompt": [{"role": "user", "content": "hi"}], "answer": "ok"} + ).freeze() + state = await harness.setup_state(task, State.for_task(task)) + + await harness.runtime.update_rollout(task, state) + + assert state["expected"] == "ok" + + +@pytest.mark.asyncio +async def test_group_handlers_receive_bound_hidden_args() -> None: + harness = Harness( + updates=[group_update_from_binding], + bindings={"group_update_from_binding.expected": "tasks.0.answer"}, + ) + task = Task( + {"prompt": [{"role": "user", "content": "hi"}], "answer": "ok"} + ).freeze() + state = State.for_task(task) + + await harness.runtime.update_group([task], [state]) + + assert state["group_expected"] == "ok" + + +@pytest.mark.asyncio +async def test_signals_receive_bound_hidden_args() -> None: + harness = Harness( + rewards=[reward_from_binding], + bindings={"reward_from_binding.expected": "task.answer"}, + ) + task = Task( + {"prompt": [{"role": "user", "content": "hi"}], "answer": "ok"} + ).freeze() + state = await harness.setup_state(task, State.for_task(task)) + + await harness.runtime.score_rollout(task, state) + + assert state["reward"] == 1.0 + assert state["metrics"]["reward_from_binding"] == 1.0 + + +@pytest.mark.asyncio +async def test_group_signals_receive_bound_hidden_args() -> None: + harness = Harness( + rewards=[group_reward_from_binding], + bindings={"group_reward_from_binding.expected": "states.0.answer"}, + ) + task = Task({"prompt": [{"role": "user", "content": "hi"}]}).freeze() + state = State.for_task(task) + state["answer"] = "ok" + + await harness.runtime.score_group([task], [state]) + + assert state["reward"] == 1.0 + assert state["metrics"]["group_reward_from_binding"] == 1.0 + + @pytest.mark.asyncio async def test_object_bindings_are_private_to_callable_tools() -> None: harness = Harness( @@ -1076,6 +1234,67 @@ class LocalTasksetConfig(TasksetConfig): assert default_config.taskset.split == "kwarg" +def test_env_config_args_supplies_typed_top_level_args() -> None: + class LocalArgsConfig(Config): + split: str = "train" + max_turns: int = 4 + + config = EnvConfig( + {"args": {"max_turns": 7}}, + args=LocalArgsConfig(split="args"), + ) + + assert isinstance(config.args, LocalArgsConfig) + assert config.args.split == "args" + assert config.args.max_turns == 7 + + +def test_env_config_args_accepts_arbitrary_user_args() -> None: + config = EnvConfig(args={"k1": "v1", "k2": "v2"}) + + assert config.args == {"k1": "v1", "k2": "v2"} + + +def test_env_config_harness_section_extends_imported_config() -> None: + config = EnvConfig( + { + "harness": { + "config": ref("load_another_harness_config"), + "rewards": [{"fn": ref("updated_reward"), "weight": 0}], + } + } + ) + harness = Harness(config=config.harness) + + assert harness.config.max_turns == 6 + assert [reward.__name__ for reward in harness.rewards] == [ + "config_reward", + "updated_reward", + ] + assert getattr(harness.rewards[1], "reward_weight") == 0.0 + + +def test_harness_config_normalizes_program_mapping() -> None: + config = HarnessConfig( + program={ + "command": ["echo", "ok"], + "sandbox": {"packages": "numpy"}, + "channels": {"mcp": True}, + } + ) + + assert config.program == { + "command": ["echo", "ok"], + "sandbox": {"packages": ["numpy"]}, + "channels": {"mcp": True}, + } + + +def test_harness_config_rejects_unknown_program_tool_interface() -> None: + with pytest.raises(ValueError, match="unknown channel"): + HarnessConfig(program={"command": ["echo"], "channels": {"ptc": True}}) + + def test_load_environment_coerces_typed_env_config_arg( monkeypatch: pytest.MonkeyPatch, ) -> None: @@ -1083,10 +1302,9 @@ def test_load_environment_coerces_typed_env_config_arg( module = types.ModuleType(module_name) seen: dict[str, object] = {} - def load_environment(split: str = "train", config: EnvConfig | None = None) -> Env: + def load_environment(split: str = "train", *, config: EnvConfig) -> Env: seen["split"] = split seen["config"] = config - config = config or EnvConfig() return Env( taskset=Taskset(source=source_loader, config=config.taskset), harness=Harness(config=config.harness), @@ -1117,6 +1335,29 @@ def load_environment(split: str = "train", config: EnvConfig | None = None) -> E } +def test_load_environment_supplies_default_typed_env_config( + monkeypatch: pytest.MonkeyPatch, +) -> None: + module_name = "default_typed_env_config" + module = types.ModuleType(module_name) + seen: dict[str, object] = {} + + def load_environment(config: EnvConfig) -> Env: + seen["config"] = config + return Env( + taskset=Taskset(source=source_loader, config=config.taskset), + harness=Harness(config=config.harness), + ) + + module.load_environment = load_environment + monkeypatch.setitem(sys.modules, module_name, module) + + env = vf.load_environment("default-typed-env-config") + + assert isinstance(seen["config"], EnvConfig) + assert env.env_args == {} + + def test_load_environment_leaves_untyped_config_arg_as_kwargs( monkeypatch: pytest.MonkeyPatch, ) -> None: @@ -1165,21 +1406,12 @@ class LocalHarnessConfig(HarnessConfig): assert explicit_config.max_turns == 10 -def test_config_field_name_is_allowed_in_typed_configs() -> None: +def test_config_field_name_is_reserved_for_config_refs() -> None: class LocalTasksetConfig(TasksetConfig): config: dict[str, object] | None = None - loaded_config = LocalTasksetConfig.from_config({"config": {"mode": "loaded"}}) - direct_config = LocalTasksetConfig(config={"mode": "direct"}) - merged_config = LocalTasksetConfig( - {"taskset_id": "local"}, - config={"mode": "merged"}, - ) - - assert loaded_config.config == {"mode": "loaded"} - assert direct_config.config == {"mode": "direct"} - assert merged_config.taskset_id == "local" - assert merged_config.config == {"mode": "merged"} + with pytest.raises(TypeError, match="reserves the 'config' field"): + LocalTasksetConfig.from_config({"config": {"mode": "loaded"}}) @pytest.mark.parametrize( @@ -1230,7 +1462,7 @@ def test_reference_v1_harness_loaders_preserve_child_defaults() -> None: assert self_judge.load_harness().config.max_turns == 8 -def test_bfcl_v1_loader_preserves_mapping_config_sections( +def test_bfcl_loader_preserves_mapping_config_sections( monkeypatch: pytest.MonkeyPatch, ) -> None: module = importlib.import_module("environments.bfcl_v3.bfcl_v3") @@ -1249,7 +1481,7 @@ def fake_harness(config: object = None, **kwargs: object) -> Harness: monkeypatch.setattr(module, "load_taskset", fake_taskset) monkeypatch.setattr(module, "load_harness", fake_harness) - env = module.load_v1_environment( + env = module.load_environment( config=EnvConfig( taskset={"taskset_id": "bfcl-env-args"}, harness={"model": "bfcl-model"}, @@ -1275,7 +1507,7 @@ def fake_taskset(config: object = None) -> Taskset: monkeypatch.setattr(module, "load_taskset", fake_taskset) - env = module.load_v1_environment( + env = module.load_environment( config=EnvConfig( taskset={"max_turns": 7}, harness={"model": "configured-model", "max_turns": 3}, @@ -1393,7 +1625,11 @@ def test_self_judge_loader_projects_shortcuts_to_child_configs() -> None: taskset = module.load_taskset(num_examples=2) harness = module.load_harness(max_turns=3) - shortcut_env = module.load_environment(num_examples=2, max_turns=3) + shortcut_env = module.load_environment( + num_examples=2, + max_turns=3, + config=EnvConfig(), + ) override_env = module.load_environment( num_examples=2, max_turns=3, @@ -1422,7 +1658,9 @@ def test_subagent_loader_keeps_child_harness_internal( ) assert env.harness.config.model == "parent" - child_harness = env.harness.toolsets[0].bindings["ask_subagent.harness"] + toolset = env.harness.toolsets[0] + assert toolset.bindings["ask_subagent.harness"] == "objects.harness" + child_harness = toolset.objects["harness"]() assert child_harness.config.model is None @@ -1499,9 +1737,9 @@ def test_configs_load_from_toml_sections(tmp_path) -> None: assert taskset.source is source_loader assert getattr(taskset.rewards[0], "__name__") == "config_reward" assert getattr(taskset.rewards[0], "reward_weight") == 0.5 - assert taskset.named_toolsets["configured"].bindings == { - "config_tool.prefix": "toml" - } + prefix = taskset.named_toolsets["configured"].bindings["config_tool.prefix"] + assert callable(prefix) + assert prefix() == "toml" assert harness.program == {"fn": ref("config_program")} assert callable(harness._program) assert harness.config.max_turns == 7 diff --git a/tests/test_v1_empty_completions.py b/tests/test_v1_empty_completions.py new file mode 100644 index 000000000..ebed809e7 --- /dev/null +++ b/tests/test_v1_empty_completions.py @@ -0,0 +1,57 @@ +import importlib.util +from pathlib import Path +from types import ModuleType + +import pytest + + +def load_env_module(name: str, filename: str) -> ModuleType: + module_path = Path(__file__).parents[1] / "environments" / name / filename + spec = importlib.util.spec_from_file_location(f"test_{name}", module_path) + assert spec is not None + assert spec.loader is not None + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def test_dspy_rlm_empty_completion_scores_zero() -> None: + module = load_env_module("dspy_rlm", "dspy_rlm.py") + + assert module.answer_reward({"answer": "4"}, {"completion": []}) == 0.0 + + +def test_openai_agents_empty_completion_scores_zero() -> None: + module = load_env_module("openai_agents_env", "openai_agents_env.py") + + assert module.answer_reward({"answer": "4"}, {"completion": []}) == 0.0 + + +@pytest.mark.asyncio +async def test_math_python_empty_completion_scores_zero() -> None: + module = load_env_module("math_python", "math_python_v1.py") + + assert await module.correct_answer({"answer": "4"}, {"completion": []}) == 0.0 + + +@pytest.mark.asyncio +async def test_hello_subagent_missing_completion_scores_zero() -> None: + module = load_env_module("hello_subagent_v1", "hello_subagent_v1.py") + + assert ( + await module.exact_answer({"answer": "hello alice"}, {"completion": None}) + == 0.0 + ) + + +def test_hello_parallel_reward_prompt_allows_missing_completion() -> None: + module = load_env_module( + "hello_parallel_sandbox_v1", "hello_parallel_sandbox_v1.py" + ) + + prompt = module.reward_prompt( + {"instruction": "write an answer", "answer": "done"}, + {"completion": None}, + ) + + assert "Assistant final answer:\n\n" in prompt diff --git a/tests/test_v1_example_counts.py b/tests/test_v1_example_counts.py index 9985eb113..c1c401b4f 100644 --- a/tests/test_v1_example_counts.py +++ b/tests/test_v1_example_counts.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import importlib from collections.abc import Iterable, Mapping from pathlib import Path diff --git a/tests/test_v1_group_reward_env.py b/tests/test_v1_group_reward_env.py index 82bea58c2..8280523b0 100644 --- a/tests/test_v1_group_reward_env.py +++ b/tests/test_v1_group_reward_env.py @@ -1,9 +1,8 @@ -from __future__ import annotations - from typing import cast import pytest +import verifiers.v1 as vf from verifiers.clients import Client from verifiers.types import RolloutInput @@ -12,7 +11,7 @@ @pytest.mark.asyncio async def test_hello_group_reward_v1_scores_full_group_lifecycle() -> None: - env = load_environment(num_examples=1) + env = load_environment(num_examples=1, config=vf.EnvConfig()) assert env.requires_group_rollouts assert env.provides_advantages diff --git a/tests/test_v1_harbor_cli.py b/tests/test_v1_harbor_cli.py index cec97b2cc..86f0c4f04 100644 --- a/tests/test_v1_harbor_cli.py +++ b/tests/test_v1_harbor_cli.py @@ -1,10 +1,11 @@ -from __future__ import annotations - +import importlib import json import sys import types from pathlib import Path +from types import ModuleType from typing import cast +from uuid import uuid4 import pytest @@ -41,10 +42,38 @@ def write_harbor_task(root: Path, name: str = "task-a") -> Path: return task_dir -def test_harbor_taskset_loads_local_tasks_with_program_patch(tmp_path: Path) -> None: - write_harbor_task(tmp_path) +def write_harbor_package(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> ModuleType: + package_name = f"harbor_pkg_{uuid4().hex}" + package_dir = tmp_path / package_name + tasks_root = package_dir / "tasks" + tasks_root.mkdir(parents=True) + (package_dir / "__init__.py").write_text( + """ +import verifiers.v1 as vf + + +def load_taskset(**kwargs): + return vf.HarborTaskset(**kwargs) + + +def load_env(): + return vf.Env(taskset=vf.HarborTaskset(), harness=vf.OpenCode()) +""".lstrip() + ) + monkeypatch.syspath_prepend(str(tmp_path)) + importlib.invalidate_caches() + module = importlib.import_module(package_name) + setattr(module, "tasks_root", tasks_root) + return module + - taskset = vf.HarborTaskset(tasks=tmp_path) +def test_harbor_taskset_loads_package_tasks_with_program_patch( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + package = write_harbor_package(tmp_path, monkeypatch) + write_harbor_task(cast(Path, getattr(package, "tasks_root"))) + + taskset = getattr(package, "load_taskset")() task = next(iter(taskset)) assert task["taskset_id"] == "harbor" @@ -65,18 +94,27 @@ def test_harbor_taskset_loads_local_tasks_with_program_patch(tmp_path: Path) -> assert task["program"]["env"]["AGENT_WORKDIR"] == "/app" -def test_harbor_taskset_accepts_single_task_dir(tmp_path: Path) -> None: - task_dir = write_harbor_task(tmp_path, "only-task") +def test_harbor_taskset_rejects_malformed_package_task( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + package = write_harbor_package(tmp_path, monkeypatch) + bad_task = cast(Path, getattr(package, "tasks_root")) / "bad-task" + bad_task.mkdir() + (bad_task / "task.toml").write_text('version = "1.0"') - taskset = vf.HarborTaskset(tasks=task_dir) + taskset = getattr(package, "load_taskset")() - assert [task["task_name"] for task in taskset] == ["only-task"] + with pytest.raises(ValueError, match="Malformed Harbor task"): + list(taskset) -def test_harbor_taskset_constructs_env_with_opencode(tmp_path: Path) -> None: - write_harbor_task(tmp_path) +def test_harbor_taskset_constructs_env_with_opencode( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + package = write_harbor_package(tmp_path, monkeypatch) + write_harbor_task(cast(Path, getattr(package, "tasks_root"))) - env = vf.Env(taskset=vf.HarborTaskset(tasks=tmp_path), harness=vf.OpenCode()) + env = getattr(package, "load_env")() row = env.get_dataset()[0] task = env.taskset.to_task(row) @@ -176,7 +214,7 @@ def test_opencode_config_owns_opencode_harness_fields() -> None: ) program = cast(dict[str, object], harness.program) command = cast(list[object], program["command"]) - mcp_setup = cast(dict[str, object], program["tools"])["mcp"] + mcp_setup = cast(dict[str, object], program["channels"])["mcp"] setup = cast(str, program["setup"]) assert harness.config.agent_workdir == "/workspace" @@ -217,15 +255,18 @@ def test_pi_harness_writes_intercepted_model_and_mcp_config() -> None: def test_task_program_merges_into_command_program_without_collisions() -> None: - harness = vf.CLIHarness( - command=["tool"], - sandbox=True, - files={"/harness.txt": "harness"}, - setup="echo harness", - tools={"mcp": "echo harness tools"}, - env={"HARNESS": "1"}, - artifacts={"log": {"path": "/logs/harness.log", "format": "text"}}, - program={"args": ["--base"]}, + harness = vf.Harness( + program={ + "command": ["tool"], + "sandbox": True, + "files": {"/harness.txt": "harness"}, + "setup": "echo harness", + "channels": {"mcp": "echo harness tools"}, + "env": {"HARNESS": "1"}, + "artifacts": {"log": {"path": "/logs/harness.log", "format": "text"}}, + "args": ["--base"], + }, + sandbox={"image": "python:3.11-slim"}, ) task = vf.Task( { @@ -249,7 +290,7 @@ def test_task_program_merges_into_command_program_without_collisions() -> None: "/task/instruction.md": "task", } assert program["setup"] == ["echo harness", "echo task"] - assert program["tools"] == {"mcp": "echo harness tools"} + assert program["channels"] == {"mcp": "echo harness tools"} assert program["env"] == {"HARNESS": "1", "TASK": "1"} assert program["args"] == ["--base", "--task"] assert program["artifacts"] == { @@ -259,7 +300,10 @@ def test_task_program_merges_into_command_program_without_collisions() -> None: def test_task_program_rejects_harness_owned_keys() -> None: - harness = vf.CLIHarness(command=["tool"], sandbox=True) + harness = vf.Harness( + program={"command": ["tool"], "sandbox": True}, + sandbox={"image": "python:3.11-slim"}, + ) task = vf.Task({"prompt": [], "program": {"command": ["other"]}}).freeze() with pytest.raises(ValueError, match="task.program can only define"): @@ -269,8 +313,13 @@ def test_task_program_rejects_harness_owned_keys() -> None: def test_task_program_rejects_colliding_upload_paths() -> None: - harness = vf.CLIHarness( - command=["tool"], sandbox=True, files={"/task/instruction.md": "harness"} + harness = vf.Harness( + program={ + "command": ["tool"], + "sandbox": True, + "files": {"/task/instruction.md": "harness"}, + }, + sandbox={"image": "python:3.11-slim"}, ) task = vf.Task( {"prompt": [], "program": {"files": {"/task/instruction.md": "task"}}} diff --git a/tests/test_v1_mini_swe_agent.py b/tests/test_v1_mini_swe_agent.py index ffff0d049..c29ee54e4 100644 --- a/tests/test_v1_mini_swe_agent.py +++ b/tests/test_v1_mini_swe_agent.py @@ -1,8 +1,10 @@ -from __future__ import annotations - +import importlib from pathlib import Path +from types import ModuleType from typing import Any, cast +from uuid import uuid4 +import pytest import verifiers.v1 as vf @@ -33,11 +35,32 @@ def write_harbor_task(root: Path) -> Path: return task_dir +def write_harbor_package(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> ModuleType: + package_name = f"mini_swe_harbor_pkg_{uuid4().hex}" + package_dir = tmp_path / package_name + tasks_root = package_dir / "tasks" + tasks_root.mkdir(parents=True) + (package_dir / "__init__.py").write_text( + """ +import verifiers.v1 as vf + + +def load_env(): + return vf.Env(taskset=vf.HarborTaskset(), harness=vf.MiniSWEAgent()) +""".lstrip() + ) + monkeypatch.syspath_prepend(str(tmp_path)) + importlib.invalidate_caches() + module = importlib.import_module(package_name) + setattr(module, "tasks_root", tasks_root) + return module + + def test_mini_swe_agent_builds_sandbox_program(): harness = vf.MiniSWEAgent(system_prompt="Use tests.", agent_workdir="/app") program = cast(dict[str, Any], harness.program) - assert isinstance(harness, vf.CLIHarness) + assert isinstance(harness, vf.Harness) assert program["sandbox"] is not False assert "OPENAI_MODEL" in cast(dict[str, object], program["env"]) assert "apt-get -o Acquire::Retries=3 update" in cast(str, program["setup"]) @@ -47,10 +70,13 @@ def test_mini_swe_agent_builds_sandbox_program(): assert "mini_swe_agent_log" in cast(dict[str, object], program["artifacts"]) -def test_mini_swe_agent_composes_with_harbor_taskset(tmp_path: Path): - write_harbor_task(tmp_path) +def test_mini_swe_agent_composes_with_harbor_taskset( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +): + package = write_harbor_package(tmp_path, monkeypatch) + write_harbor_task(cast(Path, getattr(package, "tasks_root"))) - env = vf.Env(taskset=vf.HarborTaskset(tasks=tmp_path), harness=vf.MiniSWEAgent()) + env = getattr(package, "load_env")() row = env.get_dataset()[0] task = env.taskset.to_task(row) diff --git a/tests/test_v1_rlm_swe.py b/tests/test_v1_rlm_swe.py index 3382f9b84..6c4cb2ed2 100644 --- a/tests/test_v1_rlm_swe.py +++ b/tests/test_v1_rlm_swe.py @@ -1,5 +1,5 @@ -from __future__ import annotations - +import sys +import types from collections.abc import Mapping from pathlib import Path @@ -22,7 +22,7 @@ def test_rlm_harness_builds_sandbox_program_without_eager_checkout(): artifacts = as_mapping(program["artifacts"]) setup = program["setup"] - assert isinstance(harness, vf.CLIHarness) + assert isinstance(harness, vf.Harness) assert program["sandbox"] is not False assert isinstance(setup, list) assert "apt-get -o Acquire::Retries=3 update" in setup[0] @@ -31,6 +31,26 @@ def test_rlm_harness_builds_sandbox_program_without_eager_checkout(): assert "rlm_metrics" in artifacts +def test_rlm_harness_accepts_typed_config_surface(): + harness = vf.RLM( + config=vf.RLMConfig( + local_checkout="/tmp/checkout", + rlm_tools=["bash", "edit"], + rlm_max_turns=7, + rlm_exec_timeout=11, + env_vars={"CUSTOM": "1"}, + ) + ) + program = as_mapping(harness.program) + program_env = as_mapping(program["env"]) + + assert harness.config.rlm_tools == ["bash", "edit"] + assert program_env["RLM_TOOLS"] == "bash,edit" + assert program_env["RLM_MAX_TURNS"] == "7" + assert program_env["RLM_EXEC_TIMEOUT"] == "11" + assert program_env["CUSTOM"] == "1" + + def test_rlm_harness_can_upload_skills(tmp_path: Path): skills = tmp_path / "skills" (skills / "edit").mkdir(parents=True) @@ -43,6 +63,67 @@ def test_rlm_harness_can_upload_skills(tmp_path: Path): assert dirs["/rlm/skills"] == skills +def test_rlm_harness_uploads_taskset_skills_by_default(tmp_path: Path): + skills = tmp_path / "taskset-skills" + skills.mkdir() + (skills / "SKILL.md").write_text("---\nname: taskset\n---\n") + + class SkillTaskset(vf.Taskset): + def get_upload_dirs(self): + return {"skills": skills} + + env = vf.Env( + taskset=SkillTaskset(source=[]), + harness=vf.RLM(local_checkout="/tmp/checkout"), + ) + program = as_mapping(env.harness.program) + dirs = as_mapping(program["dirs"]) + + assert dirs["/rlm/skills"] == skills + + +def test_taskset_discovers_sibling_skills_dir_by_default( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + module_name = "skill_taskset_module" + module_file = tmp_path / f"{module_name}.py" + skills = tmp_path / "skills" + module_file.write_text("") + skills.mkdir() + (skills / "SKILL.md").write_text("---\nname: sibling\n---\n") + module = types.ModuleType(module_name) + module.__file__ = str(module_file) + module.__package__ = "" + monkeypatch.setitem(sys.modules, module_name, module) + skill_taskset_type = type( + "SkillTaskset", (vf.Taskset,), {"__module__": module_name} + ) + + taskset = skill_taskset_type(source=[]) + + assert taskset.get_upload_dirs() == {"skills": skills} + + +def test_rlm_harness_explicit_skills_override_taskset_skills(tmp_path: Path): + taskset_skills = tmp_path / "taskset-skills" + explicit_skills = tmp_path / "explicit-skills" + taskset_skills.mkdir() + explicit_skills.mkdir() + + class SkillTaskset(vf.Taskset): + def get_upload_dirs(self): + return {"skills": taskset_skills} + + env = vf.Env( + taskset=SkillTaskset(source=[]), + harness=vf.RLM(local_checkout="/tmp/checkout", skills=explicit_skills), + ) + program = as_mapping(env.harness.program) + dirs = as_mapping(program["dirs"]) + + assert dirs["/rlm/skills"] == explicit_skills + + def test_rlm_swe_environment_uses_v1_r2e_taskset(monkeypatch): calls: dict[str, object] = {} @@ -54,11 +135,17 @@ def fake_load_dataset(dataset_name: str, **kwargs: object) -> Dataset: monkeypatch.setattr(rlm_swe_v1, "load_dataset", fake_load_dataset) env = rlm_swe_v1.load_environment( - dataset_name="fake-r2e", - local_checkout="/tmp/checkout", - timeout_minutes=30, - env={"CUSTOM": "1", "PATH": "/task/bin"}, - rlm_env={"CALLER": "1", "PATH": "/caller/bin"}, + config=vf.EnvConfig( + taskset=rlm_swe_v1.RlmSweTasksetConfig( + dataset_name="fake-r2e", + timeout_minutes=30, + env={"CUSTOM": "1", "PATH": "/task/bin"}, + ), + harness=vf.RLMConfig( + local_checkout="/tmp/checkout", + env_vars={"CALLER": "1", "PATH": "/caller/bin"}, + ), + ), ) task = next(iter(env.taskset)) program = as_mapping(env.harness.program) @@ -103,7 +190,9 @@ async def test_rlm_swe_taskset_setup_and_reward(monkeypatch): monkeypatch.setattr( rlm_swe_v1, "load_dataset", lambda *args, **kwargs: fake_r2e_dataset() ) - taskset = rlm_swe_v1.load_taskset(timeout_minutes=30) + taskset = rlm_swe_v1.load_taskset( + config=rlm_swe_v1.RlmSweTasksetConfig(timeout_minutes=30) + ) task = next(iter(taskset)) state = vf.State.for_task(task) sandbox = FakeSandbox() @@ -144,8 +233,10 @@ async def fake_run_tests( @pytest.mark.asyncio async def test_rlm_swe_run_tests_quotes_env_values(): taskset = rlm_swe_v1.load_taskset( - hide_tests_from_agent=False, - env={"SAFE": "two words; $(echo nope)", "QUOTE": "it's ok"}, + config=rlm_swe_v1.RlmSweTasksetConfig( + hide_tests_from_agent=False, + env={"SAFE": "two words; $(echo nope)", "QUOTE": "it's ok"}, + ) ) sandbox = RecordingSandbox() @@ -160,7 +251,9 @@ async def test_rlm_swe_run_tests_quotes_env_values(): def test_rlm_swe_get_env_vars_uses_configured_repo_path(): - taskset = rlm_swe_v1.load_taskset(repo_path="/workspace/repo") + taskset = rlm_swe_v1.load_taskset( + config=rlm_swe_v1.RlmSweTasksetConfig(repo_path="/workspace/repo") + ) path = taskset.get_env_vars()["PATH"] diff --git a/tests/test_v1_runtime_lifecycle.py b/tests/test_v1_runtime_lifecycle.py index 0795d4d45..9350a3bd0 100644 --- a/tests/test_v1_runtime_lifecycle.py +++ b/tests/test_v1_runtime_lifecycle.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import asyncio import json import shlex @@ -18,6 +16,7 @@ from verifiers.types import ClientConfig from verifiers.types import Response, ResponseMessage, ToolCall from verifiers.types import Tool +from verifiers.types import Usage from verifiers.v1.runtime import Runtime from verifiers.v1.utils.endpoint_utils import endpoint_api_key from verifiers.v1.utils import mcp_utils @@ -198,12 +197,15 @@ async def finish_tool(answer: str, state) -> str: def fake_response( - content: str | None = None, tool_calls: list[ToolCall] | None = None + content: str | None = None, + tool_calls: list[ToolCall] | None = None, + usage: Usage | None = None, ) -> Response: return Response( id="fake", created=0, model="fake", + usage=usage, message=ResponseMessage( role="assistant", content=content, @@ -529,6 +531,33 @@ def test_model_client_default_keys_are_rollout_local() -> None: assert len(runtime.model_clients) == 2 +@pytest.mark.asyncio +async def test_v1_records_default_metrics_usage_and_timing() -> None: + usage = Usage( + prompt_tokens=11, + reasoning_tokens=0, + completion_tokens=7, + total_tokens=18, + ) + harness = vf.Harness( + client=cast( + Client, + FakeModelClient([fake_response(content="ok", usage=usage)]), + ), + model="fake-model", + ) + task = vf.Task({"prompt": [{"role": "user", "content": "hi"}]}).freeze() + + state = await harness.run(task) + + assert state["metrics"]["num_turns"] == 1.0 + assert state["token_usage"] == {"input_tokens": 11.0, "output_tokens": 7.0} + assert state["usage"] == state["token_usage"] + assert state["timing"]["total"] > 0.0 + assert state["timing"]["generation"]["duration"] > 0.0 + assert state["timing"]["model"]["duration"] > 0.0 + + def test_v1_state_does_not_copy_task_answer_to_top_level() -> None: task = vf.Task({"answer": "gold"}).freeze() state = vf.State.for_task(task) @@ -1081,20 +1110,20 @@ def test_sandbox_program_patch_cannot_set_lifecycle_fields() -> None: assert state["error"] == {"message": "handled"} -def test_program_tools_mcp_injects_proxy_into_sandbox_program() -> None: +def test_program_channels_mcp_injects_proxy_into_sandbox_program() -> None: harness = vf.Harness( - program={"sandbox": True, "command": ["true"], "tools": "mcp"}, + program={"sandbox": True, "command": ["true"], "channels": "mcp"}, sandbox={"image": "python:3.11-slim"}, ) state = vf.State.for_task(vf.Task({}).freeze()) state["endpoint_root_url"] = "http://127.0.0.1:1/rollout/test" program = harness.prepare_sandbox_program( - {"sandbox": True, "command": ["true"], "tools": "mcp"}, state + {"sandbox": True, "command": ["true"], "channels": "mcp"}, state ) sandbox = harness.prepare_sandbox_config( {"image": "python:3.11-slim"}, - {"sandbox": True, "command": ["true"], "tools": "mcp"}, + {"sandbox": True, "command": ["true"], "channels": "mcp"}, ) files = cast(dict[str, str], program["files"]) @@ -1112,29 +1141,34 @@ def test_program_tools_mcp_injects_proxy_into_sandbox_program() -> None: assert "requests" in packages -def test_program_tools_mcp_requires_sandbox_command() -> None: +def test_program_channels_mcp_requires_sandbox_command() -> None: with pytest.raises(ValueError, match="requires program.sandbox"): - vf.Harness(program={"command": ["true"], "tools": "mcp"}) + vf.Harness(program={"command": ["true"], "channels": "mcp"}) -def test_program_tools_callable_rejects_command_programs() -> None: - with pytest.raises(ValueError, match="program.tools='callable'"): - vf.Harness(program={"command": ["true"], "tools": "callable"}) +def test_program_channels_callable_rejects_command_programs() -> None: + with pytest.raises(ValueError, match="program.channels='callable'"): + vf.Harness(program={"command": ["true"], "channels": "callable"}) @pytest.mark.asyncio -async def test_program_tools_mcp_setup_uses_bindings_after_setup_before_command( +async def test_program_channels_mcp_setup_uses_bindings_after_setup_before_command( monkeypatch: pytest.MonkeyPatch, ) -> None: install_fake_sandboxes(monkeypatch) install_fake_endpoint_tunnel(monkeypatch) - harness = vf.CLIHarness( - command=["python", "-c", "print('ok')"], - sandbox=True, - setup="echo setup", - tools={"mcp": configure_cli_endpoint}, - bindings={"configure_cli_endpoint.endpoint_config": endpoint_config_binding}, + harness = vf.Harness( + program={ + "command": ["python", "-c", "print('ok')"], + "sandbox": True, + "setup": "echo setup", + "channels": {"mcp": configure_cli_endpoint}, + "bindings": { + "configure_cli_endpoint.endpoint_config": endpoint_config_binding + }, + }, + sandbox={"image": "python:3.11-slim"}, model="bound-model", ) task = vf.Task({"prompt": [{"role": "user", "content": "hi"}]}).freeze() @@ -1155,10 +1189,13 @@ async def test_rollout_setup_receives_program_sandbox_before_program_setup( install_fake_sandboxes(monkeypatch) install_fake_endpoint_tunnel(monkeypatch) - harness = vf.CLIHarness( - command=["true"], - sandbox=True, - setup="echo program-setup", + harness = vf.Harness( + program={ + "command": ["true"], + "sandbox": True, + "setup": "echo program-setup", + }, + sandbox={"image": "python:3.11-slim"}, setups=[early_sandbox_lifecycle_setup, sandbox_lifecycle_setup], ) task = vf.Task({"prompt": [{"role": "user", "content": "hi"}]}).freeze() @@ -1213,7 +1250,10 @@ async def test_task_command_uses_background_job( install_fake_sandboxes(monkeypatch) install_fake_endpoint_tunnel(monkeypatch) - harness = vf.CLIHarness(command=["sleep", "120"], sandbox=True) + harness = vf.Harness( + program={"command": ["sleep", "120"], "sandbox": True}, + sandbox={"image": "python:3.11-slim", "workdir": "/app"}, + ) task = vf.Task( { "prompt": [{"role": "user", "content": "hi"}], @@ -1227,21 +1267,24 @@ async def test_task_command_uses_background_job( @pytest.mark.asyncio -async def test_program_tools_mcp_setup_accepts_config_ref_mappings( +async def test_program_channels_mcp_setup_accepts_config_ref_mappings( monkeypatch: pytest.MonkeyPatch, ) -> None: install_fake_sandboxes(monkeypatch) install_fake_endpoint_tunnel(monkeypatch) - harness = vf.CLIHarness( - command=["true"], - sandbox=True, - tools={"mcp": [{"fn": program_ref("configure_cli_endpoint_ref")}]}, - bindings={ - "configure_cli_endpoint_ref.endpoint_config": { - "fn": program_ref("endpoint_config_binding_ref") - } + harness = vf.Harness( + program={ + "command": ["true"], + "sandbox": True, + "channels": {"mcp": [{"fn": program_ref("configure_cli_endpoint_ref")}]}, + "bindings": { + "configure_cli_endpoint_ref.endpoint_config": { + "fn": program_ref("endpoint_config_binding_ref") + } + }, }, + sandbox={"image": "python:3.11-slim"}, model="toml-model", ) task = vf.Task({"prompt": [{"role": "user", "content": "hi"}]}).freeze() @@ -1254,22 +1297,28 @@ async def test_program_tools_mcp_setup_accepts_config_ref_mappings( def test_program_bindings_must_match_owned_callables() -> None: with pytest.raises(ValueError, match="does not match a callable"): - vf.CLIHarness( - command=["true"], - sandbox=True, - bindings={"missing.value": "task.value"}, + vf.Harness( + program={ + "command": ["true"], + "sandbox": True, + "bindings": {"missing.value": "task.value"}, + }, + sandbox={"image": "python:3.11-slim"}, ) def test_program_setup_is_not_a_binding_target() -> None: with pytest.raises(ValueError, match="setup callables cannot use"): - vf.CLIHarness( - command=["true"], - sandbox=True, - setup=configure_cli_endpoint, - bindings={ - "configure_cli_endpoint.endpoint_config": endpoint_config_binding + vf.Harness( + program={ + "command": ["true"], + "sandbox": True, + "setup": configure_cli_endpoint, + "bindings": { + "configure_cli_endpoint.endpoint_config": endpoint_config_binding + }, }, + sandbox={"image": "python:3.11-slim"}, ) @@ -1336,7 +1385,7 @@ async def test_real_sandbox_base_program_calls_host_callable_tool() -> None: harness = vf.Harness( client=cast(Client, client), model="fake", - program={"sandbox": True, "tools": "callable"}, + program={"sandbox": True, "channels": "callable"}, sandbox={ "image": "python:3.11-slim", "scope": "group", @@ -1379,7 +1428,7 @@ async def test_real_sandbox_command_program_uses_mcp_tool_proxy() -> None: program={ "sandbox": True, "command": ["python", "/tmp/call_mcp.py"], - "tools": "mcp", + "channels": "mcp", "files": {"/tmp/call_mcp.py": REAL_MCP_PROXY_SCRIPT}, }, sandbox={ diff --git a/tests/test_v1_scoring_functions.py b/tests/test_v1_scoring_functions.py index c84a21aab..03c7ceb5e 100644 --- a/tests/test_v1_scoring_functions.py +++ b/tests/test_v1_scoring_functions.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from collections.abc import Mapping from typing import Any, cast @@ -99,13 +97,16 @@ def test_signal_name_collisions_hard_fail() -> None: collect_signals(taskset_signals, harness_signals) -def test_group_signal_requires_plural_args_only() -> None: +@pytest.mark.asyncio +async def test_group_signal_reports_unresolved_required_args() -> None: @vf.metric(stage="group") async def bad_group_metric(task: dict, state: dict) -> float: return 0.0 - with pytest.raises(ValueError, match="tasks and states"): - build_signals(metrics=[bad_group_metric]) + signals = build_signals(metrics=[bad_group_metric]) + + with pytest.raises(TypeError, match="metric signal 'bad_group_metric'.*task"): + await score_group(signals, [{"answer": "a"}], [{"answer": "a"}]) @pytest.mark.asyncio diff --git a/tests/test_v1_taskset_bindings.py b/tests/test_v1_taskset_bindings.py new file mode 100644 index 000000000..d8d3f5d03 --- /dev/null +++ b/tests/test_v1_taskset_bindings.py @@ -0,0 +1,188 @@ +import re + +import pytest + +import verifiers.v1 as vf + + +def source_rows() -> list[dict[str, object]]: + return [ + { + "prompt": [{"role": "user", "content": "reply ok"}], + "answer": "ok", + } + ] + + +class Prefixer: + def __init__(self, prefix: str): + self.prefix = prefix + + def __call__(self, value: str) -> str: + return f"{self.prefix}{value}" + + +class TagExtractor: + def __init__(self, tag: str): + self.pattern = re.compile(rf"<{tag}>(.*?)", re.DOTALL) + + def __call__(self, completion: list[dict[str, object]]) -> str: + message = vf.get_messages(completion, role="assistant")[-1] + match = self.pattern.search(str(message.content or "")) + return "" if match is None else match.group(1).strip() + + +@vf.reward +async def prefix_reward(state, prefixer) -> float: + state["prefixed"] = prefixer("ok") + return 1.0 + + +@vf.reward +async def framework_state_reward(state) -> float: + state["framework_state_seen"] = True + return 1.0 + + +@vf.setup +async def setup_with_override(state, token) -> None: + state["token"] = token + + +@vf.reward +async def missing_binding_reward(state, extractor) -> float: + _ = state, extractor + return 0.0 + + +@vf.reward +async def extracted_answer_reward(task, state, extract_answer) -> float: + response = extract_answer(state.get("completion") or []) + return float(response == task["answer"]) + + +async def score_taskset(taskset: vf.Taskset) -> vf.State: + env = vf.Env(taskset=taskset, harness=vf.Harness()) + task = next(iter(taskset)) + state = await env.harness.setup_state(task, vf.State.for_task(task)) + await env.harness.runtime.score_rollout(task, state) + return state + + +@pytest.mark.asyncio +async def test_taskset_object_binding_resolves_instance() -> None: + taskset = vf.Taskset( + source=source_rows, + rewards=[prefix_reward], + objects={"prefixer": Prefixer("inst:")}, + bindings={"prefix_reward.prefixer": "objects.prefixer"}, + ) + + state = await score_taskset(taskset) + + assert state["prefixed"] == "inst:ok" + assert state["reward"] == 1.0 + + +@pytest.mark.asyncio +async def test_taskset_object_factory_is_lazy_and_resolved_once() -> None: + calls = 0 + + def make_prefixer() -> Prefixer: + nonlocal calls + calls += 1 + return Prefixer("factory:") + + taskset = vf.Taskset( + source=source_rows, + rewards=[prefix_reward], + objects={"prefixer": make_prefixer}, + bindings={"prefix_reward.prefixer": "objects.prefixer"}, + ) + env = vf.Env(taskset=taskset, harness=vf.Harness()) + task = next(iter(taskset)) + state = await env.harness.setup_state(task, vf.State.for_task(task)) + + await env.harness.runtime.score_rollout(task, state) + await env.harness.runtime.score_rollout(task, state) + + assert calls == 1 + assert state["prefixed"] == "factory:ok" + + +@pytest.mark.asyncio +async def test_framework_args_win_over_taskset_bindings() -> None: + taskset = vf.Taskset( + source=source_rows, + rewards=[framework_state_reward], + bindings={"framework_state_reward.state": "objects.missing"}, + ) + + state = await score_taskset(taskset) + + assert state["framework_state_seen"] is True + + +@pytest.mark.asyncio +async def test_caller_kwargs_win_over_taskset_bindings_for_handlers() -> None: + taskset = vf.Taskset( + source=source_rows, + setups=[setup_with_override], + objects={"token": "bound"}, + bindings={"setup_with_override.token": "objects.token"}, + ) + env = vf.Env(taskset=taskset, harness=vf.Harness()) + task = next(iter(taskset)) + state = vf.State.for_task(task) + + await env.harness.runtime.run_rollout_handlers( + [setup_with_override], task=task, state=state, token="explicit" + ) + + assert state["token"] == "explicit" + + +@pytest.mark.asyncio +async def test_missing_taskset_binding_error_names_signal_and_arg() -> None: + taskset = vf.Taskset(source=source_rows, rewards=[missing_binding_reward]) + + with pytest.raises( + TypeError, + match="reward signal 'missing_binding_reward'.*extractor", + ): + await score_taskset(taskset) + + +@pytest.mark.asyncio +async def test_taskset_config_map_round_trips_objects_and_bindings() -> None: + config = vf.TasksetConfig( + objects={"prefixer": Prefixer("config:")}, + bindings={"prefix_reward.prefixer": "objects.prefixer"}, + ) + taskset = vf.Taskset( + source=source_rows, + rewards=[prefix_reward], + config=config, + ) + + state = await score_taskset(taskset) + + assert state["prefixed"] == "config:ok" + + +@pytest.mark.asyncio +async def test_taskset_bindings_support_shared_extractor_pattern() -> None: + taskset = vf.Taskset( + source=source_rows, + rewards=[extracted_answer_reward], + objects={"extract_answer": lambda: TagExtractor("answer")}, + bindings={"extracted_answer_reward.extract_answer": "objects.extract_answer"}, + ) + env = vf.Env(taskset=taskset, harness=vf.Harness()) + task = next(iter(taskset)) + state = await env.harness.setup_state(task, vf.State.for_task(task)) + state["completion"] = [{"role": "assistant", "content": "ok"}] + + await env.harness.runtime.score_rollout(task, state) + + assert state["reward"] == 1.0 diff --git a/uv.lock b/uv.lock index b91d479f2..11a8be1be 100644 --- a/uv.lock +++ b/uv.lock @@ -13,6 +13,10 @@ resolution-markers = [ "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'", "python_full_version < '3.11'", ] +conflicts = [[ + { package = "verifiers", extra = "openenv" }, + { package = "verifiers", group = "policy" }, +]] [options] @@ -20,6 +24,7 @@ resolution-markers = [ prime-tunnel = false prime-sandboxes = false renderers = false +openenv-core = false [[package]] name = "accelerate" @@ -76,7 +81,7 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohappyeyeballs" }, { name = "aiosignal" }, - { name = "async-timeout", marker = "python_full_version < '3.11'" }, + { name = "async-timeout", marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "attrs" }, { name = "frozenlist" }, { name = "multidict" }, @@ -170,7 +175,7 @@ version = "1.4.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "frozenlist" }, - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, + { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/61/62/06741b579156360248d1ec624842ad0edf697050bbaf7c3e46394e106ad1/aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7", size = 25007, upload-time = "2025-07-03T22:54:43.528Z" } wheels = [ @@ -228,9 +233,9 @@ name = "anyio" version = "4.13.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, + { name = "exceptiongroup", marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "idna" }, - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, + { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/19/14/2c5dd9f512b66549ae92767a9c7b330ae88e1932ca57876909410251fe13/anyio-4.13.0.tar.gz", hash = "sha256:334b70e641fd2221c1505b3890c69882fe4a2df910cba14d97019b90b24439dc", size = 231622, upload-time = "2026-03-24T12:59:09.671Z" } wheels = [ @@ -296,17 +301,57 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/64/b4/17d4b0b2a2dc85a6df63d1157e028ed19f90d4cd97c36717afef2bc2f395/attrs-26.1.0-py3-none-any.whl", hash = "sha256:c647aa4a12dfbad9333ca4e71fe62ddc36f4e63b2d260a37a8b83d2f043ac309", size = 67548, upload-time = "2026-03-19T14:22:23.645Z" }, ] +[[package]] +name = "audioop-lts" +version = "0.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/38/53/946db57842a50b2da2e0c1e34bd37f36f5aadba1a929a3971c5d7841dbca/audioop_lts-0.2.2.tar.gz", hash = "sha256:64d0c62d88e67b98a1a5e71987b7aa7b5bcffc7dcee65b635823dbdd0a8dbbd0", size = 30686, upload-time = "2025-08-05T16:43:17.409Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/de/d4/94d277ca941de5a507b07f0b592f199c22454eeaec8f008a286b3fbbacd6/audioop_lts-0.2.2-cp313-abi3-macosx_10_13_universal2.whl", hash = "sha256:fd3d4602dc64914d462924a08c1a9816435a2155d74f325853c1f1ac3b2d9800", size = 46523, upload-time = "2025-08-05T16:42:20.836Z" }, + { url = "https://files.pythonhosted.org/packages/f8/5a/656d1c2da4b555920ce4177167bfeb8623d98765594af59702c8873f60ec/audioop_lts-0.2.2-cp313-abi3-macosx_10_13_x86_64.whl", hash = "sha256:550c114a8df0aafe9a05442a1162dfc8fec37e9af1d625ae6060fed6e756f303", size = 27455, upload-time = "2025-08-05T16:42:22.283Z" }, + { url = "https://files.pythonhosted.org/packages/1b/83/ea581e364ce7b0d41456fb79d6ee0ad482beda61faf0cab20cbd4c63a541/audioop_lts-0.2.2-cp313-abi3-macosx_11_0_arm64.whl", hash = "sha256:9a13dc409f2564de15dd68be65b462ba0dde01b19663720c68c1140c782d1d75", size = 26997, upload-time = "2025-08-05T16:42:23.849Z" }, + { url = "https://files.pythonhosted.org/packages/b8/3b/e8964210b5e216e5041593b7d33e97ee65967f17c282e8510d19c666dab4/audioop_lts-0.2.2-cp313-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:51c916108c56aa6e426ce611946f901badac950ee2ddaf302b7ed35d9958970d", size = 85844, upload-time = "2025-08-05T16:42:25.208Z" }, + { url = "https://files.pythonhosted.org/packages/c7/2e/0a1c52faf10d51def20531a59ce4c706cb7952323b11709e10de324d6493/audioop_lts-0.2.2-cp313-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:47eba38322370347b1c47024defbd36374a211e8dd5b0dcbce7b34fdb6f8847b", size = 85056, upload-time = "2025-08-05T16:42:26.559Z" }, + { url = "https://files.pythonhosted.org/packages/75/e8/cd95eef479656cb75ab05dfece8c1f8c395d17a7c651d88f8e6e291a63ab/audioop_lts-0.2.2-cp313-abi3-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ba7c3a7e5f23e215cb271516197030c32aef2e754252c4c70a50aaff7031a2c8", size = 93892, upload-time = "2025-08-05T16:42:27.902Z" }, + { url = "https://files.pythonhosted.org/packages/5c/1e/a0c42570b74f83efa5cca34905b3eef03f7ab09fe5637015df538a7f3345/audioop_lts-0.2.2-cp313-abi3-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:def246fe9e180626731b26e89816e79aae2276f825420a07b4a647abaa84becc", size = 96660, upload-time = "2025-08-05T16:42:28.9Z" }, + { url = "https://files.pythonhosted.org/packages/50/d5/8a0ae607ca07dbb34027bac8db805498ee7bfecc05fd2c148cc1ed7646e7/audioop_lts-0.2.2-cp313-abi3-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e160bf9df356d841bb6c180eeeea1834085464626dc1b68fa4e1d59070affdc3", size = 79143, upload-time = "2025-08-05T16:42:29.929Z" }, + { url = "https://files.pythonhosted.org/packages/12/17/0d28c46179e7910bfb0bb62760ccb33edb5de973052cb2230b662c14ca2e/audioop_lts-0.2.2-cp313-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:4b4cd51a57b698b2d06cb9993b7ac8dfe89a3b2878e96bc7948e9f19ff51dba6", size = 84313, upload-time = "2025-08-05T16:42:30.949Z" }, + { url = "https://files.pythonhosted.org/packages/84/ba/bd5d3806641564f2024e97ca98ea8f8811d4e01d9b9f9831474bc9e14f9e/audioop_lts-0.2.2-cp313-abi3-musllinux_1_2_ppc64le.whl", hash = "sha256:4a53aa7c16a60a6857e6b0b165261436396ef7293f8b5c9c828a3a203147ed4a", size = 93044, upload-time = "2025-08-05T16:42:31.959Z" }, + { url = "https://files.pythonhosted.org/packages/f9/5e/435ce8d5642f1f7679540d1e73c1c42d933331c0976eb397d1717d7f01a3/audioop_lts-0.2.2-cp313-abi3-musllinux_1_2_riscv64.whl", hash = "sha256:3fc38008969796f0f689f1453722a0f463da1b8a6fbee11987830bfbb664f623", size = 78766, upload-time = "2025-08-05T16:42:33.302Z" }, + { url = "https://files.pythonhosted.org/packages/ae/3b/b909e76b606cbfd53875693ec8c156e93e15a1366a012f0b7e4fb52d3c34/audioop_lts-0.2.2-cp313-abi3-musllinux_1_2_s390x.whl", hash = "sha256:15ab25dd3e620790f40e9ead897f91e79c0d3ce65fe193c8ed6c26cffdd24be7", size = 87640, upload-time = "2025-08-05T16:42:34.854Z" }, + { url = "https://files.pythonhosted.org/packages/30/e7/8f1603b4572d79b775f2140d7952f200f5e6c62904585d08a01f0a70393a/audioop_lts-0.2.2-cp313-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:03f061a1915538fd96272bac9551841859dbb2e3bf73ebe4a23ef043766f5449", size = 86052, upload-time = "2025-08-05T16:42:35.839Z" }, + { url = "https://files.pythonhosted.org/packages/b5/96/c37846df657ccdda62ba1ae2b6534fa90e2e1b1742ca8dcf8ebd38c53801/audioop_lts-0.2.2-cp313-abi3-win32.whl", hash = "sha256:3bcddaaf6cc5935a300a8387c99f7a7fbbe212a11568ec6cf6e4bc458c048636", size = 26185, upload-time = "2025-08-05T16:42:37.04Z" }, + { url = "https://files.pythonhosted.org/packages/34/a5/9d78fdb5b844a83da8a71226c7bdae7cc638861085fff7a1d707cb4823fa/audioop_lts-0.2.2-cp313-abi3-win_amd64.whl", hash = "sha256:a2c2a947fae7d1062ef08c4e369e0ba2086049a5e598fda41122535557012e9e", size = 30503, upload-time = "2025-08-05T16:42:38.427Z" }, + { url = "https://files.pythonhosted.org/packages/34/25/20d8fde083123e90c61b51afb547bb0ea7e77bab50d98c0ab243d02a0e43/audioop_lts-0.2.2-cp313-abi3-win_arm64.whl", hash = "sha256:5f93a5db13927a37d2d09637ccca4b2b6b48c19cd9eda7b17a2e9f77edee6a6f", size = 24173, upload-time = "2025-08-05T16:42:39.704Z" }, + { url = "https://files.pythonhosted.org/packages/58/a7/0a764f77b5c4ac58dc13c01a580f5d32ae8c74c92020b961556a43e26d02/audioop_lts-0.2.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:73f80bf4cd5d2ca7814da30a120de1f9408ee0619cc75da87d0641273d202a09", size = 47096, upload-time = "2025-08-05T16:42:40.684Z" }, + { url = "https://files.pythonhosted.org/packages/aa/ed/ebebedde1a18848b085ad0fa54b66ceb95f1f94a3fc04f1cd1b5ccb0ed42/audioop_lts-0.2.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:106753a83a25ee4d6f473f2be6b0966fc1c9af7e0017192f5531a3e7463dce58", size = 27748, upload-time = "2025-08-05T16:42:41.992Z" }, + { url = "https://files.pythonhosted.org/packages/cb/6e/11ca8c21af79f15dbb1c7f8017952ee8c810c438ce4e2b25638dfef2b02c/audioop_lts-0.2.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fbdd522624141e40948ab3e8cdae6e04c748d78710e9f0f8d4dae2750831de19", size = 27329, upload-time = "2025-08-05T16:42:42.987Z" }, + { url = "https://files.pythonhosted.org/packages/84/52/0022f93d56d85eec5da6b9da6a958a1ef09e80c39f2cc0a590c6af81dcbb/audioop_lts-0.2.2-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:143fad0311e8209ece30a8dbddab3b65ab419cbe8c0dde6e8828da25999be911", size = 92407, upload-time = "2025-08-05T16:42:44.336Z" }, + { url = "https://files.pythonhosted.org/packages/87/1d/48a889855e67be8718adbc7a01f3c01d5743c325453a5e81cf3717664aad/audioop_lts-0.2.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dfbbc74ec68a0fd08cfec1f4b5e8cca3d3cd7de5501b01c4b5d209995033cde9", size = 91811, upload-time = "2025-08-05T16:42:45.325Z" }, + { url = "https://files.pythonhosted.org/packages/98/a6/94b7213190e8077547ffae75e13ed05edc488653c85aa5c41472c297d295/audioop_lts-0.2.2-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cfcac6aa6f42397471e4943e0feb2244549db5c5d01efcd02725b96af417f3fe", size = 100470, upload-time = "2025-08-05T16:42:46.468Z" }, + { url = "https://files.pythonhosted.org/packages/e9/e9/78450d7cb921ede0cfc33426d3a8023a3bda755883c95c868ee36db8d48d/audioop_lts-0.2.2-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:752d76472d9804ac60f0078c79cdae8b956f293177acd2316cd1e15149aee132", size = 103878, upload-time = "2025-08-05T16:42:47.576Z" }, + { url = "https://files.pythonhosted.org/packages/4f/e2/cd5439aad4f3e34ae1ee852025dc6aa8f67a82b97641e390bf7bd9891d3e/audioop_lts-0.2.2-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:83c381767e2cc10e93e40281a04852facc4cd9334550e0f392f72d1c0a9c5753", size = 84867, upload-time = "2025-08-05T16:42:49.003Z" }, + { url = "https://files.pythonhosted.org/packages/68/4b/9d853e9076c43ebba0d411e8d2aa19061083349ac695a7d082540bad64d0/audioop_lts-0.2.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c0022283e9556e0f3643b7c3c03f05063ca72b3063291834cca43234f20c60bb", size = 90001, upload-time = "2025-08-05T16:42:50.038Z" }, + { url = "https://files.pythonhosted.org/packages/58/26/4bae7f9d2f116ed5593989d0e521d679b0d583973d203384679323d8fa85/audioop_lts-0.2.2-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:a2d4f1513d63c795e82948e1305f31a6d530626e5f9f2605408b300ae6095093", size = 99046, upload-time = "2025-08-05T16:42:51.111Z" }, + { url = "https://files.pythonhosted.org/packages/b2/67/a9f4fb3e250dda9e9046f8866e9fa7d52664f8985e445c6b4ad6dfb55641/audioop_lts-0.2.2-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:c9c8e68d8b4a56fda8c025e538e639f8c5953f5073886b596c93ec9b620055e7", size = 84788, upload-time = "2025-08-05T16:42:52.198Z" }, + { url = "https://files.pythonhosted.org/packages/70/f7/3de86562db0121956148bcb0fe5b506615e3bcf6e63c4357a612b910765a/audioop_lts-0.2.2-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:96f19de485a2925314f5020e85911fb447ff5fbef56e8c7c6927851b95533a1c", size = 94472, upload-time = "2025-08-05T16:42:53.59Z" }, + { url = "https://files.pythonhosted.org/packages/f1/32/fd772bf9078ae1001207d2df1eef3da05bea611a87dd0e8217989b2848fa/audioop_lts-0.2.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e541c3ef484852ef36545f66209444c48b28661e864ccadb29daddb6a4b8e5f5", size = 92279, upload-time = "2025-08-05T16:42:54.632Z" }, + { url = "https://files.pythonhosted.org/packages/4f/41/affea7181592ab0ab560044632571a38edaf9130b84928177823fbf3176a/audioop_lts-0.2.2-cp313-cp313t-win32.whl", hash = "sha256:d5e73fa573e273e4f2e5ff96f9043858a5e9311e94ffefd88a3186a910c70917", size = 26568, upload-time = "2025-08-05T16:42:55.627Z" }, + { url = "https://files.pythonhosted.org/packages/28/2b/0372842877016641db8fc54d5c88596b542eec2f8f6c20a36fb6612bf9ee/audioop_lts-0.2.2-cp313-cp313t-win_amd64.whl", hash = "sha256:9191d68659eda01e448188f60364c7763a7ca6653ed3f87ebb165822153a8547", size = 30942, upload-time = "2025-08-05T16:42:56.674Z" }, + { url = "https://files.pythonhosted.org/packages/ee/ca/baf2b9cc7e96c179bb4a54f30fcd83e6ecb340031bde68f486403f943768/audioop_lts-0.2.2-cp313-cp313t-win_arm64.whl", hash = "sha256:c174e322bb5783c099aaf87faeb240c8d210686b04bd61dfd05a8e5a83d88969", size = 24603, upload-time = "2025-08-05T16:42:57.571Z" }, +] + [[package]] name = "authlib" -version = "1.7.0" +version = "1.7.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cryptography" }, { name = "joserfc" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d9/82/4d0603f30c1b4629b1f091bb266b0d7986434891d6940a8c87f8098db24e/authlib-1.7.0.tar.gz", hash = "sha256:b3e326c9aa9cc3ea95fe7d89fd880722d3608da4d00e8a27e061e64b48d801d5", size = 175890, upload-time = "2026-04-18T11:00:28.559Z" } +sdist = { url = "https://files.pythonhosted.org/packages/36/98/7d93f30d029643c0275dbc0bd6d5a6f670661ee6c9a94d93af7ab4887600/authlib-1.7.2.tar.gz", hash = "sha256:2cea25fefcd4e7173bdf1372c0afc265c8034b23a8cd5dcb6a9164b826c64231", size = 176511, upload-time = "2026-05-06T08:10:23.116Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ca/48/c954218b2a250e23f178f10167c4173fecb5a75d2c206f0a67ba58006c26/authlib-1.7.0-py2.py3-none-any.whl", hash = "sha256:e36817afb02f6f0b6bf55f150782499ddd6ddf44b402bb055d3263cc65ac9ae0", size = 258779, upload-time = "2026-04-18T11:00:26.64Z" }, + { url = "https://files.pythonhosted.org/packages/fb/95/adcb68e20c34162e9135f370d6e31737719c2b6f94bc953fe7ed1f10fe21/authlib-1.7.2-py2.py3-none-any.whl", hash = "sha256:3e1faedc9d87e7d56a164eca3ccb6ace0d61b94abe83e92242f8dc8bba9b4a9f", size = 259548, upload-time = "2026-05-06T08:10:21.436Z" }, ] [[package]] @@ -349,7 +394,7 @@ name = "blake3" version = "1.0.8" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.12'" }, + { name = "typing-extensions", marker = "python_full_version < '3.12' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/75/aa/abcd75e9600987a0bc6cfe9b6b2ff3f0e2cb08c170addc6e76035b5c4cb3/blake3-1.0.8.tar.gz", hash = "sha256:513cc7f0f5a7c035812604c2c852a0c1468311345573de647e310aca4ab165ba", size = 117308, upload-time = "2025-10-14T06:47:48.83Z" } wheels = [ @@ -415,6 +460,72 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d8/fc/923e25ac9cadfff1cd20038bcc0854d0f98061eb6bc78e42c43615f5982d/blake3-1.0.8-cp313-cp313t-win_amd64.whl", hash = "sha256:3cec94ed5676821cf371e9c9d25a41b4f3ebdb5724719b31b2749653b7cc1dfa", size = 215369, upload-time = "2025-10-14T06:46:39.054Z" }, ] +[[package]] +name = "boltons" +version = "21.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ad/1f/6c0608d86e0fc77c982a2923ece80eef85f091f2332fc13cbce41d70d502/boltons-21.0.0.tar.gz", hash = "sha256:65e70a79a731a7fe6e98592ecfb5ccf2115873d01dbc576079874629e5c90f13", size = 180201, upload-time = "2021-05-17T01:20:17.802Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f7/a7/1a31561d10a089fcb46fe286766dd4e053a12f6e23b4fd1c26478aff2475/boltons-21.0.0-py2.py3-none-any.whl", hash = "sha256:b9bb7b58b2b420bbe11a6025fdef6d3e5edc9f76a42fb467afe7ca212ef9948b", size = 193723, upload-time = "2021-05-17T01:20:20.023Z" }, +] + +[[package]] +name = "bracex" +version = "2.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/63/9a/fec38644694abfaaeca2798b58e276a8e61de49e2e37494ace423395febc/bracex-2.6.tar.gz", hash = "sha256:98f1347cd77e22ee8d967a30ad4e310b233f7754dbf31ff3fceb76145ba47dc7", size = 26642, upload-time = "2025-06-22T19:12:31.254Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9d/2a/9186535ce58db529927f6cf5990a849aa9e052eea3e2cfefe20b9e1802da/bracex-2.6-py3-none-any.whl", hash = "sha256:0b0049264e7340b3ec782b5cb99beb325f36c3782a32e36e876452fd49a09952", size = 11508, upload-time = "2025-06-22T19:12:29.781Z" }, +] + +[[package]] +name = "brotli" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f7/16/c92ca344d646e71a43b8bb353f0a6490d7f6e06210f8554c8f874e454285/brotli-1.2.0.tar.gz", hash = "sha256:e310f77e41941c13340a95976fe66a8a95b01e783d430eeaf7a2f87e0a57dd0a", size = 7388632, upload-time = "2025-11-05T18:39:42.86Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/64/10/a090475284fc4a71aed40a96f32e44a7fe5bda39687353dd977720b211b6/brotli-1.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3b90b767916ac44e93a8e28ce6adf8d551e43affb512f2377c732d486ac6514e", size = 863089, upload-time = "2025-11-05T18:38:01.181Z" }, + { url = "https://files.pythonhosted.org/packages/03/41/17416630e46c07ac21e378c3464815dd2e120b441e641bc516ac32cc51d2/brotli-1.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6be67c19e0b0c56365c6a76e393b932fb0e78b3b56b711d180dd7013cb1fd984", size = 445442, upload-time = "2025-11-05T18:38:02.434Z" }, + { url = "https://files.pythonhosted.org/packages/24/31/90cc06584deb5d4fcafc0985e37741fc6b9717926a78674bbb3ce018957e/brotli-1.2.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0bbd5b5ccd157ae7913750476d48099aaf507a79841c0d04a9db4415b14842de", size = 1532658, upload-time = "2025-11-05T18:38:03.588Z" }, + { url = "https://files.pythonhosted.org/packages/62/17/33bf0c83bcbc96756dfd712201d87342732fad70bb3472c27e833a44a4f9/brotli-1.2.0-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:3f3c908bcc404c90c77d5a073e55271a0a498f4e0756e48127c35d91cf155947", size = 1631241, upload-time = "2025-11-05T18:38:04.582Z" }, + { url = "https://files.pythonhosted.org/packages/48/10/f47854a1917b62efe29bc98ac18e5d4f71df03f629184575b862ef2e743b/brotli-1.2.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1b557b29782a643420e08d75aea889462a4a8796e9a6cf5621ab05a3f7da8ef2", size = 1424307, upload-time = "2025-11-05T18:38:05.587Z" }, + { url = "https://files.pythonhosted.org/packages/e4/b7/f88eb461719259c17483484ea8456925ee057897f8e64487d76e24e5e38d/brotli-1.2.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:81da1b229b1889f25adadc929aeb9dbc4e922bd18561b65b08dd9343cfccca84", size = 1488208, upload-time = "2025-11-05T18:38:06.613Z" }, + { url = "https://files.pythonhosted.org/packages/26/59/41bbcb983a0c48b0b8004203e74706c6b6e99a04f3c7ca6f4f41f364db50/brotli-1.2.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:ff09cd8c5eec3b9d02d2408db41be150d8891c5566addce57513bf546e3d6c6d", size = 1597574, upload-time = "2025-11-05T18:38:07.838Z" }, + { url = "https://files.pythonhosted.org/packages/8e/e6/8c89c3bdabbe802febb4c5c6ca224a395e97913b5df0dff11b54f23c1788/brotli-1.2.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a1778532b978d2536e79c05dac2d8cd857f6c55cd0c95ace5b03740824e0e2f1", size = 1492109, upload-time = "2025-11-05T18:38:08.816Z" }, + { url = "https://files.pythonhosted.org/packages/ed/9a/4b19d4310b2dbd545c0c33f176b0528fa68c3cd0754e34b2f2bcf56548ae/brotli-1.2.0-cp310-cp310-win32.whl", hash = "sha256:b232029d100d393ae3c603c8ffd7e3fe6f798c5e28ddca5feabb8e8fdb732997", size = 334461, upload-time = "2025-11-05T18:38:10.729Z" }, + { url = "https://files.pythonhosted.org/packages/ac/39/70981d9f47705e3c2b95c0847dfa3e7a37aa3b7c6030aedc4873081ed005/brotli-1.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:ef87b8ab2704da227e83a246356a2b179ef826f550f794b2c52cddb4efbd0196", size = 369035, upload-time = "2025-11-05T18:38:11.827Z" }, + { url = "https://files.pythonhosted.org/packages/7a/ef/f285668811a9e1ddb47a18cb0b437d5fc2760d537a2fe8a57875ad6f8448/brotli-1.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:15b33fe93cedc4caaff8a0bd1eb7e3dab1c61bb22a0bf5bdfdfd97cd7da79744", size = 863110, upload-time = "2025-11-05T18:38:12.978Z" }, + { url = "https://files.pythonhosted.org/packages/50/62/a3b77593587010c789a9d6eaa527c79e0848b7b860402cc64bc0bc28a86c/brotli-1.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:898be2be399c221d2671d29eed26b6b2713a02c2119168ed914e7d00ceadb56f", size = 445438, upload-time = "2025-11-05T18:38:14.208Z" }, + { url = "https://files.pythonhosted.org/packages/cd/e1/7fadd47f40ce5549dc44493877db40292277db373da5053aff181656e16e/brotli-1.2.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:350c8348f0e76fff0a0fd6c26755d2653863279d086d3aa2c290a6a7251135dd", size = 1534420, upload-time = "2025-11-05T18:38:15.111Z" }, + { url = "https://files.pythonhosted.org/packages/12/8b/1ed2f64054a5a008a4ccd2f271dbba7a5fb1a3067a99f5ceadedd4c1d5a7/brotli-1.2.0-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2e1ad3fda65ae0d93fec742a128d72e145c9c7a99ee2fcd667785d99eb25a7fe", size = 1632619, upload-time = "2025-11-05T18:38:16.094Z" }, + { url = "https://files.pythonhosted.org/packages/89/5a/7071a621eb2d052d64efd5da2ef55ecdac7c3b0c6e4f9d519e9c66d987ef/brotli-1.2.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:40d918bce2b427a0c4ba189df7a006ac0c7277c180aee4617d99e9ccaaf59e6a", size = 1426014, upload-time = "2025-11-05T18:38:17.177Z" }, + { url = "https://files.pythonhosted.org/packages/26/6d/0971a8ea435af5156acaaccec1a505f981c9c80227633851f2810abd252a/brotli-1.2.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2a7f1d03727130fc875448b65b127a9ec5d06d19d0148e7554384229706f9d1b", size = 1489661, upload-time = "2025-11-05T18:38:18.41Z" }, + { url = "https://files.pythonhosted.org/packages/f3/75/c1baca8b4ec6c96a03ef8230fab2a785e35297632f402ebb1e78a1e39116/brotli-1.2.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:9c79f57faa25d97900bfb119480806d783fba83cd09ee0b33c17623935b05fa3", size = 1599150, upload-time = "2025-11-05T18:38:19.792Z" }, + { url = "https://files.pythonhosted.org/packages/0d/1a/23fcfee1c324fd48a63d7ebf4bac3a4115bdb1b00e600f80f727d850b1ae/brotli-1.2.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:844a8ceb8483fefafc412f85c14f2aae2fb69567bf2a0de53cdb88b73e7c43ae", size = 1493505, upload-time = "2025-11-05T18:38:20.913Z" }, + { url = "https://files.pythonhosted.org/packages/36/e5/12904bbd36afeef53d45a84881a4810ae8810ad7e328a971ebbfd760a0b3/brotli-1.2.0-cp311-cp311-win32.whl", hash = "sha256:aa47441fa3026543513139cb8926a92a8e305ee9c71a6209ef7a97d91640ea03", size = 334451, upload-time = "2025-11-05T18:38:21.94Z" }, + { url = "https://files.pythonhosted.org/packages/02/8b/ecb5761b989629a4758c394b9301607a5880de61ee2ee5fe104b87149ebc/brotli-1.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:022426c9e99fd65d9475dce5c195526f04bb8be8907607e27e747893f6ee3e24", size = 369035, upload-time = "2025-11-05T18:38:22.941Z" }, + { url = "https://files.pythonhosted.org/packages/11/ee/b0a11ab2315c69bb9b45a2aaed022499c9c24a205c3a49c3513b541a7967/brotli-1.2.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:35d382625778834a7f3061b15423919aa03e4f5da34ac8e02c074e4b75ab4f84", size = 861543, upload-time = "2025-11-05T18:38:24.183Z" }, + { url = "https://files.pythonhosted.org/packages/e1/2f/29c1459513cd35828e25531ebfcbf3e92a5e49f560b1777a9af7203eb46e/brotli-1.2.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7a61c06b334bd99bc5ae84f1eeb36bfe01400264b3c352f968c6e30a10f9d08b", size = 444288, upload-time = "2025-11-05T18:38:25.139Z" }, + { url = "https://files.pythonhosted.org/packages/3d/6f/feba03130d5fceadfa3a1bb102cb14650798c848b1df2a808356f939bb16/brotli-1.2.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:acec55bb7c90f1dfc476126f9711a8e81c9af7fb617409a9ee2953115343f08d", size = 1528071, upload-time = "2025-11-05T18:38:26.081Z" }, + { url = "https://files.pythonhosted.org/packages/2b/38/f3abb554eee089bd15471057ba85f47e53a44a462cfce265d9bf7088eb09/brotli-1.2.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:260d3692396e1895c5034f204f0db022c056f9e2ac841593a4cf9426e2a3faca", size = 1626913, upload-time = "2025-11-05T18:38:27.284Z" }, + { url = "https://files.pythonhosted.org/packages/03/a7/03aa61fbc3c5cbf99b44d158665f9b0dd3d8059be16c460208d9e385c837/brotli-1.2.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:072e7624b1fc4d601036ab3f4f27942ef772887e876beff0301d261210bca97f", size = 1419762, upload-time = "2025-11-05T18:38:28.295Z" }, + { url = "https://files.pythonhosted.org/packages/21/1b/0374a89ee27d152a5069c356c96b93afd1b94eae83f1e004b57eb6ce2f10/brotli-1.2.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:adedc4a67e15327dfdd04884873c6d5a01d3e3b6f61406f99b1ed4865a2f6d28", size = 1484494, upload-time = "2025-11-05T18:38:29.29Z" }, + { url = "https://files.pythonhosted.org/packages/cf/57/69d4fe84a67aef4f524dcd075c6eee868d7850e85bf01d778a857d8dbe0a/brotli-1.2.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:7a47ce5c2288702e09dc22a44d0ee6152f2c7eda97b3c8482d826a1f3cfc7da7", size = 1593302, upload-time = "2025-11-05T18:38:30.639Z" }, + { url = "https://files.pythonhosted.org/packages/d5/3b/39e13ce78a8e9a621c5df3aeb5fd181fcc8caba8c48a194cd629771f6828/brotli-1.2.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:af43b8711a8264bb4e7d6d9a6d004c3a2019c04c01127a868709ec29962b6036", size = 1487913, upload-time = "2025-11-05T18:38:31.618Z" }, + { url = "https://files.pythonhosted.org/packages/62/28/4d00cb9bd76a6357a66fcd54b4b6d70288385584063f4b07884c1e7286ac/brotli-1.2.0-cp312-cp312-win32.whl", hash = "sha256:e99befa0b48f3cd293dafeacdd0d191804d105d279e0b387a32054c1180f3161", size = 334362, upload-time = "2025-11-05T18:38:32.939Z" }, + { url = "https://files.pythonhosted.org/packages/1c/4e/bc1dcac9498859d5e353c9b153627a3752868a9d5f05ce8dedd81a2354ab/brotli-1.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:b35c13ce241abdd44cb8ca70683f20c0c079728a36a996297adb5334adfc1c44", size = 369115, upload-time = "2025-11-05T18:38:33.765Z" }, + { url = "https://files.pythonhosted.org/packages/6c/d4/4ad5432ac98c73096159d9ce7ffeb82d151c2ac84adcc6168e476bb54674/brotli-1.2.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:9e5825ba2c9998375530504578fd4d5d1059d09621a02065d1b6bfc41a8e05ab", size = 861523, upload-time = "2025-11-05T18:38:34.67Z" }, + { url = "https://files.pythonhosted.org/packages/91/9f/9cc5bd03ee68a85dc4bc89114f7067c056a3c14b3d95f171918c088bf88d/brotli-1.2.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0cf8c3b8ba93d496b2fae778039e2f5ecc7cff99df84df337ca31d8f2252896c", size = 444289, upload-time = "2025-11-05T18:38:35.6Z" }, + { url = "https://files.pythonhosted.org/packages/2e/b6/fe84227c56a865d16a6614e2c4722864b380cb14b13f3e6bef441e73a85a/brotli-1.2.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c8565e3cdc1808b1a34714b553b262c5de5fbda202285782173ec137fd13709f", size = 1528076, upload-time = "2025-11-05T18:38:36.639Z" }, + { url = "https://files.pythonhosted.org/packages/55/de/de4ae0aaca06c790371cf6e7ee93a024f6b4bb0568727da8c3de112e726c/brotli-1.2.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:26e8d3ecb0ee458a9804f47f21b74845cc823fd1bb19f02272be70774f56e2a6", size = 1626880, upload-time = "2025-11-05T18:38:37.623Z" }, + { url = "https://files.pythonhosted.org/packages/5f/16/a1b22cbea436642e071adcaf8d4b350a2ad02f5e0ad0da879a1be16188a0/brotli-1.2.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:67a91c5187e1eec76a61625c77a6c8c785650f5b576ca732bd33ef58b0dff49c", size = 1419737, upload-time = "2025-11-05T18:38:38.729Z" }, + { url = "https://files.pythonhosted.org/packages/46/63/c968a97cbb3bdbf7f974ef5a6ab467a2879b82afbc5ffb65b8acbb744f95/brotli-1.2.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4ecdb3b6dc36e6d6e14d3a1bdc6c1057c8cbf80db04031d566eb6080ce283a48", size = 1484440, upload-time = "2025-11-05T18:38:39.916Z" }, + { url = "https://files.pythonhosted.org/packages/06/9d/102c67ea5c9fc171f423e8399e585dabea29b5bc79b05572891e70013cdd/brotli-1.2.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3e1b35d56856f3ed326b140d3c6d9db91740f22e14b06e840fe4bb1923439a18", size = 1593313, upload-time = "2025-11-05T18:38:41.24Z" }, + { url = "https://files.pythonhosted.org/packages/9e/4a/9526d14fa6b87bc827ba1755a8440e214ff90de03095cacd78a64abe2b7d/brotli-1.2.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:54a50a9dad16b32136b2241ddea9e4df159b41247b2ce6aac0b3276a66a8f1e5", size = 1487945, upload-time = "2025-11-05T18:38:42.277Z" }, + { url = "https://files.pythonhosted.org/packages/5b/e8/3fe1ffed70cbef83c5236166acaed7bb9c766509b157854c80e2f766b38c/brotli-1.2.0-cp313-cp313-win32.whl", hash = "sha256:1b1d6a4efedd53671c793be6dd760fcf2107da3a52331ad9ea429edf0902f27a", size = 334368, upload-time = "2025-11-05T18:38:43.345Z" }, + { url = "https://files.pythonhosted.org/packages/ff/91/e739587be970a113b37b821eae8097aac5a48e5f0eca438c22e4c7dd8648/brotli-1.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:b63daa43d82f0cdabf98dee215b375b4058cce72871fd07934f179885aad16e8", size = 369116, upload-time = "2025-11-05T18:38:44.609Z" }, +] + [[package]] name = "cachetools" version = "7.0.6" @@ -510,7 +621,7 @@ name = "cffi" version = "2.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "pycparser", marker = "implementation_name != 'PyPy'" }, + { name = "pycparser", marker = "implementation_name != 'PyPy' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" } wheels = [ @@ -655,14 +766,26 @@ sdist = { url = "https://files.pythonhosted.org/packages/93/09/7d04d7581ae3bb8b5 [[package]] name = "click" -version = "8.3.3" +version = "8.1.8" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/bb/63/f9e1ea081ce35720d8b92acde70daaedace594dc93b693c869e0d5910718/click-8.3.3.tar.gz", hash = "sha256:398329ad4837b2ff7cbe1dd166a4c0f8900c3ca3a218de04466f38f6497f18a2", size = 328061, upload-time = "2026-04-22T15:11:27.506Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593, upload-time = "2024-12-21T18:38:44.339Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ae/44/c1221527f6a71a01ec6fbad7fa78f1d50dfa02217385cf0fa3eec7087d59/click-8.3.3-py3-none-any.whl", hash = "sha256:a2bf429bb3033c89fa4936ffb35d5cb471e3719e1f3c8a7c3fff0b8314305613", size = 110502, upload-time = "2026-04-22T15:11:25.044Z" }, + { url = "https://files.pythonhosted.org/packages/7e/d4/7ebdbd03970677812aac39c869717059dbb71a4cfc033ca6e5221787892c/click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2", size = 98188, upload-time = "2024-12-21T18:38:41.666Z" }, +] + +[[package]] +name = "click-option-group" +version = "0.5.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ef/ff/d291d66595b30b83d1cb9e314b2c9be7cfc7327d4a0d40a15da2416ea97b/click_option_group-0.5.9.tar.gz", hash = "sha256:f94ed2bc4cf69052e0f29592bd1e771a1789bd7bfc482dd0bc482134aff95823", size = 22222, upload-time = "2025-10-09T09:38:01.474Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/75/45/54bb2d8d4138964a94bef6e9afe48b0be4705ba66ac442ae7d8a8dc4ffef/click_option_group-0.5.9-py3-none-any.whl", hash = "sha256:ad2599248bd373e2e19bec5407967c3eec1d0d4fc4a5e77b08a0481e75991080", size = 11553, upload-time = "2025-10-09T09:38:00.066Z" }, ] [[package]] @@ -709,15 +832,15 @@ wheels = [ [[package]] name = "connect-python" -version = "0.9.0" +version = "0.8.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "protobuf" }, { name = "pyqwest" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/74/fc/0e4798c53e2754f5de36ecf4d198706cb23711d603df6c008f6e7b5b21ae/connect_python-0.9.0.tar.gz", hash = "sha256:a188ec843b0f5953b7e1b88061af50ad91c9aaa2e982d7a89a63ae5c1fff932e", size = 46094, upload-time = "2026-03-19T02:40:42.279Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8f/8d/e04954dacc3c32c2f858a115b2b989ed6cccefc3835fb440bb12dcb468c2/connect_python-0.8.1.tar.gz", hash = "sha256:0d36c9cfe050661f8f167afcfff8df622805a4f348961d0d8fab630c96f0bdab", size = 39292, upload-time = "2026-01-27T05:00:02.751Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/4c/15/5b42df2d9d34e5103f2b69e4f6a4aeb47c52589eaac8d53eb5b0a40eabaa/connect_python-0.9.0-py3-none-any.whl", hash = "sha256:896171fa7236d4e1557e3f7eee76daa8c9dd762f2c21662515f2060f1b542574", size = 63381, upload-time = "2026-03-19T02:40:40.743Z" }, + { url = "https://files.pythonhosted.org/packages/7d/e4/e36a6d0450493e2b940086ebe94394381803b2b09c97e8f8e34230bcaa99/connect_python-0.8.1-py3-none-any.whl", hash = "sha256:bfe31fb3d90c2a6715fc2996fa4bd5c98dfdccd81e47ac232c9d03aadd666be1", size = 54902, upload-time = "2026-01-27T05:00:01.18Z" }, ] [[package]] @@ -728,7 +851,7 @@ resolution-markers = [ "python_full_version < '3.11'", ] dependencies = [ - { name = "numpy", marker = "python_full_version < '3.11'" }, + { name = "numpy", marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/66/54/eb9bfc647b19f2009dd5c7f5ec51c4e6ca831725f1aea7a993034f483147/contourpy-1.3.2.tar.gz", hash = "sha256:b6945942715a034c671b7fc54f9588126b0b8bf23db2696e3ca8328f3ff0ab54", size = 13466130, upload-time = "2025-04-15T17:47:53.79Z" } wheels = [ @@ -806,7 +929,7 @@ resolution-markers = [ "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'", ] dependencies = [ - { name = "numpy", marker = "python_full_version >= '3.11'" }, + { name = "numpy", marker = "python_full_version >= '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/58/01/1253e6698a07380cd31a736d248a3f2a50a7c88779a1813da27503cadc2a/contourpy-1.3.3.tar.gz", hash = "sha256:083e12155b210502d0bca491432bb04d56dc3432f95a979b429f2848c3dbe880", size = 13466174, upload-time = "2025-07-26T12:03:12.549Z" } wheels = [ @@ -946,7 +1069,8 @@ wheels = [ [package.optional-dependencies] toml = [ - { name = "tomli", marker = "python_full_version <= '3.11'" }, + { name = "tomli", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version <= '3.11' and extra == 'group-9-verifiers-policy') or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "tomli", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version <= '3.11' and extra == 'extra-9-verifiers-openenv') or (python_full_version <= '3.11' and extra != 'group-9-verifiers-policy') or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] [[package]] @@ -954,8 +1078,8 @@ name = "cryptography" version = "46.0.7" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, - { name = "typing-extensions", marker = "python_full_version < '3.11'" }, + { name = "cffi", marker = "platform_python_implementation != 'PyPy' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "typing-extensions", marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/47/93/ac8f3d5ff04d54bc814e961a43ae5b0b146154c89c61b47bb07557679b18/cryptography-46.0.7.tar.gz", hash = "sha256:e4cfd68c5f3e0bfdad0d38e023239b96a2fe84146481852dffbcca442c245aa5", size = 750652, upload-time = "2026-04-08T01:57:54.692Z" } wheels = [ @@ -1037,19 +1161,19 @@ wheels = [ [[package]] name = "cyclopts" -version = "4.11.0" +version = "4.11.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "attrs" }, { name = "docstring-parser" }, { name = "rich" }, { name = "rich-rst" }, - { name = "tomli", marker = "python_full_version < '3.11'" }, + { name = "tomli", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f9/fa/eff8f1abae783bade9b5e9bafafd0040d4dbf51988f9384bfdc0326ba1fc/cyclopts-4.11.0.tar.gz", hash = "sha256:1ffcb9990dbd56b90da19980d31596de9e99019980a215a5d76cf88fe452e94d", size = 170690, upload-time = "2026-04-23T00:23:36.858Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e4/f7/3ee212c1bc314551094fc8fda7b4b63c647ac5c32d06daa285d04d33edfc/cyclopts-4.11.2.tar.gz", hash = "sha256:8c9b77921660fa1ee52c150e2217ced672323efb3434e9b338077de1bc551ff4", size = 175935, upload-time = "2026-05-04T00:11:57.857Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/7c/37/197db187c260d24d4be1f09d427f59f3fb9a89bcf1354e23865c7bff7607/cyclopts-4.11.0-py3-none-any.whl", hash = "sha256:34318e3823b44b5baa754a5e37ec70a5c17dc81c65e4295ed70e17bc1aeae50d", size = 208494, upload-time = "2026-04-23T00:23:34.948Z" }, + { url = "https://files.pythonhosted.org/packages/23/18/4cedda786e7da429e7489549a9e5461530d4133130e541f25fb94f015776/cyclopts-4.11.2-py3-none-any.whl", hash = "sha256:838020120b939549ff7c8423aca29c86764b5dd1d8a5d7f3753a6327861f537b", size = 213537, upload-time = "2026-05-04T00:11:56.103Z" }, ] [[package]] @@ -1065,8 +1189,8 @@ dependencies = [ { name = "multiprocess" }, { name = "numpy" }, { name = "packaging" }, - { name = "pandas", version = "2.3.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "pandas", version = "3.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "pandas", version = "2.3.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "pandas", version = "3.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "pyarrow" }, { name = "pyyaml" }, { name = "requests" }, @@ -1239,14 +1363,11 @@ wheels = [ [[package]] name = "exceptiongroup" -version = "1.3.1" +version = "1.2.2" source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" } +sdist = { url = "https://files.pythonhosted.org/packages/09/35/2495c4ac46b980e4ca1f6ad6db102322ef3ad2410b79fdde159a4b0f3b92/exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc", size = 28883, upload-time = "2024-07-12T22:26:00.161Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" }, + { url = "https://files.pythonhosted.org/packages/02/cc/b7e31358aac6ed1ef2bb790a9746ac2c69bcb3c8588b41616914eb106eaf/exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b", size = 16453, upload-time = "2024-07-12T22:25:58.476Z" }, ] [[package]] @@ -1267,158 +1388,59 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c1/ea/53f2148663b321f21b5a606bd5f191517cf40b7072c0497d3c92c4a13b1e/executing-2.2.1-py2.py3-none-any.whl", hash = "sha256:760643d3452b4d777d295bb167ccc74c64a81df23fb5e08eff250c425a4b2017", size = 28317, upload-time = "2025-09-01T09:48:08.5Z" }, ] +[[package]] +name = "face" +version = "26.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "boltons" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/24/4e/0e106b0ba486cc38c858fb5efe899002f2ec4765e0808b298d8e19a16efb/face-26.0.0.tar.gz", hash = "sha256:ae12136ff0052f124811f5319670a8d9d29b7d2caaaabe542813690967cc6bca", size = 49862, upload-time = "2026-02-14T00:17:12.576Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/63/1d/c2f7a4334f7501a3474766b5bc0948e8e0b0916217a54d092dd700a5ed3c/face-26.0.0-py3-none-any.whl", hash = "sha256:6ec9cf271d8ee2447f04b14264209a09ec9cbe8252255e61fb7ab6b154e300f9", size = 54825, upload-time = "2026-02-14T00:17:11.519Z" }, +] + [[package]] name = "fastapi" -version = "0.136.1" +version = "0.115.14" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "annotated-doc" }, { name = "pydantic" }, { name = "starlette" }, { name = "typing-extensions" }, - { name = "typing-inspection" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5d/45/c130091c2dfa061bbfe3150f2a5091ef1adf149f2a8d2ae769ecaf6e99a2/fastapi-0.136.1.tar.gz", hash = "sha256:7af665ad7acfa0a3baf8983d393b6b471b9da10ede59c60045f49fbc89a0fa7f", size = 397448, upload-time = "2026-04-23T16:49:44.046Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ca/53/8c38a874844a8b0fa10dd8adf3836ac154082cf88d3f22b544e9ceea0a15/fastapi-0.115.14.tar.gz", hash = "sha256:b1de15cdc1c499a4da47914db35d0e4ef8f1ce62b624e94e0e5824421df99739", size = 296263, upload-time = "2025-06-26T15:29:08.21Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5a/ff/2e4eca3ade2c22fe1dea7043b8ee9dabe47753349eb1b56a202de8af6349/fastapi-0.136.1-py3-none-any.whl", hash = "sha256:a6e9d7eeada96c93a4d69cb03836b44fa34e2854accb7244a1ece36cd4781c3f", size = 117683, upload-time = "2026-04-23T16:49:42.437Z" }, + { url = "https://files.pythonhosted.org/packages/53/50/b1222562c6d270fea83e9c9075b8e8600b8479150a18e4516a6138b980d1/fastapi-0.115.14-py3-none-any.whl", hash = "sha256:6c0c8bf9420bd58f565e585036d971872472b4f7d3f6c73b698e10cffdefb3ca", size = 95514, upload-time = "2025-06-26T15:29:06.49Z" }, ] [package.optional-dependencies] standard = [ { name = "email-validator" }, { name = "fastapi-cli", extra = ["standard"] }, - { name = "fastar" }, { name = "httpx" }, { name = "jinja2" }, - { name = "pydantic-extra-types" }, - { name = "pydantic-settings" }, { name = "python-multipart" }, { name = "uvicorn", extra = ["standard"] }, ] [[package]] name = "fastapi-cli" -version = "0.0.24" +version = "0.0.5" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "rich-toolkit" }, - { name = "tomli", marker = "python_full_version < '3.11'" }, { name = "typer" }, { name = "uvicorn", extra = ["standard"] }, ] -sdist = { url = "https://files.pythonhosted.org/packages/6e/58/74797ae9e4610cfa0c6b34c8309096d3b20bb29be3b8b5fbf1004d10fa5f/fastapi_cli-0.0.24.tar.gz", hash = "sha256:1afc9c9e21d7ebc8a3ca5e31790cd8d837742be7e4f8b9236e99cb3451f0de00", size = 19043, upload-time = "2026-02-24T10:45:10.476Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c5/f8/1ad5ce32d029aeb9117e9a5a9b3e314a8477525d60c12a9b7730a3c186ec/fastapi_cli-0.0.5.tar.gz", hash = "sha256:d30e1239c6f46fcb95e606f02cdda59a1e2fa778a54b64686b3ff27f6211ff9f", size = 15571, upload-time = "2024-08-02T05:48:13.16Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c7/4b/68f9fe268e535d79c76910519530026a4f994ce07189ac0dded45c6af825/fastapi_cli-0.0.24-py3-none-any.whl", hash = "sha256:4a1f78ed798f106b4fee85ca93b85d8fe33c0a3570f775964d37edb80b8f0edc", size = 12304, upload-time = "2026-02-24T10:45:09.552Z" }, + { url = "https://files.pythonhosted.org/packages/24/ea/4b5011012ac925fe2f83b19d0e09cee9d324141ec7bf5e78bb2817f96513/fastapi_cli-0.0.5-py3-none-any.whl", hash = "sha256:e94d847524648c748a5350673546bbf9bcaeb086b33c24f2e82e021436866a46", size = 9489, upload-time = "2024-08-02T05:48:11.609Z" }, ] [package.optional-dependencies] standard = [ - { name = "fastapi-cloud-cli" }, - { name = "uvicorn", extra = ["standard"] }, -] - -[[package]] -name = "fastapi-cloud-cli" -version = "0.17.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "fastar" }, - { name = "httpx" }, - { name = "pydantic", extra = ["email"] }, - { name = "rich-toolkit" }, - { name = "rignore" }, - { name = "sentry-sdk" }, - { name = "typer" }, { name = "uvicorn", extra = ["standard"] }, ] -sdist = { url = "https://files.pythonhosted.org/packages/67/79/66567c39c5fab6dbebf9e40b3a3fcb0e2ec359517c87a67434c76b06e60b/fastapi_cloud_cli-0.17.0.tar.gz", hash = "sha256:2b6c241b63427023bd1e23b3251f23234aba4b05428b245a050e92db1389823c", size = 47276, upload-time = "2026-04-15T13:17:56.402Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/50/31/fa442466bacadffec3d6611509d6ea391b6ca01b6ee0d4af835bfdea3483/fastapi_cloud_cli-0.17.0-py3-none-any.whl", hash = "sha256:b496e6998f037f572ab06a233ce257828b4c701488ce500b5c9d725e970a7cb1", size = 33936, upload-time = "2026-04-15T13:17:55.112Z" }, -] - -[[package]] -name = "fastar" -version = "0.11.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/03/0f/0aeb3fc50046617702acc0078b277b58367fd62eb727b9ec733ae0e8bbcc/fastar-0.11.0.tar.gz", hash = "sha256:aa7f100f7313c03fdb20f1385927ba95671071ba308ad0c1763fef295e1895ce", size = 70238, upload-time = "2026-04-13T17:11:17.143Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9b/4a/0d79fe52243a4130aa41d0a3a9eea22e00427db761e1a6782ee817c50222/fastar-0.11.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:e7c906ad371ca365591ebcb7630009923f3eceb20956814494d15591a78e9e46", size = 709786, upload-time = "2026-04-13T17:09:53.974Z" }, - { url = "https://files.pythonhosted.org/packages/9f/e4/77c94eaafc035e39f5ce5176e32743da4e3fe890f28790e708e53d8f75cd/fastar-0.11.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6919497b35fa5bd978d2c26ee117cf1771b90ee5073f7518e44b9bc364b57715", size = 632127, upload-time = "2026-04-13T17:09:39.023Z" }, - { url = "https://files.pythonhosted.org/packages/3c/f6/97658dd992f4e45747d35adb24c0b100f6b6d451490685ae3fe8a3a2ee1b/fastar-0.11.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:56b50206aeedd99e22b83289e6fb3ff8f7d7da4407d2419902e4716b4f90585a", size = 869608, upload-time = "2026-04-13T17:09:08.268Z" }, - { url = "https://files.pythonhosted.org/packages/e9/fc/81c1ec4d8146a437399e7b95631b51be312f323a9ce64569f932db6c3914/fastar-0.11.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7a1811a69ae81d469720df0c8af3f84f834a93b5e4f8be0e0e8bde6a52fa11f2", size = 762925, upload-time = "2026-04-13T17:07:52.788Z" }, - { url = "https://files.pythonhosted.org/packages/b9/35/49baf480ecb197aea7ce2515c503a2f25061958dd3b4c98e98a3a11cdcc7/fastar-0.11.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:10486238c55589a3947c38f9cfb88a67d8a608eb8dddc722038237d0278a41d7", size = 759913, upload-time = "2026-04-13T17:08:07.324Z" }, - { url = "https://files.pythonhosted.org/packages/94/eb/946f1980267f2824efb7d7c518d47a49b89c0e9cd7c449301f5a7531558a/fastar-0.11.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1555ef9992d368a6ec39092276990cef8d329c39a1d86ebd847eaa3b10efd472", size = 926054, upload-time = "2026-04-13T17:08:22.196Z" }, - { url = "https://files.pythonhosted.org/packages/0c/19/d5eb611085ce054382570d8d4e24a5e2ff23cd6d2404528a6643841d6059/fastar-0.11.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b1f4aca0a9620b76988bbf6225cdea6678a392902444ca18bb8a51495b165a89", size = 818594, upload-time = "2026-04-13T17:08:52.366Z" }, - { url = "https://files.pythonhosted.org/packages/4a/52/18e8d55c0d3d917713f381cb2d0cb793da00c209c802e011d8dc72018cd5/fastar-0.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75beeecac7d11a666a6c4a0b7f7e80842ae5cf523f2f890b99c78fc82b403545", size = 823005, upload-time = "2026-04-13T17:09:23.051Z" }, - { url = "https://files.pythonhosted.org/packages/2c/b4/0fecdcf33e5aaffe777b96a1c10a3204fe0b05bf18e971033a0bfedafc1c/fastar-0.11.0-cp310-cp310-manylinux_2_31_riscv64.whl", hash = "sha256:a08cdf5d16daa401c65c9c7493a18db7dc515c52155a17071ec7098bb07da9d3", size = 887115, upload-time = "2026-04-13T17:08:37.385Z" }, - { url = "https://files.pythonhosted.org/packages/08/f8/2a6ad1c2523eb72a4595a9331162fc67ce0f0aee3348728598026c516986/fastar-0.11.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:6e210375e5a7ba53586cbd6017aa417d2d2ceacbe8671682470281bd0a15e8ef", size = 973595, upload-time = "2026-04-13T17:10:09.258Z" }, - { url = "https://files.pythonhosted.org/packages/5c/a6/2aa48843228673feacc2b80876b8924e63ea9c5f5f607bd7a72416b86bae/fastar-0.11.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:a2988eb2604b8e15670f355425e8c800e4dcd4edfbcbfe194397f8f17b7eb19e", size = 1036988, upload-time = "2026-04-13T17:10:26.133Z" }, - { url = "https://files.pythonhosted.org/packages/92/ac/3dd14b21c323e8484f47c910110d1d93139ba44621ac2c4c597dbe9fcdb7/fastar-0.11.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:34abc857b46068fdf91d157bd0203bfd6791dc7a432d1ed180f5af6c2f5bcce9", size = 1078267, upload-time = "2026-04-13T17:10:43.645Z" }, - { url = "https://files.pythonhosted.org/packages/de/a1/3f89e58d6fa99160c9e7e17220c8ab5040b5cc017c4fac2356c6ed18453d/fastar-0.11.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:0d884be84e37a01053776395441fc960031974e0265801ce574efc3d05e0cdaf", size = 1032551, upload-time = "2026-04-13T17:11:00.667Z" }, - { url = "https://files.pythonhosted.org/packages/f6/ea/24dd3cfc2096933d7d2a80c926e79602cff1fa481124ed2165b60c1dd9ef/fastar-0.11.0-cp310-cp310-win32.whl", hash = "sha256:c721c1ad758e3e4c2c1fd9e96911a0fa58c0a6be5668f1bcfd0b741e72c7cb63", size = 456022, upload-time = "2026-04-13T17:11:41.859Z" }, - { url = "https://files.pythonhosted.org/packages/82/ef/6eb39ee9cdd59822d1c7337c4d28fdc948885bdf455af9e70efa9879e06f/fastar-0.11.0-cp310-cp310-win_amd64.whl", hash = "sha256:ba4180b7c3080f55f9035fdd7d8c39fe0e1485087a68ff615bb4784a10b8106b", size = 488392, upload-time = "2026-04-13T17:11:27.486Z" }, - { url = "https://files.pythonhosted.org/packages/11/7a/fb367bdaf4efa2c7952a45aeab2e87a564293ecffe150af673ec8edfda46/fastar-0.11.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:b82fd6f996e65a86f67a6bd64dd22ef3e8ae2dcaed0ae3b550e71f7e1bbb1df5", size = 709869, upload-time = "2026-04-13T17:09:55.62Z" }, - { url = "https://files.pythonhosted.org/packages/80/ff/b87efb0dcfd081c62c7c7601d7681dabe63103cd51fc16f8d57a1ab45961/fastar-0.11.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:27eed386fd0558e6daa29211111bbd7b740f7c7e881197f8a00ac7c0f3cdb1d7", size = 631668, upload-time = "2026-04-13T17:09:40.537Z" }, - { url = "https://files.pythonhosted.org/packages/24/7c/0ed6dd38b9adc04b3a8ec3b7045908e7c2170ba0ff6e6d2c51bc9fc770f3/fastar-0.11.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:a6931bebc1d8e95ddeef55732c195449e6b44ef33aa31b325505097ed3b4d6aa", size = 869663, upload-time = "2026-04-13T17:09:09.78Z" }, - { url = "https://files.pythonhosted.org/packages/58/ce/8b7fb3f23855accebaaf2d2637eac7f261a7a5d936f861a172079f1ef511/fastar-0.11.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:891f72ce42a5e28a74fbd4d5fbf1a3ac1a1163d13cbc200cbd005fb0fabc54bd", size = 762938, upload-time = "2026-04-13T17:07:54.51Z" }, - { url = "https://files.pythonhosted.org/packages/07/cc/5491e2b677bb841f768e3aba052d0344338a5c78aa5d4c18b443831a8e8d/fastar-0.11.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5b83c1f61f7017d6e1498568038f8745440cfc16ca2f697ec81bac83050108f6", size = 759232, upload-time = "2026-04-13T17:08:08.864Z" }, - { url = "https://files.pythonhosted.org/packages/4e/b7/643630bdbd179e41e9fae31c03b4cf6061dbf4d6fbbae8425d16eb12545d/fastar-0.11.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db73a9b765a516e73983b25341e7b5e0189733878279e278b2295131b0e3a21e", size = 926271, upload-time = "2026-04-13T17:08:23.68Z" }, - { url = "https://files.pythonhosted.org/packages/09/5d/37ade50003b4540e0a53ef100f6692d7ab2ac1122d5acf39920cc09a3e8b/fastar-0.11.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:625827d52eb4e8fec942e0233f125ff8010fcf6a67c0a974a8e5f4666b771e3c", size = 818634, upload-time = "2026-04-13T17:08:54.268Z" }, - { url = "https://files.pythonhosted.org/packages/c3/ff/135d177de32cc1e837c99019e4643e6e79352bde49544d4ece5b5eebf56b/fastar-0.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7f5fd8fa21ec0a88296a38dc5d7fc35efd3b26d46a17b8b7c73c5563925ca15", size = 822755, upload-time = "2026-04-13T17:09:25.01Z" }, - { url = "https://files.pythonhosted.org/packages/27/cb/b835dbe76ceac7fa6105851468c259ffd06830eb9c029402e499d0ec153b/fastar-0.11.0-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:8c15af91b8cd87ddf23ea55355ae513c1de3ab67178f26dad017c9e9c0af6096", size = 887101, upload-time = "2026-04-13T17:08:39.248Z" }, - { url = "https://files.pythonhosted.org/packages/9e/54/aa8289eb57fc550535470397cb051f5a58a7c89ca4de31d5502b916dd894/fastar-0.11.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:03a112395a8b0bff251423bd1564c012f0cc058ad8b6bd8fba96f3d7fc117e44", size = 973606, upload-time = "2026-04-13T17:10:10.98Z" }, - { url = "https://files.pythonhosted.org/packages/1f/fd/776d50a0897c01dc6bfd0926772ee913436fdae91b9affaf0a0cbd09f0a1/fastar-0.11.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:f2994bb8f5f8c11eb12beae1e6e77a907173c9819236b8a4c8f0573652ceccce", size = 1036696, upload-time = "2026-04-13T17:10:28.502Z" }, - { url = "https://files.pythonhosted.org/packages/c8/f1/cf0f9b499fb37ac065c8a01ec642f96a3c5eb849c38ae983b59f3b3245e0/fastar-0.11.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:dcf99e4b5973d842c7f19c776c3a83cdc0977d505edce6206438505c0456b517", size = 1078182, upload-time = "2026-04-13T17:10:45.318Z" }, - { url = "https://files.pythonhosted.org/packages/f8/9e/21e4701aec4a1123d4dc4d31578dc18875582b5710e4725f7ceb752a248b/fastar-0.11.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:29c9c386dc0d5dda78845a8e6b1480d26ab861c1e0b68f42ae5735cb70ca07f1", size = 1032336, upload-time = "2026-04-13T17:11:02.364Z" }, - { url = "https://files.pythonhosted.org/packages/ce/e2/5872b28c72c27ec1a00760eace6ff35f714f41ebbd5208cf016b12e29250/fastar-0.11.0-cp311-cp311-win32.whl", hash = "sha256:030b2580fc394f2c9b7890b6735810404e9b9ed5e0344db150b945965b5482b7", size = 457368, upload-time = "2026-04-13T17:11:43.528Z" }, - { url = "https://files.pythonhosted.org/packages/fd/6e/ce6832a16193eb4466f4108be8809c249b51cb1f89dd7894545700d079d5/fastar-0.11.0-cp311-cp311-win_amd64.whl", hash = "sha256:83ab57ae067969cd0b483ac3b6dccc4b595fc77f5c820760998648d4c42822b5", size = 488605, upload-time = "2026-04-13T17:11:29.161Z" }, - { url = "https://files.pythonhosted.org/packages/15/5a/9cfb80661cf38fd7b0889224beb7d2746784d4ade2a931ed9775a18d8602/fastar-0.11.0-cp311-cp311-win_arm64.whl", hash = "sha256:27b1a4cee2298b704de8151d310462ee7335ed036011ca9aa6e784b30b6c73a9", size = 464580, upload-time = "2026-04-13T17:11:18.583Z" }, - { url = "https://files.pythonhosted.org/packages/0f/06/a5773706afc8bd496769786590bbc56d2d0ee419a299cc12ea3f5717fcf3/fastar-0.11.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:3c51f1c2cdddbd1420d2897ace7738e36c65e17f6ae84e0bfe763f8d1068bb97", size = 708394, upload-time = "2026-04-13T17:09:57.269Z" }, - { url = "https://files.pythonhosted.org/packages/cc/a6/d5e2a4e48495616440a21eed07558219ca90243ad00b0502586f95bd4833/fastar-0.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0d9d6b052baf5380baea866675dab6ccd04ec2460d12b1c46f10ce3f4ee6a820", size = 628417, upload-time = "2026-04-13T17:09:42.145Z" }, - { url = "https://files.pythonhosted.org/packages/ab/69/9816d69ac8265c9e50456637a487ccfb7a9c566efd9dbcd673df9c2558c2/fastar-0.11.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:bd2f05666d4df7e14885b5c38fefd92a785917387513d33d837ff42ec143a22f", size = 863950, upload-time = "2026-04-13T17:09:11.506Z" }, - { url = "https://files.pythonhosted.org/packages/5b/0d/f88daad53aff2e754b6b5ff2a7113f72447a34f6ef17cc23ca99988117b7/fastar-0.11.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1e6e74aba1ae77ca4aedcaf1697cd413319f4c88a5ccbe5b42c709517c5097e", size = 760737, upload-time = "2026-04-13T17:07:55.958Z" }, - { url = "https://files.pythonhosted.org/packages/2f/a6/82ef4ecd969d50d92ed3ed9dbd8fe77faa24be5e5736f716edc9f4ce8d62/fastar-0.11.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:38ef77fe940bbc9b37a98bd838727f844b11731cd39358a2640ff864fb385086", size = 757603, upload-time = "2026-04-13T17:08:10.623Z" }, - { url = "https://files.pythonhosted.org/packages/03/35/50249f0d827251f8ac511495e2eacccebda80a00a0ad73e9615b8113b84f/fastar-0.11.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8955e61b32d6aff82c983217abf80933fd823b0e727586fc72f08043d996fd59", size = 923952, upload-time = "2026-04-13T17:08:25.526Z" }, - { url = "https://files.pythonhosted.org/packages/7b/d8/faee41659e9c379d906d24eaee6d6833ac8cfef0a5df480e5c2a8d3efb33/fastar-0.11.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:483532442cdb08fbff0169510224eae0836f2f672cea6aacb52847d90fefdc46", size = 816574, upload-time = "2026-04-13T17:08:56.076Z" }, - { url = "https://files.pythonhosted.org/packages/22/47/0448ea7992b997dad2bf004bfd98eca74b5858630eae080b50c7b17d9ddc/fastar-0.11.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef5a6071121e05d8287fc75bccb054bcbac8bb0501200a0c0a8feeace5303ea4", size = 819382, upload-time = "2026-04-13T17:09:26.66Z" }, - { url = "https://files.pythonhosted.org/packages/33/ef/0d63eb43586831b7a6f8b22c4d77125a7c594423af1f4f090fa9541b9b40/fastar-0.11.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:e45e598af5afe8412197d4786efd6cf29be02e7d3d4f6a3461149eae5d7e94f1", size = 885254, upload-time = "2026-04-13T17:08:40.9Z" }, - { url = "https://files.pythonhosted.org/packages/01/25/edd584675d69e49a165052c3ee886df1c5d574f3e7d813c990306387c623/fastar-0.11.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2e160919b1c47ddb8538e7e8eb4cd527281b40f0bf75110a75993838ef61f286", size = 971239, upload-time = "2026-04-13T17:10:12.997Z" }, - { url = "https://files.pythonhosted.org/packages/a5/37/e8bb24f506ba2b08fbaf36c5800e843bd4d542954e9331f00418e2d23349/fastar-0.11.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:4bb4dc0fc8f7a6807febcebce8a2f3626ba4955a9263d81ecc630aad83be84c0", size = 1035185, upload-time = "2026-04-13T17:10:30.207Z" }, - { url = "https://files.pythonhosted.org/packages/9a/bf/be753736296338149ee4cb3e92e2b5423d6ba17c7b951d15218fd7e99bbf/fastar-0.11.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4ec95af56aa173f6e320e1183001bf108ba59beaf13edd1fc8200648db203588", size = 1072191, upload-time = "2026-04-13T17:10:47.072Z" }, - { url = "https://files.pythonhosted.org/packages/d2/cd/a81c1aaafb5a22ce57c98ae22f39c89413ed53e4ee6e1b1444b0bd666a6c/fastar-0.11.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:136cf342735464091c39dc3708168f9fdeb9ebea40b1ead937c61afaf46143d9", size = 1028054, upload-time = "2026-04-13T17:11:04.293Z" }, - { url = "https://files.pythonhosted.org/packages/ec/88/1ce4eed3d70627c95f49ca017f6bbbf2ddcc4b0c601d293259de7689bc20/fastar-0.11.0-cp312-cp312-win32.whl", hash = "sha256:35f23c11b556cc4d3704587faacbc0037f7bdf6c4525cd1d09c70bda4b1c6809", size = 454198, upload-time = "2026-04-13T17:11:45.168Z" }, - { url = "https://files.pythonhosted.org/packages/8f/1d/26ce92f4331cd61a69840db9ca6115829805eec24f285481a854f578e917/fastar-0.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:920bc56c3c0b8a8ca492904941d1883c1c947c858cd93343356c29122a38f44c", size = 486697, upload-time = "2026-04-13T17:11:31.084Z" }, - { url = "https://files.pythonhosted.org/packages/ed/96/e6eda4480559c69b05d466e7b5ea9170e81fef3795a73e059959a3258319/fastar-0.11.0-cp312-cp312-win_arm64.whl", hash = "sha256:395248faf89e8a6bd5dc1fd544c8465113b627cb6d7c8b296796b60ebea33593", size = 462591, upload-time = "2026-04-13T17:11:20.577Z" }, - { url = "https://files.pythonhosted.org/packages/c9/d6/3be260037e86fb694e88d47f583bac3a0188c99cee1a6b257ac26cb6b53c/fastar-0.11.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:33f544b08b4541b678e53749b4552a44720d96761fb79c172b005b1089c443ed", size = 707975, upload-time = "2026-04-13T17:09:58.866Z" }, - { url = "https://files.pythonhosted.org/packages/e1/cd/7867aefb1784662554a335f2952c75a50f0c70585ed0d2210d6cc15e5627/fastar-0.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:91c1c792447e4a642745f347ff9847c52af39633071c57ee67ed53c157fc3506", size = 628460, upload-time = "2026-04-13T17:09:43.776Z" }, - { url = "https://files.pythonhosted.org/packages/e5/2b/d11d84bdd5e0e377771b955755771e3460b290da5809cb78c1b735ee2228/fastar-0.11.0-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:881247e6b6eaea59fc6569f9b61447aa6b9fc2ee864e048b4643d69c52745805", size = 863054, upload-time = "2026-04-13T17:09:13.048Z" }, - { url = "https://files.pythonhosted.org/packages/25/39/d3f428b318fa940b1b6e785b8d54fc895dfb5d5b945ef8d5442ffa904fb2/fastar-0.11.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:863b7929845c9fec92ef6c8d59579cf46af5136655e5342f8df5cebe46cab06c", size = 760247, upload-time = "2026-04-13T17:07:57.396Z" }, - { url = "https://files.pythonhosted.org/packages/9e/04/03949aee82aabb8ede06ac5a4a5579ffaf98a8fe59ce958494508ff15513/fastar-0.11.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:96b4a57df12bf3211662627a3ea29d62ecb314a2434a0d0843f9fc23e47536e5", size = 756512, upload-time = "2026-04-13T17:08:12.415Z" }, - { url = "https://files.pythonhosted.org/packages/3f/0c/2ca1ae0a3828ca51047962d932b80daca2522db73e8cb9d040cb6ebe28d5/fastar-0.11.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ceef1c2c4df7b7b8ebd3f5d718bbf457b9bbdf25ce0bd07870211ec4fbd9aff4", size = 922183, upload-time = "2026-04-13T17:08:27.187Z" }, - { url = "https://files.pythonhosted.org/packages/65/68/7fe808b1f73a68e686f25434f538c6dc10ef4dfb3db0ace22cd861744bf8/fastar-0.11.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b8e545918441910a779659d4759ad0eef349e935fbdb4668a666d3681567eb05", size = 816394, upload-time = "2026-04-13T17:08:57.657Z" }, - { url = "https://files.pythonhosted.org/packages/1f/17/07d086080f8a83b8d7966955e29bcdbd6a060f5bd949dc9d5abd3658cead/fastar-0.11.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28095bb8f821e85fc2764e1a55f03e5e2876dee2abe7cd0ee9420d929905d643", size = 818983, upload-time = "2026-04-13T17:09:28.46Z" }, - { url = "https://files.pythonhosted.org/packages/fb/e2/2c4edf0910af2e814ff6d65b77a91196d472ca8a9fb2033bd983f6856caa/fastar-0.11.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:0fafb95ecbe70f666a5e9b35dd63974ccdc9bb3d99ccdbd4014a823ec3e659b5", size = 884689, upload-time = "2026-04-13T17:08:42.763Z" }, - { url = "https://files.pythonhosted.org/packages/fa/ba/04fdcbd6558e60de4ced3b55230fac47675d181252582b2fcec3c74608e5/fastar-0.11.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:af48fed039b94016629dcdad1c95c90c486326dd068de2b0a4df419ee09b6821", size = 970677, upload-time = "2026-04-13T17:10:15.124Z" }, - { url = "https://files.pythonhosted.org/packages/df/b3/2b860a9658550167dbd5824c85e88d0b4b912bf493e42a6322544d6e483d/fastar-0.11.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:74cd96163f39b8638ab4e8d49708ca887959672a22871d8170d01f067319533b", size = 1034026, upload-time = "2026-04-13T17:10:32.318Z" }, - { url = "https://files.pythonhosted.org/packages/b7/9b/fa42ea1188b144bac4b1b60753dfd449974a4d5eda132029ee7711569f94/fastar-0.11.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4e8b993cb5613bab495ed482810bedc0986633fcb9a3b55c37ec88e0d6714f6a", size = 1071147, upload-time = "2026-04-13T17:10:48.833Z" }, - { url = "https://files.pythonhosted.org/packages/95/c8/d2e501556dca9f1fbc9246111a31792fb49ad908fa4927f34938a97a3604/fastar-0.11.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dfe39d91fc28e37e06162d94afe01050220edb7df554acb5b702b5503e564816", size = 1028377, upload-time = "2026-04-13T17:11:06.374Z" }, - { url = "https://files.pythonhosted.org/packages/db/33/5f11f23eca0a569cd052507bc45dda2e5468697f8665728d25be44120f7d/fastar-0.11.0-cp313-cp313-win32.whl", hash = "sha256:c5f63d4d99ff4bfb37c659982ec413358bdee747005348756cc50a04d412d989", size = 454089, upload-time = "2026-04-13T17:11:46.821Z" }, - { url = "https://files.pythonhosted.org/packages/da/2f/35ff03c939cba7a255a9132367873fec6c355fd06a7f84fedcbaf4c8129f/fastar-0.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:8690ed1928d31ded3ada308e1086525fb3871f5fa81e1b69601a3f7774004583", size = 486312, upload-time = "2026-04-13T17:11:32.86Z" }, - { url = "https://files.pythonhosted.org/packages/ef/71/ee9246cbfcbfd4144558f35e7e9a306ffe0a7564730a5188c45f21d2dab8/fastar-0.11.0-cp313-cp313-win_arm64.whl", hash = "sha256:d977ded9d98a0719a305e0a4d5ee811f1d3e856d853a50acb8ae833c3cd6d5d2", size = 461975, upload-time = "2026-04-13T17:11:22.589Z" }, - { url = "https://files.pythonhosted.org/packages/cc/5c/9bbeffbf1905391446dd98aa520422ce7affde5c9a7c22d757cc5d7c1397/fastar-0.11.0-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:1266d6a004f427b0d61bd6c7b544d84cc964691b2232c2f4d635a1b75f2f6d5e", size = 711644, upload-time = "2026-04-13T17:10:07.663Z" }, - { url = "https://files.pythonhosted.org/packages/7e/af/ae5cf39d4fb82d0c592705f5ec6db1b065be5265c151b108f86126ee8773/fastar-0.11.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:298a827ec04ade43733f6ca960d0faec38706aa1494175869ea7ea17f5bad5d3", size = 634371, upload-time = "2026-04-13T17:09:52.083Z" }, - { url = "https://files.pythonhosted.org/packages/7e/36/8d4569e26473c72ccb02d1c5df3ed710073f1c06eca09c26d52ea79fd815/fastar-0.11.0-pp311-pypy311_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8800e2387e463a0e5799416a1cbe72dd0fde7270a20e4bde684145e7878f6516", size = 870850, upload-time = "2026-04-13T17:09:21.439Z" }, - { url = "https://files.pythonhosted.org/packages/bf/46/724dc796e1756d3977970f820d30d59bb8cab8e3671b285f1d82ab513aec/fastar-0.11.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7496def0a2befd82d429cb004ef7ca831585cc887947bd6b9abb68a5ef852b0b", size = 764469, upload-time = "2026-04-13T17:08:05.638Z" }, - { url = "https://files.pythonhosted.org/packages/99/e3/74d6859e632e8fb9339a14f652fb9f800c2bd6aa53071e311c0be3fbab8b/fastar-0.11.0-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:878eaf15463eb572e3538af7ca3a8534e5e279cf8196db902d24e5725c4af86e", size = 761375, upload-time = "2026-04-13T17:08:20.669Z" }, - { url = "https://files.pythonhosted.org/packages/a3/e7/cc70e2be5ef8731a7525552b1c35c1448cf9eae6a62cb3a56f12c1bf27ea/fastar-0.11.0-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0324ed1d1ef0186e1bbd843b17807d6d837d0906899d4c99378b02c5d86bdd9c", size = 928189, upload-time = "2026-04-13T17:08:35.663Z" }, - { url = "https://files.pythonhosted.org/packages/3c/33/c9a969e78dca323547276a6fee5f4f9588f7cd5ab45acec3778c67399589/fastar-0.11.0-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bdf9bd863205590beaf8ef6e66f315310196632180dceaf674985d01a876cac3", size = 820864, upload-time = "2026-04-13T17:09:06.366Z" }, - { url = "https://files.pythonhosted.org/packages/84/bd/6b9434b541fe55c125b5f2e017a565596a2d215aa09207e4555e4585064f/fastar-0.11.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59af8dbb683b24b90fb5b506de080faeab0a17a908e6c2a5d93a97260ed75d7b", size = 824060, upload-time = "2026-04-13T17:09:37.377Z" }, - { url = "https://files.pythonhosted.org/packages/24/8d/871d5f8cf4c6f13987119fb0a9ae8be131e34f2756c2524e9974adf33824/fastar-0.11.0-pp311-pypy311_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:9f3df73a3c4292cfe15696cdf59cdb6c309ab59d30b34c733be13c6e32d9a264", size = 889217, upload-time = "2026-04-13T17:08:50.884Z" }, - { url = "https://files.pythonhosted.org/packages/d0/26/cca0fd2704f3ed20165e5613ed911549aef3aaf3b0b5b02fee0e8e23e6cc/fastar-0.11.0-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:aa3762cbb16e41a76b61f4a6914937a71aab3a7b6c2d82ca233bc686ebaf756b", size = 975418, upload-time = "2026-04-13T17:10:24.307Z" }, - { url = "https://files.pythonhosted.org/packages/99/94/8bbb0b13f5b6cbe2492f0b7cbba5103e6163976a3331466d010e781fa189/fastar-0.11.0-pp311-pypy311_pp73-musllinux_1_2_armv7l.whl", hash = "sha256:a8c7bc8ac74cb359bb546b199288c83236372d094b402e557c197e85527495cd", size = 1038492, upload-time = "2026-04-13T17:10:41.939Z" }, - { url = "https://files.pythonhosted.org/packages/ed/d3/5b7df222a30eac2822ffd00f82fd4c2ce84fba4b369d1e1a03732fd177fc/fastar-0.11.0-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:587cbd060a2699c5f66281081395bb4657b2b1e0eef5c206b1aabf740019d670", size = 1080210, upload-time = "2026-04-13T17:10:58.462Z" }, - { url = "https://files.pythonhosted.org/packages/ec/6d/56ef943ea524784598c035ccbd42e564e937da0438ae3f55f0e76cb95571/fastar-0.11.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:6a1c56957ac82408be37a3f63594bc83e0919e8760492a4475e542f9f1828778", size = 1034886, upload-time = "2026-04-13T17:11:15.617Z" }, -] [[package]] name = "fastmcp" @@ -1432,13 +1454,13 @@ dependencies = [ { name = "httpx" }, { name = "jsonref" }, { name = "jsonschema-path" }, - { name = "mcp" }, + { name = "mcp", version = "1.27.0", source = { registry = "https://pypi.org/simple" } }, { name = "openapi-pydantic" }, { name = "opentelemetry-api" }, { name = "packaging" }, { name = "platformdirs" }, - { name = "py-key-value-aio", extra = ["filetree", "keyring", "memory"] }, - { name = "pydantic", extra = ["email"] }, + { name = "py-key-value-aio", extra = ["filetree", "keyring", "memory"], marker = "extra == 'extra-9-verifiers-openenv'" }, + { name = "pydantic", extra = ["email"], marker = "extra == 'extra-9-verifiers-openenv'" }, { name = "pyperclip" }, { name = "python-dotenv" }, { name = "pyyaml" }, @@ -1693,6 +1715,88 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f2/c5/a1bc0996af85757903cf2bf444a7824e68e0035ce63fb41d6f76f9def68b/gitpython-3.1.47-py3-none-any.whl", hash = "sha256:489f590edfd6d20571b2c0e72c6a6ac6915ee8b8cd04572330e3842207a78905", size = 209547, upload-time = "2026-04-22T02:44:41.271Z" }, ] +[[package]] +name = "glom" +version = "25.12.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "boltons" }, + { name = "face" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/78/74/8387f95565ba7c30cd152a585b275ebb9a834d1d32782425c5d2fe0a102c/glom-25.12.0.tar.gz", hash = "sha256:1ae7da88be3693df40ad27bdf57a765a55c075c86c971bcddd67927403eb0069", size = 196128, upload-time = "2025-12-29T06:29:07.274Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a7/e6/4129d9a3baa72d747533bb33376543ccadd9a7f9944e5a6e3ae2e245f5d6/glom-25.12.0-py3-none-any.whl", hash = "sha256:b9f21e77f71a6576a43864e85066b8cc3f0f778d0d50961563f8981377a6dcb1", size = 103295, upload-time = "2025-12-29T06:29:06.074Z" }, +] + +[[package]] +name = "googleapis-common-protos" +version = "1.74.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/20/18/a746c8344152d368a5aac738d4c857012f2c5d1fd2eac7e17b647a7861bd/googleapis_common_protos-1.74.0.tar.gz", hash = "sha256:57971e4eeeba6aad1163c1f0fc88543f965bb49129b8bb55b2b7b26ecab084f1", size = 151254, upload-time = "2026-04-02T21:23:26.679Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b6/b0/be5d3329badb9230b765de6eea66b73abd5944bdeb5afb3562ddcd80ae84/googleapis_common_protos-1.74.0-py3-none-any.whl", hash = "sha256:702216f78610bb510e3f12ac3cafd281b7ac45cc5d86e90ad87e4d301a3426b5", size = 300743, upload-time = "2026-04-02T21:22:49.108Z" }, +] + +[[package]] +name = "gradio" +version = "6.14.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "audioop-lts", marker = "python_full_version >= '3.13'" }, + { name = "brotli" }, + { name = "fastapi" }, + { name = "gradio-client" }, + { name = "groovy" }, + { name = "hf-gradio" }, + { name = "httpx" }, + { name = "huggingface-hub" }, + { name = "jinja2" }, + { name = "markupsafe" }, + { name = "numpy" }, + { name = "orjson" }, + { name = "packaging" }, + { name = "pandas", version = "2.3.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-9-verifiers-openenv') or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "pandas", version = "3.0.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-9-verifiers-openenv') or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "pillow" }, + { name = "pydantic" }, + { name = "pydub" }, + { name = "python-multipart" }, + { name = "pytz" }, + { name = "pyyaml" }, + { name = "safehttpx" }, + { name = "semantic-version" }, + { name = "starlette" }, + { name = "tomlkit" }, + { name = "typer" }, + { name = "typing-extensions" }, + { name = "uvicorn" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/de/bd/7d1544571de4566138e50c868b91bb79e38c998266896d38fed3d3a77898/gradio-6.14.0.tar.gz", hash = "sha256:4972ef7d01ac57472772624eb4e095767b6c8f3cd4846b7fea648e8034cda9f8", size = 36026409, upload-time = "2026-04-30T16:50:31.698Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/e2/41f991b2e212b7afda3a9927676ebf8af302e5a2c632b330bd70bf2cf2c1/gradio-6.14.0-py3-none-any.whl", hash = "sha256:bb702f5ab643510d167bae54269ad6e985c2185174d388fe542cc5957f51f4fd", size = 19687959, upload-time = "2026-04-30T16:50:26.914Z" }, +] + +[[package]] +name = "gradio-client" +version = "2.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "fsspec" }, + { name = "httpx" }, + { name = "huggingface-hub" }, + { name = "packaging" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e8/e6/6b6029f5fe2ad7f1211105d530e34d991014c2cae463f9223033031cfc4f/gradio_client-2.5.0.tar.gz", hash = "sha256:4cde99bad62149595c30c90876ca2e405e3a13687ecf895474f3412cb476673d", size = 59013, upload-time = "2026-04-20T23:16:21.518Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/81/0a861b8e1ff42960139c6cd4c7dd591292fa09ea1ae2d87677441cba4c00/gradio_client-2.5.0-py3-none-any.whl", hash = "sha256:d43e2179c29076292a76485ad7ed2e6eaa19d14ac58283bd7f5beabfe4ca958c", size = 59952, upload-time = "2026-04-20T23:16:20.186Z" }, +] + [[package]] name = "griffelib" version = "2.0.2" @@ -1702,6 +1806,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/11/8c/c9138d881c79aa0ea9ed83cbd58d5ca75624378b38cee225dcf5c42cc91f/griffelib-2.0.2-py3-none-any.whl", hash = "sha256:925c857658fb1ba40c0772c37acbc2ab650bd794d9c1b9726922e36ea4117ea1", size = 142357, upload-time = "2026-03-27T11:34:46.275Z" }, ] +[[package]] +name = "groovy" +version = "0.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/52/36/bbdede67400277bef33d3ec0e6a31750da972c469f75966b4930c753218f/groovy-0.1.2.tar.gz", hash = "sha256:25c1dc09b3f9d7e292458aa762c6beb96ea037071bf5e917fc81fb78d2231083", size = 17325, upload-time = "2025-02-28T20:24:56.068Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/28/27/3d6dcadc8a3214d8522c1e7f6a19554e33659be44546d44a2f7572ac7d2a/groovy-0.1.2-py3-none-any.whl", hash = "sha256:7f7975bab18c729a257a8b1ae9dcd70b7cafb1720481beae47719af57c35fa64", size = 14090, upload-time = "2025-02-28T20:24:55.152Z" }, +] + [[package]] name = "h11" version = "0.16.0" @@ -1711,6 +1824,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, ] +[[package]] +name = "hf-gradio" +version = "0.4.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "gradio-client" }, + { name = "typer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ce/86/c9694b7cfada5780e75769e60dc161a161f4dd7fc91b61db5e3a3338bef9/hf_gradio-0.4.1.tar.gz", hash = "sha256:a017d942618f0d495a58ee4563047fa04bef614c00e0cb789a9a6d0633cffa7b", size = 6560, upload-time = "2026-04-22T14:01:32.334Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/30/2d/afff2ee87e75d8eb85c92bb8cf0e15b05c23c2ebd8fd8dec781d8601ed7f/hf_gradio-0.4.1-py3-none-any.whl", hash = "sha256:76b8cb8be6abe62d74c1ad2d35b42f0629db89aa9e1a8d033cecfe7c856eeab3", size = 4482, upload-time = "2026-04-17T19:53:31.827Z" }, +] + [[package]] name = "hf-xet" version = "1.4.3" @@ -1824,7 +1950,7 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "filelock" }, { name = "fsspec" }, - { name = "hf-xet", marker = "platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" }, + { name = "hf-xet", marker = "platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "httpx" }, { name = "packaging" }, { name = "pyyaml" }, @@ -1890,11 +2016,11 @@ name = "ipykernel" version = "7.2.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "appnope", marker = "sys_platform == 'darwin'" }, + { name = "appnope", marker = "sys_platform == 'darwin' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "comm" }, { name = "debugpy" }, - { name = "ipython", version = "8.39.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "ipython", version = "9.13.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "ipython", version = "8.39.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "ipython", version = "9.13.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "jupyter-client" }, { name = "jupyter-core" }, { name = "matplotlib-inline" }, @@ -1918,17 +2044,17 @@ resolution-markers = [ "python_full_version < '3.11'", ] dependencies = [ - { name = "colorama", marker = "python_full_version < '3.11' and sys_platform == 'win32'" }, - { name = "decorator", marker = "python_full_version < '3.11'" }, - { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, - { name = "jedi", marker = "python_full_version < '3.11'" }, - { name = "matplotlib-inline", marker = "python_full_version < '3.11'" }, - { name = "pexpect", marker = "python_full_version < '3.11' and sys_platform != 'emscripten' and sys_platform != 'win32'" }, - { name = "prompt-toolkit", marker = "python_full_version < '3.11'" }, - { name = "pygments", marker = "python_full_version < '3.11'" }, - { name = "stack-data", marker = "python_full_version < '3.11'" }, - { name = "traitlets", marker = "python_full_version < '3.11'" }, - { name = "typing-extensions", marker = "python_full_version < '3.11'" }, + { name = "colorama", marker = "(python_full_version < '3.11' and sys_platform == 'win32') or (python_full_version >= '3.11' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'win32' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "decorator", marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "exceptiongroup", marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "jedi", marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "matplotlib-inline", marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "pexpect", marker = "(python_full_version < '3.11' and sys_platform != 'emscripten' and sys_platform != 'win32') or (python_full_version >= '3.11' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform == 'emscripten' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform == 'win32' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "prompt-toolkit", marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "pygments", marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "stack-data", marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "traitlets", marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "typing-extensions", marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/40/18/f8598d287006885e7136451fdea0755af4ebcbfe342836f24deefaed1164/ipython-8.39.0.tar.gz", hash = "sha256:4110ae96012c379b8b6db898a07e186c40a2a1ef5d57a7fa83166047d9da7624", size = 5513971, upload-time = "2026-03-27T10:02:13.94Z" } wheels = [ @@ -1951,18 +2077,18 @@ resolution-markers = [ "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'", ] dependencies = [ - { name = "colorama", marker = "python_full_version >= '3.11' and sys_platform == 'win32'" }, - { name = "decorator", marker = "python_full_version >= '3.11'" }, - { name = "ipython-pygments-lexers", marker = "python_full_version >= '3.11'" }, - { name = "jedi", marker = "python_full_version >= '3.11'" }, - { name = "matplotlib-inline", marker = "python_full_version >= '3.11'" }, - { name = "pexpect", marker = "python_full_version >= '3.11' and sys_platform != 'emscripten' and sys_platform != 'win32'" }, - { name = "prompt-toolkit", marker = "python_full_version >= '3.11'" }, - { name = "psutil", marker = "python_full_version >= '3.11'" }, - { name = "pygments", marker = "python_full_version >= '3.11'" }, - { name = "stack-data", marker = "python_full_version >= '3.11'" }, - { name = "traitlets", marker = "python_full_version >= '3.11'" }, - { name = "typing-extensions", marker = "python_full_version == '3.11.*'" }, + { name = "colorama", marker = "(python_full_version >= '3.11' and sys_platform == 'win32') or (python_full_version < '3.11' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'win32' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "decorator", marker = "python_full_version >= '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "ipython-pygments-lexers", marker = "python_full_version >= '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "jedi", marker = "python_full_version >= '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "matplotlib-inline", marker = "python_full_version >= '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "pexpect", marker = "(python_full_version >= '3.11' and sys_platform != 'emscripten' and sys_platform != 'win32') or (python_full_version < '3.11' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform == 'emscripten' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform == 'win32' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "prompt-toolkit", marker = "python_full_version >= '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "psutil", marker = "python_full_version >= '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "pygments", marker = "python_full_version >= '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "stack-data", marker = "python_full_version >= '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "traitlets", marker = "python_full_version >= '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "typing-extensions", marker = "python_full_version == '3.11.*' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/cd/c4/87cda5842cf5c31837c06ddb588e11c3c35d8ece89b7a0108c06b8c9b00a/ipython-9.13.0.tar.gz", hash = "sha256:7e834b6afc99f020e3f05966ced34792f40267d64cb1ea9043886dab0dde5967", size = 4430549, upload-time = "2026-04-24T12:24:55.221Z" } wheels = [ @@ -1974,7 +2100,7 @@ name = "ipython-pygments-lexers" version = "1.1.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "pygments", marker = "python_full_version >= '3.11'" }, + { name = "pygments", marker = "python_full_version >= '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/ef/4c/5dd1d8af08107f88c7f741ead7a40854b8ac24ddf9ae850afbcf698aa552/ipython_pygments_lexers-1.1.1.tar.gz", hash = "sha256:09c0138009e56b6854f9535736f4171d855c8c08a563a0dcd8022f78355c7e81", size = 8393, upload-time = "2025-01-17T11:24:34.505Z" } wheels = [ @@ -1987,8 +2113,8 @@ version = "8.1.8" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "comm" }, - { name = "ipython", version = "8.39.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "ipython", version = "9.13.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "ipython", version = "8.39.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "ipython", version = "9.13.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "jupyterlab-widgets" }, { name = "traitlets" }, { name = "widgetsnbextension" }, @@ -2154,14 +2280,14 @@ wheels = [ [[package]] name = "joserfc" -version = "1.6.4" +version = "1.6.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cryptography" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/de/c6/de8fdbdfa75c8ca04fead38a82d573df8a82906e984c349d58665f459558/joserfc-1.6.4.tar.gz", hash = "sha256:34ce5f499bfcc5e9ad4cc75077f9278ab3227b71da9aaf28f9ab705f8a560d3c", size = 231866, upload-time = "2026-04-13T13:15:40.632Z" } +sdist = { url = "https://files.pythonhosted.org/packages/3b/dc/5f768c2e391e9afabe5d18e3221346deb5fb6338565f1ccc9e7c6d7befdd/joserfc-1.6.5.tar.gz", hash = "sha256:1482a7db78fb4602e44ed89e51b599d052e091288c7c532c5b694e20149dec48", size = 231881, upload-time = "2026-05-06T04:58:13.408Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b6/f7/210b27752e972edb36d239315b08d3eb6b14824cc4a590da2337d195260b/joserfc-1.6.4-py3-none-any.whl", hash = "sha256:3e4a22b509b41908989237a045e25c8308d5fd47ab96bdae2dd8057c6451003a", size = 70464, upload-time = "2026-04-13T13:15:39.259Z" }, + { url = "https://files.pythonhosted.org/packages/54/3b/ad1cb22e75c963b1f07c8a2329bf47227ce7e4361df5eb2fb101b2ce33ef/joserfc-1.6.5-py3-none-any.whl", hash = "sha256:e9878a0f8243fe7b95e11fdda81374ca9f7a689e302751579d3dfdeec559675e", size = 70464, upload-time = "2026-05-06T04:58:11.668Z" }, ] [[package]] @@ -2175,7 +2301,7 @@ wheels = [ [[package]] name = "jsonschema" -version = "4.26.0" +version = "4.25.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "attrs" }, @@ -2183,23 +2309,23 @@ dependencies = [ { name = "referencing" }, { name = "rpds-py" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b3/fc/e067678238fa451312d4c62bf6e6cf5ec56375422aee02f9cb5f909b3047/jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326", size = 366583, upload-time = "2026-01-07T13:41:07.246Z" } +sdist = { url = "https://files.pythonhosted.org/packages/74/69/f7185de793a29082a9f3c7728268ffb31cb5095131a9c139a74078e27336/jsonschema-4.25.1.tar.gz", hash = "sha256:e4a9655ce0da0c0b67a085847e00a3a51449e1157f4f75e9fb5aa545e122eb85", size = 357342, upload-time = "2025-08-18T17:03:50.038Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/69/90/f63fb5873511e014207a475e2bb4e8b2e570d655b00ac19a9a0ca0a385ee/jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce", size = 90630, upload-time = "2026-01-07T13:41:05.306Z" }, + { url = "https://files.pythonhosted.org/packages/bf/9c/8c95d856233c1f82500c2450b8c68576b4cf1c871db3afac5c34ff84e6fd/jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63", size = 90040, upload-time = "2025-08-18T17:03:48.373Z" }, ] [[package]] name = "jsonschema-path" -version = "0.4.5" +version = "0.4.6" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pathable" }, { name = "pyyaml" }, { name = "referencing" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5b/8a/7e6102f2b8bdc6705a9eb5294f8f6f9ccd3a8420e8e8e19671d1dd773251/jsonschema_path-0.4.5.tar.gz", hash = "sha256:c6cd7d577ae290c7defd4f4029e86fdb248ca1bd41a07557795b3c95e5144918", size = 15113, upload-time = "2026-03-03T09:56:46.87Z" } +sdist = { url = "https://files.pythonhosted.org/packages/01/86/cfee6dd25843bec0760f456599a4f7e7e40221a934b9229fda0662c859bc/jsonschema_path-0.4.6.tar.gz", hash = "sha256:c89eb635f4d497c9ac328eeff359c489755838806a7d033510a692e9576f5c4b", size = 15302, upload-time = "2026-04-27T18:57:08.412Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/04/d5/4e96c44f6c1ea3d812cf5391d81a4f5abaa540abf8d04ecd7f66e0ed11df/jsonschema_path-0.4.5-py3-none-any.whl", hash = "sha256:7d77a2c3f3ec569a40efe5c5f942c44c1af2a6f96fe0866794c9ef5b8f87fd65", size = 19368, upload-time = "2026-03-03T09:56:45.39Z" }, + { url = "https://files.pythonhosted.org/packages/6c/43/3d3065c05a04bb550c143bfbb8e4fd7022cd327e1082bf257bac74923783/jsonschema_path-0.4.6-py3-none-any.whl", hash = "sha256:451354b5311fa955c3144e6e4e255388c751c0121c5570ec5bb9291dd42d08c9", size = 19565, upload-time = "2026-04-27T18:57:06.792Z" }, ] [[package]] @@ -2420,7 +2546,9 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b3/e1/694c89986fcae7777184fc8b22baa0976eba15a6847221763f6ad211fc1f/llguidance-0.7.30-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c80af02c118d2b0526bcecaab389af2ed094537a069b0fc724cd2a2f2ba3990f", size = 3327974, upload-time = "2025-06-23T00:23:47.556Z" }, { url = "https://files.pythonhosted.org/packages/fd/77/ab7a548ae189dc23900fdd37803c115c2339b1223af9e8eb1f4329b5935a/llguidance-0.7.30-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:00a256d532911d2cf5ba4ef63e182944e767dd2402f38d63002016bc37755958", size = 3210709, upload-time = "2025-06-23T00:23:45.872Z" }, { url = "https://files.pythonhosted.org/packages/9c/5b/6a166564b14f9f805f0ea01ec233a84f55789cb7eeffe1d6224ccd0e6cdd/llguidance-0.7.30-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:af8741c867e4bc7e42f7cdc68350c076b4edd0ca10ecefbde75f15a9f6bc25d0", size = 14867038, upload-time = "2025-06-23T00:23:39.571Z" }, + { url = "https://files.pythonhosted.org/packages/17/ec/69507bdb36767f9b6ff2e290660a9b5afdda0fb8a7903faa37f37c6c2a72/llguidance-0.7.30-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f4a327a30dd37d86dd6347861ac8de3521fc1dbef9475296c06744e5b40ffc54", size = 15142936, upload-time = "2025-06-23T00:23:41.944Z" }, { url = "https://files.pythonhosted.org/packages/af/80/5a40b9689f17612434b820854cba9b8cabd5142072c491b5280fe5f7a35e/llguidance-0.7.30-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9edc409b9decd6cffba5f5bf3b4fbd7541f95daa8cbc9510cbf96c6ab1ffc153", size = 15004926, upload-time = "2025-06-23T00:23:43.965Z" }, + { url = "https://files.pythonhosted.org/packages/bb/bc/2d2f9b446bb3e51e4dd4db290590afee03ae29163f417168569f0361204c/llguidance-0.7.30-cp39-abi3-win32.whl", hash = "sha256:a0d52b8d1b2d3b0e661e3f953ecccfa16644f302026b3067a4815c1baa2ae643", size = 2585627, upload-time = "2025-06-23T00:23:52.39Z" }, { url = "https://files.pythonhosted.org/packages/99/47/58e49a118b514855b245f8a962c6aaf9a5cc95a0f61eac7e230e691c7b7e/llguidance-0.7.30-cp39-abi3-win_amd64.whl", hash = "sha256:05234ecceea7c9c6ff13b9739112043173a3bcb88cae860249b20335a07b3075", size = 2796878, upload-time = "2025-06-23T00:23:51Z" }, ] @@ -2495,6 +2623,9 @@ wheels = [ linkify = [ { name = "linkify-it-py" }, ] +plugins = [ + { name = "mdit-py-plugins" }, +] [[package]] name = "markupsafe" @@ -2576,8 +2707,8 @@ name = "matplotlib" version = "3.10.9" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "contourpy", version = "1.3.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "contourpy", version = "1.3.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "contourpy", version = "1.3.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "contourpy", version = "1.3.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "cycler" }, { name = "fonttools" }, { name = "kiwisolver" }, @@ -2643,25 +2774,74 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/af/33/ee4519fa02ed11a94aef9559552f3b17bb863f2ecfe1a35dc7f548cde231/matplotlib_inline-0.2.1-py3-none-any.whl", hash = "sha256:d56ce5156ba6085e00a9d54fead6ed29a9c47e215cd1bba2e976ef39f5710a76", size = 9516, upload-time = "2025-10-23T09:00:20.675Z" }, ] +[[package]] +name = "mcp" +version = "1.23.3" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.13' and sys_platform == 'win32'", + "python_full_version == '3.12.*' and sys_platform == 'win32'", + "python_full_version >= '3.13' and sys_platform == 'emscripten'", + "python_full_version == '3.12.*' and sys_platform == 'emscripten'", + "python_full_version >= '3.13' and sys_platform != 'emscripten' and sys_platform != 'win32'", + "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'win32'", + "python_full_version == '3.11.*' and sys_platform == 'win32'", + "python_full_version == '3.11.*' and sys_platform == 'emscripten'", + "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'", + "python_full_version < '3.11'", +] +dependencies = [ + { name = "anyio", marker = "extra == 'group-9-verifiers-policy'" }, + { name = "httpx", marker = "extra == 'group-9-verifiers-policy'" }, + { name = "httpx-sse", marker = "extra == 'group-9-verifiers-policy'" }, + { name = "jsonschema", marker = "extra == 'group-9-verifiers-policy'" }, + { name = "pydantic", marker = "extra == 'group-9-verifiers-policy'" }, + { name = "pydantic-settings", marker = "extra == 'group-9-verifiers-policy'" }, + { name = "pyjwt", extra = ["crypto"], marker = "extra == 'group-9-verifiers-policy'" }, + { name = "python-multipart", marker = "extra == 'group-9-verifiers-policy'" }, + { name = "pywin32", marker = "(sys_platform == 'win32' and extra == 'group-9-verifiers-policy') or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "sse-starlette", marker = "extra == 'group-9-verifiers-policy'" }, + { name = "starlette", marker = "extra == 'group-9-verifiers-policy'" }, + { name = "typing-extensions", marker = "extra == 'group-9-verifiers-policy'" }, + { name = "typing-inspection", marker = "extra == 'group-9-verifiers-policy'" }, + { name = "uvicorn", marker = "(sys_platform != 'emscripten' and extra == 'group-9-verifiers-policy') or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a7/a4/d06a303f45997e266f2c228081abe299bbcba216cb806128e2e49095d25f/mcp-1.23.3.tar.gz", hash = "sha256:b3b0da2cc949950ce1259c7bfc1b081905a51916fcd7c8182125b85e70825201", size = 600697, upload-time = "2025-12-09T16:04:37.351Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/c6/13c1a26b47b3f3a3b480783001ada4268917c9f42d78a079c336da2e75e5/mcp-1.23.3-py3-none-any.whl", hash = "sha256:32768af4b46a1b4f7df34e2bfdf5c6011e7b63d7f1b0e321d0fdef4cd6082031", size = 231570, upload-time = "2025-12-09T16:04:35.56Z" }, +] + [[package]] name = "mcp" version = "1.27.0" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.13' and sys_platform == 'win32'", + "python_full_version == '3.12.*' and sys_platform == 'win32'", + "python_full_version >= '3.13' and sys_platform == 'emscripten'", + "python_full_version == '3.12.*' and sys_platform == 'emscripten'", + "python_full_version >= '3.13' and sys_platform != 'emscripten' and sys_platform != 'win32'", + "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'win32'", + "python_full_version == '3.11.*' and sys_platform == 'win32'", + "python_full_version == '3.11.*' and sys_platform == 'emscripten'", + "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'", + "python_full_version < '3.11'", +] dependencies = [ - { name = "anyio" }, - { name = "httpx" }, - { name = "httpx-sse" }, - { name = "jsonschema" }, - { name = "pydantic" }, - { name = "pydantic-settings" }, - { name = "pyjwt", extra = ["crypto"] }, - { name = "python-multipart" }, - { name = "pywin32", marker = "sys_platform == 'win32'" }, - { name = "sse-starlette" }, - { name = "starlette" }, - { name = "typing-extensions" }, - { name = "typing-inspection" }, - { name = "uvicorn", marker = "sys_platform != 'emscripten'" }, + { name = "anyio", marker = "extra == 'extra-9-verifiers-openenv' or extra != 'group-9-verifiers-policy'" }, + { name = "httpx", marker = "extra == 'extra-9-verifiers-openenv' or extra != 'group-9-verifiers-policy'" }, + { name = "httpx-sse", marker = "extra == 'extra-9-verifiers-openenv' or extra != 'group-9-verifiers-policy'" }, + { name = "jsonschema", marker = "extra == 'extra-9-verifiers-openenv' or extra != 'group-9-verifiers-policy'" }, + { name = "pydantic", marker = "extra == 'extra-9-verifiers-openenv' or extra != 'group-9-verifiers-policy'" }, + { name = "pydantic-settings", marker = "extra == 'extra-9-verifiers-openenv' or extra != 'group-9-verifiers-policy'" }, + { name = "pyjwt", extra = ["crypto"], marker = "extra == 'extra-9-verifiers-openenv' or extra != 'group-9-verifiers-policy'" }, + { name = "python-multipart", marker = "extra == 'extra-9-verifiers-openenv' or extra != 'group-9-verifiers-policy'" }, + { name = "pywin32", marker = "(sys_platform == 'win32' and extra == 'extra-9-verifiers-openenv') or (sys_platform == 'win32' and extra != 'group-9-verifiers-policy') or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "sse-starlette", marker = "extra == 'extra-9-verifiers-openenv' or extra != 'group-9-verifiers-policy'" }, + { name = "starlette", marker = "extra == 'extra-9-verifiers-openenv' or extra != 'group-9-verifiers-policy'" }, + { name = "typing-extensions", marker = "extra == 'extra-9-verifiers-openenv' or extra != 'group-9-verifiers-policy'" }, + { name = "typing-inspection", marker = "extra == 'extra-9-verifiers-openenv' or extra != 'group-9-verifiers-policy'" }, + { name = "uvicorn", marker = "(sys_platform != 'emscripten' and extra == 'extra-9-verifiers-openenv') or (sys_platform != 'emscripten' and extra != 'group-9-verifiers-policy') or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/8b/eb/c0cfc62075dc6e1ec1c64d352ae09ac051d9334311ed226f1f425312848a/mcp-1.27.0.tar.gz", hash = "sha256:d3dc35a7eec0d458c1da4976a48f982097ddaab87e278c5511d5a4a56e852b83", size = 607509, upload-time = "2026-04-02T14:48:08.88Z" } wheels = [ @@ -2722,21 +2902,29 @@ name = "mlx" version = "0.31.2" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "mlx-metal", marker = "sys_platform == 'darwin'" }, + { name = "mlx-metal", marker = "sys_platform == 'darwin' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/29/7c/c16d52494a1ba6d90443f31fa26bc810bf878d532dfa9a7a13f49ef9542d/mlx-0.31.2-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:b29cf940f34205f09bb552ac60465ae833c4ae640b52777c6d725ddbad8461ca", size = 586942, upload-time = "2026-04-22T03:14:21.97Z" }, { url = "https://files.pythonhosted.org/packages/74/da/1c7f3dc39b7bda65b0cafbaf1e58a35eea118622c6f4506c9a4294c9806e/mlx-0.31.2-cp310-cp310-macosx_15_0_arm64.whl", hash = "sha256:ebdc47b87b4b0216ceab3b5961716804bba3107c16454b65ae51d0e0c059f298", size = 586942, upload-time = "2026-04-22T03:14:23.527Z" }, { url = "https://files.pythonhosted.org/packages/4c/e9/a8559389706d39f613620a8b6b42ed03cf3155a516b0762d355c5116fdab/mlx-0.31.2-cp310-cp310-macosx_26_0_arm64.whl", hash = "sha256:2a64db61b2840f28bae08354e6f999698e30381af201cc12354290673c96213b", size = 586804, upload-time = "2026-04-22T03:14:24.882Z" }, + { url = "https://files.pythonhosted.org/packages/4d/4a/274ebee3783a37560cddc8e781ec3eefadd17f3f85a7dcd5df6f07d200d6/mlx-0.31.2-cp310-cp310-manylinux_2_35_aarch64.whl", hash = "sha256:e3e2818157371501de097887f371784227f9dd9c91e177f986db7b25319c55d7", size = 653252, upload-time = "2026-04-22T03:14:26.275Z" }, + { url = "https://files.pythonhosted.org/packages/d6/c7/79283370001660102f5c5c772b649f69da02113609d927af35e747508320/mlx-0.31.2-cp310-cp310-manylinux_2_35_x86_64.whl", hash = "sha256:c71dff00cc1b363d542f111d9e8b7b59dadb65b29d027f798b71ea34da75b665", size = 692109, upload-time = "2026-04-22T03:14:28.05Z" }, { url = "https://files.pythonhosted.org/packages/94/89/1e77ec3ff380e8fb9e7258047374d31452a0f9828a0e370f127b07dd8288/mlx-0.31.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:4a3f181b367d404e44a6bd68ef5eb573930809ac60cacd51d0c851c629b1b651", size = 586911, upload-time = "2026-04-22T03:14:29.675Z" }, { url = "https://files.pythonhosted.org/packages/6a/41/c1907f05f8a3fc54025fb78ad68d3c4a4b931664d03c0a24f7f431cc4087/mlx-0.31.2-cp311-cp311-macosx_15_0_arm64.whl", hash = "sha256:70297cbef7479429f69c966bfed10da20a6f0c2aa997eec2b4f6ba1a07caf2ef", size = 586915, upload-time = "2026-04-22T03:14:31.403Z" }, { url = "https://files.pythonhosted.org/packages/97/b0/61ac2c14773c786fecbda28067b0207a0c654cb4d10c548808c51284d700/mlx-0.31.2-cp311-cp311-macosx_26_0_arm64.whl", hash = "sha256:c0ff158b7ac93a4b5659adbc70053498b30a5964fc45f78596398e056a96c36a", size = 587030, upload-time = "2026-04-22T03:14:32.961Z" }, + { url = "https://files.pythonhosted.org/packages/de/53/e12feb7078ee472983555fcb1da4749a2bbbc8fc5b29b78c205b96d37d1e/mlx-0.31.2-cp311-cp311-manylinux_2_35_aarch64.whl", hash = "sha256:cd5d42b0b2bee7efe1b0680a7e302943dd33b92c879cffa0358ffdb5a4a8d27b", size = 652994, upload-time = "2026-04-22T03:14:34.691Z" }, + { url = "https://files.pythonhosted.org/packages/c5/40/f92c8cdc9595bf24c7e483a3156bfe0cc99a5cf5545d8dba8e7fe000c10b/mlx-0.31.2-cp311-cp311-manylinux_2_35_x86_64.whl", hash = "sha256:b368f7ede4238cc44076e4843820338c453c21ee50bd3ee26d4b182c179fd8e1", size = 692086, upload-time = "2026-04-22T03:14:36.45Z" }, { url = "https://files.pythonhosted.org/packages/c3/47/5f33906cb03d6a378a697cd2d2641a26b37dea17ee3d9124d7e39e8eca01/mlx-0.31.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:e5067aaf2be1f3d7bba5be52348775804f111173c1ed04639618fd713b1a530f", size = 584863, upload-time = "2026-04-22T03:14:38.211Z" }, { url = "https://files.pythonhosted.org/packages/08/e7/a851a451b1327af9fb4df3991b9ae87d066b6f6630e854af55c288b0995a/mlx-0.31.2-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:edb9797db7d852477ca1c99708058654ee860d4148fe5765f0d55528e2b1aa22", size = 584860, upload-time = "2026-04-22T03:14:39.746Z" }, { url = "https://files.pythonhosted.org/packages/3b/15/0d1dc0597644e5e7b011ca954ba0c47e13cd880a3b909b0c3f1b4d8bf8f1/mlx-0.31.2-cp312-cp312-macosx_26_0_arm64.whl", hash = "sha256:51ca102db641b01e7cb083ce8ecb580e281530a141a7ca12544bb370641630ae", size = 584887, upload-time = "2026-04-22T03:14:41.585Z" }, + { url = "https://files.pythonhosted.org/packages/5d/c3/00664239a98e8bd614733c4182cd402d2bacad2d7f79eca66562ac406870/mlx-0.31.2-cp312-cp312-manylinux_2_35_aarch64.whl", hash = "sha256:117c7583cae0ca107cd53c591cc34f8e75f97a505aa47088844b7dc0fc69dc67", size = 627863, upload-time = "2026-04-22T03:14:43.326Z" }, + { url = "https://files.pythonhosted.org/packages/53/7b/af6cd73a79772af6f19eab2cb4c48eda23a9294d1650a4c1269a9996e532/mlx-0.31.2-cp312-cp312-manylinux_2_35_x86_64.whl", hash = "sha256:99572133181481640a8bf8d449daf083816d0af3ee050c8adfc5bf45ceca91c6", size = 685090, upload-time = "2026-04-22T03:14:45.058Z" }, { url = "https://files.pythonhosted.org/packages/a3/3f/888f8664d4f8e23a1363a5f50024be5216e199ab7ad0ba20988c7ed6d729/mlx-0.31.2-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:1b3fb0dda955b0d552ce57bdd6f42b3309ab21b067e40587d6848443d307e91f", size = 584796, upload-time = "2026-04-22T03:14:47.215Z" }, { url = "https://files.pythonhosted.org/packages/dd/14/e9cd18b51f9e1dbcb060eec0fafc2d2428c8e1eacd9b0a02d7c5ce75b661/mlx-0.31.2-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:34b0171cd9eb5c43fdd82091f6135d6ccc5a065363a4a3e68fac64fb4e53d37c", size = 584790, upload-time = "2026-04-22T03:14:48.519Z" }, { url = "https://files.pythonhosted.org/packages/ca/20/c6c5fb998c7834d094b2bfb9f003b5246cb270f0266da055c55546c34999/mlx-0.31.2-cp313-cp313-macosx_26_0_arm64.whl", hash = "sha256:c05981684279a8935d58b0dde3ea5b02d210c3bad3319aa0e9934ec2df165752", size = 584795, upload-time = "2026-04-22T03:14:49.904Z" }, + { url = "https://files.pythonhosted.org/packages/0b/19/aca251d4c5f3532ce9c2c1e95ad76740d9c6c298f406f62d992f465b9be0/mlx-0.31.2-cp313-cp313-manylinux_2_35_aarch64.whl", hash = "sha256:cd1f4189e5f1bc68735f44eb63ce98ae09d66ac75d7ab5b15a41afae7e9f0513", size = 627843, upload-time = "2026-04-22T03:14:51.351Z" }, + { url = "https://files.pythonhosted.org/packages/3e/2b/b89364883b98f21c2fe29e52d4ac8bc2fa2fe0d79293b36ec421efc1854a/mlx-0.31.2-cp313-cp313-manylinux_2_35_x86_64.whl", hash = "sha256:53c8d57ffa9ce77f8355663be05014c0dd37280e57f19126fb0a24389a30684b", size = 685064, upload-time = "2026-04-22T03:14:52.75Z" }, ] [[package]] @@ -2744,13 +2932,13 @@ name = "mlx-lm" version = "0.31.3" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "jinja2", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" }, - { name = "mlx", marker = "sys_platform == 'darwin'" }, - { name = "numpy", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" }, - { name = "protobuf", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" }, - { name = "pyyaml", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" }, - { name = "sentencepiece", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" }, - { name = "transformers", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" }, + { name = "jinja2" }, + { name = "mlx", marker = "sys_platform == 'darwin' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "numpy" }, + { name = "protobuf" }, + { name = "pyyaml" }, + { name = "sentencepiece" }, + { name = "transformers" }, ] sdist = { url = "https://files.pythonhosted.org/packages/84/94/9a38d6b0c6fcca995b9136c94eb7da1e9c5165652edf228b96b29960fa7a/mlx_lm-0.31.3.tar.gz", hash = "sha256:61eb0e3ba09444f77f874aff295401d7ccd20b39495cbbce0c782a15474ce733", size = 304318, upload-time = "2026-04-22T07:37:27.922Z" } wheels = [ @@ -2873,7 +3061,7 @@ name = "multidict" version = "6.7.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.11'" }, + { name = "typing-extensions", marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/1a/c2/c2d94cbe6ac1753f3fc980da97b3d930efe1da3af3c9f5125354436c073d/multidict-6.7.1.tar.gz", hash = "sha256:ec6652a1bee61c53a3e5776b6049172c53b6aaba34f18c9ad04f82712bac623d", size = 102010, upload-time = "2026-01-26T02:46:45.979Z" } wheels = [ @@ -3183,7 +3371,9 @@ name = "nvidia-cublas-cu12" version = "12.8.4.1" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/29/99/db44d685f0e257ff0e213ade1964fc459b4a690a73293220e98feb3307cf/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:b86f6dd8935884615a0683b663891d43781b819ac4f2ba2b0c9604676af346d0", size = 590537124, upload-time = "2025-03-07T01:43:53.556Z" }, { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload-time = "2025-03-07T01:44:31.254Z" }, + { url = "https://files.pythonhosted.org/packages/70/61/7d7b3c70186fb651d0fbd35b01dbfc8e755f69fd58f817f3d0f642df20c3/nvidia_cublas_cu12-12.8.4.1-py3-none-win_amd64.whl", hash = "sha256:47e9b82132fa8d2b4944e708049229601448aaad7e6f296f630f2d1a32de35af", size = 567544208, upload-time = "2025-03-07T01:53:30.535Z" }, ] [[package]] @@ -3191,7 +3381,9 @@ name = "nvidia-cuda-cupti-cu12" version = "12.8.90" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/d5/1f/b3bd73445e5cb342727fd24fe1f7b748f690b460acadc27ea22f904502c8/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4412396548808ddfed3f17a467b104ba7751e6b58678a4b840675c56d21cf7ed", size = 9533318, upload-time = "2025-03-07T01:40:10.421Z" }, { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload-time = "2025-03-07T01:40:21.213Z" }, + { url = "https://files.pythonhosted.org/packages/41/bc/83f5426095d93694ae39fe1311431b5d5a9bb82e48bf0dd8e19be2765942/nvidia_cuda_cupti_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:bb479dcdf7e6d4f8b0b01b115260399bf34154a1a2e9fe11c85c517d87efd98e", size = 7015759, upload-time = "2025-03-07T01:51:11.355Z" }, ] [[package]] @@ -3200,6 +3392,8 @@ version = "12.8.93" source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload-time = "2025-03-07T01:42:13.562Z" }, + { url = "https://files.pythonhosted.org/packages/eb/d1/e50d0acaab360482034b84b6e27ee83c6738f7d32182b987f9c7a4e32962/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc1fec1e1637854b4c0a65fb9a8346b51dd9ee69e61ebaccc82058441f15bce8", size = 43106076, upload-time = "2025-03-07T01:41:59.817Z" }, + { url = "https://files.pythonhosted.org/packages/45/51/52a3d84baa2136cc8df15500ad731d74d3a1114d4c123e043cb608d4a32b/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-win_amd64.whl", hash = "sha256:7a4b6b2904850fe78e0bd179c4b655c404d4bb799ef03ddc60804247099ae909", size = 73586838, upload-time = "2025-03-07T01:52:13.483Z" }, ] [[package]] @@ -3207,7 +3401,9 @@ name = "nvidia-cuda-runtime-cu12" version = "12.8.90" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/7c/75/f865a3b236e4647605ea34cc450900854ba123834a5f1598e160b9530c3a/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:52bf7bbee900262ffefe5e9d5a2a69a30d97e2bc5bb6cc866688caa976966e3d", size = 965265, upload-time = "2025-03-07T01:39:43.533Z" }, { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload-time = "2025-03-07T01:40:01.615Z" }, + { url = "https://files.pythonhosted.org/packages/30/a5/a515b7600ad361ea14bfa13fb4d6687abf500adc270f19e89849c0590492/nvidia_cuda_runtime_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:c0c6027f01505bfed6c3b21ec546f69c687689aad5f1a377554bc6ca4aa993a8", size = 944318, upload-time = "2025-03-07T01:51:01.794Z" }, ] [[package]] @@ -3215,10 +3411,12 @@ name = "nvidia-cudnn-cu12" version = "9.10.2.21" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" }, + { name = "nvidia-cublas-cu12" }, ] wheels = [ + { url = "https://files.pythonhosted.org/packages/fa/41/e79269ce215c857c935fd86bcfe91a451a584dfc27f1e068f568b9ad1ab7/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c9132cc3f8958447b4910a1720036d9eff5928cc3179b0a51fb6d167c6cc87d8", size = 705026878, upload-time = "2025-06-06T21:52:51.348Z" }, { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" }, + { url = "https://files.pythonhosted.org/packages/3d/90/0bd6e586701b3a890fd38aa71c387dab4883d619d6e5ad912ccbd05bfd67/nvidia_cudnn_cu12-9.10.2.21-py3-none-win_amd64.whl", hash = "sha256:c6288de7d63e6cf62988f0923f96dc339cea362decb1bf5b3141883392a7d65e", size = 692992268, upload-time = "2025-06-06T21:55:18.114Z" }, ] [[package]] @@ -3226,10 +3424,12 @@ name = "nvidia-cufft-cu12" version = "11.3.3.83" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" }, + { name = "nvidia-nvjitlink-cu12" }, ] wheels = [ + { url = "https://files.pythonhosted.org/packages/60/bc/7771846d3a0272026c416fbb7e5f4c1f146d6d80704534d0b187dd6f4800/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:848ef7224d6305cdb2a4df928759dca7b1201874787083b6e7550dd6765ce69a", size = 193109211, upload-time = "2025-03-07T01:44:56.873Z" }, { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" }, + { url = "https://files.pythonhosted.org/packages/7d/ec/ce1629f1e478bb5ccd208986b5f9e0316a78538dd6ab1d0484f012f8e2a1/nvidia_cufft_cu12-11.3.3.83-py3-none-win_amd64.whl", hash = "sha256:7a64a98ef2a7c47f905aaf8931b69a3a43f27c55530c698bb2ed7c75c0b42cb7", size = 192216559, upload-time = "2025-03-07T01:53:57.106Z" }, ] [[package]] @@ -3238,6 +3438,7 @@ version = "1.13.1.3" source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload-time = "2025-03-07T01:45:50.723Z" }, + { url = "https://files.pythonhosted.org/packages/1e/f5/5607710447a6fe9fd9b3283956fceeee8a06cda1d2f56ce31371f595db2a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:4beb6d4cce47c1a0f1013d72e02b0994730359e17801d395bdcbf20cfb3bb00a", size = 1120705, upload-time = "2025-03-07T01:45:41.434Z" }, ] [[package]] @@ -3245,7 +3446,9 @@ name = "nvidia-curand-cu12" version = "10.3.9.90" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/45/5e/92aa15eca622a388b80fbf8375d4760738df6285b1e92c43d37390a33a9a/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dfab99248034673b779bc6decafdc3404a8a6f502462201f2f31f11354204acd", size = 63625754, upload-time = "2025-03-07T01:46:10.735Z" }, { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload-time = "2025-03-07T01:46:23.323Z" }, + { url = "https://files.pythonhosted.org/packages/b9/75/70c05b2f3ed5be3bb30b7102b6eb78e100da4bbf6944fd6725c012831cab/nvidia_curand_cu12-10.3.9.90-py3-none-win_amd64.whl", hash = "sha256:f149a8ca457277da854f89cf282d6ef43176861926c7ac85b2a0fbd237c587ec", size = 62765309, upload-time = "2025-03-07T01:54:20.478Z" }, ] [[package]] @@ -3253,12 +3456,14 @@ name = "nvidia-cusolver-cu12" version = "11.7.3.90" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" }, - { name = "nvidia-cusparse-cu12", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" }, - { name = "nvidia-nvjitlink-cu12", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" }, + { name = "nvidia-cublas-cu12" }, + { name = "nvidia-cusparse-cu12" }, + { name = "nvidia-nvjitlink-cu12" }, ] wheels = [ + { url = "https://files.pythonhosted.org/packages/c8/32/f7cd6ce8a7690544d084ea21c26e910a97e077c9b7f07bf5de623ee19981/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:db9ed69dbef9715071232caa9b69c52ac7de3a95773c2db65bdba85916e4e5c0", size = 267229841, upload-time = "2025-03-07T01:46:54.356Z" }, { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" }, + { url = "https://files.pythonhosted.org/packages/13/c0/76ca8551b8a84146ffa189fec81c26d04adba4bc0dbe09cd6e6fd9b7de04/nvidia_cusolver_cu12-11.7.3.90-py3-none-win_amd64.whl", hash = "sha256:4a550db115fcabc4d495eb7d39ac8b58d4ab5d8e63274d3754df1c0ad6a22d34", size = 256720438, upload-time = "2025-03-07T01:54:39.898Z" }, ] [[package]] @@ -3266,10 +3471,12 @@ name = "nvidia-cusparse-cu12" version = "12.5.8.93" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" }, + { name = "nvidia-nvjitlink-cu12" }, ] wheels = [ + { url = "https://files.pythonhosted.org/packages/bc/f7/cd777c4109681367721b00a106f491e0d0d15cfa1fd59672ce580ce42a97/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b6c161cb130be1a07a27ea6923df8141f3c295852f4b260c65f18f3e0a091dc", size = 288117129, upload-time = "2025-03-07T01:47:40.407Z" }, { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" }, + { url = "https://files.pythonhosted.org/packages/62/07/f3b2ad63f8e3d257a599f422ae34eb565e70c41031aecefa3d18b62cabd1/nvidia_cusparse_cu12-12.5.8.93-py3-none-win_amd64.whl", hash = "sha256:9a33604331cb2cac199f2e7f5104dfbb8a5a898c367a53dfda9ff2acb6b6b4dd", size = 284937404, upload-time = "2025-03-07T01:55:07.742Z" }, ] [[package]] @@ -3277,7 +3484,9 @@ name = "nvidia-cusparselt-cu12" version = "0.7.1" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/73/b9/598f6ff36faaece4b3c50d26f50e38661499ff34346f00e057760b35cc9d/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8878dce784d0fac90131b6817b607e803c36e629ba34dc5b433471382196b6a5", size = 283835557, upload-time = "2025-02-26T00:16:54.265Z" }, { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" }, + { url = "https://files.pythonhosted.org/packages/2f/d8/a6b0d0d0c2435e9310f3e2bb0d9c9dd4c33daef86aa5f30b3681defd37ea/nvidia_cusparselt_cu12-0.7.1-py3-none-win_amd64.whl", hash = "sha256:f67fbb5831940ec829c9117b7f33807db9f9678dc2a617fbe781cac17b4e1075", size = 271020911, upload-time = "2025-02-26T00:14:47.204Z" }, ] [[package]] @@ -3285,6 +3494,7 @@ name = "nvidia-nccl-cu12" version = "2.27.3" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/4b/7b/8354b784cf73b0ba51e566b4baba3ddd44fe8288a3d39ef1e06cd5417226/nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9ddf1a245abc36c550870f26d537a9b6087fb2e2e3d6e0ef03374c6fd19d984f", size = 322397768, upload-time = "2025-06-03T21:57:30.234Z" }, { url = "https://files.pythonhosted.org/packages/5c/5b/4e4fff7bad39adf89f735f2bc87248c81db71205b62bcc0d5ca5b606b3c3/nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adf27ccf4238253e0b826bce3ff5fa532d65fc42322c8bfdfaf28024c0fbe039", size = 322364134, upload-time = "2025-06-03T21:58:04.013Z" }, ] @@ -3294,6 +3504,8 @@ version = "12.8.93" source = { registry = "https://pypi.org/simple" } wheels = [ { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" }, + { url = "https://files.pythonhosted.org/packages/2a/a2/8cee5da30d13430e87bf99bb33455d2724d0a4a9cb5d7926d80ccb96d008/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:adccd7161ace7261e01bb91e44e88da350895c270d23f744f0820c818b7229e7", size = 38386204, upload-time = "2025-03-07T01:49:43.612Z" }, + { url = "https://files.pythonhosted.org/packages/ed/d7/34f02dad2e30c31b10a51f6b04e025e5dd60e5f936af9045a9b858a05383/nvidia_nvjitlink_cu12-12.8.93-py3-none-win_amd64.whl", hash = "sha256:bd93fbeeee850917903583587f4fc3a4eafa022e34572251368238ab5e6bd67f", size = 268553710, upload-time = "2025-03-07T01:56:24.13Z" }, ] [[package]] @@ -3301,7 +3513,9 @@ name = "nvidia-nvtx-cu12" version = "12.8.90" source = { registry = "https://pypi.org/simple" } wheels = [ + { url = "https://files.pythonhosted.org/packages/10/c0/1b303feea90d296f6176f32a2a70b5ef230f9bdeb3a72bddb0dc922dc137/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7ad891da111ebafbf7e015d34879f7112832fc239ff0d7d776b6cb685274615", size = 91161, upload-time = "2025-03-07T01:42:23.922Z" }, { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" }, + { url = "https://files.pythonhosted.org/packages/9f/99/4c9c0c329bf9fc125008c3b54c7c94c0023518d06fc025ae36431375e1fe/nvidia_nvtx_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:619c8304aedc69f02ea82dd244541a83c3d9d40993381b3b590f1adaed3db41e", size = 56492, upload-time = "2025-03-07T01:52:24.69Z" }, ] [[package]] @@ -3329,7 +3543,8 @@ version = "0.14.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "griffelib" }, - { name = "mcp" }, + { name = "mcp", version = "1.23.3", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'group-9-verifiers-policy'" }, + { name = "mcp", version = "1.27.0", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-9-verifiers-openenv' or extra != 'group-9-verifiers-policy'" }, { name = "openai" }, { name = "pydantic" }, { name = "requests" }, @@ -3397,48 +3612,229 @@ wheels = [ [[package]] name = "openenv-core" -version = "0.2.1" +version = "0.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "fastapi" }, { name = "fastmcp" }, + { name = "gradio" }, + { name = "httpx" }, { name = "huggingface-hub" }, { name = "openai" }, { name = "pydantic" }, { name = "pyyaml" }, { name = "requests" }, { name = "rich" }, - { name = "tomli" }, + { name = "tomli", version = "2.4.1", source = { registry = "https://pypi.org/simple" } }, { name = "tomli-w" }, { name = "typer" }, { name = "uvicorn" }, { name = "websockets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d1/d3/d2cef0e459158c9410f073ffd2ad6eca7c09232e7c53d4987acc0b942d28/openenv_core-0.2.1.tar.gz", hash = "sha256:0caa44411af7d866e451e50744d1adab57cdf9a2cf7a1b3f81042675110aebc7", size = 102728, upload-time = "2026-02-04T10:25:24.263Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ce/d6/3bebe8afb55fcc3ea9251c4c2dfbab2879e31089bc91a8fe9696e5ce019b/openenv_core-0.3.0.tar.gz", hash = "sha256:c7fee2035badab5be497eb6f4afb2cb417de000f82cc19afd72fb5ec332c431d", size = 164720, upload-time = "2026-05-11T11:37:57.274Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a1/5a/a7f8b0e53eac45faedcf6fbfacdd28a104f815d3471f2deceefb4234d8be/openenv_core-0.2.1-py3-none-any.whl", hash = "sha256:5868722833df3220b7a3288f581e6c0825c2d8fae42d932ff90d2bb60765813a", size = 121855, upload-time = "2026-02-04T10:25:22.82Z" }, + { url = "https://files.pythonhosted.org/packages/f8/f5/aafa43138589bfd5d369a8d02ea365aae9d6fe55ac0b3894368d6d69bd03/openenv_core-0.3.0-py3-none-any.whl", hash = "sha256:859e875c9d5211b157c30fb9abc681606fcf0bf1b6ffcdf404678992823a1df0", size = 194313, upload-time = "2026-05-11T11:37:55.537Z" }, ] -[package.optional-dependencies] -core = [ - { name = "fastapi" }, - { name = "pydantic" }, +[[package]] +name = "opentelemetry-api" +version = "1.37.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "importlib-metadata" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/63/04/05040d7ce33a907a2a02257e601992f0cdf11c73b33f13c4492bf6c3d6d5/opentelemetry_api-1.37.0.tar.gz", hash = "sha256:540735b120355bd5112738ea53621f8d5edb35ebcd6fe21ada3ab1c61d1cd9a7", size = 64923, upload-time = "2025-09-11T10:29:01.662Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/91/48/28ed9e55dcf2f453128df738210a980e09f4e468a456fa3c763dbc8be70a/opentelemetry_api-1.37.0-py3-none-any.whl", hash = "sha256:accf2024d3e89faec14302213bc39550ec0f4095d1cf5ca688e1bfb1c8612f47", size = 65732, upload-time = "2025-09-11T10:28:41.826Z" }, +] + +[[package]] +name = "opentelemetry-exporter-otlp-proto-common" +version = "1.37.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-proto" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/dc/6c/10018cbcc1e6fff23aac67d7fd977c3d692dbe5f9ef9bb4db5c1268726cc/opentelemetry_exporter_otlp_proto_common-1.37.0.tar.gz", hash = "sha256:c87a1bdd9f41fdc408d9cc9367bb53f8d2602829659f2b90be9f9d79d0bfe62c", size = 20430, upload-time = "2025-09-11T10:29:03.605Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/08/13/b4ef09837409a777f3c0af2a5b4ba9b7af34872bc43609dda0c209e4060d/opentelemetry_exporter_otlp_proto_common-1.37.0-py3-none-any.whl", hash = "sha256:53038428449c559b0c564b8d718df3314da387109c4d36bd1b94c9a641b0292e", size = 18359, upload-time = "2025-09-11T10:28:44.939Z" }, +] + +[[package]] +name = "opentelemetry-exporter-otlp-proto-http" +version = "1.37.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "googleapis-common-protos" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-exporter-otlp-proto-common" }, + { name = "opentelemetry-proto" }, + { name = "opentelemetry-sdk" }, { name = "requests" }, - { name = "uvicorn" }, - { name = "websockets" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5d/e3/6e320aeb24f951449e73867e53c55542bebbaf24faeee7623ef677d66736/opentelemetry_exporter_otlp_proto_http-1.37.0.tar.gz", hash = "sha256:e52e8600f1720d6de298419a802108a8f5afa63c96809ff83becb03f874e44ac", size = 17281, upload-time = "2025-09-11T10:29:04.844Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/e9/70d74a664d83976556cec395d6bfedd9b85ec1498b778367d5f93e373397/opentelemetry_exporter_otlp_proto_http-1.37.0-py3-none-any.whl", hash = "sha256:54c42b39945a6cc9d9a2a33decb876eabb9547e0dcb49df090122773447f1aef", size = 19576, upload-time = "2025-09-11T10:28:46.726Z" }, ] [[package]] -name = "opentelemetry-api" -version = "1.41.1" +name = "opentelemetry-instrumentation" +version = "0.58b0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "importlib-metadata" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "packaging" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f6/36/7c307d9be8ce4ee7beb86d7f1d31027f2a6a89228240405a858d6e4d64f9/opentelemetry_instrumentation-0.58b0.tar.gz", hash = "sha256:df640f3ac715a3e05af145c18f527f4422c6ab6c467e40bd24d2ad75a00cb705", size = 31549, upload-time = "2025-09-11T11:42:14.084Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d4/db/5ff1cd6c5ca1d12ecf1b73be16fbb2a8af2114ee46d4b0e6d4b23f4f4db7/opentelemetry_instrumentation-0.58b0-py3-none-any.whl", hash = "sha256:50f97ac03100676c9f7fc28197f8240c7290ca1baa12da8bfbb9a1de4f34cc45", size = 33019, upload-time = "2025-09-11T11:41:00.624Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-requests" +version = "0.58b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "opentelemetry-util-http" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/36/42/83ee32de763b919779aaa595b60c5a7b9c0a4b33952bbe432c5f6a783085/opentelemetry_instrumentation_requests-0.58b0.tar.gz", hash = "sha256:ae9495e6ff64e27bdb839fce91dbb4be56e325139828e8005f875baf41951a2e", size = 15188, upload-time = "2025-09-11T11:42:51.268Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/90/4d/f3476b28ea167d1762134352d01ae9693940a42c78994d9f1b32a4477816/opentelemetry_instrumentation_requests-0.58b0-py3-none-any.whl", hash = "sha256:672a0be0bb5b52bea0c11820b35e27edcf4cd22d34abe4afc59a92a80519f8a8", size = 12966, upload-time = "2025-09-11T11:41:52.67Z" }, +] + +[[package]] +name = "opentelemetry-instrumentation-threading" +version = "0.58b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/70/a9/3888cb0470e6eb48ea17b6802275ae71df411edd6382b9a8e8f391936fda/opentelemetry_instrumentation_threading-0.58b0.tar.gz", hash = "sha256:f68c61f77841f9ff6270176f4d496c10addbceacd782af434d705f83e4504862", size = 8770, upload-time = "2025-09-11T11:42:56.308Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a5/54/add1076cb37980e617723a96e29c84006983e8ad6fc589dde7f69ddc57d4/opentelemetry_instrumentation_threading-0.58b0-py3-none-any.whl", hash = "sha256:eacc072881006aceb5b9b6831bcdce718c67ef6f31ac0b32bd6a23a94d979b4a", size = 9312, upload-time = "2025-09-11T11:41:58.603Z" }, +] + +[[package]] +name = "opentelemetry-proto" +version = "1.37.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/dd/ea/a75f36b463a36f3c5a10c0b5292c58b31dbdde74f6f905d3d0ab2313987b/opentelemetry_proto-1.37.0.tar.gz", hash = "sha256:30f5c494faf66f77faeaefa35ed4443c5edb3b0aa46dad073ed7210e1a789538", size = 46151, upload-time = "2025-09-11T10:29:11.04Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c4/25/f89ea66c59bd7687e218361826c969443c4fa15dfe89733f3bf1e2a9e971/opentelemetry_proto-1.37.0-py3-none-any.whl", hash = "sha256:8ed8c066ae8828bbf0c39229979bdf583a126981142378a9cbe9d6fd5701c6e2", size = 72534, upload-time = "2025-09-11T10:28:56.831Z" }, +] + +[[package]] +name = "opentelemetry-sdk" +version = "1.37.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-semantic-conventions" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fa/fc/b7564cbef36601aef0d6c9bc01f7badb64be8e862c2e1c3c5c3b43b53e4f/opentelemetry_api-1.41.1.tar.gz", hash = "sha256:0ad1814d73b875f84494387dae86ce0b12c68556331ce6ce8fe789197c949621", size = 71416, upload-time = "2026-04-24T13:15:38.262Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f4/62/2e0ca80d7fe94f0b193135375da92c640d15fe81f636658d2acf373086bc/opentelemetry_sdk-1.37.0.tar.gz", hash = "sha256:cc8e089c10953ded765b5ab5669b198bbe0af1b3f89f1007d19acd32dc46dda5", size = 170404, upload-time = "2025-09-11T10:29:11.779Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/29/59/3e7118ed140f76b0982ba4321bdaed1997a0473f9720de2d10788a577033/opentelemetry_api-1.41.1-py3-none-any.whl", hash = "sha256:a22df900e75c76dc08440710e51f52f1aa6b451b429298896023e60db5b3139f", size = 69007, upload-time = "2026-04-24T13:15:15.662Z" }, + { url = "https://files.pythonhosted.org/packages/9f/62/9f4ad6a54126fb00f7ed4bb5034964c6e4f00fcd5a905e115bd22707e20d/opentelemetry_sdk-1.37.0-py3-none-any.whl", hash = "sha256:8f3c3c22063e52475c5dbced7209495c2c16723d016d39287dfc215d1771257c", size = 131941, upload-time = "2025-09-11T10:28:57.83Z" }, +] + +[[package]] +name = "opentelemetry-semantic-conventions" +version = "0.58b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/aa/1b/90701d91e6300d9f2fb352153fb1721ed99ed1f6ea14fa992c756016e63a/opentelemetry_semantic_conventions-0.58b0.tar.gz", hash = "sha256:6bd46f51264279c433755767bb44ad00f1c9e2367e1b42af563372c5a6fa0c25", size = 129867, upload-time = "2025-09-11T10:29:12.597Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/90/68152b7465f50285d3ce2481b3aec2f82822e3f52e5152eeeaf516bab841/opentelemetry_semantic_conventions-0.58b0-py3-none-any.whl", hash = "sha256:5564905ab1458b96684db1340232729fce3b5375a06e140e8904c78e4f815b28", size = 207954, upload-time = "2025-09-11T10:28:59.218Z" }, +] + +[[package]] +name = "opentelemetry-util-http" +version = "0.58b0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c6/5f/02f31530faf50ef8a41ab34901c05cbbf8e9d76963ba2fb852b0b4065f4e/opentelemetry_util_http-0.58b0.tar.gz", hash = "sha256:de0154896c3472c6599311c83e0ecee856c4da1b17808d39fdc5cce5312e4d89", size = 9411, upload-time = "2025-09-11T11:43:05.602Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a5/a3/0a1430c42c6d34d8372a16c104e7408028f0c30270d8f3eb6cccf2e82934/opentelemetry_util_http-0.58b0-py3-none-any.whl", hash = "sha256:6c6b86762ed43025fbd593dc5f700ba0aa3e09711aedc36fd48a13b23d8cb1e7", size = 7652, upload-time = "2025-09-11T11:42:09.682Z" }, +] + +[[package]] +name = "orjson" +version = "3.11.9" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7e/0c/964746fcafbd16f8ff53219ad9f6b412b34f345c75f384ad434ceaadb538/orjson-3.11.9.tar.gz", hash = "sha256:4fef17e1f8722c11587a6ef18e35902450221da0028e65dbaaa543619e68e48f", size = 5599163, upload-time = "2026-05-06T15:11:08.309Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/5d/b95ca542a001135cc250a49370f282f578c8f4e46cc8617d73775297eea8/orjson-3.11.9-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:135869ef917b8704ea0a94e01620e0c05021c15c52036e4663baffe75e72f8ce", size = 228986, upload-time = "2026-05-06T15:09:14.765Z" }, + { url = "https://files.pythonhosted.org/packages/80/01/be33fbff646e22f93398429ea645f20d2097aea1a6cdc1e6628e70125f83/orjson-3.11.9-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:115ab5f5f4a0f203cc2a5f0fb09aee503a3f771aa08392949ab5ca230c4fbdbd", size = 132558, upload-time = "2026-05-06T15:09:17.431Z" }, + { url = "https://files.pythonhosted.org/packages/4e/61/73d49333bba660a075daccca10970dc6409ce1cf42ae4046646a19468aad/orjson-3.11.9-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4da3c38a2083ca4aaf9c2a36776cce3e9328e6647b10d118948f3cfb4913ffe4", size = 128213, upload-time = "2026-05-06T15:09:18.719Z" }, + { url = "https://files.pythonhosted.org/packages/1f/7d/30e844b3dac3f74aed66b1f984daf9db3c98c0328c03d965a9e8dc06449e/orjson-3.11.9-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:53b50b0e14084b8f7e29c5ce84c5af0f1160169b30d8a6914231d97d2fe297d4", size = 135430, upload-time = "2026-05-06T15:09:20.257Z" }, + { url = "https://files.pythonhosted.org/packages/16/64/bd815f5c610b3facc204f26ba94e87a9eb49b0d83de3d5fc1eee2402d91b/orjson-3.11.9-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:231742b4a11dad8d5380a435962c57e91b7c37b79be858f4ef1c0df1a259897e", size = 146178, upload-time = "2026-05-06T15:09:21.616Z" }, + { url = "https://files.pythonhosted.org/packages/c7/35/e744fd36c79b339d27beb06068b5a08a8882ef5418804d0ce545a31f718d/orjson-3.11.9-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:34fd2317602587321faab75ab76c623a0117e80841a6413654f04e47f339a8fb", size = 133068, upload-time = "2026-05-06T15:09:23.228Z" }, + { url = "https://files.pythonhosted.org/packages/2a/56/d54152b67b63a0b3e556cfc549d6ce84f74d7f425ddeadc6c8a74d913da7/orjson-3.11.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:71f3db16e69b667b132e0f305a833d5497da302d801508cbb051ed9a9819da47", size = 134217, upload-time = "2026-05-06T15:09:24.847Z" }, + { url = "https://files.pythonhosted.org/packages/0b/ee/66154baf69f71c7164a268a5e888908aec5a0819d13c81d5e2755a257758/orjson-3.11.9-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0b34789fa0da61cf7bef0546b09c738fb195331e017e477096d129e9105ab03d", size = 141917, upload-time = "2026-05-06T15:09:26.647Z" }, + { url = "https://files.pythonhosted.org/packages/09/d3/c5824260ca8b9d7ba82648d042a3f8f4815d18c15bb98a1f30edd1bb2d83/orjson-3.11.9-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:87e4d4ab280b0c87424d47695bec2182caf8cfc17879ea78dab76680194abc13", size = 415356, upload-time = "2026-05-06T15:09:28.252Z" }, + { url = "https://files.pythonhosted.org/packages/64/cb/509c2e816fe4df641d93dc92f6a89adc8df3ada8ebdee2bd44aba3264c3c/orjson-3.11.9-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:ace6c58523302d3b97b6ac5c38a5298a54b473762b6be82726b4265c41029f92", size = 148112, upload-time = "2026-05-06T15:09:29.783Z" }, + { url = "https://files.pythonhosted.org/packages/db/b5/3ceae56d2e4962979eedb023ba6a46a4bb65f333960379be0ca470686220/orjson-3.11.9-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:97d0d932803c1b164fde11cb542a9efcb1e0f63b184537cca65887147906ff48", size = 137112, upload-time = "2026-05-06T15:09:31.432Z" }, + { url = "https://files.pythonhosted.org/packages/d7/7a/81fa3f2c7bef79b04cf2ab7838e5ac74b1f12511ceab979759b0275d6bb4/orjson-3.11.9-cp310-cp310-win32.whl", hash = "sha256:b3afcf569c15577a9fe64627292daa3e6b3a70f4fb77a5df246a87ec21681b94", size = 131706, upload-time = "2026-05-06T15:09:32.707Z" }, + { url = "https://files.pythonhosted.org/packages/ae/d8/b64600f9083c7f151ad39717a5877fccbeb0ef6d7efcb55f971ce00b6bee/orjson-3.11.9-cp310-cp310-win_amd64.whl", hash = "sha256:8697ab6a080a5c46edaad50e2bc5bd8c7ca5c66442d24104fa44ec74910a8244", size = 127282, upload-time = "2026-05-06T15:09:33.955Z" }, + { url = "https://files.pythonhosted.org/packages/1e/51/3fb9e65ae76ee97bd611869a503fa3fc0a6e81dd8b737cf3003f682df7ff/orjson-3.11.9-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:f01c4818b3fc9b0da8e096722a84318071eaa118df35f6ed2344da0e73a5444f", size = 228522, upload-time = "2026-05-06T15:09:35.362Z" }, + { url = "https://files.pythonhosted.org/packages/16/fa/9d54b07cb3f3b0bfd57841478e42d7a0ece4a9f49f9907eecf5a45461687/orjson-3.11.9-cp311-cp311-macosx_15_0_arm64.whl", hash = "sha256:3ebca4179031ee716ed076ffadc29428e900512f6fccee8614c9983157fcf19c", size = 128463, upload-time = "2026-05-06T15:09:37.063Z" }, + { url = "https://files.pythonhosted.org/packages/88/b1/6ceafc2eefd0a553e3be77ce6c49d107e772485d9568629376171c50e634/orjson-3.11.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48ee05097750de0ff69ed5b7bbcf0732182fd57a24043dcc2a1da780a5ead3a5", size = 132306, upload-time = "2026-05-06T15:09:38.299Z" }, + { url = "https://files.pythonhosted.org/packages/ea/76/f11311285324a40aab1e3031385c50b635a7cd0734fdaf60c7e89a696f60/orjson-3.11.9-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a6082706765a95a6680d812e1daf1c0cfe8adec7831b3ff3b625693f3b461b1c", size = 127988, upload-time = "2026-05-06T15:09:39.597Z" }, + { url = "https://files.pythonhosted.org/packages/9e/85/0ef63bcf1337f44031ce9b91b1919563f62a37527b3ea4368bb15a22e5d7/orjson-3.11.9-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:277fefe9d76ee17eb14debf399e3533d4d63b5f677a4d3719eb763536af1f4bd", size = 135188, upload-time = "2026-05-06T15:09:40.957Z" }, + { url = "https://files.pythonhosted.org/packages/05/94/b0d27090ea8a2095db3c2bd1b1c96f96f19bbb494d7fef33130e846e613d/orjson-3.11.9-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:03db380e3780fa0015ed776a90f20e8e20bb11dde13b216ce19e5718e3dfba62", size = 145937, upload-time = "2026-05-06T15:09:42.249Z" }, + { url = "https://files.pythonhosted.org/packages/09/eb/75d50c29c05b8054013e221e598820a365c8e64065312e75e202ed880709/orjson-3.11.9-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:33d7d766701847dc6729846362dc27895d2f2d2251264f9d10e7cb9878194877", size = 132758, upload-time = "2026-05-06T15:09:43.945Z" }, + { url = "https://files.pythonhosted.org/packages/49/bd/360686f39348aa88827cb6fbf7dc606fd41c831a35235e1abf1db8e3a9e6/orjson-3.11.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:147302878da387104b66bb4a8b0227d1d487e976ce41a8501916161072ed87b1", size = 133971, upload-time = "2026-05-06T15:09:45.239Z" }, + { url = "https://files.pythonhosted.org/packages/0e/30/3178eb16f3221aeef068b6f1f1ebe05f656ea5c6dffe9f6c917329fe17a3/orjson-3.11.9-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3513550321f8c8c811a7c3297b8a630e82dc08e4c10216d07703c997776236cd", size = 141685, upload-time = "2026-05-06T15:09:46.858Z" }, + { url = "https://files.pythonhosted.org/packages/5f/f1/ff2f19ed0225f9680fafa42febca3570dd59444ebf190980738d376214c2/orjson-3.11.9-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:c5d001196b89fa9cf0a4ab79766cd835b991a166e4b621ba95089edc50c429ff", size = 415167, upload-time = "2026-05-06T15:09:48.312Z" }, + { url = "https://files.pythonhosted.org/packages/9b/61/863bddf0da6e9e586765414debd54b4e58db05f560902b6d00658cb88636/orjson-3.11.9-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:16969c9d369c98eb084889c6e4d2d39b77c7eb38ceccf8da2a9fff62ae908980", size = 147913, upload-time = "2026-05-06T15:09:49.733Z" }, + { url = "https://files.pythonhosted.org/packages/b6/8a/4081492586d75b073d60c5271a8d0f05a0955cabf1e34c8473f6fcd84235/orjson-3.11.9-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:63e0efbc991250c0b3143488fa57d95affcabbfc63c99c48d625dd37779aafe2", size = 136959, upload-time = "2026-05-06T15:09:51.311Z" }, + { url = "https://files.pythonhosted.org/packages/0d/bd/70b6ab193594d7abb875320c0a7c8335e846f28968c432c31042409c3c8d/orjson-3.11.9-cp311-cp311-win32.whl", hash = "sha256:14ed654580c1ed2bc217352ec82f91b047aef82951aa71c7f64e0dcb03c0e180", size = 131533, upload-time = "2026-05-06T15:09:52.637Z" }, + { url = "https://files.pythonhosted.org/packages/3f/17/1a1a228183d62d1b77e2c30d210f47dd4768b310ebe1607c63e3c0e3a71e/orjson-3.11.9-cp311-cp311-win_amd64.whl", hash = "sha256:57ea77fb70a448ce87d18fca050193202a3da5e54598f6501ca5476fb66cfe02", size = 127106, upload-time = "2026-05-06T15:09:54.204Z" }, + { url = "https://files.pythonhosted.org/packages/b8/95/285de5fa296d09681ee9c546cd4a8aeb773b701cf343dc125994f4d52953/orjson-3.11.9-cp311-cp311-win_arm64.whl", hash = "sha256:19b72ed11572a2ee51a67a903afbe5af504f84ed6f529c0fe44b0ab3fb5cc697", size = 126848, upload-time = "2026-05-06T15:09:55.551Z" }, + { url = "https://files.pythonhosted.org/packages/16/6d/11867a3ffa3a3608d84a4de51ef4dd0896d6b5cc9132fbe1daf593e677bc/orjson-3.11.9-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:9ef6fe90aadef185c7b128859f40beb24720b4ecea95379fc9000931179c3a49", size = 228515, upload-time = "2026-05-06T15:09:57.265Z" }, + { url = "https://files.pythonhosted.org/packages/24/75/05912954c8b288f34fcf5cd4b9b071cb4f6e77b9961e175e56ebb258089f/orjson-3.11.9-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:e5c9b8f28e726e97d97696c826bc7bea5d71cecd63576dba92924a32c1961291", size = 128409, upload-time = "2026-05-06T15:09:59.063Z" }, + { url = "https://files.pythonhosted.org/packages/ab/86/1c3a47df3bc8191ea9ac51603bbb872a95167a364320c269f2557911f406/orjson-3.11.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26a473dbb4162108b27901492546f83c76fdcea3d0eadff00ae7a07e18dcce09", size = 132106, upload-time = "2026-05-06T15:10:00.798Z" }, + { url = "https://files.pythonhosted.org/packages/d7/cf/b33b5f3e695ae7d63feef9d915c37cc3b8f465493dcd4f8e0b4c697a2366/orjson-3.11.9-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:011382e2a60fda9d46f1cdee31068cfc52ffe952b587d683ec0463002802a0f4", size = 127864, upload-time = "2026-05-06T15:10:02.15Z" }, + { url = "https://files.pythonhosted.org/packages/31/6a/6cf69385a58208024fcb8c014e2141b8ce838aba6492b589f8acfff97fab/orjson-3.11.9-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c2d3dc759490128c5c1711a53eeaa8ee1d437fd0038ffd2b6008abf46db3f882", size = 135213, upload-time = "2026-05-06T15:10:03.515Z" }, + { url = "https://files.pythonhosted.org/packages/e8/f8/0b1bd3e8f2efcdd376af5c8cfd79eaf13f018080c0089c80ebd724e3c7fb/orjson-3.11.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d8ea516b3726d190e1b4297e6f4e7a8650347ae053868a18163b4dd3641d1fff", size = 145994, upload-time = "2026-05-06T15:10:05.083Z" }, + { url = "https://files.pythonhosted.org/packages/f3/59/dab79f61044c529d2c81aecdc589b1f833a1c8dec11ba3b1c2498a02ca7e/orjson-3.11.9-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:380cdce7ba24989af81d0a7013d0aaec5d0e2a21734c0e2681b1bc4f141957fe", size = 132744, upload-time = "2026-05-06T15:10:06.853Z" }, + { url = "https://files.pythonhosted.org/packages/0e/a4/82b7a2fe5d8a67a59ed831b24d59a3d46ea7d207b66e1602d376541d94a6/orjson-3.11.9-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be4fa4f0af7fa18951f7ab3fc2148e223af211bf03f59e1c6034ec3f97f21d61", size = 134014, upload-time = "2026-05-06T15:10:08.213Z" }, + { url = "https://files.pythonhosted.org/packages/50/c7/375e83a76851b73b2e39f3bcf0e5a19e2b89bad13e5bca97d0b293d27f24/orjson-3.11.9-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a8f5f8bc7ce7d59f08d9f99fa510c06496164a24cb5f3d34537dbd9ca30132e2", size = 141509, upload-time = "2026-05-06T15:10:09.595Z" }, + { url = "https://files.pythonhosted.org/packages/7f/7c/49d5d82a3d3097f641f094f552131f1e2723b0b8cb0fa2874ab65ecfffa6/orjson-3.11.9-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:4d7fde5501b944f83b3e665e1b31343ff6e154b15560a16b7130ea1e594a4206", size = 415127, upload-time = "2026-05-06T15:10:11.049Z" }, + { url = "https://files.pythonhosted.org/packages/3a/dc/7446c538590d55f455647e5f3c61fc33f7108714e7afcffa6a2a033f8350/orjson-3.11.9-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:cde1a448023ba7d5bb4c01c5afb48894380b5e4956e0627266526587ef4e535f", size = 148025, upload-time = "2026-05-06T15:10:12.842Z" }, + { url = "https://files.pythonhosted.org/packages/df/e5/4d2d8af06f788329b4f78f8cc3679bb395392fcaa1e4d8d3c33e85308fa4/orjson-3.11.9-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:71e63adb0e1f1ed5d9e168f50a91ceb93ae6420731d222dc7da5c69409aa47aa", size = 136943, upload-time = "2026-05-06T15:10:14.405Z" }, + { url = "https://files.pythonhosted.org/packages/06/69/850264ccf6d80f6b174620d30a87f65c9b1490aba33fe6b62798e618cad3/orjson-3.11.9-cp312-cp312-win32.whl", hash = "sha256:2d057a602cdd19a0ad680417527c45b6961a095081c0f46fe0e03e304aac6470", size = 131606, upload-time = "2026-05-06T15:10:15.791Z" }, + { url = "https://files.pythonhosted.org/packages/b9/d5/973a43fc9c55e20f2051e9830997649f669be0cb3ca52192087c0143f118/orjson-3.11.9-cp312-cp312-win_amd64.whl", hash = "sha256:59e403b1cc5a676da8eaf31f6254801b7341b3e29efa85f92b48d272637e77be", size = 127101, upload-time = "2026-05-06T15:10:17.129Z" }, + { url = "https://files.pythonhosted.org/packages/fe/ae/495470f0e4a18f73fa10b7f6b84b464ec4cc5291c4e0c7c2a6c400bef006/orjson-3.11.9-cp312-cp312-win_arm64.whl", hash = "sha256:9af678d6488357948f1f84c6cd1c1d397c014e1ae2f98ae082a44eb48f602624", size = 126736, upload-time = "2026-05-06T15:10:18.645Z" }, + { url = "https://files.pythonhosted.org/packages/32/33/93fcc25907235c344ae73122f8a4e01d2d393ef062b4af7d2e2487a32c37/orjson-3.11.9-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:4bab1b2d6141fe7b32ae71dac905666ece4f94936efbfb13d55bb7739a3a6021", size = 228458, upload-time = "2026-05-06T15:10:20.079Z" }, + { url = "https://files.pythonhosted.org/packages/8f/27/b1e6dadb3c080313c03fdd8067b85e6a0460c7d8d6a1c3984ef77b904e4d/orjson-3.11.9-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:844417969855fc7a41be124aafe83dc424592a7f77cd4501900c67307122b92c", size = 128368, upload-time = "2026-05-06T15:10:21.549Z" }, + { url = "https://files.pythonhosted.org/packages/21/0f/c9ede0bf052f6b4051e64a7d4fa91b725cccf8321a6a786e86eb03519f00/orjson-3.11.9-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffe02797b5e9f3a9d8292ddcd289b474ad13e81ad83cd1891a240811f1d2cb81", size = 132070, upload-time = "2026-05-06T15:10:23.371Z" }, + { url = "https://files.pythonhosted.org/packages/fd/26/d398e28048dc18205bbe812f2c88cb9b40313db2470778e25964796458fe/orjson-3.11.9-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0e4eed3b200023042814d2fc8a5d2e880f13b52e1ed2485e83da4f3962f7dc1a", size = 127892, upload-time = "2026-05-06T15:10:24.714Z" }, + { url = "https://files.pythonhosted.org/packages/66/60/52b0054c4c700d5aa7fc5b7ca96917400d8f061307778578e67a10e25852/orjson-3.11.9-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8aff7da9952a5ad1cef8e68017724d96c7b9a66e99e91d6252e1b133d67a7b10", size = 135217, upload-time = "2026-05-06T15:10:26.084Z" }, + { url = "https://files.pythonhosted.org/packages/d5/97/1e3dc2b2a28b7b2528f403d2fc1d79ec5f39af3bc143ab65d3ec26426385/orjson-3.11.9-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4d4e98d6f3b8afed8bc8cd9718ec0cdf46661826beefb53fe8eafb37f2bf0362", size = 145980, upload-time = "2026-05-06T15:10:28.062Z" }, + { url = "https://files.pythonhosted.org/packages/fc/39/31fbfe7850f2de32dee7e7e5c09f26d403ab01e440ac96001c6b01ad3c99/orjson-3.11.9-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3a81d52442a7c99b3662333235b3adf96a1715864658b35bb797212be7bddb97", size = 132738, upload-time = "2026-05-06T15:10:29.727Z" }, + { url = "https://files.pythonhosted.org/packages/a1/08/dca0082dd2a194acb93e5457e73455388e2e2ca464a2672449a9ddbb679d/orjson-3.11.9-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e39364e726a8fff737309aff059ff67d8a8c8d5b677be7bb49a8b3e84b7e218", size = 134033, upload-time = "2026-05-06T15:10:31.152Z" }, + { url = "https://files.pythonhosted.org/packages/11/d4/5bdb0626801230139987385554c5d4c42255218ac906525bf4347f22cd95/orjson-3.11.9-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4fd66214623f1b17501df9f0543bef0b833979ab5b6ded1e1d123222866aa8c9", size = 141492, upload-time = "2026-05-06T15:10:32.641Z" }, + { url = "https://files.pythonhosted.org/packages/fa/88/a21fb53b3ede6703aede6dce4710ed4111e5b201cfa6bbff5e544f9d47d7/orjson-3.11.9-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:8ecc30f10465fa1e0ce13fd01d9e22c316e5053a719a8d915d4545a09a5ff677", size = 415087, upload-time = "2026-05-06T15:10:34.438Z" }, + { url = "https://files.pythonhosted.org/packages/3d/57/1b30daf70f0d8180e9a73cefbfbdd99e4bf19eb020466502b01fba7e0e50/orjson-3.11.9-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:97db4c94a7db398a5bd636273324f0b3fd58b350bbbac8bb380ceb825a9b40f4", size = 148031, upload-time = "2026-05-06T15:10:36.358Z" }, + { url = "https://files.pythonhosted.org/packages/04/83/45fbb6d962e260807f99441db9613cee868ceda4baceda59b3720a563f97/orjson-3.11.9-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9f78cf8fec5bd627f4082b8dfeac7871b43d7f3274904492a43dab39f18a19a0", size = 136915, upload-time = "2026-05-06T15:10:38.013Z" }, + { url = "https://files.pythonhosted.org/packages/5f/cc/2d10025f9056d376e4127ec05a5808b218d46f035fdc08178a5411b34250/orjson-3.11.9-cp313-cp313-win32.whl", hash = "sha256:d4087e5c0209a0a8efe4de3303c234b9c44d1174161dcd851e8eea07c7560b32", size = 131613, upload-time = "2026-05-06T15:10:39.569Z" }, + { url = "https://files.pythonhosted.org/packages/67/bd/2775ff28bfe883b9aa1ff348300542eb2ef1ee18d8ae0e3a49846817a865/orjson-3.11.9-cp313-cp313-win_amd64.whl", hash = "sha256:051b102c93b4f634e89f3866b07b9a9a98915ada541f4ec30f177067b2694979", size = 127086, upload-time = "2026-05-06T15:10:41.262Z" }, + { url = "https://files.pythonhosted.org/packages/91/2b/d26799e580939e32a7da9a39531bc9e58e15ca32ffaa6a8cb3e9bb0d22cd/orjson-3.11.9-cp313-cp313-win_arm64.whl", hash = "sha256:cce9127885941bd28f080cecf1f1d288336b7e0d812c345b08be88b572796254", size = 126696, upload-time = "2026-05-06T15:10:42.651Z" }, ] [[package]] @@ -3498,10 +3894,10 @@ resolution-markers = [ "python_full_version < '3.11'", ] dependencies = [ - { name = "numpy", marker = "python_full_version < '3.11'" }, - { name = "python-dateutil", marker = "python_full_version < '3.11'" }, - { name = "pytz", marker = "python_full_version < '3.11'" }, - { name = "tzdata", marker = "python_full_version < '3.11'" }, + { name = "numpy", marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "python-dateutil", marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "pytz", marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "tzdata", marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/33/01/d40b85317f86cf08d853a4f495195c73815fdf205eef3993821720274518/pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b", size = 4495223, upload-time = "2025-09-29T23:34:51.853Z" } wheels = [ @@ -3557,9 +3953,9 @@ resolution-markers = [ "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'", ] dependencies = [ - { name = "numpy", marker = "python_full_version >= '3.11'" }, - { name = "python-dateutil", marker = "python_full_version >= '3.11'" }, - { name = "tzdata", marker = "(python_full_version >= '3.11' and sys_platform == 'emscripten') or (python_full_version >= '3.11' and sys_platform == 'win32')" }, + { name = "numpy", marker = "python_full_version >= '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "python-dateutil", marker = "python_full_version >= '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "tzdata", marker = "(python_full_version >= '3.11' and sys_platform == 'emscripten') or (python_full_version >= '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform == 'emscripten' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform == 'win32' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/da/99/b342345300f13440fe9fe385c3c481e2d9a595ee3bab4d3219247ac94e9a/pandas-3.0.2.tar.gz", hash = "sha256:f4753e73e34c8d83221ba58f232433fca2748be8b18dbca02d242ed153945043", size = 4645855, upload-time = "2026-03-31T06:48:30.816Z" } wheels = [ @@ -3623,6 +4019,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/52/96/5a770e5c461462575474468e5af931cff9de036e7c2b4fea23c1c58d2cbe/pathable-0.5.0-py3-none-any.whl", hash = "sha256:646e3d09491a6351a0c82632a09c02cdf70a252e73196b36d8a15ba0a114f0a6", size = 16867, upload-time = "2026-02-20T08:46:59.536Z" }, ] +[[package]] +name = "peewee" +version = "3.19.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/88/b0/79462b42e89764998756e0557f2b58a15610a5b4512fbbcccae58fba7237/peewee-3.19.0.tar.gz", hash = "sha256:f88292a6f0d7b906cb26bca9c8599b8f4d8920ebd36124400d0cbaaaf915511f", size = 974035, upload-time = "2026-01-07T17:24:59.597Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1a/41/19c65578ef9a54b3083253c68a607f099642747168fe00f3a2bceb7c3a34/peewee-3.19.0-py3-none-any.whl", hash = "sha256:de220b94766e6008c466e00ce4ba5299b9a832117d9eb36d45d0062f3cfd7417", size = 411885, upload-time = "2026-01-07T17:24:58.33Z" }, +] + [[package]] name = "peft" version = "0.19.1" @@ -3649,7 +4054,7 @@ name = "pexpect" version = "4.9.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "ptyprocess", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" }, + { name = "ptyprocess", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32') or (sys_platform == 'emscripten' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform == 'win32' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/42/92/cc564bf6381ff43ce1f4d06852fc19a2f11d180f23dc32d9588bee2f149d/pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f", size = 166450, upload-time = "2023-11-25T09:07:26.339Z" } wheels = [ @@ -3914,17 +4319,17 @@ wheels = [ [[package]] name = "protobuf" -version = "7.34.1" +version = "6.33.6" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6b/6b/a0e95cad1ad7cc3f2c6821fcab91671bd5b78bd42afb357bb4765f29bc41/protobuf-7.34.1.tar.gz", hash = "sha256:9ce42245e704cc5027be797c1db1eb93184d44d1cdd71811fb2d9b25ad541280", size = 454708, upload-time = "2026-03-20T17:34:47.036Z" } +sdist = { url = "https://files.pythonhosted.org/packages/66/70/e908e9c5e52ef7c3a6c7902c9dfbb34c7e29c25d2f81ade3856445fd5c94/protobuf-6.33.6.tar.gz", hash = "sha256:a6768d25248312c297558af96a9f9c929e8c4cee0659cb07e780731095f38135", size = 444531, upload-time = "2026-03-18T19:05:00.988Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ec/11/3325d41e6ee15bf1125654301211247b042563bcc898784351252549a8ad/protobuf-7.34.1-cp310-abi3-macosx_10_9_universal2.whl", hash = "sha256:d8b2cc79c4d8f62b293ad9b11ec3aebce9af481fa73e64556969f7345ebf9fc7", size = 429247, upload-time = "2026-03-20T17:34:37.024Z" }, - { url = "https://files.pythonhosted.org/packages/eb/9d/aa69df2724ff63efa6f72307b483ce0827f4347cc6d6df24b59e26659fef/protobuf-7.34.1-cp310-abi3-manylinux2014_aarch64.whl", hash = "sha256:5185e0e948d07abe94bb76ec9b8416b604cfe5da6f871d67aad30cbf24c3110b", size = 325753, upload-time = "2026-03-20T17:34:38.751Z" }, - { url = "https://files.pythonhosted.org/packages/92/e8/d174c91fd48e50101943f042b09af9029064810b734e4160bbe282fa1caa/protobuf-7.34.1-cp310-abi3-manylinux2014_s390x.whl", hash = "sha256:403b093a6e28a960372b44e5eb081775c9b056e816a8029c61231743d63f881a", size = 340198, upload-time = "2026-03-20T17:34:39.871Z" }, - { url = "https://files.pythonhosted.org/packages/53/1b/3b431694a4dc6d37b9f653f0c64b0a0d9ec074ee810710c0c3da21d67ba7/protobuf-7.34.1-cp310-abi3-manylinux2014_x86_64.whl", hash = "sha256:8ff40ce8cd688f7265326b38d5a1bed9bfdf5e6723d49961432f83e21d5713e4", size = 324267, upload-time = "2026-03-20T17:34:41.1Z" }, - { url = "https://files.pythonhosted.org/packages/85/29/64de04a0ac142fb685fd09999bc3d337943fb386f3a0ec57f92fd8203f97/protobuf-7.34.1-cp310-abi3-win32.whl", hash = "sha256:34b84ce27680df7cca9f231043ada0daa55d0c44a2ddfaa58ec1d0d89d8bf60a", size = 426628, upload-time = "2026-03-20T17:34:42.536Z" }, - { url = "https://files.pythonhosted.org/packages/4d/87/cb5e585192a22b8bd457df5a2c16a75ea0db9674c3a0a39fc9347d84e075/protobuf-7.34.1-cp310-abi3-win_amd64.whl", hash = "sha256:e97b55646e6ce5cbb0954a8c28cd39a5869b59090dfaa7df4598a7fba869468c", size = 437901, upload-time = "2026-03-20T17:34:44.112Z" }, - { url = "https://files.pythonhosted.org/packages/88/95/608f665226bca68b736b79e457fded9a2a38c4f4379a4a7614303d9db3bc/protobuf-7.34.1-py3-none-any.whl", hash = "sha256:bb3812cd53aefea2b028ef42bd780f5b96407247f20c6ef7c679807e9d188f11", size = 170715, upload-time = "2026-03-20T17:34:45.384Z" }, + { url = "https://files.pythonhosted.org/packages/fc/9f/2f509339e89cfa6f6a4c4ff50438db9ca488dec341f7e454adad60150b00/protobuf-6.33.6-cp310-abi3-win32.whl", hash = "sha256:7d29d9b65f8afef196f8334e80d6bc1d5d4adedb449971fefd3723824e6e77d3", size = 425739, upload-time = "2026-03-18T19:04:48.373Z" }, + { url = "https://files.pythonhosted.org/packages/76/5d/683efcd4798e0030c1bab27374fd13a89f7c2515fb1f3123efdfaa5eab57/protobuf-6.33.6-cp310-abi3-win_amd64.whl", hash = "sha256:0cd27b587afca21b7cfa59a74dcbd48a50f0a6400cfb59391340ad729d91d326", size = 437089, upload-time = "2026-03-18T19:04:50.381Z" }, + { url = "https://files.pythonhosted.org/packages/5c/01/a3c3ed5cd186f39e7880f8303cc51385a198a81469d53d0fdecf1f64d929/protobuf-6.33.6-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:9720e6961b251bde64edfdab7d500725a2af5280f3f4c87e57c0208376aa8c3a", size = 427737, upload-time = "2026-03-18T19:04:51.866Z" }, + { url = "https://files.pythonhosted.org/packages/ee/90/b3c01fdec7d2f627b3a6884243ba328c1217ed2d978def5c12dc50d328a3/protobuf-6.33.6-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:e2afbae9b8e1825e3529f88d514754e094278bb95eadc0e199751cdd9a2e82a2", size = 324610, upload-time = "2026-03-18T19:04:53.096Z" }, + { url = "https://files.pythonhosted.org/packages/9b/ca/25afc144934014700c52e05103c2421997482d561f3101ff352e1292fb81/protobuf-6.33.6-cp39-abi3-manylinux2014_s390x.whl", hash = "sha256:c96c37eec15086b79762ed265d59ab204dabc53056e3443e702d2681f4b39ce3", size = 339381, upload-time = "2026-03-18T19:04:54.616Z" }, + { url = "https://files.pythonhosted.org/packages/16/92/d1e32e3e0d894fe00b15ce28ad4944ab692713f2e7f0a99787405e43533a/protobuf-6.33.6-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:e9db7e292e0ab79dd108d7f1a94fe31601ce1ee3f7b79e0692043423020b0593", size = 323436, upload-time = "2026-03-18T19:04:55.768Z" }, + { url = "https://files.pythonhosted.org/packages/c4/72/02445137af02769918a93807b2b7890047c32bfb9f90371cbc12688819eb/protobuf-6.33.6-py3-none-any.whl", hash = "sha256:77179e006c476e69bf8e8ce866640091ec42e1beb80b213c3900006ecfba6901", size = 170656, upload-time = "2026-03-18T19:04:59.826Z" }, ] [[package]] @@ -4336,6 +4741,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/01/dd/bebff3040138f00ae8a102d426b27349b9a49acc310fcae7f92112d867e3/pydantic_settings-2.14.0-py3-none-any.whl", hash = "sha256:fc8d5d692eb7092e43c8647c1c35a3ecd00e040fcf02ed86f4cb5458ca62182e", size = 60940, upload-time = "2026-04-20T13:37:38.586Z" }, ] +[[package]] +name = "pydub" +version = "0.25.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fe/9a/e6bca0eed82db26562c73b5076539a4a08d3cffd19c3cc5913a3e61145fd/pydub-0.25.1.tar.gz", hash = "sha256:980a33ce9949cab2a569606b65674d748ecbca4f0796887fd6f46173a7b0d30f", size = 38326, upload-time = "2021-03-10T02:09:54.659Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a6/53/d78dc063216e62fc55f6b2eebb447f6a4b0a59f55c8406376f76bf959b08/pydub-0.25.1-py2.py3-none-any.whl", hash = "sha256:65617e33033874b59d87db603aa1ed450633288aefead953b30bded59cb599a6", size = 32327, upload-time = "2021-03-10T02:09:53.503Z" }, +] + [[package]] name = "pyfiglet" version = "1.0.2" @@ -4359,7 +4773,7 @@ name = "pyjwt" version = "2.12.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.11'" }, + { name = "typing-extensions", marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/c2/27/a3b6e5bf6ff856d2509292e95c8f57f0df7017cf5394921fc4e4ef40308a/pyjwt-2.12.1.tar.gz", hash = "sha256:c74a7a2adf861c04d002db713dd85f84beb242228e671280bf709d765b03672b", size = 102564, upload-time = "2026-03-13T19:27:37.25Z" } wheels = [ @@ -4391,37 +4805,40 @@ wheels = [ [[package]] name = "pyqwest" -version = "0.5.1" +version = "0.3.3" source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "opentelemetry-api" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/fc/42/9328a1a057bc191e02514fbd8c9aa899e38ec29420491017f057ac6b5313/pyqwest-0.5.1.tar.gz", hash = "sha256:49535565a55a23830d376c6c0b8ca1276f19bb871e39330e1fbb3df69d9f02df", size = 448653, upload-time = "2026-04-17T03:56:00.279Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d3/48/a869484add1102bbedaf76b1e3840a3f34bb611bf9228ec89a03d51328b1/pyqwest-0.5.1-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:047078c4c3f7c93d8a1df07138c471cfdb234577f60563afe090b160ada1a132", size = 5058870, upload-time = "2026-04-17T03:54:36.807Z" }, - { url = "https://files.pythonhosted.org/packages/4f/b7/f06e56398ea41a183a0494b6557d1e28ed122ac8a20315a7f24ec316b7c1/pyqwest-0.5.1-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f732aed3498abf5742f0c952f24fc21625d62fe0a701ec8e472257709540d61", size = 5438347, upload-time = "2026-04-17T03:54:39.108Z" }, - { url = "https://files.pythonhosted.org/packages/39/df/6160cab1d48ad2d1a2038ac8a1ff94d72f267cb6a3e3a7181c1ae41d98c9/pyqwest-0.5.1-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77565ad8e551a101eddbf08328b0e3d54a179efec97a3da195ce831f0474ef92", size = 5457281, upload-time = "2026-04-17T03:54:41.347Z" }, - { url = "https://files.pythonhosted.org/packages/8d/56/343516d26e153a60c19f2dbbe83460238feddd18a27c42ae11f843e79bf1/pyqwest-0.5.1-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:57ea14094f841d735fcf401a6428e94a39aabf22d15762a979cf8d6096fea90b", size = 5600773, upload-time = "2026-04-17T03:54:43.615Z" }, - { url = "https://files.pythonhosted.org/packages/32/f3/56c2cca5b7fcf708d41628cd3bd8a8be48d364a33a469fc056a5e92d5de3/pyqwest-0.5.1-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:e6716309fc179b0714978d1f8c32a70724b3b9f43acc9aea3cf9507872e631d6", size = 5763641, upload-time = "2026-04-17T03:54:45.804Z" }, - { url = "https://files.pythonhosted.org/packages/92/c1/ac5dceb438ec71f5a3aaf1d9481ddc3535cf875fa8c8ff3622758d49fe50/pyqwest-0.5.1-cp310-abi3-win_amd64.whl", hash = "sha256:736b560fd2256a41264f554243edf3ff872ead4da347392531576203cf97a4ce", size = 4638049, upload-time = "2026-04-17T03:54:48.108Z" }, - { url = "https://files.pythonhosted.org/packages/c3/e6/42b24a7daba2b900287ee0b89ba24e864c1976b4b371ec5aba3ddcd52f0a/pyqwest-0.5.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c30bb16da6580d085d86a46e28c3b59813c634c5dd7363faa980c6e53d42ea21", size = 5055607, upload-time = "2026-04-17T03:54:50.139Z" }, - { url = "https://files.pythonhosted.org/packages/7e/51/05ad3a8eba6a14cede32f0446199505782e43734ac1d0ebeb43286e94a64/pyqwest-0.5.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4540be138c7e4ee498c084fa9f19f34dd135f263653619ec91ccebecefd32a3a", size = 5433373, upload-time = "2026-04-17T03:54:52.275Z" }, - { url = "https://files.pythonhosted.org/packages/ba/02/eeb8bb3cb80dc4449303294e94d7e7b11e2bfd0ca319a93bfa4ce604d50c/pyqwest-0.5.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ae56109419b37be3e90a134f827fda26c6338c432bae5ecef561e93c3df275ab", size = 5457842, upload-time = "2026-04-17T03:54:54.865Z" }, - { url = "https://files.pythonhosted.org/packages/22/32/929fe6f208e18d1ab2b4b188be9830a8d6f22641aa77434d08674f6b0a4f/pyqwest-0.5.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:82fbf17cf45b5a67c95734934f584045f114a1aa59d179fb79d80e56e653e052", size = 5600494, upload-time = "2026-04-17T03:54:57.485Z" }, - { url = "https://files.pythonhosted.org/packages/9a/b6/3da1250e26e435262a7113e8d8bc5628eba0518d06b5b07e74bbad3efd7f/pyqwest-0.5.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:717b65ee898bc3e4f29a74e068bb753bbb8a338456f1fb65edc2edabc3f3eaa5", size = 5762097, upload-time = "2026-04-17T03:54:59.536Z" }, - { url = "https://files.pythonhosted.org/packages/eb/b1/159538a856bee250431d4976a3b7731c1402932af94c00d832bf71a7ceda/pyqwest-0.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:26498ead2aaf491430980c06cbee3bd72a26a8f33a025e9a19ddcd7e781a069a", size = 4632471, upload-time = "2026-04-17T03:55:02.369Z" }, - { url = "https://files.pythonhosted.org/packages/28/f6/5f5eaf7357ade38a5e8697935a156ac47ff3bef0bb9cc32ef2f5b4cf0190/pyqwest-0.5.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:81bf0feb6d07fc25ffbe96be09ef64ff732d444e26c80db1b1de999d74a7a7ed", size = 5054876, upload-time = "2026-04-17T03:55:04.992Z" }, - { url = "https://files.pythonhosted.org/packages/fb/e8/3b17156c13745a17ce39c7bfe9ed9fdec8ba19a5a47ddd762c89f00fb642/pyqwest-0.5.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a7d2c4a162e72b57dfd175f53882558702c3ddd5704f536599f463e46081bd9", size = 5433361, upload-time = "2026-04-17T03:55:07.182Z" }, - { url = "https://files.pythonhosted.org/packages/35/fc/a50cde1003e1c39c2087b1ae50cd074a41ab89f5a1ad2998cc3f481cf10c/pyqwest-0.5.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b33bcba509e07ee79e61f9311880a22e8c11a562382648166f624f0b480266c1", size = 5457142, upload-time = "2026-04-17T03:55:09.358Z" }, - { url = "https://files.pythonhosted.org/packages/a9/65/f8c50d2ebe3e6d41f04a9924baef9f106feec2eb87acdd60fcb1c3ce2e84/pyqwest-0.5.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:3a7a7b56e0f9099b50dae2e5907f05fc5e61252cd48cc1561f156765b233dd7b", size = 5601185, upload-time = "2026-04-17T03:55:11.626Z" }, - { url = "https://files.pythonhosted.org/packages/7e/1f/13f250e31d5503d326da8d9874891bedbfd7eeaa86711ab15acce157e59f/pyqwest-0.5.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a3cd54af62f01fd0daf694218074325ccbbea6527f27738a85fc1e38880119f0", size = 5761677, upload-time = "2026-04-17T03:55:13.915Z" }, - { url = "https://files.pythonhosted.org/packages/b3/ab/11a1bf107af0f21fa8f1d9516e59c8b742c19c32f82cae49f6340ffa7336/pyqwest-0.5.1-cp313-cp313-win_amd64.whl", hash = "sha256:938ac244f96730a8c1addb260cbec93e44ef63d73358285bafb00f18526c106b", size = 4631790, upload-time = "2026-04-17T03:55:16.002Z" }, - { url = "https://files.pythonhosted.org/packages/68/05/6340fcb599734691832b60829236c53f813220f0a642af90e05c5df5d6a9/pyqwest-0.5.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:45f9d448fedd6d85c4c9a5cb8e743b83fdd798c335936588f5d90628c6163f73", size = 5059333, upload-time = "2026-04-17T03:55:46.264Z" }, - { url = "https://files.pythonhosted.org/packages/52/65/9d98c421a5ec3283bb77337f51920504c4d770bbd09b36a6164eaa21c4e8/pyqwest-0.5.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:144c22aa18de37f5b6f877939c50fb2aafb94a6433ac47ddab3850793402e46b", size = 5438850, upload-time = "2026-04-17T03:55:48.542Z" }, - { url = "https://files.pythonhosted.org/packages/1f/6e/b5862ef8e6a61c1baf48740e9f9ea256ef63d26c10df8c340a3fbefc35e2/pyqwest-0.5.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:850c58da7df5c0a4c6351ffe66f99768cb5d92e882b55daf9984a96a5c46e11e", size = 5455438, upload-time = "2026-04-17T03:55:51.106Z" }, - { url = "https://files.pythonhosted.org/packages/8b/b5/765fadb80caea4d62e6895a5404264a78746662044a2c64e73a8dceee588/pyqwest-0.5.1-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:03e57c1d58d7f236a08dfe5b27493ee56ae2d5e4b40e5c4bd9e6309000b4b86f", size = 5605021, upload-time = "2026-04-17T03:55:53.508Z" }, - { url = "https://files.pythonhosted.org/packages/98/eb/9d52f55f724edd40a2c2317074e283d8bb4bab564f7354a36e4f4f89e80e/pyqwest-0.5.1-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:958540d4861b9e7a228a775ea8b937c019bdc769d52b6e2e872b5bdc1ebf6870", size = 5761909, upload-time = "2026-04-17T03:55:56.063Z" }, - { url = "https://files.pythonhosted.org/packages/92/8c/7287ef3e34c5045fe1583385cf03a5a5681b7fae62676391444cfa44a4d1/pyqwest-0.5.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:469dfd7baaac6fd6565d396c8d05f19cb5ba279856af38b5f69ff8278e991c49", size = 4630018, upload-time = "2026-04-17T03:55:58.249Z" }, +sdist = { url = "https://files.pythonhosted.org/packages/37/d5/37c968f2ad6a1e6d443215063a9fcc109999f796aaf697806ea1ccf562af/pyqwest-0.3.3.tar.gz", hash = "sha256:473a9ede8a96d1d993a1628eb62edd056a3c57b74e65cfd30d72eb451be1a23d", size = 423176, upload-time = "2026-01-21T12:54:44.453Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5b/7b/7c52652189652ea1ada963746b330eb3301d758a49f2adf27ccfc8c6c707/pyqwest-0.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e089afa5f0bda88b661216f7b97c0bdf00dd9eb6591a8233127365282b31c2d8", size = 4913603, upload-time = "2026-01-21T12:53:22.556Z" }, + { url = "https://files.pythonhosted.org/packages/83/80/630b061176ae9d70826215f51416098a7cacac823199c15edbb95206fd23/pyqwest-0.3.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14d0b67e737c47207d30ec14531bdffb15c73292494db910ccc7c20f110b8e6d", size = 5279003, upload-time = "2026-01-21T12:53:24.333Z" }, + { url = "https://files.pythonhosted.org/packages/d6/88/916503f0cfb1e8d3828ed2275139bf5e252be81ed17aa6825b90f4bf696f/pyqwest-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f6f201b2319704ad51f9485533060a7d96b10ecf5601540db7f986c6c561181", size = 5312934, upload-time = "2026-01-21T12:53:27.189Z" }, + { url = "https://files.pythonhosted.org/packages/ee/ad/ba9cab9a206923454416b11aa0e7160cbec1329fc5cdc80cb5d3bc677010/pyqwest-0.3.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:cf9a0c0f15ffefe65b42a11b4caebe9e50a96bb971861b87469813406576262e", size = 5469592, upload-time = "2026-01-21T12:53:29.289Z" }, + { url = "https://files.pythonhosted.org/packages/51/33/c2ecaab4f4f03f1a6f3f778ee16ac0fdafb7fe13af61e6ae7d42a81ac2ac/pyqwest-0.3.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a8675df30f5bd32745b17b568cb5efb1e5f58acee530181ed5697fff11d3b1b9", size = 5617339, upload-time = "2026-01-21T12:53:31.053Z" }, + { url = "https://files.pythonhosted.org/packages/41/b7/416cba68cd718d02dbc747ebef194417c287c318d2ecf0dffd58d07ff7d9/pyqwest-0.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:c1230c010ae130ff700ebce2d7017147aafc181af342d5e6af625fb7b42b6c3c", size = 4525965, upload-time = "2026-01-21T12:53:32.741Z" }, + { url = "https://files.pythonhosted.org/packages/87/82/4d7cd62af778d93e5f14d00b7dc258b2028e002de1fd874b7a538d604867/pyqwest-0.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a676d634bb6ce34c34398a8ff1a3646b882a08890f7e54ccdaaa8eb97b6cf587", size = 4916893, upload-time = "2026-01-21T12:53:34.28Z" }, + { url = "https://files.pythonhosted.org/packages/ea/3f/421be99b0da2922841923de02f98a2bd1ffcfce24ff509540b0d66f95d22/pyqwest-0.3.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8dd6096c8e7fd2399f231d442acc79bfe41ba272de65b7867219e3297a5627f", size = 5276138, upload-time = "2026-01-21T12:53:35.901Z" }, + { url = "https://files.pythonhosted.org/packages/5a/0a/d510daa73eebf57808f7a497bd0e25161b7ee60ecea5de18d818ef705aef/pyqwest-0.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1277a75e20c15764cfb3a2fc7899b360b60d7de00057b86dadb5d0ffa08fa17c", size = 5311182, upload-time = "2026-01-21T12:53:38.075Z" }, + { url = "https://files.pythonhosted.org/packages/50/6a/5e02319dcd3c0c75c9681f6f8feff994b42506d497073b6f13e3cf572c87/pyqwest-0.3.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cd233c47044192acb3746b1bb9025c339b09d211d051827a36420bbdbc596246", size = 5467897, upload-time = "2026-01-21T12:53:39.819Z" }, + { url = "https://files.pythonhosted.org/packages/5e/bb/5667d154bc81fb3d53e00b7f7784bb68e6a48385bbd0f7a86949148b7c8b/pyqwest-0.3.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33a23affdf6c836433ba22ee7d76f836df8bc25cbd52e3307dbc7e9554d67e07", size = 5617148, upload-time = "2026-01-21T12:53:41.594Z" }, + { url = "https://files.pythonhosted.org/packages/f9/76/6ac342701177df7585fa42151bdb58cbb20655a8b46c5cec4a4055640707/pyqwest-0.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:3db982b3eb0ce2f1ecd70185da3afc2506f682920e3de8b17ffe5eccca560b8d", size = 4524858, upload-time = "2026-01-21T12:53:43.578Z" }, + { url = "https://files.pythonhosted.org/packages/7c/22/1a124da1eb328bcd38b207afce037b8322c03848d53ad372a44f90bcb7a9/pyqwest-0.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4666516358c2349a3c1f1f00816b27df6cc127b0d75cdd5d07bf4498c407ba4f", size = 4910119, upload-time = "2026-01-21T12:53:45.153Z" }, + { url = "https://files.pythonhosted.org/packages/90/05/35c546933d6765c9f38841ee703a299cd08d115e8f9df5a882669775b07d/pyqwest-0.3.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b802af427c9e4ed33c5a6ed47612887dce1128ba3cf685c7a26a383d9412c811", size = 5267540, upload-time = "2026-01-21T12:53:46.707Z" }, + { url = "https://files.pythonhosted.org/packages/f9/a6/97990418194cd24640486a14f6cb390d9f7005bc1aa06fe2324ef0aa8050/pyqwest-0.3.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81118f530429b10eb295ae5b9b1353593e17ad15e0166203d2dfca51b763db85", size = 5309338, upload-time = "2026-01-21T12:53:48.903Z" }, + { url = "https://files.pythonhosted.org/packages/70/12/1e503ba23b8cfe008178610ee649e4ec6536a4d98bce9786287ae82ebb33/pyqwest-0.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:420e26aed443f6beb8a10fe5eb06913084be48e549b537b0a8887088358ed354", size = 5455416, upload-time = "2026-01-21T12:53:50.466Z" }, + { url = "https://files.pythonhosted.org/packages/70/9f/d39e0ba8fa4396f9a68ad60e2544f76d79688e93f5bb6be777f6070435e8/pyqwest-0.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f596adcd385d2125c8c715e6c107068139d36b5ebcb14709e1ed2f98423e624e", size = 5613290, upload-time = "2026-01-21T12:53:52.428Z" }, + { url = "https://files.pythonhosted.org/packages/c4/ea/9fe5998f570978925102f0302107562c285e658a03587f7b7c04e51456b1/pyqwest-0.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:66f1d9b0338592949bb53e76361f15b869a9aeca9503e00d6c46d648d609c0be", size = 4524367, upload-time = "2026-01-21T12:53:54.112Z" }, + { url = "https://files.pythonhosted.org/packages/b1/b4/5946422d7f17c7079c405e61f940cb81f86c95c6bcb7d3d446f17c471ddd/pyqwest-0.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:88558ca7a276cf9b1cf2ed325e004a24838432df74e0a614a24f25b592b2e4d5", size = 4909511, upload-time = "2026-01-21T12:53:55.581Z" }, + { url = "https://files.pythonhosted.org/packages/48/79/c3f6e69db0c73dabe57915292a08d9d8ecf6ea30c29783badfd9d5cea95e/pyqwest-0.3.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:040c02caf99740b5e6c2f2533e42639cb098b283e9ad5589d4f7e84358356f65", size = 5266021, upload-time = "2026-01-21T12:53:57.574Z" }, + { url = "https://files.pythonhosted.org/packages/fa/ee/59c281578ef27664143c50061093984186fa8967947b26de7c4915644731/pyqwest-0.3.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3062c3c4be935c3eac084019a57e72a1b1fa0eab22e77698a18cf7bd8ffbc2a", size = 5308932, upload-time = "2026-01-21T12:53:59.413Z" }, + { url = "https://files.pythonhosted.org/packages/46/e9/b1b151d3ec53c6cfbace6dfe6c1b4ae2d04fd11a7747d091c5b56c1b3646/pyqwest-0.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4b9ebabcc349ba64d8b1e0f235c6c448fbf608a473ba1c07009a610fa6fee769", size = 5455010, upload-time = "2026-01-21T12:54:01.446Z" }, + { url = "https://files.pythonhosted.org/packages/a1/43/dd43b37835c32acebe4456662e14e1a32ae034f496d26e2f98f664f8b89c/pyqwest-0.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:597e7aa1327105bf20f1513b9176dab2e86ac1ef82310195bb57bdd023f60d1c", size = 5612503, upload-time = "2026-01-21T12:54:03.243Z" }, + { url = "https://files.pythonhosted.org/packages/2a/d3/005e90088918e8c5a78549b8092c0c51689f230f12c24824f23a62bda89e/pyqwest-0.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:0a7996c2a28af6c44e50b85125af4843177190aee1f4c5687dc94fa66240e4e8", size = 4524286, upload-time = "2026-01-21T12:54:05.181Z" }, + { url = "https://files.pythonhosted.org/packages/91/06/2985f8318f24024bfa5852a1081c3cee5fb79a57533894763253ce49d504/pyqwest-0.3.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:6fb02f04bfd8c6f1faa5bb6530162a7f4b2c99a504efb22e60b5af597698889d", size = 4916617, upload-time = "2026-01-21T12:54:28.541Z" }, + { url = "https://files.pythonhosted.org/packages/e1/21/f810f4126e17d4b737b5117d4d8d12f0cee4c4060c5d74a7b83f1101e833/pyqwest-0.3.3-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd9547ab8f22fe914cb56ff236fa8c85bbe7d1790d1fd5749cf65a3d00215f87", size = 5278030, upload-time = "2026-01-21T12:54:30.186Z" }, + { url = "https://files.pythonhosted.org/packages/91/74/23929a47f4caed9a38717e50c5f2827bc812c0fa7898dc160898bb485c14/pyqwest-0.3.3-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:410f1d1d33a8095ad835750d1c9bd0e9ec56ea5822309f5bed2bdde2d01d39a0", size = 5311924, upload-time = "2026-01-21T12:54:31.921Z" }, + { url = "https://files.pythonhosted.org/packages/33/af/a52b96ebeac32b3d98b849e73bffa922224abb0a6f8532ec99dea0cb8a9b/pyqwest-0.3.3-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:427cbe9a326f303f97030aeff9812ee5cd2817b90a7e23e8b483a40fec34d815", size = 5470042, upload-time = "2026-01-21T12:54:34.078Z" }, + { url = "https://files.pythonhosted.org/packages/ab/95/abeb0cc3e7f1654b639fa3bae13ed23783e3b8fde1ad0e3c1851bb04d572/pyqwest-0.3.3-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:8e9f580c009ae724bd114313df40ed6f53d344d5f47aaa175023b84489d200a7", size = 5616312, upload-time = "2026-01-21T12:54:37.524Z" }, + { url = "https://files.pythonhosted.org/packages/56/d8/66f9cf5dad4f2b1cd75f58e59f2f06892703f693bcb95777284e0873f186/pyqwest-0.3.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:044785a3fa1de8efe5acf6f1adeb7343dcc52c14213fcd88774d0601d90d62cb", size = 4522824, upload-time = "2026-01-21T12:54:41.309Z" }, ] [[package]] @@ -4429,13 +4846,14 @@ name = "pytest" version = "9.0.3" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, - { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, + { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "exceptiongroup", marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "iniconfig" }, { name = "packaging" }, { name = "pluggy" }, { name = "pygments" }, - { name = "tomli", marker = "python_full_version < '3.11'" }, + { name = "tomli", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'group-9-verifiers-policy') or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "tomli", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-9-verifiers-openenv') or (python_full_version < '3.11' and extra != 'group-9-verifiers-policy') or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/7d/0d/549bd94f1a0a402dc8cf64563a117c0f3765662e2e668477624baeec44d5/pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c", size = 1572165, upload-time = "2026-04-07T17:16:18.027Z" } wheels = [ @@ -4447,9 +4865,9 @@ name = "pytest-asyncio" version = "1.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "backports-asyncio-runner", marker = "python_full_version < '3.11'" }, + { name = "backports-asyncio-runner", marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "pytest" }, - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, + { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/90/2c/8af215c0f776415f3590cac4f9086ccefd6fd463befeae41cd4d3f193e5a/pytest_asyncio-1.3.0.tar.gz", hash = "sha256:d7f52f36d231b80ee124cd216ffb19369aa168fc10095013c6b014a34d3ee9e5", size = 50087, upload-time = "2025-11-10T16:07:47.256Z" } wheels = [ @@ -4623,7 +5041,7 @@ name = "pyzmq" version = "27.1.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "cffi", marker = "implementation_name == 'pypy'" }, + { name = "cffi", marker = "implementation_name == 'pypy' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/04/0b/3c9baedbdf613ecaa7aa07027780b8867f57b6293b6ee50de316c9f3222b/pyzmq-27.1.0.tar.gz", hash = "sha256:ac0765e3d44455adb6ddbf4417dcce460fc40a05978c08efdf2948072f6db540", size = 281750, upload-time = "2025-09-08T23:10:18.157Z" } wheels = [ @@ -4715,7 +5133,7 @@ wheels = [ [package.optional-dependencies] cgraph = [ - { name = "cupy-cuda12x", marker = "sys_platform != 'darwin'" }, + { name = "cupy-cuda12x", marker = "sys_platform != 'darwin' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] [[package]] @@ -4747,7 +5165,7 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "attrs" }, { name = "rpds-py" }, - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, + { name = "typing-extensions", marker = "python_full_version < '3.13' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" } wheels = [ @@ -4901,111 +5319,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/13/2f/b4530fbf948867702d0a3f27de4a6aab1d156f406d72852ab902c4d04de9/rich_rst-1.3.2-py3-none-any.whl", hash = "sha256:a99b4907cbe118cf9d18b0b44de272efa61f15117c61e39ebdc431baf5df722a", size = 12567, upload-time = "2025-10-14T16:49:42.953Z" }, ] -[[package]] -name = "rich-toolkit" -version = "0.19.7" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "click" }, - { name = "rich" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/42/ba/dae9e3096651042754da419a4042bc1c75e07d615f9b15066d738838e4df/rich_toolkit-0.19.7.tar.gz", hash = "sha256:133c0915872da91d4c25d85342d5ec1dfacc69b63448af1a08a0d4b4f23ef46e", size = 195877, upload-time = "2026-02-24T16:06:20.555Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/fb/3c/c923619f6d2f5fafcc96fec0aaf9550a46cd5b6481f06e0c6b66a2a4fed0/rich_toolkit-0.19.7-py3-none-any.whl", hash = "sha256:0288e9203728c47c5a4eb60fd2f0692d9df7455a65901ab6f898437a2ba5989d", size = 32963, upload-time = "2026-02-24T16:06:22.066Z" }, -] - -[[package]] -name = "rignore" -version = "0.7.6" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e5/f5/8bed2310abe4ae04b67a38374a4d311dd85220f5d8da56f47ae9361be0b0/rignore-0.7.6.tar.gz", hash = "sha256:00d3546cd793c30cb17921ce674d2c8f3a4b00501cb0e3dd0e82217dbeba2671", size = 57140, upload-time = "2025-11-05T21:41:21.968Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/86/7a/b970cd0138b0ece72eb28f086e933f9ed75b795716ad3de5ab22994b3b54/rignore-0.7.6-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:f3c74a7e5ee77aea669c95fdb3933f2a6c7549893700082e759128a29cf67e45", size = 884999, upload-time = "2025-11-05T20:42:38.373Z" }, - { url = "https://files.pythonhosted.org/packages/ca/05/23faca29616d8966ada63fb0e13c214107811fa9a0aba2275e4c7ca63bd5/rignore-0.7.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b7202404958f5fe3474bac91f65350f0b1dde1a5e05089f2946549b7e91e79ec", size = 824824, upload-time = "2025-11-05T20:42:22.1Z" }, - { url = "https://files.pythonhosted.org/packages/fa/2e/05a1e61f04cf2548524224f0b5f21ca19ea58f7273a863bac10846b8ff69/rignore-0.7.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6bde7c5835fa3905bfb7e329a4f1d7eccb676de63da7a3f934ddd5c06df20597", size = 899121, upload-time = "2025-11-05T20:40:48.94Z" }, - { url = "https://files.pythonhosted.org/packages/ff/35/71518847e10bdbf359badad8800e4681757a01f4777b3c5e03dbde8a42d8/rignore-0.7.6-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:626c3d4ba03af266694d25101bc1d8d16eda49c5feb86cedfec31c614fceca7d", size = 873813, upload-time = "2025-11-05T20:41:04.71Z" }, - { url = "https://files.pythonhosted.org/packages/f6/c8/32ae405d3e7fd4d9f9b7838f2fcca0a5005bb87fa514b83f83fd81c0df22/rignore-0.7.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0a43841e651e7a05a4274b9026cc408d1912e64016ede8cd4c145dae5d0635be", size = 1168019, upload-time = "2025-11-05T20:41:20.723Z" }, - { url = "https://files.pythonhosted.org/packages/25/98/013c955982bc5b4719bf9a5bea58be317eea28aa12bfd004025e3cd7c000/rignore-0.7.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7978c498dbf7f74d30cdb8859fe612167d8247f0acd377ae85180e34490725da", size = 942822, upload-time = "2025-11-05T20:41:36.99Z" }, - { url = "https://files.pythonhosted.org/packages/90/fb/9a3f3156c6ed30bcd597e63690353edac1fcffe9d382ad517722b56ac195/rignore-0.7.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d22f72ab695c07d2d96d2a645208daff17084441b5d58c07378c9dd6f9c4c87", size = 959820, upload-time = "2025-11-05T20:42:06.364Z" }, - { url = "https://files.pythonhosted.org/packages/5e/b2/93bf609633021e9658acaff24cfb055d8cdaf7f5855d10ebb35307900dda/rignore-0.7.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d5bd8e1a91ed1a789b2cbe39eeea9204a6719d4f2cf443a9544b521a285a295f", size = 985050, upload-time = "2025-11-05T20:41:51.124Z" }, - { url = "https://files.pythonhosted.org/packages/69/bc/ec2d040469bdfd7b743df10f2201c5d285009a4263d506edbf7a06a090bb/rignore-0.7.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:bc1fc03efad5789365018e94ac4079f851a999bc154d1551c45179f7fcf45322", size = 1079164, upload-time = "2025-11-05T21:40:10.368Z" }, - { url = "https://files.pythonhosted.org/packages/df/26/4b635f4ea5baf4baa8ba8eee06163f6af6e76dfbe72deb57da34bb24b19d/rignore-0.7.6-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:ce2617fe28c51367fd8abfd4eeea9e61664af63c17d4ea00353d8ef56dfb95fa", size = 1139028, upload-time = "2025-11-05T21:40:27.977Z" }, - { url = "https://files.pythonhosted.org/packages/6a/54/a3147ebd1e477b06eb24e2c2c56d951ae5faa9045b7b36d7892fec5080d9/rignore-0.7.6-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:7c4ad2cee85068408e7819a38243043214e2c3047e9bd4c506f8de01c302709e", size = 1119024, upload-time = "2025-11-05T21:40:45.148Z" }, - { url = "https://files.pythonhosted.org/packages/fb/f4/27475db769a57cff18fe7e7267b36e6cdb5b1281caa185ba544171106cba/rignore-0.7.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:02cd240bfd59ecc3907766f4839cbba20530a2e470abca09eaa82225e4d946fb", size = 1128531, upload-time = "2025-11-05T21:41:02.734Z" }, - { url = "https://files.pythonhosted.org/packages/97/32/6e782d3b352e4349fa0e90bf75b13cb7f11d8908b36d9e2b262224b65d9a/rignore-0.7.6-cp310-cp310-win32.whl", hash = "sha256:fe2bd8fa1ff555259df54c376abc73855cb02628a474a40d51b358c3a1ddc55b", size = 646817, upload-time = "2025-11-05T21:41:47.51Z" }, - { url = "https://files.pythonhosted.org/packages/c0/8a/53185c69abb3bb362e8a46b8089999f820bf15655629ff8395107633c8ab/rignore-0.7.6-cp310-cp310-win_amd64.whl", hash = "sha256:d80afd6071c78baf3765ec698841071b19e41c326f994cfa69b5a1df676f5d39", size = 727001, upload-time = "2025-11-05T21:41:32.778Z" }, - { url = "https://files.pythonhosted.org/packages/25/41/b6e2be3069ef3b7f24e35d2911bd6deb83d20ed5642ad81d5a6d1c015473/rignore-0.7.6-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:40be8226e12d6653abbebaffaea2885f80374c1c8f76fe5ca9e0cadd120a272c", size = 885285, upload-time = "2025-11-05T20:42:39.763Z" }, - { url = "https://files.pythonhosted.org/packages/52/66/ba7f561b6062402022887706a7f2b2c2e2e2a28f1e3839202b0a2f77e36d/rignore-0.7.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:182f4e5e4064d947c756819446a7d4cdede8e756b8c81cf9e509683fe38778d7", size = 823882, upload-time = "2025-11-05T20:42:23.488Z" }, - { url = "https://files.pythonhosted.org/packages/f5/81/4087453df35a90b07370647b19017029324950c1b9137d54bf1f33843f17/rignore-0.7.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16b63047648a916a87be1e51bb5c009063f1b8b6f5afe4f04f875525507e63dc", size = 899362, upload-time = "2025-11-05T20:40:51.111Z" }, - { url = "https://files.pythonhosted.org/packages/fb/c9/390a8fdfabb76d71416be773bd9f162977bd483084f68daf19da1dec88a6/rignore-0.7.6-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ba5524f5178deca4d7695e936604ebc742acb8958f9395776e1fcb8133f8257a", size = 873633, upload-time = "2025-11-05T20:41:06.193Z" }, - { url = "https://files.pythonhosted.org/packages/df/c9/79404fcb0faa76edfbc9df0901f8ef18568d1104919ebbbad6d608c888d1/rignore-0.7.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:62020dbb89a1dd4b84ab3d60547b3b2eb2723641d5fb198463643f71eaaed57d", size = 1167633, upload-time = "2025-11-05T20:41:22.491Z" }, - { url = "https://files.pythonhosted.org/packages/6e/8d/b3466d32d445d158a0aceb80919085baaae495b1f540fb942f91d93b5e5b/rignore-0.7.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b34acd532769d5a6f153a52a98dcb81615c949ab11697ce26b2eb776af2e174d", size = 941434, upload-time = "2025-11-05T20:41:38.151Z" }, - { url = "https://files.pythonhosted.org/packages/e8/40/9cd949761a7af5bc27022a939c91ff622d29c7a0b66d0c13a863097dde2d/rignore-0.7.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c5e53b752f9de44dff7b3be3c98455ce3bf88e69d6dc0cf4f213346c5e3416c", size = 959461, upload-time = "2025-11-05T20:42:08.476Z" }, - { url = "https://files.pythonhosted.org/packages/b5/87/1e1a145731f73bdb7835e11f80da06f79a00d68b370d9a847de979575e6d/rignore-0.7.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:25b3536d13a5d6409ce85f23936f044576eeebf7b6db1d078051b288410fc049", size = 985323, upload-time = "2025-11-05T20:41:52.735Z" }, - { url = "https://files.pythonhosted.org/packages/6c/31/1ecff992fc3f59c4fcdcb6c07d5f6c1e6dfb55ccda19c083aca9d86fa1c6/rignore-0.7.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6e01cad2b0b92f6b1993f29fc01f23f2d78caf4bf93b11096d28e9d578eb08ce", size = 1079173, upload-time = "2025-11-05T21:40:12.007Z" }, - { url = "https://files.pythonhosted.org/packages/17/18/162eedadb4c2282fa4c521700dbf93c9b14b8842e8354f7d72b445b8d593/rignore-0.7.6-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:5991e46ab9b4868334c9e372ab0892b0150f3f586ff2b1e314272caeb38aaedb", size = 1139012, upload-time = "2025-11-05T21:40:29.399Z" }, - { url = "https://files.pythonhosted.org/packages/78/96/a9ca398a8af74bb143ad66c2a31303c894111977e28b0d0eab03867f1b43/rignore-0.7.6-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:6c8ae562e5d1246cba5eaeb92a47b2a279e7637102828dde41dcbe291f529a3e", size = 1118827, upload-time = "2025-11-05T21:40:46.6Z" }, - { url = "https://files.pythonhosted.org/packages/9f/22/1c1a65047df864def9a047dbb40bc0b580b8289a4280e62779cd61ae21f2/rignore-0.7.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:aaf938530dcc0b47c4cfa52807aa2e5bfd5ca6d57a621125fe293098692f6345", size = 1128182, upload-time = "2025-11-05T21:41:04.239Z" }, - { url = "https://files.pythonhosted.org/packages/bd/f4/1526eb01fdc2235aca1fd9d0189bee4021d009a8dcb0161540238c24166e/rignore-0.7.6-cp311-cp311-win32.whl", hash = "sha256:166ebce373105dd485ec213a6a2695986346e60c94ff3d84eb532a237b24a4d5", size = 646547, upload-time = "2025-11-05T21:41:49.439Z" }, - { url = "https://files.pythonhosted.org/packages/7c/c8/dda0983e1845706beb5826459781549a840fe5a7eb934abc523e8cd17814/rignore-0.7.6-cp311-cp311-win_amd64.whl", hash = "sha256:44f35ee844b1a8cea50d056e6a595190ce9d42d3cccf9f19d280ae5f3058973a", size = 727139, upload-time = "2025-11-05T21:41:34.367Z" }, - { url = "https://files.pythonhosted.org/packages/e3/47/eb1206b7bf65970d41190b879e1723fc6bbdb2d45e53565f28991a8d9d96/rignore-0.7.6-cp311-cp311-win_arm64.whl", hash = "sha256:14b58f3da4fa3d5c3fa865cab49821675371f5e979281c683e131ae29159a581", size = 657598, upload-time = "2025-11-05T21:41:23.758Z" }, - { url = "https://files.pythonhosted.org/packages/0b/0e/012556ef3047a2628842b44e753bb15f4dc46806780ff090f1e8fe4bf1eb/rignore-0.7.6-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:03e82348cb7234f8d9b2834f854400ddbbd04c0f8f35495119e66adbd37827a8", size = 883488, upload-time = "2025-11-05T20:42:41.359Z" }, - { url = "https://files.pythonhosted.org/packages/93/b0/d4f1f3fe9eb3f8e382d45ce5b0547ea01c4b7e0b4b4eb87bcd66a1d2b888/rignore-0.7.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b9e624f6be6116ea682e76c5feb71ea91255c67c86cb75befe774365b2931961", size = 820411, upload-time = "2025-11-05T20:42:24.782Z" }, - { url = "https://files.pythonhosted.org/packages/4a/c8/dea564b36dedac8de21c18e1851789545bc52a0c22ece9843444d5608a6a/rignore-0.7.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bda49950d405aa8d0ebe26af807c4e662dd281d926530f03f29690a2e07d649a", size = 897821, upload-time = "2025-11-05T20:40:52.613Z" }, - { url = "https://files.pythonhosted.org/packages/b3/2b/ee96db17ac1835e024c5d0742eefb7e46de60020385ac883dd3d1cde2c1f/rignore-0.7.6-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b5fd5ab3840b8c16851d327ed06e9b8be6459702a53e5ab1fc4073b684b3789e", size = 873963, upload-time = "2025-11-05T20:41:07.49Z" }, - { url = "https://files.pythonhosted.org/packages/a5/8c/ad5a57bbb9d14d5c7e5960f712a8a0b902472ea3f4a2138cbf70d1777b75/rignore-0.7.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ced2a248352636a5c77504cb755dc02c2eef9a820a44d3f33061ce1bb8a7f2d2", size = 1169216, upload-time = "2025-11-05T20:41:23.73Z" }, - { url = "https://files.pythonhosted.org/packages/80/e6/5b00bc2a6bc1701e6878fca798cf5d9125eb3113193e33078b6fc0d99123/rignore-0.7.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a04a3b73b75ddc12c9c9b21efcdaab33ca3832941d6f1d67bffd860941cd448a", size = 942942, upload-time = "2025-11-05T20:41:39.393Z" }, - { url = "https://files.pythonhosted.org/packages/85/e5/7f99bd0cc9818a91d0e8b9acc65b792e35750e3bdccd15a7ee75e64efca4/rignore-0.7.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d24321efac92140b7ec910ac7c53ab0f0c86a41133d2bb4b0e6a7c94967f44dd", size = 959787, upload-time = "2025-11-05T20:42:09.765Z" }, - { url = "https://files.pythonhosted.org/packages/55/54/2ffea79a7c1eabcede1926347ebc2a81bc6b81f447d05b52af9af14948b9/rignore-0.7.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:73c7aa109d41e593785c55fdaa89ad80b10330affa9f9d3e3a51fa695f739b20", size = 984245, upload-time = "2025-11-05T20:41:54.062Z" }, - { url = "https://files.pythonhosted.org/packages/41/f7/e80f55dfe0f35787fa482aa18689b9c8251e045076c35477deb0007b3277/rignore-0.7.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1734dc49d1e9501b07852ef44421f84d9f378da9fbeda729e77db71f49cac28b", size = 1078647, upload-time = "2025-11-05T21:40:13.463Z" }, - { url = "https://files.pythonhosted.org/packages/d4/cf/2c64f0b6725149f7c6e7e5a909d14354889b4beaadddaa5fff023ec71084/rignore-0.7.6-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5719ea14ea2b652c0c0894be5dfde954e1853a80dea27dd2fbaa749618d837f5", size = 1139186, upload-time = "2025-11-05T21:40:31.27Z" }, - { url = "https://files.pythonhosted.org/packages/75/95/a86c84909ccc24af0d094b50d54697951e576c252a4d9f21b47b52af9598/rignore-0.7.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:8e23424fc7ce35726854f639cb7968151a792c0c3d9d082f7f67e0c362cfecca", size = 1117604, upload-time = "2025-11-05T21:40:48.07Z" }, - { url = "https://files.pythonhosted.org/packages/7f/5e/13b249613fd5d18d58662490ab910a9f0be758981d1797789913adb4e918/rignore-0.7.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3efdcf1dd84d45f3e2bd2f93303d9be103888f56dfa7c3349b5bf4f0657ec696", size = 1127725, upload-time = "2025-11-05T21:41:05.804Z" }, - { url = "https://files.pythonhosted.org/packages/c7/28/fa5dcd1e2e16982c359128664e3785f202d3eca9b22dd0b2f91c4b3d242f/rignore-0.7.6-cp312-cp312-win32.whl", hash = "sha256:ccca9d1a8b5234c76b71546fc3c134533b013f40495f394a65614a81f7387046", size = 646145, upload-time = "2025-11-05T21:41:51.096Z" }, - { url = "https://files.pythonhosted.org/packages/26/87/69387fb5dd81a0f771936381431780b8cf66fcd2cfe9495e1aaf41548931/rignore-0.7.6-cp312-cp312-win_amd64.whl", hash = "sha256:c96a285e4a8bfec0652e0bfcf42b1aabcdda1e7625f5006d188e3b1c87fdb543", size = 726090, upload-time = "2025-11-05T21:41:36.485Z" }, - { url = "https://files.pythonhosted.org/packages/24/5f/e8418108dcda8087fb198a6f81caadbcda9fd115d61154bf0df4d6d3619b/rignore-0.7.6-cp312-cp312-win_arm64.whl", hash = "sha256:a64a750e7a8277a323f01ca50b7784a764845f6cce2fe38831cb93f0508d0051", size = 656317, upload-time = "2025-11-05T21:41:25.305Z" }, - { url = "https://files.pythonhosted.org/packages/b7/8a/a4078f6e14932ac7edb171149c481de29969d96ddee3ece5dc4c26f9e0c3/rignore-0.7.6-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:2bdab1d31ec9b4fb1331980ee49ea051c0d7f7bb6baa28b3125ef03cdc48fdaf", size = 883057, upload-time = "2025-11-05T20:42:42.741Z" }, - { url = "https://files.pythonhosted.org/packages/f9/8f/f8daacd177db4bf7c2223bab41e630c52711f8af9ed279be2058d2fe4982/rignore-0.7.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:90f0a00ce0c866c275bf888271f1dc0d2140f29b82fcf33cdbda1e1a6af01010", size = 820150, upload-time = "2025-11-05T20:42:26.545Z" }, - { url = "https://files.pythonhosted.org/packages/36/31/b65b837e39c3f7064c426754714ac633b66b8c2290978af9d7f513e14aa9/rignore-0.7.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1ad295537041dc2ed4b540fb1a3906bd9ede6ccdad3fe79770cd89e04e3c73c", size = 897406, upload-time = "2025-11-05T20:40:53.854Z" }, - { url = "https://files.pythonhosted.org/packages/ca/58/1970ce006c427e202ac7c081435719a076c478f07b3a23f469227788dc23/rignore-0.7.6-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f782dbd3a65a5ac85adfff69e5c6b101285ef3f845c3a3cae56a54bebf9fe116", size = 874050, upload-time = "2025-11-05T20:41:08.922Z" }, - { url = "https://files.pythonhosted.org/packages/d4/00/eb45db9f90137329072a732273be0d383cb7d7f50ddc8e0bceea34c1dfdf/rignore-0.7.6-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65cece3b36e5b0826d946494734c0e6aaf5a0337e18ff55b071438efe13d559e", size = 1167835, upload-time = "2025-11-05T20:41:24.997Z" }, - { url = "https://files.pythonhosted.org/packages/f3/f1/6f1d72ddca41a64eed569680587a1236633587cc9f78136477ae69e2c88a/rignore-0.7.6-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d7e4bb66c13cd7602dc8931822c02dfbbd5252015c750ac5d6152b186f0a8be0", size = 941945, upload-time = "2025-11-05T20:41:40.628Z" }, - { url = "https://files.pythonhosted.org/packages/48/6f/2f178af1c1a276a065f563ec1e11e7a9e23d4996fd0465516afce4b5c636/rignore-0.7.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:297e500c15766e196f68aaaa70e8b6db85fa23fdc075b880d8231fdfba738cd7", size = 959067, upload-time = "2025-11-05T20:42:11.09Z" }, - { url = "https://files.pythonhosted.org/packages/5b/db/423a81c4c1e173877c7f9b5767dcaf1ab50484a94f60a0b2ed78be3fa765/rignore-0.7.6-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a07084211a8d35e1a5b1d32b9661a5ed20669970b369df0cf77da3adea3405de", size = 984438, upload-time = "2025-11-05T20:41:55.443Z" }, - { url = "https://files.pythonhosted.org/packages/31/eb/c4f92cc3f2825d501d3c46a244a671eb737fc1bcf7b05a3ecd34abb3e0d7/rignore-0.7.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:181eb2a975a22256a1441a9d2f15eb1292839ea3f05606620bd9e1938302cf79", size = 1078365, upload-time = "2025-11-05T21:40:15.148Z" }, - { url = "https://files.pythonhosted.org/packages/26/09/99442f02794bd7441bfc8ed1c7319e890449b816a7493b2db0e30af39095/rignore-0.7.6-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:7bbcdc52b5bf9f054b34ce4af5269df5d863d9c2456243338bc193c28022bd7b", size = 1139066, upload-time = "2025-11-05T21:40:32.771Z" }, - { url = "https://files.pythonhosted.org/packages/2c/88/bcfc21e520bba975410e9419450f4b90a2ac8236b9a80fd8130e87d098af/rignore-0.7.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:f2e027a6da21a7c8c0d87553c24ca5cc4364def18d146057862c23a96546238e", size = 1118036, upload-time = "2025-11-05T21:40:49.646Z" }, - { url = "https://files.pythonhosted.org/packages/e2/25/d37215e4562cda5c13312636393aea0bafe38d54d4e0517520a4cc0753ec/rignore-0.7.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ee4a18b82cbbc648e4aac1510066682fe62beb5dc88e2c67c53a83954e541360", size = 1127550, upload-time = "2025-11-05T21:41:07.648Z" }, - { url = "https://files.pythonhosted.org/packages/dc/76/a264ab38bfa1620ec12a8ff1c07778da89e16d8c0f3450b0333020d3d6dc/rignore-0.7.6-cp313-cp313-win32.whl", hash = "sha256:a7d7148b6e5e95035d4390396895adc384d37ff4e06781a36fe573bba7c283e5", size = 646097, upload-time = "2025-11-05T21:41:53.201Z" }, - { url = "https://files.pythonhosted.org/packages/62/44/3c31b8983c29ea8832b6082ddb1d07b90379c2d993bd20fce4487b71b4f4/rignore-0.7.6-cp313-cp313-win_amd64.whl", hash = "sha256:b037c4b15a64dced08fc12310ee844ec2284c4c5c1ca77bc37d0a04f7bff386e", size = 726170, upload-time = "2025-11-05T21:41:38.131Z" }, - { url = "https://files.pythonhosted.org/packages/aa/41/e26a075cab83debe41a42661262f606166157df84e0e02e2d904d134c0d8/rignore-0.7.6-cp313-cp313-win_arm64.whl", hash = "sha256:e47443de9b12fe569889bdbe020abe0e0b667516ee2ab435443f6d0869bd2804", size = 656184, upload-time = "2025-11-05T21:41:27.396Z" }, - { url = "https://files.pythonhosted.org/packages/85/12/62d690b4644c330d7ac0f739b7f078190ab4308faa909a60842d0e4af5b2/rignore-0.7.6-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c3d3a523af1cd4ed2c0cba8d277a32d329b0c96ef9901fb7ca45c8cfaccf31a5", size = 887462, upload-time = "2025-11-05T20:42:50.804Z" }, - { url = "https://files.pythonhosted.org/packages/05/bc/6528a0e97ed2bd7a7c329183367d1ffbc5b9762ae8348d88dae72cc9d1f5/rignore-0.7.6-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:990853566e65184a506e1e2af2d15045afad3ebaebb8859cb85b882081915110", size = 826918, upload-time = "2025-11-05T20:42:33.689Z" }, - { url = "https://files.pythonhosted.org/packages/3e/2c/7d7bad116e09a04e9e1688c6f891fa2d4fd33f11b69ac0bd92419ddebeae/rignore-0.7.6-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cab9ff2e436ce7240d7ee301c8ef806ed77c1fd6b8a8239ff65f9bbbcb5b8a3", size = 900922, upload-time = "2025-11-05T20:41:00.361Z" }, - { url = "https://files.pythonhosted.org/packages/09/ba/e5ea89fbde8e37a90ce456e31c5e9d85512cef5ae38e0f4d2426eb776a19/rignore-0.7.6-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d1a6671b2082c13bfd9a5cf4ce64670f832a6d41470556112c4ab0b6519b2fc4", size = 876987, upload-time = "2025-11-05T20:41:16.219Z" }, - { url = "https://files.pythonhosted.org/packages/d0/fb/93d14193f0ec0c3d35b763f0a000e9780f63b2031f3d3756442c2152622d/rignore-0.7.6-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2468729b4c5295c199d084ab88a40afcb7c8b974276805105239c07855bbacee", size = 1171110, upload-time = "2025-11-05T20:41:32.631Z" }, - { url = "https://files.pythonhosted.org/packages/9e/46/08436312ff96ffa29cfa4e1a987efc37e094531db46ba5e9fda9bb792afd/rignore-0.7.6-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:775710777fd71e5fdf54df69cdc249996a1d6f447a2b5bfb86dbf033fddd9cf9", size = 943339, upload-time = "2025-11-05T20:41:47.128Z" }, - { url = "https://files.pythonhosted.org/packages/34/28/3b3c51328f505cfaf7e53f408f78a1e955d561135d02f9cb0341ea99f69a/rignore-0.7.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4565407f4a77f72cf9d91469e75d15d375f755f0a01236bb8aaa176278cc7085", size = 961680, upload-time = "2025-11-05T20:42:18.061Z" }, - { url = "https://files.pythonhosted.org/packages/5c/9e/cbff75c8676d4f4a90bd58a1581249d255c7305141b0868f0abc0324836b/rignore-0.7.6-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dc44c33f8fb2d5c9da748de7a6e6653a78aa740655e7409895e94a247ffa97c8", size = 987045, upload-time = "2025-11-05T20:42:02.315Z" }, - { url = "https://files.pythonhosted.org/packages/8c/25/d802d1d369502a7ddb8816059e7c79d2d913e17df975b863418e0aca4d8a/rignore-0.7.6-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:8f32478f05540513c11923e8838afab9efef0131d66dca7f67f0e1bbd118af6a", size = 1080310, upload-time = "2025-11-05T21:40:23.184Z" }, - { url = "https://files.pythonhosted.org/packages/43/f0/250b785c2e473b1ab763eaf2be820934c2a5409a722e94b279dddac21c7d/rignore-0.7.6-pp310-pypy310_pp73-musllinux_1_2_armv7l.whl", hash = "sha256:1b63a3dd76225ea35b01dd6596aa90b275b5d0f71d6dc28fce6dd295d98614aa", size = 1140998, upload-time = "2025-11-05T21:40:40.603Z" }, - { url = "https://files.pythonhosted.org/packages/f5/d6/bb42fd2a8bba6aea327962656e20621fd495523259db40cfb4c5f760f05c/rignore-0.7.6-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:fe6c41175c36554a4ef0994cd1b4dbd6d73156fca779066456b781707402048e", size = 1121178, upload-time = "2025-11-05T21:40:57.585Z" }, - { url = "https://files.pythonhosted.org/packages/97/f4/aeb548374129dce3dc191a4bb598c944d9ed663f467b9af830315d86059c/rignore-0.7.6-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:9a0c6792406ae36f4e7664dc772da909451d46432ff8485774526232d4885063", size = 1130190, upload-time = "2025-11-05T21:41:16.403Z" }, - { url = "https://files.pythonhosted.org/packages/82/78/a6250ff0c49a3cdb943910ada4116e708118e9b901c878cfae616c80a904/rignore-0.7.6-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a20b6fb61bcced9a83dfcca6599ad45182b06ba720cff7c8d891e5b78db5b65f", size = 886470, upload-time = "2025-11-05T20:42:52.314Z" }, - { url = "https://files.pythonhosted.org/packages/35/af/c69c0c51b8f9f7914d95c4ea91c29a2ac067572048cae95dd6d2efdbe05d/rignore-0.7.6-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:392dcabfecbe176c9ebbcb40d85a5e86a5989559c4f988c2741da7daf1b5be25", size = 825976, upload-time = "2025-11-05T20:42:35.118Z" }, - { url = "https://files.pythonhosted.org/packages/f1/d2/1b264f56132264ea609d3213ab603d6a27016b19559a1a1ede1a66a03dcd/rignore-0.7.6-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22baa462abdc36fdd5a5e2dae423107723351b85ff093762f9261148b9d0a04a", size = 899739, upload-time = "2025-11-05T20:41:01.518Z" }, - { url = "https://files.pythonhosted.org/packages/55/e4/b3c5dfdd8d8a10741dfe7199ef45d19a0e42d0c13aa377c83bd6caf65d90/rignore-0.7.6-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:53fb28882d2538cb2d231972146c4927a9d9455e62b209f85d634408c4103538", size = 874843, upload-time = "2025-11-05T20:41:17.687Z" }, - { url = "https://files.pythonhosted.org/packages/cc/10/d6f3750233881a2a154cefc9a6a0a9b19da526b19f7f08221b552c6f827d/rignore-0.7.6-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:87409f7eeb1103d6b77f3472a3a0d9a5953e3ae804a55080bdcb0120ee43995b", size = 1170348, upload-time = "2025-11-05T20:41:34.21Z" }, - { url = "https://files.pythonhosted.org/packages/6e/10/ad98ca05c9771c15af734cee18114a3c280914b6e34fde9ffea2e61e88aa/rignore-0.7.6-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:684014e42e4341ab3ea23a203551857fcc03a7f8ae96ca3aefb824663f55db32", size = 942315, upload-time = "2025-11-05T20:41:48.508Z" }, - { url = "https://files.pythonhosted.org/packages/de/00/ab5c0f872acb60d534e687e629c17e0896c62da9b389c66d3aa16b817aa8/rignore-0.7.6-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77356ebb01ba13f8a425c3d30fcad40e57719c0e37670d022d560884a30e4767", size = 961047, upload-time = "2025-11-05T20:42:19.403Z" }, - { url = "https://files.pythonhosted.org/packages/b8/86/3030fdc363a8f0d1cd155b4c453d6db9bab47a24fcc64d03f61d9d78fe6a/rignore-0.7.6-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6cbd8a48abbd3747a6c830393cd578782fab5d43f4deea48c5f5e344b8fed2b0", size = 986090, upload-time = "2025-11-05T20:42:03.581Z" }, - { url = "https://files.pythonhosted.org/packages/33/b8/133aa4002cee0ebbb39362f94e4898eec7fbd09cec9fcbce1cd65b355b7f/rignore-0.7.6-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:2673225dcec7f90497e79438c35e34638d0d0391ccea3cbb79bfb9adc0dc5bd7", size = 1079656, upload-time = "2025-11-05T21:40:24.89Z" }, - { url = "https://files.pythonhosted.org/packages/67/56/36d5d34210e5e7dfcd134eed8335b19e80ae940ee758f493e4f2b344dd70/rignore-0.7.6-pp311-pypy311_pp73-musllinux_1_2_armv7l.whl", hash = "sha256:c081f17290d8a2b96052b79207622aa635686ea39d502b976836384ede3d303c", size = 1139789, upload-time = "2025-11-05T21:40:42.119Z" }, - { url = "https://files.pythonhosted.org/packages/6b/5b/bb4f9420802bf73678033a4a55ab1bede36ce2e9b41fec5f966d83d932b3/rignore-0.7.6-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:57e8327aacc27f921968cb2a174f9e47b084ce9a7dd0122c8132d22358f6bd79", size = 1120308, upload-time = "2025-11-05T21:40:59.402Z" }, - { url = "https://files.pythonhosted.org/packages/ce/8b/a1299085b28a2f6135e30370b126e3c5055b61908622f2488ade67641479/rignore-0.7.6-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:d8955b57e42f2a5434670d5aa7b75eaf6e74602ccd8955dddf7045379cd762fb", size = 1129444, upload-time = "2025-11-05T21:41:17.906Z" }, -] - [[package]] name = "rpds-py" version = "0.30.0" @@ -5099,6 +5412,63 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/b7/b95708304cd49b7b6f82fdd039f1748b66ec2b21d6a45180910802f1abf1/rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:ac37f9f516c51e5753f27dfdef11a88330f04de2d564be3991384b2f3535d02e", size = 562191, upload-time = "2025-11-30T20:24:36.853Z" }, ] +[[package]] +name = "ruamel-yaml" +version = "0.19.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c7/3b/ebda527b56beb90cb7652cb1c7e4f91f48649fbcd8d2eb2fb6e77cd3329b/ruamel_yaml-0.19.1.tar.gz", hash = "sha256:53eb66cd27849eff968ebf8f0bf61f46cdac2da1d1f3576dd4ccee9b25c31993", size = 142709, upload-time = "2026-01-02T16:50:31.84Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b8/0c/51f6841f1d84f404f92463fc2b1ba0da357ca1e3db6b7fbda26956c3b82a/ruamel_yaml-0.19.1-py3-none-any.whl", hash = "sha256:27592957fedf6e0b62f281e96effd28043345e0e66001f97683aa9a40c667c93", size = 118102, upload-time = "2026-01-02T16:50:29.201Z" }, +] + +[[package]] +name = "ruamel-yaml-clib" +version = "0.2.14" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/e9/39ec4d4b3f91188fad1842748f67d4e749c77c37e353c4e545052ee8e893/ruamel.yaml.clib-0.2.14.tar.gz", hash = "sha256:803f5044b13602d58ea378576dd75aa759f52116a0232608e8fdada4da33752e", size = 225394, upload-time = "2025-09-22T19:51:23.753Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b4/56/35a0a752415ae01992c68f5a6513bdef0e1b6fbdb60d7619342ce12346a0/ruamel.yaml.clib-0.2.14-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f8b2acb0ffdd2ce8208accbec2dca4a06937d556fdcaefd6473ba1b5daa7e3c4", size = 269216, upload-time = "2025-09-23T14:24:09.742Z" }, + { url = "https://files.pythonhosted.org/packages/98/6a/9a68184ab93619f4607ff1675e4ef01e8accfcbff0d482f4ca44c10d8eab/ruamel.yaml.clib-0.2.14-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:aef953f3b8bd0b50bd52a2e52fb54a6a2171a1889d8dea4a5959d46c6624c451", size = 137092, upload-time = "2025-09-22T19:50:26.906Z" }, + { url = "https://files.pythonhosted.org/packages/2b/3f/cfed5f088628128a9ec66f46794fd4d165642155c7b78c26d83b16c6bf7b/ruamel.yaml.clib-0.2.14-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:a0ac90efbc7a77b0d796c03c8cc4e62fd710b3f1e4c32947713ef2ef52e09543", size = 633768, upload-time = "2025-09-22T19:50:31.228Z" }, + { url = "https://files.pythonhosted.org/packages/3a/d5/5ce2cc156c1da48160171968d91f066d305840fbf930ee955a509d025a44/ruamel.yaml.clib-0.2.14-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9bf6b699223afe6c7fe9f2ef76e0bfa6dd892c21e94ce8c957478987ade76cd8", size = 721253, upload-time = "2025-09-22T19:50:28.776Z" }, + { url = "https://files.pythonhosted.org/packages/2b/71/d0b56bc902b38ebe4be8e270f730f929eec4edaf8a0fa7028f4ef64fa950/ruamel.yaml.clib-0.2.14-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d73a0187718f6eec5b2f729b0f98e4603f7bd9c48aa65d01227d1a5dcdfbe9e8", size = 683823, upload-time = "2025-09-22T19:50:29.993Z" }, + { url = "https://files.pythonhosted.org/packages/4b/db/1f37449dd89c540218598316ccafc1a0aed60215e72efa315c5367cfd015/ruamel.yaml.clib-0.2.14-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:81f6d3b19bc703679a5705c6a16dabdc79823c71d791d73c65949be7f3012c02", size = 690370, upload-time = "2025-09-23T18:42:46.797Z" }, + { url = "https://files.pythonhosted.org/packages/5d/53/c498b30f35efcd9f47cb084d7ad9374f2b907470f73913dec6396b81397d/ruamel.yaml.clib-0.2.14-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b28caeaf3e670c08cb7e8de221266df8494c169bd6ed8875493fab45be9607a4", size = 703578, upload-time = "2025-09-22T19:50:32.531Z" }, + { url = "https://files.pythonhosted.org/packages/34/79/492cfad9baed68914840c39e5f3c1cc251f51a897ddb3f532601215cbb12/ruamel.yaml.clib-0.2.14-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:94f3efb718f8f49b031f2071ec7a27dd20cbfe511b4dfd54ecee54c956da2b31", size = 722544, upload-time = "2025-09-22T19:50:34.157Z" }, + { url = "https://files.pythonhosted.org/packages/ca/f5/479ebfd5ba396e209ade90f7282d84b90c57b3e07be8dc6fcd02a6df7ffc/ruamel.yaml.clib-0.2.14-cp310-cp310-win32.whl", hash = "sha256:27c070cf3888e90d992be75dd47292ff9aa17dafd36492812a6a304a1aedc182", size = 100375, upload-time = "2025-09-22T19:50:36.832Z" }, + { url = "https://files.pythonhosted.org/packages/57/31/a044520fdb3bd409889f67f1efebda0658033c7ab3f390cee37531cc9a9e/ruamel.yaml.clib-0.2.14-cp310-cp310-win_amd64.whl", hash = "sha256:4f4a150a737fccae13fb51234d41304ff2222e3b7d4c8e9428ed1a6ab48389b8", size = 118129, upload-time = "2025-09-22T19:50:35.545Z" }, + { url = "https://files.pythonhosted.org/packages/b3/9f/3c51e9578b8c36fcc4bdd271a1a5bb65963a74a4b6ad1a989768a22f6c2a/ruamel.yaml.clib-0.2.14-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5bae1a073ca4244620425cd3d3aa9746bde590992b98ee8c7c8be8c597ca0d4e", size = 270207, upload-time = "2025-09-23T14:24:11.445Z" }, + { url = "https://files.pythonhosted.org/packages/4a/16/cb02815bc2ae9c66760c0c061d23c7358f9ba51dae95ac85247662b7fbe2/ruamel.yaml.clib-0.2.14-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:0a54e5e40a7a691a426c2703b09b0d61a14294d25cfacc00631aa6f9c964df0d", size = 137780, upload-time = "2025-09-22T19:50:37.734Z" }, + { url = "https://files.pythonhosted.org/packages/31/c6/fc687cd1b93bff8e40861eea46d6dc1a6a778d9a085684e4045ff26a8e40/ruamel.yaml.clib-0.2.14-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:10d9595b6a19778f3269399eff6bab642608e5966183abc2adbe558a42d4efc9", size = 641590, upload-time = "2025-09-22T19:50:41.978Z" }, + { url = "https://files.pythonhosted.org/packages/45/5d/65a2bc08b709b08576b3f307bf63951ee68a8e047cbbda6f1c9864ecf9a7/ruamel.yaml.clib-0.2.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dba72975485f2b87b786075e18a6e5d07dc2b4d8973beb2732b9b2816f1bad70", size = 738090, upload-time = "2025-09-22T19:50:39.152Z" }, + { url = "https://files.pythonhosted.org/packages/fb/d0/a70a03614d9a6788a3661ab1538879ed2aae4e84d861f101243116308a37/ruamel.yaml.clib-0.2.14-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:29757bdb7c142f9595cc1b62ec49a3d1c83fab9cef92db52b0ccebaad4eafb98", size = 700744, upload-time = "2025-09-22T19:50:40.811Z" }, + { url = "https://files.pythonhosted.org/packages/77/30/c93fa457611f79946d5cb6cc97493ca5425f3f21891d7b1f9b44eaa1b38e/ruamel.yaml.clib-0.2.14-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:557df28dbccf79b152fe2d1b935f6063d9cc431199ea2b0e84892f35c03bb0ee", size = 742321, upload-time = "2025-09-23T18:42:48.916Z" }, + { url = "https://files.pythonhosted.org/packages/40/85/e2c54ad637117cd13244a4649946eaa00f32edcb882d1f92df90e079ab00/ruamel.yaml.clib-0.2.14-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:26a8de280ab0d22b6e3ec745b4a5a07151a0f74aad92dd76ab9c8d8d7087720d", size = 743805, upload-time = "2025-09-22T19:50:43.58Z" }, + { url = "https://files.pythonhosted.org/packages/81/50/f899072c38877d8ef5382e0b3d47f8c4346226c1f52d6945d6f64fec6a2f/ruamel.yaml.clib-0.2.14-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e501c096aa3889133d674605ebd018471bc404a59cbc17da3c5924421c54d97c", size = 769529, upload-time = "2025-09-22T19:50:45.707Z" }, + { url = "https://files.pythonhosted.org/packages/99/7c/96d4b5075e30c65ea2064e40c2d657c7c235d7b6ef18751cf89a935b9041/ruamel.yaml.clib-0.2.14-cp311-cp311-win32.whl", hash = "sha256:915748cfc25b8cfd81b14d00f4bfdb2ab227a30d6d43459034533f4d1c207a2a", size = 100256, upload-time = "2025-09-22T19:50:48.26Z" }, + { url = "https://files.pythonhosted.org/packages/7d/8c/73ee2babd04e8bfcf1fd5c20aa553d18bf0ebc24b592b4f831d12ae46cc0/ruamel.yaml.clib-0.2.14-cp311-cp311-win_amd64.whl", hash = "sha256:4ccba93c1e5a40af45b2f08e4591969fa4697eae951c708f3f83dcbf9f6c6bb1", size = 118234, upload-time = "2025-09-22T19:50:47.019Z" }, + { url = "https://files.pythonhosted.org/packages/b4/42/ccfb34a25289afbbc42017e4d3d4288e61d35b2e00cfc6b92974a6a1f94b/ruamel.yaml.clib-0.2.14-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:6aeadc170090ff1889f0d2c3057557f9cd71f975f17535c26a5d37af98f19c27", size = 271775, upload-time = "2025-09-23T14:24:12.771Z" }, + { url = "https://files.pythonhosted.org/packages/82/73/e628a92e80197ff6a79ab81ec3fa00d4cc082d58ab78d3337b7ba7043301/ruamel.yaml.clib-0.2.14-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:5e56ac47260c0eed992789fa0b8efe43404a9adb608608631a948cee4fc2b052", size = 138842, upload-time = "2025-09-22T19:50:49.156Z" }, + { url = "https://files.pythonhosted.org/packages/2b/c5/346c7094344a60419764b4b1334d9e0285031c961176ff88ffb652405b0c/ruamel.yaml.clib-0.2.14-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:a911aa73588d9a8b08d662b9484bc0567949529824a55d3885b77e8dd62a127a", size = 647404, upload-time = "2025-09-22T19:50:52.921Z" }, + { url = "https://files.pythonhosted.org/packages/df/99/65080c863eb06d4498de3d6c86f3e90595e02e159fd8529f1565f56cfe2c/ruamel.yaml.clib-0.2.14-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a05ba88adf3d7189a974b2de7a9d56731548d35dc0a822ec3dc669caa7019b29", size = 753141, upload-time = "2025-09-22T19:50:50.294Z" }, + { url = "https://files.pythonhosted.org/packages/3d/e3/0de85f3e3333f8e29e4b10244374a202a87665d1131798946ee22cf05c7c/ruamel.yaml.clib-0.2.14-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fb04c5650de6668b853623eceadcdb1a9f2fee381f5d7b6bc842ee7c239eeec4", size = 703477, upload-time = "2025-09-22T19:50:51.508Z" }, + { url = "https://files.pythonhosted.org/packages/d9/25/0d2f09d8833c7fd77ab8efeff213093c16856479a9d293180a0d89f6bed9/ruamel.yaml.clib-0.2.14-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:df3ec9959241d07bc261f4983d25a1205ff37703faf42b474f15d54d88b4f8c9", size = 741157, upload-time = "2025-09-23T18:42:50.408Z" }, + { url = "https://files.pythonhosted.org/packages/d3/8c/959f10c2e2153cbdab834c46e6954b6dd9e3b109c8f8c0a3cf1618310985/ruamel.yaml.clib-0.2.14-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:fbc08c02e9b147a11dfcaa1ac8a83168b699863493e183f7c0c8b12850b7d259", size = 745859, upload-time = "2025-09-22T19:50:54.497Z" }, + { url = "https://files.pythonhosted.org/packages/ed/6b/e580a7c18b485e1a5f30a32cda96b20364b0ba649d9d2baaf72f8bd21f83/ruamel.yaml.clib-0.2.14-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c099cafc1834d3c5dac305865d04235f7c21c167c8dd31ebc3d6bbc357e2f023", size = 770200, upload-time = "2025-09-22T19:50:55.718Z" }, + { url = "https://files.pythonhosted.org/packages/ef/44/3455eebc761dc8e8fdced90f2b0a3fa61e32ba38b50de4130e2d57db0f21/ruamel.yaml.clib-0.2.14-cp312-cp312-win32.whl", hash = "sha256:b5b0f7e294700b615a3bcf6d28b26e6da94e8eba63b079f4ec92e9ba6c0d6b54", size = 98829, upload-time = "2025-09-22T19:50:58.895Z" }, + { url = "https://files.pythonhosted.org/packages/76/ab/5121f7f3b651db93de546f8c982c241397aad0a4765d793aca1dac5eadee/ruamel.yaml.clib-0.2.14-cp312-cp312-win_amd64.whl", hash = "sha256:a37f40a859b503304dd740686359fcf541d6fb3ff7fc10f539af7f7150917c68", size = 115570, upload-time = "2025-09-22T19:50:57.981Z" }, + { url = "https://files.pythonhosted.org/packages/d7/ae/e3811f05415594025e96000349d3400978adaed88d8f98d494352d9761ee/ruamel.yaml.clib-0.2.14-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:7e4f9da7e7549946e02a6122dcad00b7c1168513acb1f8a726b1aaf504a99d32", size = 269205, upload-time = "2025-09-23T14:24:15.06Z" }, + { url = "https://files.pythonhosted.org/packages/72/06/7d51f4688d6d72bb72fa74254e1593c4f5ebd0036be5b41fe39315b275e9/ruamel.yaml.clib-0.2.14-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:dd7546c851e59c06197a7c651335755e74aa383a835878ca86d2c650c07a2f85", size = 137417, upload-time = "2025-09-22T19:50:59.82Z" }, + { url = "https://files.pythonhosted.org/packages/5a/08/b4499234a420ef42960eeb05585df5cc7eb25ccb8c980490b079e6367050/ruamel.yaml.clib-0.2.14-cp313-cp313-manylinux2014_aarch64.whl", hash = "sha256:1c1acc3a0209ea9042cc3cfc0790edd2eddd431a2ec3f8283d081e4d5018571e", size = 642558, upload-time = "2025-09-22T19:51:03.388Z" }, + { url = "https://files.pythonhosted.org/packages/b6/ba/1975a27dedf1c4c33306ee67c948121be8710b19387aada29e2f139c43ee/ruamel.yaml.clib-0.2.14-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2070bf0ad1540d5c77a664de07ebcc45eebd1ddcab71a7a06f26936920692beb", size = 744087, upload-time = "2025-09-22T19:51:00.897Z" }, + { url = "https://files.pythonhosted.org/packages/20/15/8a19a13d27f3bd09fa18813add8380a29115a47b553845f08802959acbce/ruamel.yaml.clib-0.2.14-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bd8fe07f49c170e09d76773fb86ad9135e0beee44f36e1576a201b0676d3d1d", size = 699709, upload-time = "2025-09-22T19:51:02.075Z" }, + { url = "https://files.pythonhosted.org/packages/19/ee/8d6146a079ad21e534b5083c9ee4a4c8bec42f79cf87594b60978286b39a/ruamel.yaml.clib-0.2.14-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ff86876889ea478b1381089e55cf9e345707b312beda4986f823e1d95e8c0f59", size = 708926, upload-time = "2025-09-23T18:42:51.707Z" }, + { url = "https://files.pythonhosted.org/packages/a9/f5/426b714abdc222392e68f3b8ad323930d05a214a27c7e7a0f06c69126401/ruamel.yaml.clib-0.2.14-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:1f118b707eece8cf84ecbc3e3ec94d9db879d85ed608f95870d39b2d2efa5dca", size = 740202, upload-time = "2025-09-22T19:51:04.673Z" }, + { url = "https://files.pythonhosted.org/packages/3d/ac/3c5c2b27a183f4fda8a57c82211721c016bcb689a4a175865f7646db9f94/ruamel.yaml.clib-0.2.14-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b30110b29484adc597df6bd92a37b90e63a8c152ca8136aad100a02f8ba6d1b6", size = 765196, upload-time = "2025-09-22T19:51:05.916Z" }, + { url = "https://files.pythonhosted.org/packages/92/2e/06f56a71fd55021c993ed6e848c9b2e5e9cfce180a42179f0ddd28253f7c/ruamel.yaml.clib-0.2.14-cp313-cp313-win32.whl", hash = "sha256:f4e97a1cf0b7a30af9e1d9dad10a5671157b9acee790d9e26996391f49b965a2", size = 98635, upload-time = "2025-09-22T19:51:08.183Z" }, + { url = "https://files.pythonhosted.org/packages/51/79/76aba16a1689b50528224b182f71097ece338e7a4ab55e84c2e73443b78a/ruamel.yaml.clib-0.2.14-cp313-cp313-win_amd64.whl", hash = "sha256:090782b5fb9d98df96509eecdbcaffd037d47389a89492320280d52f91330d78", size = 115238, upload-time = "2025-09-22T19:51:07.081Z" }, +] + [[package]] name = "ruff" version = "0.15.12" @@ -5124,6 +5494,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c0/98/6beb4b351e472e5f4c4613f7c35a5290b8be2497e183825310c4c3a3984b/ruff-0.15.12-py3-none-win_arm64.whl", hash = "sha256:a538f7a82d061cee7be55542aca1d86d1393d55d81d4fcc314370f4340930d4f", size = 11120821, upload-time = "2026-04-24T18:16:57.979Z" }, ] +[[package]] +name = "safehttpx" +version = "0.1.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "httpx" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/89/d1/4282284d9cf1ee873607a46442da977fc3c985059315ab23610be31d5885/safehttpx-0.1.7.tar.gz", hash = "sha256:db201c0978c41eddb8bb480f3eee59dd67304fdd91646035e9d9a720049a9d23", size = 10385, upload-time = "2025-10-24T18:30:09.783Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2e/a3/0f0b7d78e2f1eb9e8e1afbff1d2bff8d60144aee17aca51c065b516743dd/safehttpx-0.1.7-py3-none-any.whl", hash = "sha256:c4f4a162db6993464d7ca3d7cc4af0ffc6515a606dfd220b9f82c6945d869cde", size = 8959, upload-time = "2025-10-24T18:30:08.733Z" }, +] + [[package]] name = "safetensors" version = "0.7.0" @@ -5158,7 +5540,7 @@ resolution-markers = [ "python_full_version < '3.11'", ] dependencies = [ - { name = "numpy", marker = "python_full_version < '3.11'" }, + { name = "numpy", marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/0f/37/6964b830433e654ec7485e45a00fc9a27cf868d622838f6b6d9c5ec0d532/scipy-1.15.3.tar.gz", hash = "sha256:eae3cf522bc7df64b42cad3925c876e1b0b6c35c1337c93e12c0f366f55b0eaf", size = 59419214, upload-time = "2025-05-08T16:13:05.955Z" } wheels = [ @@ -5225,7 +5607,7 @@ resolution-markers = [ "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'", ] dependencies = [ - { name = "numpy", marker = "python_full_version >= '3.11'" }, + { name = "numpy", marker = "python_full_version >= '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/7a/97/5a3609c4f8d58b039179648e62dd220f89864f56f7357f5d4f45c29eb2cc/scipy-1.17.1.tar.gz", hash = "sha256:95d8e012d8cb8816c226aef832200b1d45109ed4464303e997c5b13122b297c0", size = 30573822, upload-time = "2026-02-23T00:26:24.851Z" } wheels = [ @@ -5284,6 +5666,59 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/46/f5af3402b579fd5e11573ce652019a67074317e18c1935cc0b4ba9b35552/secretstorage-3.5.0-py3-none-any.whl", hash = "sha256:0ce65888c0725fcb2c5bc0fdb8e5438eece02c523557ea40ce0703c266248137", size = 15554, upload-time = "2025-11-23T19:02:51.545Z" }, ] +[[package]] +name = "semantic-version" +version = "2.10.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7d/31/f2289ce78b9b473d582568c234e104d2a342fd658cc288a7553d83bb8595/semantic_version-2.10.0.tar.gz", hash = "sha256:bdabb6d336998cbb378d4b9db3a4b56a1e3235701dc05ea2690d9a997ed5041c", size = 52289, upload-time = "2022-05-26T13:35:23.454Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6a/23/8146aad7d88f4fcb3a6218f41a60f6c2d4e3a72de72da1825dc7c8f7877c/semantic_version-2.10.0-py2.py3-none-any.whl", hash = "sha256:de78a3b8e0feda74cabc54aab2da702113e33ac9d9eb9d2389bcf1f58b7d9177", size = 15552, upload-time = "2022-05-26T13:35:21.206Z" }, +] + +[[package]] +name = "semgrep" +version = "1.161.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "boltons" }, + { name = "click" }, + { name = "click-option-group" }, + { name = "colorama" }, + { name = "exceptiongroup" }, + { name = "glom" }, + { name = "jsonschema" }, + { name = "mcp", version = "1.23.3", source = { registry = "https://pypi.org/simple" } }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-exporter-otlp-proto-http" }, + { name = "opentelemetry-instrumentation-requests" }, + { name = "opentelemetry-instrumentation-threading" }, + { name = "opentelemetry-sdk" }, + { name = "packaging" }, + { name = "peewee" }, + { name = "pyjwt", extra = ["crypto"], marker = "extra == 'group-9-verifiers-policy'" }, + { name = "pywin32", marker = "sys_platform == 'win32'" }, + { name = "requests" }, + { name = "rich" }, + { name = "ruamel-yaml" }, + { name = "ruamel-yaml-clib" }, + { name = "semantic-version" }, + { name = "tomli", version = "2.0.2", source = { registry = "https://pypi.org/simple" } }, + { name = "typing-extensions" }, + { name = "urllib3" }, + { name = "wcmatch" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ef/de/37c8b9d716d6d1e8a494ca960396a37b49fc79b593437d603489ffec58c8/semgrep-1.161.0.tar.gz", hash = "sha256:4f078397b7f2d4a88dbe803cb4b1128266339e3a5797208695cf2aee444d7b83", size = 55346055, upload-time = "2026-04-22T21:08:10.213Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/87/47/2c21a67bdb44786a0ff5cd41a91968c827edef28e988a54e33b45fa601ad/semgrep-1.161.0-cp310.cp311.cp312.cp313.cp314.py310.py311.py312.py313.py314-none-macosx_10_14_x86_64.whl", hash = "sha256:345068df665f3b48742ae9ce69679f5919f2baed5791892ee514f13a909f5964", size = 44680267, upload-time = "2026-04-22T21:07:39.991Z" }, + { url = "https://files.pythonhosted.org/packages/2f/ff/b7a91b66b31bcc681b75068fe7d57c859d918dcba681b1537ac011666c50/semgrep-1.161.0-cp310.cp311.cp312.cp313.cp314.py310.py311.py312.py313.py314-none-macosx_11_0_arm64.whl", hash = "sha256:1f9d43a97f78c8ca9bc2f0eb9486b70fae2106dbd802f7a52bb1cb9d58f9f3c8", size = 48602046, upload-time = "2026-04-22T21:07:43.542Z" }, + { url = "https://files.pythonhosted.org/packages/7d/08/3382aab9139ad344dd85b90f54b3fe92c1de8c0b41795356bdfda4e75e62/semgrep-1.161.0-cp310.cp311.cp312.cp313.cp314.py310.py311.py312.py313.py314-none-manylinux_2_35_aarch64.whl", hash = "sha256:c8316729d54882dede14d7166f4cdaf6174927b05895d12888ae3a0f4791c785", size = 77963546, upload-time = "2026-04-22T21:07:47.762Z" }, + { url = "https://files.pythonhosted.org/packages/cc/bf/573042ea49590bff369dc96ca687068dc90947b33a261cf19b889f4795b5/semgrep-1.161.0-cp310.cp311.cp312.cp313.cp314.py310.py311.py312.py313.py314-none-manylinux_2_35_x86_64.whl", hash = "sha256:8a82257d654348f94250f410701f1124dc70a0c4338461acc5ee713e101b0d9d", size = 76204940, upload-time = "2026-04-22T21:07:52.203Z" }, + { url = "https://files.pythonhosted.org/packages/28/95/0c86c0341b3b104aeea1e814ed9d874605486c11a92d8c613266969d9a89/semgrep-1.161.0-cp310.cp311.cp312.cp313.cp314.py310.py311.py312.py313.py314-none-musllinux_1_2_aarch64.whl", hash = "sha256:ae2d56f0c14910c9ac15d7de2ff084994d249b4b20c972931bb4dfa11e78f4d2", size = 78411785, upload-time = "2026-04-22T21:07:56.764Z" }, + { url = "https://files.pythonhosted.org/packages/34/a0/ed74b68e7720298e58b1458483698091123f9b6884ac5d2754755ed68137/semgrep-1.161.0-cp310.cp311.cp312.cp313.cp314.py310.py311.py312.py313.py314-none-musllinux_1_2_x86_64.whl", hash = "sha256:6583d68017890561fc93087056a565564051f498cb0637bde1af3945b755b164", size = 75949179, upload-time = "2026-04-22T21:08:01.832Z" }, + { url = "https://files.pythonhosted.org/packages/26/54/51305a35e4b0cef706259d28ceab2e3aa5de548d290a124f1860b433aa64/semgrep-1.161.0-cp310.cp311.cp312.cp313.cp314.py310.py311.py312.py313.py314-none-win_amd64.whl", hash = "sha256:bf4bc7caf27fa817f4a5a26b0875add679bc011aff70ae44aa46913e21f5a401", size = 56273247, upload-time = "2026-04-22T21:08:05.823Z" }, +] + [[package]] name = "sentencepiece" version = "0.2.1" @@ -5501,15 +5936,14 @@ wheels = [ [[package]] name = "sse-starlette" -version = "3.3.4" +version = "3.0.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, - { name = "starlette" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/26/8c/f9290339ef6d79badbc010f067cd769d6601ec11a57d78569c683fb4dd87/sse_starlette-3.3.4.tar.gz", hash = "sha256:aaf92fc067af8a5427192895ac028e947b484ac01edbc3caf00e7e7137c7bef1", size = 32427, upload-time = "2026-03-29T09:00:23.307Z" } +sdist = { url = "https://files.pythonhosted.org/packages/db/3c/fa6517610dc641262b77cc7bf994ecd17465812c1b0585fe33e11be758ab/sse_starlette-3.0.3.tar.gz", hash = "sha256:88cfb08747e16200ea990c8ca876b03910a23b547ab3bd764c0d8eb81019b971", size = 21943, upload-time = "2025-10-30T18:44:20.117Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f8/7f/3de5402f39890ac5660b86bcf5c03f9d855dad5c4ed764866d7b592b46fd/sse_starlette-3.3.4-py3-none-any.whl", hash = "sha256:84bb06e58939a8b38d8341f1bc9792f06c2b53f48c608dd207582b664fc8f3c1", size = 14330, upload-time = "2026-03-29T09:00:21.846Z" }, + { url = "https://files.pythonhosted.org/packages/23/a0/984525d19ca5c8a6c33911a0c164b11490dd0f90ff7fd689f704f84e9a11/sse_starlette-3.0.3-py3-none-any.whl", hash = "sha256:af5bf5a6f3933df1d9c7f8539633dc8444ca6a97ab2e2a7cd3b6e431ac03a431", size = 11765, upload-time = "2025-10-30T18:44:18.834Z" }, ] [[package]] @@ -5548,15 +5982,14 @@ wheels = [ [[package]] name = "starlette" -version = "0.52.1" +version = "0.46.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c4/68/79977123bb7be889ad680d79a40f339082c1978b5cfcf62c2d8d196873ac/starlette-0.52.1.tar.gz", hash = "sha256:834edd1b0a23167694292e94f597773bc3f89f362be6effee198165a35d62933", size = 2653702, upload-time = "2026-01-18T13:34:11.062Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ce/20/08dfcd9c983f6a6f4a1000d934b9e6d626cff8d2eeb77a89a68eef20a2b7/starlette-0.46.2.tar.gz", hash = "sha256:7f7361f34eed179294600af672f565727419830b54b7b084efe44bb82d2fccd5", size = 2580846, upload-time = "2025-04-13T13:56:17.942Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/81/0d/13d1d239a25cbfb19e740db83143e95c772a1fe10202dda4b76792b114dd/starlette-0.52.1-py3-none-any.whl", hash = "sha256:0029d43eb3d273bc4f83a08720b4912ea4b071087a3b48db01b7c839f7954d74", size = 74272, upload-time = "2026-01-18T13:34:09.188Z" }, + { url = "https://files.pythonhosted.org/packages/8b/0c/9d30a4ebeb6db2b25a841afbb80f6ef9a854fc3b41be131d249a977b4959/starlette-0.46.2-py3-none-any.whl", hash = "sha256:595633ce89f8ffa71a015caed34a5b2dc1c0cdb3f0f1fbd1e69339cf2abeec35", size = 72037, upload-time = "2025-04-13T13:56:16.21Z" }, ] [[package]] @@ -5609,19 +6042,18 @@ wheels = [ [[package]] name = "textual" -version = "8.2.4" +version = "6.2.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "markdown-it-py", extra = ["linkify"] }, - { name = "mdit-py-plugins" }, + { name = "markdown-it-py", extra = ["linkify", "plugins"] }, { name = "platformdirs" }, { name = "pygments" }, { name = "rich" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/19/89/bec5709fb759f9c784bbcb30b2e3497df3f901691d13c2b864dbf6694a17/textual-8.2.4.tar.gz", hash = "sha256:d4e2b2ddd7157191d00b228592b7c739ea080b7d792fd410f23ca75f05ea76c4", size = 1848933, upload-time = "2026-04-19T04:20:45.845Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/30/38b615f7d4b16f6fdd73e4dcd8913e2d880bbb655e68a076e3d91181a7ee/textual-6.2.1.tar.gz", hash = "sha256:4699d8dfae43503b9c417bd2a6fb0da1c89e323fe91c4baa012f9298acaa83e1", size = 1570645, upload-time = "2025-10-01T16:11:24.467Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/5c/32/02932f0d597cdbb34e34bf24266ff0f2cf292ccb3aafc37dd9efcb0cc416/textual-8.2.4-py3-none-any.whl", hash = "sha256:a83bd3f0cc7125ca203845af753f9d6b6be030025ecd1b05cc75ebe645b9c4ba", size = 724390, upload-time = "2026-04-19T04:20:49.968Z" }, + { url = "https://files.pythonhosted.org/packages/c5/93/02c7adec57a594af28388d85da9972703a4af94ae1399542555cd9581952/textual-6.2.1-py3-none-any.whl", hash = "sha256:3c7190633cd4d8bfe6049ae66808b98da91ded2edb85cef54e82bf77b03d2a54", size = 710702, upload-time = "2025-10-01T16:11:22.161Z" }, ] [[package]] @@ -5701,10 +6133,43 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/59/8c/b1c87148aa15e099243ec9f0cf9d0e970cc2234c3257d558c25a2c5304e6/tokenizers-0.22.2-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f01a9c019878532f98927d2bacb79bbb404b43d3437455522a00a30718cdedb5", size = 3373542, upload-time = "2026-01-05T10:40:52.803Z" }, ] +[[package]] +name = "tomli" +version = "2.0.2" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.13' and sys_platform == 'win32'", + "python_full_version == '3.12.*' and sys_platform == 'win32'", + "python_full_version >= '3.13' and sys_platform == 'emscripten'", + "python_full_version == '3.12.*' and sys_platform == 'emscripten'", + "python_full_version >= '3.13' and sys_platform != 'emscripten' and sys_platform != 'win32'", + "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'win32'", + "python_full_version == '3.11.*' and sys_platform == 'win32'", + "python_full_version == '3.11.*' and sys_platform == 'emscripten'", + "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'", + "python_full_version < '3.11'", +] +sdist = { url = "https://files.pythonhosted.org/packages/35/b9/de2a5c0144d7d75a57ff355c0c24054f965b2dc3036456ae03a51ea6264b/tomli-2.0.2.tar.gz", hash = "sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed", size = 16096, upload-time = "2024-10-02T10:46:13.208Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cf/db/ce8eda256fa131af12e0a76d481711abe4681b6923c27efb9a255c9e4594/tomli-2.0.2-py3-none-any.whl", hash = "sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38", size = 13237, upload-time = "2024-10-02T10:46:11.806Z" }, +] + [[package]] name = "tomli" version = "2.4.1" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.13' and sys_platform == 'win32'", + "python_full_version == '3.12.*' and sys_platform == 'win32'", + "python_full_version >= '3.13' and sys_platform == 'emscripten'", + "python_full_version == '3.12.*' and sys_platform == 'emscripten'", + "python_full_version >= '3.13' and sys_platform != 'emscripten' and sys_platform != 'win32'", + "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'win32'", + "python_full_version == '3.11.*' and sys_platform == 'win32'", + "python_full_version == '3.11.*' and sys_platform == 'emscripten'", + "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'win32'", + "python_full_version < '3.11'", +] sdist = { url = "https://files.pythonhosted.org/packages/22/de/48c59722572767841493b26183a0d1cc411d54fd759c5607c4590b6563a6/tomli-2.4.1.tar.gz", hash = "sha256:7c7e1a961a0b2f2472c1ac5b69affa0ae1132c39adcb67aba98568702b9cc23f", size = 17543, upload-time = "2026-03-25T20:22:03.828Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/f4/11/db3d5885d8528263d8adc260bb2d28ebf1270b96e98f0e0268d32b8d9900/tomli-2.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f8f0fc26ec2cc2b965b7a3b87cd19c5c6b8c5e5f436b984e85f486d652285c30", size = 154704, upload-time = "2026-03-25T20:21:10.473Z" }, @@ -5746,6 +6211,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c7/18/c86eb8e0202e32dd3df50d43d7ff9854f8e0603945ff398974c1d91ac1ef/tomli_w-1.2.0-py3-none-any.whl", hash = "sha256:188306098d013b691fcadc011abd66727d3c414c571bb01b1a174ba8c983cf90", size = 6675, upload-time = "2025-01-15T12:07:22.074Z" }, ] +[[package]] +name = "tomlkit" +version = "0.14.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/af/14b24e41977adb296d6bd1fb59402cf7d60ce364f90c890bd2ec65c43b5a/tomlkit-0.14.0.tar.gz", hash = "sha256:cf00efca415dbd57575befb1f6634c4f42d2d87dbba376128adb42c121b87064", size = 187167, upload-time = "2026-01-13T01:14:53.304Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b5/11/87d6d29fb5d237229d67973a6c9e06e048f01cf4994dee194ab0ea841814/tomlkit-0.14.0-py3-none-any.whl", hash = "sha256:592064ed85b40fa213469f81ac584f67a4f2992509a7c3ea2d632208623a3680", size = 39310, upload-time = "2026-01-13T01:14:51.965Z" }, +] + [[package]] name = "torch" version = "2.8.0" @@ -5754,25 +6228,25 @@ dependencies = [ { name = "filelock" }, { name = "fsspec" }, { name = "jinja2" }, - { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "networkx", version = "3.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "setuptools", marker = "python_full_version >= '3.12'" }, + { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "networkx", version = "3.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "nvidia-cublas-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "nvidia-cuda-cupti-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "nvidia-cuda-nvrtc-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "nvidia-cuda-runtime-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "nvidia-cudnn-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "nvidia-cufft-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "nvidia-cufile-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "nvidia-curand-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "nvidia-cusolver-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "nvidia-cusparse-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "nvidia-cusparselt-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "nvidia-nccl-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "nvidia-nvtx-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "setuptools", marker = "python_full_version >= '3.12' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "sympy" }, - { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "triton", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "typing-extensions" }, ] wheels = [ @@ -5882,7 +6356,7 @@ name = "tqdm" version = "4.67.3" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/09/a9/6ba95a270c6f1fbcd8dac228323f2777d886cb206987444e4bce66338dd4/tqdm-4.67.3.tar.gz", hash = "sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb", size = 169598, upload-time = "2026-02-03T17:35:53.048Z" } wheels = [ @@ -5959,7 +6433,7 @@ wheels = [ [[package]] name = "typer" -version = "0.24.2" +version = "0.23.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "annotated-doc" }, @@ -5967,9 +6441,9 @@ dependencies = [ { name = "rich" }, { name = "shellingham" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/83/b8/9ebb531b6c2d377af08ac6746a5df3425b21853a5d2260876919b58a2a4a/typer-0.24.2.tar.gz", hash = "sha256:ec070dcfca1408e85ee203c6365001e818c3b7fffe686fd07ff2d68095ca0480", size = 119849, upload-time = "2026-04-22T17:45:34.413Z" } +sdist = { url = "https://files.pythonhosted.org/packages/fd/07/b822e1b307d40e263e8253d2384cf98c51aa2368cc7ba9a07e523a1d964b/typer-0.23.1.tar.gz", hash = "sha256:2070374e4d31c83e7b61362fd859aa683576432fd5b026b060ad6b4cd3b86134", size = 120047, upload-time = "2026-02-13T10:04:30.984Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/39/d1/9484b497e0a0410b901c12b8251c3e746e1e863f7d28419ffe06f7892fda/typer-0.24.2-py3-none-any.whl", hash = "sha256:b618bc3d721f9a8d30f3e05565be26416d06e9bcc29d49bc491dc26aba674fa8", size = 55977, upload-time = "2026-04-22T17:45:33.055Z" }, + { url = "https://files.pythonhosted.org/packages/d5/91/9b286ab899c008c2cb05e8be99814807e7fbbd33f0c0c960470826e5ac82/typer-0.23.1-py3-none-any.whl", hash = "sha256:3291ad0d3c701cbf522012faccfbb29352ff16ad262db2139e6b01f15781f14e", size = 56813, upload-time = "2026-02-13T10:04:32.008Z" }, ] [[package]] @@ -6025,11 +6499,11 @@ wheels = [ [[package]] name = "uncalled-for" -version = "0.3.1" +version = "0.3.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e1/68/35c1d87e608940badbcfeb630347aa0509897284684f61fab6423d02b253/uncalled_for-0.3.1.tar.gz", hash = "sha256:5e412ac6708f04b56bef5867b5dcf6690ebce4eb7316058d9c50787492bb4bca", size = 49693, upload-time = "2026-04-07T13:05:06.462Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/82/345cc927f7fbdae6065e7768759932fcc827fc20b29b45dfbafa2f1f7da4/uncalled_for-0.3.2.tar.gz", hash = "sha256:89f5dbcd71e2b8f47c030b1fa302e6cce2ec795d1ac565eeb6525c5fe55cb8a2", size = 50032, upload-time = "2026-05-06T13:38:25.204Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/11/e1/7ec67882ad8fc9f86384bef6421fa252c9cbe5744f8df6ce77afc9eca1f5/uncalled_for-0.3.1-py3-none-any.whl", hash = "sha256:074cdc92da8356278f93d0ded6f2a66dd883dbecaf9bc89437646ee2289cc200", size = 11361, upload-time = "2026-04-07T13:05:05.341Z" }, + { url = "https://files.pythonhosted.org/packages/3b/25/2c87754f3a9e692315f7b811244090e68f362979fc8886b3fbd2985a1d8c/uncalled_for-0.3.2-py3-none-any.whl", hash = "sha256:0ff60b142c7d1f8070bde9d42afaa70aedc77dcc10998c227687e9c15713418e", size = 11444, upload-time = "2026-05-06T13:38:24.025Z" }, ] [[package]] @@ -6048,7 +6522,7 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, { name = "h11" }, - { name = "typing-extensions", marker = "python_full_version < '3.11'" }, + { name = "typing-extensions", marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/1f/93/041fca8274050e40e6791f267d82e0e2e27dd165627bd640d3e0e378d877/uvicorn-0.46.0.tar.gz", hash = "sha256:fb9da0926999cc6cb22dc7cd71a94a632f078e6ae47ff683c5c420750fb7413d", size = 88758, upload-time = "2026-04-23T07:16:00.151Z" } wheels = [ @@ -6057,11 +6531,11 @@ wheels = [ [package.optional-dependencies] standard = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "httptools" }, { name = "python-dotenv" }, { name = "pyyaml" }, - { name = "uvloop", marker = "platform_python_implementation != 'PyPy' and sys_platform != 'cygwin' and sys_platform != 'win32'" }, + { name = "uvloop", marker = "(platform_python_implementation != 'PyPy' and sys_platform != 'cygwin' and sys_platform != 'win32') or (platform_python_implementation == 'PyPy' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform == 'cygwin' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform == 'win32' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "watchfiles" }, { name = "websockets" }, ] @@ -6109,7 +6583,8 @@ dependencies = [ { name = "httpx" }, { name = "jinja2" }, { name = "math-verify" }, - { name = "mcp" }, + { name = "mcp", version = "1.23.3", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'group-9-verifiers-policy'" }, + { name = "mcp", version = "1.27.0", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-9-verifiers-openenv' or extra != 'group-9-verifiers-policy'" }, { name = "msgpack" }, { name = "nest-asyncio" }, { name = "numpy" }, @@ -6125,8 +6600,9 @@ dependencies = [ { name = "setproctitle" }, { name = "tenacity" }, { name = "textual" }, - { name = "tomli", marker = "python_full_version < '3.11'" }, - { name = "typing-extensions", marker = "python_full_version < '3.12'" }, + { name = "tomli", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'group-9-verifiers-policy') or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "tomli", version = "2.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-9-verifiers-openenv') or (python_full_version < '3.11' and extra != 'group-9-verifiers-policy') or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "typing-extensions", marker = "python_full_version < '3.12' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] [package.optional-dependencies] @@ -6136,7 +6612,7 @@ browser = [ { name = "stagehand" }, ] openenv = [ - { name = "openenv-core", extra = ["core"] }, + { name = "openenv-core" }, ] renderers = [ { name = "renderers" }, @@ -6167,7 +6643,6 @@ dev = [ { name = "ipykernel" }, { name = "ipywidgets" }, { name = "nltk" }, - { name = "openenv-core", extra = ["core"] }, { name = "pre-commit" }, { name = "pytest" }, { name = "pytest-asyncio" }, @@ -6181,6 +6656,9 @@ dev = [ { name = "textarena" }, { name = "ty" }, ] +policy = [ + { name = "semgrep" }, +] [package.metadata] requires-dist = [ @@ -6203,7 +6681,7 @@ requires-dist = [ { name = "numpy" }, { name = "openai", specifier = ">=1.108.1" }, { name = "openai-agents", specifier = ">=0.0.7" }, - { name = "openenv-core", extras = ["core"], marker = "extra == 'openenv'", specifier = "==0.2.1" }, + { name = "openenv-core", marker = "extra == 'openenv'", specifier = ">=0.3.0" }, { name = "peft", marker = "extra == 'rl'" }, { name = "prime-sandboxes", specifier = ">=0.2.25" }, { name = "prime-tunnel", specifier = ">=0.1.6" }, @@ -6236,7 +6714,6 @@ dev = [ { name = "ipykernel" }, { name = "ipywidgets" }, { name = "nltk" }, - { name = "openenv-core", extras = ["core"], specifier = "==0.2.1" }, { name = "pre-commit" }, { name = "pytest", specifier = ">=7.0.0" }, { name = "pytest-asyncio", specifier = ">=0.21.0" }, @@ -6250,6 +6727,7 @@ dev = [ { name = "textarena" }, { name = "ty", specifier = ">=0.0.1a29,<0.0.22" }, ] +policy = [{ name = "semgrep", specifier = ">=1.150.0" }] [[package]] name = "virtualenv" @@ -6260,7 +6738,7 @@ dependencies = [ { name = "filelock" }, { name = "platformdirs" }, { name = "python-discovery" }, - { name = "typing-extensions", marker = "python_full_version < '3.11'" }, + { name = "typing-extensions", marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/0c/98/3a7e644e19cb26133488caff231be390579860bbbb3da35913c49a1d0a46/virtualenv-21.2.4.tar.gz", hash = "sha256:b294ef68192638004d72524ce7ef303e9d0cf5a44c95ce2e54a7500a6381cada", size = 5850742, upload-time = "2026-04-14T22:15:31.438Z" } wheels = [ @@ -6285,7 +6763,7 @@ dependencies = [ { name = "filelock" }, { name = "gguf" }, { name = "lark" }, - { name = "llguidance", marker = "platform_machine == 'aarch64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" }, + { name = "llguidance", marker = "platform_machine == 'aarch64' or platform_machine == 'arm64' or platform_machine == 'x86_64' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "lm-format-enforcer" }, { name = "mistral-common", extra = ["audio", "image"] }, { name = "msgspec" }, @@ -6311,12 +6789,12 @@ dependencies = [ { name = "ray", extra = ["cgraph"] }, { name = "regex" }, { name = "requests" }, - { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "scipy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "scipy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "sentencepiece" }, { name = "setproctitle" }, - { name = "setuptools", marker = "python_full_version >= '3.12'" }, - { name = "six", marker = "python_full_version >= '3.12'" }, + { name = "setuptools", marker = "python_full_version >= '3.12' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "six", marker = "python_full_version >= '3.12' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "tiktoken" }, { name = "tokenizers" }, { name = "torch" }, @@ -6326,8 +6804,8 @@ dependencies = [ { name = "transformers" }, { name = "typing-extensions" }, { name = "watchfiles" }, - { name = "xformers", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "xgrammar", marker = "platform_machine == 'aarch64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" }, + { name = "xformers", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, + { name = "xgrammar", marker = "platform_machine == 'aarch64' or platform_machine == 'arm64' or platform_machine == 'x86_64' or (extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/7d/0a/278d7bbf454f7de5322a5007427eed3e8b34ed6c2802491b56bbdfd7bbb4/vllm-0.10.2.tar.gz", hash = "sha256:57608f44cf61f5d80fb182c98e06e524cb2925bb528258a7b247c8e43a52d13e", size = 10908356, upload-time = "2025-09-13T23:00:34.918Z" } wheels = [ @@ -6444,6 +6922,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6e/d4/ed38dd3b1767193de971e694aa544356e63353c33a85d948166b5ff58b9e/watchfiles-1.1.1-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e6f39af2eab0118338902798b5aa6664f46ff66bc0280de76fca67a7f262a49", size = 457546, upload-time = "2025-10-14T15:06:13.372Z" }, ] +[[package]] +name = "wcmatch" +version = "8.5.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "bracex" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ea/c4/55e0d36da61d7b8b2a49fd273e6b296fd5e8471c72ebbe438635d1af3968/wcmatch-8.5.2.tar.gz", hash = "sha256:a70222b86dea82fb382dd87b73278c10756c138bd6f8f714e2183128887b9eb2", size = 114983, upload-time = "2024-05-15T12:51:08.054Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/09/78/533ef890536e5ba0fd4f7df37482b5800ecaaceae9afc30978a1a7f88ff1/wcmatch-8.5.2-py3-none-any.whl", hash = "sha256:17d3ad3758f9d0b5b4dedc770b65420d4dac62e680229c287bf24c9db856a478", size = 39397, upload-time = "2024-05-15T12:51:06.2Z" }, +] + [[package]] name = "wcwidth" version = "0.6.0" @@ -6521,17 +7011,67 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3f/0e/fa3b193432cfc60c93b42f3be03365f5f909d2b3ea410295cf36df739e31/widgetsnbextension-4.0.15-py3-none-any.whl", hash = "sha256:8156704e4346a571d9ce73b84bee86a29906c9abfd7223b7228a28899ccf3366", size = 2196503, upload-time = "2025-11-01T21:15:53.565Z" }, ] +[[package]] +name = "wrapt" +version = "1.17.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/95/8f/aeb76c5b46e273670962298c23e7ddde79916cb74db802131d49a85e4b7d/wrapt-1.17.3.tar.gz", hash = "sha256:f66eb08feaa410fe4eebd17f2a2c8e2e46d3476e9f8c783daa8e09e0faa666d0", size = 55547, upload-time = "2025-08-12T05:53:21.714Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3f/23/bb82321b86411eb51e5a5db3fb8f8032fd30bd7c2d74bfe936136b2fa1d6/wrapt-1.17.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:88bbae4d40d5a46142e70d58bf664a89b6b4befaea7b2ecc14e03cedb8e06c04", size = 53482, upload-time = "2025-08-12T05:51:44.467Z" }, + { url = "https://files.pythonhosted.org/packages/45/69/f3c47642b79485a30a59c63f6d739ed779fb4cc8323205d047d741d55220/wrapt-1.17.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e6b13af258d6a9ad602d57d889f83b9d5543acd471eee12eb51f5b01f8eb1bc2", size = 38676, upload-time = "2025-08-12T05:51:32.636Z" }, + { url = "https://files.pythonhosted.org/packages/d1/71/e7e7f5670c1eafd9e990438e69d8fb46fa91a50785332e06b560c869454f/wrapt-1.17.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd341868a4b6714a5962c1af0bd44f7c404ef78720c7de4892901e540417111c", size = 38957, upload-time = "2025-08-12T05:51:54.655Z" }, + { url = "https://files.pythonhosted.org/packages/de/17/9f8f86755c191d6779d7ddead1a53c7a8aa18bccb7cea8e7e72dfa6a8a09/wrapt-1.17.3-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:f9b2601381be482f70e5d1051a5965c25fb3625455a2bf520b5a077b22afb775", size = 81975, upload-time = "2025-08-12T05:52:30.109Z" }, + { url = "https://files.pythonhosted.org/packages/f2/15/dd576273491f9f43dd09fce517f6c2ce6eb4fe21681726068db0d0467096/wrapt-1.17.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:343e44b2a8e60e06a7e0d29c1671a0d9951f59174f3709962b5143f60a2a98bd", size = 83149, upload-time = "2025-08-12T05:52:09.316Z" }, + { url = "https://files.pythonhosted.org/packages/0c/c4/5eb4ce0d4814521fee7aa806264bf7a114e748ad05110441cd5b8a5c744b/wrapt-1.17.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:33486899acd2d7d3066156b03465b949da3fd41a5da6e394ec49d271baefcf05", size = 82209, upload-time = "2025-08-12T05:52:10.331Z" }, + { url = "https://files.pythonhosted.org/packages/31/4b/819e9e0eb5c8dc86f60dfc42aa4e2c0d6c3db8732bce93cc752e604bb5f5/wrapt-1.17.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e6f40a8aa5a92f150bdb3e1c44b7e98fb7113955b2e5394122fa5532fec4b418", size = 81551, upload-time = "2025-08-12T05:52:31.137Z" }, + { url = "https://files.pythonhosted.org/packages/f8/83/ed6baf89ba3a56694700139698cf703aac9f0f9eb03dab92f57551bd5385/wrapt-1.17.3-cp310-cp310-win32.whl", hash = "sha256:a36692b8491d30a8c75f1dfee65bef119d6f39ea84ee04d9f9311f83c5ad9390", size = 36464, upload-time = "2025-08-12T05:53:01.204Z" }, + { url = "https://files.pythonhosted.org/packages/2f/90/ee61d36862340ad7e9d15a02529df6b948676b9a5829fd5e16640156627d/wrapt-1.17.3-cp310-cp310-win_amd64.whl", hash = "sha256:afd964fd43b10c12213574db492cb8f73b2f0826c8df07a68288f8f19af2ebe6", size = 38748, upload-time = "2025-08-12T05:53:00.209Z" }, + { url = "https://files.pythonhosted.org/packages/bd/c3/cefe0bd330d389c9983ced15d326f45373f4073c9f4a8c2f99b50bfea329/wrapt-1.17.3-cp310-cp310-win_arm64.whl", hash = "sha256:af338aa93554be859173c39c85243970dc6a289fa907402289eeae7543e1ae18", size = 36810, upload-time = "2025-08-12T05:52:51.906Z" }, + { url = "https://files.pythonhosted.org/packages/52/db/00e2a219213856074a213503fdac0511203dceefff26e1daa15250cc01a0/wrapt-1.17.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:273a736c4645e63ac582c60a56b0acb529ef07f78e08dc6bfadf6a46b19c0da7", size = 53482, upload-time = "2025-08-12T05:51:45.79Z" }, + { url = "https://files.pythonhosted.org/packages/5e/30/ca3c4a5eba478408572096fe9ce36e6e915994dd26a4e9e98b4f729c06d9/wrapt-1.17.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5531d911795e3f935a9c23eb1c8c03c211661a5060aab167065896bbf62a5f85", size = 38674, upload-time = "2025-08-12T05:51:34.629Z" }, + { url = "https://files.pythonhosted.org/packages/31/25/3e8cc2c46b5329c5957cec959cb76a10718e1a513309c31399a4dad07eb3/wrapt-1.17.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0610b46293c59a3adbae3dee552b648b984176f8562ee0dba099a56cfbe4df1f", size = 38959, upload-time = "2025-08-12T05:51:56.074Z" }, + { url = "https://files.pythonhosted.org/packages/5d/8f/a32a99fc03e4b37e31b57cb9cefc65050ea08147a8ce12f288616b05ef54/wrapt-1.17.3-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b32888aad8b6e68f83a8fdccbf3165f5469702a7544472bdf41f582970ed3311", size = 82376, upload-time = "2025-08-12T05:52:32.134Z" }, + { url = "https://files.pythonhosted.org/packages/31/57/4930cb8d9d70d59c27ee1332a318c20291749b4fba31f113c2f8ac49a72e/wrapt-1.17.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8cccf4f81371f257440c88faed6b74f1053eef90807b77e31ca057b2db74edb1", size = 83604, upload-time = "2025-08-12T05:52:11.663Z" }, + { url = "https://files.pythonhosted.org/packages/a8/f3/1afd48de81d63dd66e01b263a6fbb86e1b5053b419b9b33d13e1f6d0f7d0/wrapt-1.17.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8a210b158a34164de8bb68b0e7780041a903d7b00c87e906fb69928bf7890d5", size = 82782, upload-time = "2025-08-12T05:52:12.626Z" }, + { url = "https://files.pythonhosted.org/packages/1e/d7/4ad5327612173b144998232f98a85bb24b60c352afb73bc48e3e0d2bdc4e/wrapt-1.17.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:79573c24a46ce11aab457b472efd8d125e5a51da2d1d24387666cd85f54c05b2", size = 82076, upload-time = "2025-08-12T05:52:33.168Z" }, + { url = "https://files.pythonhosted.org/packages/bb/59/e0adfc831674a65694f18ea6dc821f9fcb9ec82c2ce7e3d73a88ba2e8718/wrapt-1.17.3-cp311-cp311-win32.whl", hash = "sha256:c31eebe420a9a5d2887b13000b043ff6ca27c452a9a22fa71f35f118e8d4bf89", size = 36457, upload-time = "2025-08-12T05:53:03.936Z" }, + { url = "https://files.pythonhosted.org/packages/83/88/16b7231ba49861b6f75fc309b11012ede4d6b0a9c90969d9e0db8d991aeb/wrapt-1.17.3-cp311-cp311-win_amd64.whl", hash = "sha256:0b1831115c97f0663cb77aa27d381237e73ad4f721391a9bfb2fe8bc25fa6e77", size = 38745, upload-time = "2025-08-12T05:53:02.885Z" }, + { url = "https://files.pythonhosted.org/packages/9a/1e/c4d4f3398ec073012c51d1c8d87f715f56765444e1a4b11e5180577b7e6e/wrapt-1.17.3-cp311-cp311-win_arm64.whl", hash = "sha256:5a7b3c1ee8265eb4c8f1b7d29943f195c00673f5ab60c192eba2d4a7eae5f46a", size = 36806, upload-time = "2025-08-12T05:52:53.368Z" }, + { url = "https://files.pythonhosted.org/packages/9f/41/cad1aba93e752f1f9268c77270da3c469883d56e2798e7df6240dcb2287b/wrapt-1.17.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ab232e7fdb44cdfbf55fc3afa31bcdb0d8980b9b95c38b6405df2acb672af0e0", size = 53998, upload-time = "2025-08-12T05:51:47.138Z" }, + { url = "https://files.pythonhosted.org/packages/60/f8/096a7cc13097a1869fe44efe68dace40d2a16ecb853141394047f0780b96/wrapt-1.17.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9baa544e6acc91130e926e8c802a17f3b16fbea0fd441b5a60f5cf2cc5c3deba", size = 39020, upload-time = "2025-08-12T05:51:35.906Z" }, + { url = "https://files.pythonhosted.org/packages/33/df/bdf864b8997aab4febb96a9ae5c124f700a5abd9b5e13d2a3214ec4be705/wrapt-1.17.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6b538e31eca1a7ea4605e44f81a48aa24c4632a277431a6ed3f328835901f4fd", size = 39098, upload-time = "2025-08-12T05:51:57.474Z" }, + { url = "https://files.pythonhosted.org/packages/9f/81/5d931d78d0eb732b95dc3ddaeeb71c8bb572fb01356e9133916cd729ecdd/wrapt-1.17.3-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:042ec3bb8f319c147b1301f2393bc19dba6e176b7da446853406d041c36c7828", size = 88036, upload-time = "2025-08-12T05:52:34.784Z" }, + { url = "https://files.pythonhosted.org/packages/ca/38/2e1785df03b3d72d34fc6252d91d9d12dc27a5c89caef3335a1bbb8908ca/wrapt-1.17.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3af60380ba0b7b5aeb329bc4e402acd25bd877e98b3727b0135cb5c2efdaefe9", size = 88156, upload-time = "2025-08-12T05:52:13.599Z" }, + { url = "https://files.pythonhosted.org/packages/b3/8b/48cdb60fe0603e34e05cffda0b2a4adab81fd43718e11111a4b0100fd7c1/wrapt-1.17.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0b02e424deef65c9f7326d8c19220a2c9040c51dc165cddb732f16198c168396", size = 87102, upload-time = "2025-08-12T05:52:14.56Z" }, + { url = "https://files.pythonhosted.org/packages/3c/51/d81abca783b58f40a154f1b2c56db1d2d9e0d04fa2d4224e357529f57a57/wrapt-1.17.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:74afa28374a3c3a11b3b5e5fca0ae03bef8450d6aa3ab3a1e2c30e3a75d023dc", size = 87732, upload-time = "2025-08-12T05:52:36.165Z" }, + { url = "https://files.pythonhosted.org/packages/9e/b1/43b286ca1392a006d5336412d41663eeef1ad57485f3e52c767376ba7e5a/wrapt-1.17.3-cp312-cp312-win32.whl", hash = "sha256:4da9f45279fff3543c371d5ababc57a0384f70be244de7759c85a7f989cb4ebe", size = 36705, upload-time = "2025-08-12T05:53:07.123Z" }, + { url = "https://files.pythonhosted.org/packages/28/de/49493f962bd3c586ab4b88066e967aa2e0703d6ef2c43aa28cb83bf7b507/wrapt-1.17.3-cp312-cp312-win_amd64.whl", hash = "sha256:e71d5c6ebac14875668a1e90baf2ea0ef5b7ac7918355850c0908ae82bcb297c", size = 38877, upload-time = "2025-08-12T05:53:05.436Z" }, + { url = "https://files.pythonhosted.org/packages/f1/48/0f7102fe9cb1e8a5a77f80d4f0956d62d97034bbe88d33e94699f99d181d/wrapt-1.17.3-cp312-cp312-win_arm64.whl", hash = "sha256:604d076c55e2fdd4c1c03d06dc1a31b95130010517b5019db15365ec4a405fc6", size = 36885, upload-time = "2025-08-12T05:52:54.367Z" }, + { url = "https://files.pythonhosted.org/packages/fc/f6/759ece88472157acb55fc195e5b116e06730f1b651b5b314c66291729193/wrapt-1.17.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a47681378a0439215912ef542c45a783484d4dd82bac412b71e59cf9c0e1cea0", size = 54003, upload-time = "2025-08-12T05:51:48.627Z" }, + { url = "https://files.pythonhosted.org/packages/4f/a9/49940b9dc6d47027dc850c116d79b4155f15c08547d04db0f07121499347/wrapt-1.17.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:54a30837587c6ee3cd1a4d1c2ec5d24e77984d44e2f34547e2323ddb4e22eb77", size = 39025, upload-time = "2025-08-12T05:51:37.156Z" }, + { url = "https://files.pythonhosted.org/packages/45/35/6a08de0f2c96dcdd7fe464d7420ddb9a7655a6561150e5fc4da9356aeaab/wrapt-1.17.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:16ecf15d6af39246fe33e507105d67e4b81d8f8d2c6598ff7e3ca1b8a37213f7", size = 39108, upload-time = "2025-08-12T05:51:58.425Z" }, + { url = "https://files.pythonhosted.org/packages/0c/37/6faf15cfa41bf1f3dba80cd3f5ccc6622dfccb660ab26ed79f0178c7497f/wrapt-1.17.3-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6fd1ad24dc235e4ab88cda009e19bf347aabb975e44fd5c2fb22a3f6e4141277", size = 88072, upload-time = "2025-08-12T05:52:37.53Z" }, + { url = "https://files.pythonhosted.org/packages/78/f2/efe19ada4a38e4e15b6dff39c3e3f3f73f5decf901f66e6f72fe79623a06/wrapt-1.17.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ed61b7c2d49cee3c027372df5809a59d60cf1b6c2f81ee980a091f3afed6a2d", size = 88214, upload-time = "2025-08-12T05:52:15.886Z" }, + { url = "https://files.pythonhosted.org/packages/40/90/ca86701e9de1622b16e09689fc24b76f69b06bb0150990f6f4e8b0eeb576/wrapt-1.17.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:423ed5420ad5f5529db9ce89eac09c8a2f97da18eb1c870237e84c5a5c2d60aa", size = 87105, upload-time = "2025-08-12T05:52:17.914Z" }, + { url = "https://files.pythonhosted.org/packages/fd/e0/d10bd257c9a3e15cbf5523025252cc14d77468e8ed644aafb2d6f54cb95d/wrapt-1.17.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e01375f275f010fcbf7f643b4279896d04e571889b8a5b3f848423d91bf07050", size = 87766, upload-time = "2025-08-12T05:52:39.243Z" }, + { url = "https://files.pythonhosted.org/packages/e8/cf/7d848740203c7b4b27eb55dbfede11aca974a51c3d894f6cc4b865f42f58/wrapt-1.17.3-cp313-cp313-win32.whl", hash = "sha256:53e5e39ff71b3fc484df8a522c933ea2b7cdd0d5d15ae82e5b23fde87d44cbd8", size = 36711, upload-time = "2025-08-12T05:53:10.074Z" }, + { url = "https://files.pythonhosted.org/packages/57/54/35a84d0a4d23ea675994104e667ceff49227ce473ba6a59ba2c84f250b74/wrapt-1.17.3-cp313-cp313-win_amd64.whl", hash = "sha256:1f0b2f40cf341ee8cc1a97d51ff50dddb9fcc73241b9143ec74b30fc4f44f6cb", size = 38885, upload-time = "2025-08-12T05:53:08.695Z" }, + { url = "https://files.pythonhosted.org/packages/01/77/66e54407c59d7b02a3c4e0af3783168fff8e5d61def52cda8728439d86bc/wrapt-1.17.3-cp313-cp313-win_arm64.whl", hash = "sha256:7425ac3c54430f5fc5e7b6f41d41e704db073309acfc09305816bc6a0b26bb16", size = 36896, upload-time = "2025-08-12T05:52:55.34Z" }, + { url = "https://files.pythonhosted.org/packages/1f/f6/a933bd70f98e9cf3e08167fc5cd7aaaca49147e48411c0bd5ae701bb2194/wrapt-1.17.3-py3-none-any.whl", hash = "sha256:7171ae35d2c33d326ac19dd8facb1e82e5fd04ef8c6c0e394d7af55a55051c22", size = 23591, upload-time = "2025-08-12T05:53:20.674Z" }, +] + [[package]] name = "xformers" version = "0.0.32.post1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "numpy", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" }, - { name = "torch", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'win32')" }, + { name = "numpy" }, + { name = "torch" }, ] sdist = { url = "https://files.pythonhosted.org/packages/6f/33/3b9c4d3d5b2da453d27de891df4ad653ac5795324961aa3a5c15b0353fe6/xformers-0.0.32.post1.tar.gz", hash = "sha256:1de84a45c497c8d92326986508d81f4b0a8c6be4d3d62a29b8ad6048a6ab51e1", size = 12106196, upload-time = "2025-08-14T18:07:45.486Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/6b/df/6817346f1a77278315d5fe1fc9f239ba3282ba36e8ab3256babd448dde62/xformers-0.0.32.post1-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:5f245b5555188da112070d8fefb6b7ae1ae47422856521d66c837e9d2352fbe4", size = 117199943, upload-time = "2025-08-14T18:07:34.78Z" }, + { url = "https://files.pythonhosted.org/packages/91/70/7dee5a786d77a63cf20dac60c38086bd2202d59ae89c8acef0ef6331c374/xformers-0.0.32.post1-cp39-abi3-win_amd64.whl", hash = "sha256:feb452bc2c8731da1c5d0e2e4536ba95bb214f77b41e91f24443c74d6f98a126", size = 100221531, upload-time = "2025-08-14T18:07:41.112Z" }, ] [[package]] @@ -6539,13 +7079,13 @@ name = "xgrammar" version = "0.1.23" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "mlx-lm", marker = "platform_machine == 'arm64' and sys_platform == 'darwin'" }, + { name = "mlx-lm", marker = "(platform_machine == 'arm64' and sys_platform == 'darwin') or (platform_machine != 'arm64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'darwin' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "ninja" }, { name = "numpy" }, { name = "pydantic" }, { name = "torch" }, { name = "transformers" }, - { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "triton", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux') or (platform_machine != 'x86_64' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy') or (sys_platform != 'linux' and extra == 'extra-9-verifiers-openenv' and extra == 'group-9-verifiers-policy')" }, { name = "typing-extensions" }, ] sdist = { url = "https://files.pythonhosted.org/packages/e9/12/958457553a87c31bdb18c8395b88bb3255f7c7373ab3a0b046d3b7f37f86/xgrammar-0.1.23.tar.gz", hash = "sha256:5ef280455c1ac008f052d7ea92286f0ca3a3d7ab360224894ac69277c8827113", size = 2263693, upload-time = "2025-08-15T07:31:42.792Z" } diff --git a/verifiers/__init__.py b/verifiers/__init__.py index 84875f1f8..0e76b7e73 100644 --- a/verifiers/__init__.py +++ b/verifiers/__init__.py @@ -17,7 +17,7 @@ teardown, update, ) -from .types import DatasetBuilder # noqa # isort: skip +from .types import DatasetBuilder, State # noqa # isort: skip from .parsers.parser import Parser # noqa # isort: skip from .rubrics.rubric import Rubric # noqa # isort: skip @@ -44,6 +44,14 @@ __all__ = [ "DatasetBuilder", + "State", + "Config", + "ConfigData", + "ConfigMap", + "GroupHandler", + "Handler", + "MutableConfigMap", + "Objects", "Parser", "ThinkParser", "MaybeThinkParser", @@ -63,10 +71,13 @@ "Env", "EnvConfig", "Task", + "TaskRow", + "TaskRows", "Taskset", "TasksetConfig", "Harness", "HarnessConfig", + "ProgramConfig", "MCPTool", "MCPToolConfig", "SandboxConfig", @@ -76,11 +87,12 @@ "UserConfig", "HarborTaskset", "HarborTasksetConfig", - "CLIHarness", "MiniSWEAgent", "OpenCode", + "OpenCodeConfig", "Pi", "RLM", + "RLMConfig", "Environment", "MultiTurnEnv", "SingleTurnEnv", @@ -103,6 +115,7 @@ "quiet_verifiers", "load_environment", "print_prompt_completions_sample", + "get_messages", "cleanup", "metric", "reward", @@ -111,6 +124,13 @@ "stop", "teardown", "update", + "add_metric", + "add_reward", + "add_advantage", + "build_signals", + "collect_signals", + "score_group", + "score_rollout", "ensure_keys", "MissingKeyError", "get_model", @@ -165,13 +185,23 @@ "TextArenaEnv": "verifiers.envs.integrations.textarena_env:TextArenaEnv", "BrowserEnv": "verifiers.envs.integrations.browser_env:BrowserEnv", "OpenEnvEnv": "verifiers.envs.integrations.openenv_env:OpenEnvEnv", + "Config": "verifiers.v1:Config", "Env": "verifiers.v1:Env", "EnvConfig": "verifiers.v1:EnvConfig", + "ConfigData": "verifiers.v1:ConfigData", + "ConfigMap": "verifiers.v1:ConfigMap", + "GroupHandler": "verifiers.v1:GroupHandler", + "Handler": "verifiers.v1:Handler", + "MutableConfigMap": "verifiers.v1:MutableConfigMap", + "Objects": "verifiers.v1:Objects", "Task": "verifiers.v1:Task", + "TaskRow": "verifiers.v1:TaskRow", + "TaskRows": "verifiers.v1:TaskRows", "Taskset": "verifiers.v1:Taskset", "TasksetConfig": "verifiers.v1:TasksetConfig", "Harness": "verifiers.v1:Harness", "HarnessConfig": "verifiers.v1:HarnessConfig", + "ProgramConfig": "verifiers.v1:ProgramConfig", "MCPTool": "verifiers.v1:MCPTool", "MCPToolConfig": "verifiers.v1:MCPToolConfig", "SandboxConfig": "verifiers.v1:SandboxConfig", @@ -181,11 +211,20 @@ "UserConfig": "verifiers.v1:UserConfig", "HarborTaskset": "verifiers.v1:HarborTaskset", "HarborTasksetConfig": "verifiers.v1:HarborTasksetConfig", - "CLIHarness": "verifiers.v1:CLIHarness", "MiniSWEAgent": "verifiers.v1:MiniSWEAgent", "OpenCode": "verifiers.v1:OpenCode", + "OpenCodeConfig": "verifiers.v1:OpenCodeConfig", "Pi": "verifiers.v1:Pi", "RLM": "verifiers.v1:RLM", + "RLMConfig": "verifiers.v1:RLMConfig", + "get_messages": "verifiers.v1:get_messages", + "add_metric": "verifiers.v1:add_metric", + "add_reward": "verifiers.v1:add_reward", + "add_advantage": "verifiers.v1:add_advantage", + "build_signals": "verifiers.v1:build_signals", + "collect_signals": "verifiers.v1:collect_signals", + "score_group": "verifiers.v1:score_group", + "score_rollout": "verifiers.v1:score_rollout", } @@ -250,27 +289,46 @@ def __getattr__(name: str): from .rubrics.math_rubric import MathRubric # noqa: F401 from .utils.env_utils import load_environment # noqa: F401 from .v1 import ( # noqa: F401 + Config, + ConfigData, + ConfigMap, Env, EnvConfig, + GroupHandler, + Handler, Harness, HarnessConfig, - MCPTool, - MCPToolConfig, - SandboxConfig, HarborTaskset, HarborTasksetConfig, - CLIHarness, + MCPTool, + MCPToolConfig, MiniSWEAgent, + MutableConfigMap, + Objects, OpenCode, + OpenCodeConfig, Pi, + ProgramConfig, RLM, + RLMConfig, + SandboxConfig, Task, + TaskRow, + TaskRows, Taskset, TasksetConfig, Toolset, ToolsetConfig, User, UserConfig, + add_advantage, + add_metric, + add_reward, + build_signals, + collect_signals, + get_messages, + score_group, + score_rollout, ) # Optional verifiers-rl exports. Keep type-checking clean when extra is absent. diff --git a/verifiers/cli/plugins/prime.py b/verifiers/cli/plugins/prime.py index da538cb85..455d0154b 100644 --- a/verifiers/cli/plugins/prime.py +++ b/verifiers/cli/plugins/prime.py @@ -1,7 +1,5 @@ """Prime-hosted command plugin contract.""" -from __future__ import annotations - from dataclasses import dataclass import os from pathlib import Path diff --git a/verifiers/clients/__init__.py b/verifiers/clients/__init__.py index 9b70c114d..eb938d6dd 100644 --- a/verifiers/clients/__init__.py +++ b/verifiers/clients/__init__.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from verifiers.clients.anthropic_messages_client import AnthropicMessagesClient from verifiers.clients.client import Client from verifiers.clients.nemorl_chat_completions_client import ( diff --git a/verifiers/clients/nemorl_chat_completions_client.py b/verifiers/clients/nemorl_chat_completions_client.py index a9e615417..99f02b79b 100644 --- a/verifiers/clients/nemorl_chat_completions_client.py +++ b/verifiers/clients/nemorl_chat_completions_client.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from typing import Any, cast from verifiers.clients.openai_chat_completions_client import ( diff --git a/verifiers/clients/openai_responses_client.py b/verifiers/clients/openai_responses_client.py index b33f6b615..eb599d5fc 100644 --- a/verifiers/clients/openai_responses_client.py +++ b/verifiers/clients/openai_responses_client.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import time from collections.abc import Iterable, Mapping from typing import Any, TypeAlias diff --git a/verifiers/clients/renderer_client.py b/verifiers/clients/renderer_client.py index ad7644357..6ffdcf6cd 100644 --- a/verifiers/clients/renderer_client.py +++ b/verifiers/clients/renderer_client.py @@ -8,8 +8,6 @@ concurrent rollouts tokenize in parallel instead of blocking the event loop. """ -from __future__ import annotations - import asyncio import json import threading diff --git a/verifiers/envs/env_group.py b/verifiers/envs/env_group.py index 5db93111f..f2104db98 100644 --- a/verifiers/envs/env_group.py +++ b/verifiers/envs/env_group.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import json from collections.abc import Mapping from typing import TYPE_CHECKING, Any, cast, final @@ -266,13 +264,13 @@ def add_env_route(example): def _format_dataset( self, - dataset: Dataset, + dataset: "Dataset", system_prompt: str | None = None, few_shot: Messages | None = None, question_key: str = "question", answer_key: str = "answer", map_kwargs: dict = {}, - ) -> Dataset: + ) -> "Dataset": """Ensure unique example_ids across concatenated datasets.""" # use parent's prompt handling dataset = self._ensure_prompt( @@ -294,8 +292,8 @@ def add_example_id(example, i): return dataset def _format_completion_dataset( - self, dataset: Dataset, map_kwargs: dict = {} - ) -> Dataset: + self, dataset: "Dataset", map_kwargs: dict = {} + ) -> "Dataset": """Ensure unique example_ids across concatenated datasets.""" # ensure unique example_ids across concatenated datasets if "example_id" in dataset.column_names: diff --git a/verifiers/envs/environment.py b/verifiers/envs/environment.py index 36d5c9743..26ee1ca55 100644 --- a/verifiers/envs/environment.py +++ b/verifiers/envs/environment.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import asyncio import atexit import json @@ -98,8 +96,8 @@ class Environment(ABC): def __init__( self, - dataset: Dataset | DatasetBuilder | None = None, - eval_dataset: Dataset | DatasetBuilder | None = None, + dataset: "Dataset | DatasetBuilder | None" = None, + eval_dataset: "Dataset | DatasetBuilder | None" = None, system_prompt: str | None = None, few_shot: Messages | None = None, parser: Parser | None = None, @@ -157,8 +155,8 @@ def __init__( # Dataset sources (builders) and built datasets # Use get_dataset()/get_eval_dataset() for access; build_dataset() to trigger build - self.dataset: Dataset | None = None - self.eval_dataset: Dataset | None = None + self.dataset: "Dataset | None" = None + self.eval_dataset: "Dataset | None" = None if dataset is not None: if callable(dataset): @@ -275,7 +273,7 @@ def _sync_teardown(): ) signal.signal(signal.SIGTERM, lambda _, __: (_sync_teardown(), exit(143))) - def _ensure_example_id(self, dataset: Dataset) -> Dataset: + def _ensure_example_id(self, dataset: "Dataset") -> "Dataset": """Ensure example_id column exists and is integer type.""" if "example_id" in dataset.column_names and not isinstance( dataset["example_id"][0], int @@ -287,13 +285,13 @@ def _ensure_example_id(self, dataset: Dataset) -> Dataset: def _ensure_prompt( self, - dataset: Dataset, + dataset: "Dataset", system_prompt: str | None = None, few_shot: Messages | None = None, question_key: str = "question", answer_key: str = "answer", map_kwargs: dict = {}, - ) -> Dataset: + ) -> "Dataset": """Ensure prompt column exists.""" if "prompt" not in dataset.column_names: @@ -354,13 +352,13 @@ def prepend_system_prompt(prompt: list[Any]) -> list[Any]: def _format_dataset( self, - dataset: Dataset, + dataset: "Dataset", system_prompt: str | None = None, few_shot: Messages | None = None, question_key: str = "question", answer_key: str = "answer", map_kwargs: dict = {}, - ) -> Dataset: + ) -> "Dataset": """ Format dataset by creating example_id and prompt columns. """ @@ -373,8 +371,8 @@ def _format_dataset( return dataset def _format_completion_dataset( - self, dataset: Dataset, map_kwargs: dict = {} - ) -> Dataset: + self, dataset: "Dataset", map_kwargs: dict = {} + ) -> "Dataset": """ Format dataset by creating example_id. """ @@ -383,7 +381,7 @@ def _format_completion_dataset( dataset = self._ensure_example_id(dataset) return dataset - def _format_dataset_source(self, dataset: Dataset) -> Dataset: + def _format_dataset_source(self, dataset: "Dataset") -> "Dataset": """Format a dataset as chat (messages); client maps to its format at request time.""" return self._format_dataset( dataset, @@ -392,7 +390,7 @@ def _format_dataset_source(self, dataset: Dataset) -> Dataset: map_kwargs=self.map_kwargs, ) - def build_dataset(self) -> Dataset | None: + def build_dataset(self) -> "Dataset | None": """Build and cache the training dataset from source if needed.""" if self.dataset is not None: return self.dataset @@ -402,7 +400,7 @@ def build_dataset(self) -> Dataset | None: self.dataset = self._format_dataset_source(built) return self.dataset - def build_eval_dataset(self) -> Dataset | None: + def build_eval_dataset(self) -> "Dataset | None": """Build and cache the evaluation dataset from source if needed.""" if self.eval_dataset is not None: return self.eval_dataset @@ -413,7 +411,7 @@ def build_eval_dataset(self) -> Dataset | None: return self.eval_dataset @final - def get_dataset(self, n: int = -1, seed: int | None = None) -> Dataset: + def get_dataset(self, n: int = -1, seed: int | None = None) -> "Dataset": self.build_dataset() if self.dataset is None: raise ValueError("dataset is not set") @@ -425,7 +423,7 @@ def get_dataset(self, n: int = -1, seed: int | None = None) -> Dataset: return self.dataset @final - def get_eval_dataset(self, n: int = -1, seed: int | None = None) -> Dataset: + def get_eval_dataset(self, n: int = -1, seed: int | None = None) -> "Dataset": self.build_eval_dataset() if self.eval_dataset is None: self.logger.warning( @@ -467,7 +465,7 @@ def increment_state_usage( @final def increment_state_usage_from_response( - self, state: State, response: object + self, state: State, response: Response ) -> None: tracker = self._get_usage_tracker(state, create_if_missing=True) assert tracker is not None @@ -833,7 +831,7 @@ async def run_group_attempt() -> list[State]: async def generate( self, - inputs: Dataset | List[RolloutInput], + inputs: "Dataset | List[RolloutInput]", client: Client | ClientConfig, model: str, sampling_args: SamplingArgs | None = None, @@ -1129,7 +1127,7 @@ def get_client_for_group() -> Client | ClientConfig: def generate_sync( self, - inputs: Dataset | List[RolloutInput], + inputs: "Dataset | List[RolloutInput]", client: Client | ClientConfig, **kwargs, ) -> GenerateOutputs: diff --git a/verifiers/envs/experimental/composable/_filter.py b/verifiers/envs/experimental/composable/_filter.py index d85e0439b..2ad1a9bb1 100644 --- a/verifiers/envs/experimental/composable/_filter.py +++ b/verifiers/envs/experimental/composable/_filter.py @@ -12,8 +12,6 @@ inputs. """ -from __future__ import annotations - import re from typing import Callable diff --git a/verifiers/envs/experimental/composable/composable_env.py b/verifiers/envs/experimental/composable/composable_env.py index 1d1a28afe..1a519ec21 100644 --- a/verifiers/envs/experimental/composable/composable_env.py +++ b/verifiers/envs/experimental/composable/composable_env.py @@ -34,8 +34,6 @@ ``run_command``. """ -from __future__ import annotations - import asyncio import importlib.resources as resources import json diff --git a/verifiers/envs/experimental/composable/harness.py b/verifiers/envs/experimental/composable/harness.py index b55d7171b..254fbfafe 100644 --- a/verifiers/envs/experimental/composable/harness.py +++ b/verifiers/envs/experimental/composable/harness.py @@ -14,8 +14,6 @@ env = ComposableEnv(taskset=taskset, harness=harness) """ -from __future__ import annotations - from dataclasses import dataclass from importlib.abc import Traversable from pathlib import Path @@ -140,7 +138,7 @@ class Harness: system_prompt_path: str = "/task/system_prompt.txt" instruction_path: str = "/task/instruction.md" log_path: str | None = None - sandbox_spec: SandboxSpec | None = None + sandbox_spec: "SandboxSpec | None" = None skills_path: str | None = None upload_dir_mapping: dict[str, str] | None = None get_upload_dirs: Callable[[], dict[str, Traversable | Path] | None] | None = None @@ -149,13 +147,11 @@ class Harness: metrics_key: str | None = None metrics_keys: list[str] | None = None tool_names: list[str] | None = None - environment_vars: Callable[[State], dict[str, str]] | None = None + environment_vars: "Callable[[State], dict[str, str]] | None" = None post_install_uploads: dict[str, str] | None = None post_install_script: str | None = None - keep_trajectory_step: ( - Callable[[TrajectoryStep, State, dict[str, str]], bool] | None - ) = None - render_completion: Callable[[State], None] | None = None + keep_trajectory_step: "Callable[[TrajectoryStep, State, dict[str, str]], bool] | None" = None + render_completion: "Callable[[State], None] | None" = None def get_effective_upload_dir_mapping(self) -> dict[str, str] | None: """Return the merged upload mapping (skills_path + upload_dir_mapping).""" diff --git a/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py b/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py index c6ab44348..19f840e9c 100644 --- a/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py +++ b/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py @@ -1,7 +1,5 @@ """mini-SWE-agent harness configuration.""" -from __future__ import annotations - from pathlib import PurePosixPath import shlex diff --git a/verifiers/envs/experimental/composable/harnesses/opencode.py b/verifiers/envs/experimental/composable/harnesses/opencode.py index c1dbf5d2d..bca54c6b9 100644 --- a/verifiers/envs/experimental/composable/harnesses/opencode.py +++ b/verifiers/envs/experimental/composable/harnesses/opencode.py @@ -9,8 +9,6 @@ harness = opencode_harness(system_prompt="You are a coding agent...") """ -from __future__ import annotations - import json import shlex from pathlib import Path, PurePosixPath diff --git a/verifiers/envs/experimental/composable/harnesses/rlm.py b/verifiers/envs/experimental/composable/harnesses/rlm.py index 932aec35a..13a15a815 100644 --- a/verifiers/envs/experimental/composable/harnesses/rlm.py +++ b/verifiers/envs/experimental/composable/harnesses/rlm.py @@ -1,7 +1,5 @@ """RLM agent harness: install script, run command, and harness factory.""" -from __future__ import annotations - import hashlib import random import shlex diff --git a/verifiers/envs/experimental/composable/swe_debug_env.py b/verifiers/envs/experimental/composable/swe_debug_env.py index 766e64b10..505133f2e 100644 --- a/verifiers/envs/experimental/composable/swe_debug_env.py +++ b/verifiers/envs/experimental/composable/swe_debug_env.py @@ -1,7 +1,5 @@ """No-agent debugger for SWE-style SandboxTaskSet instances.""" -from __future__ import annotations - import shlex import time from typing import Any, Literal diff --git a/verifiers/envs/experimental/composable/task.py b/verifiers/envs/experimental/composable/task.py index 3cf25a43f..4d3962962 100644 --- a/verifiers/envs/experimental/composable/task.py +++ b/verifiers/envs/experimental/composable/task.py @@ -24,8 +24,6 @@ def get_sandbox_spec(self, info) -> SandboxSpec: ... async def evaluate(self, sandbox_client, sandbox_id, state) -> float: ... """ -from __future__ import annotations - import importlib import importlib.resources as resources from dataclasses import dataclass @@ -100,7 +98,7 @@ class Task: """ def __init__( - self, taskset: TaskSet, prompt: Messages, info: dict, answer: str = "" + self, taskset: "TaskSet", prompt: Messages, info: dict, answer: str = "" ): self._taskset = taskset self.prompt = prompt @@ -143,7 +141,7 @@ class TaskSet: def __init__( self, - dataset: Any | DatasetBuilder, + dataset: "Any | DatasetBuilder", name: str = "", filter_fn: str | None = None, ): @@ -294,13 +292,13 @@ def __getitem__(self, i: int) -> Task: # -- Combinators --------------------------------------------------------- - def filter(self, predicate: Callable[[dict], bool]) -> TaskSet: + def filter(self, predicate: Callable[[dict], bool]) -> "TaskSet": clone = object.__new__(type(self)) clone.__dict__.update(self.__dict__) clone.dataset = self.dataset.filter(predicate) return clone - def take(self, n: int) -> TaskSet: + def take(self, n: int) -> "TaskSet": clone = object.__new__(type(self)) clone.__dict__.update(self.__dict__) clone.dataset = self.dataset.select(range(min(n, len(self.dataset)))) diff --git a/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py b/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py index 883c6a49e..fe1e6886b 100644 --- a/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py +++ b/verifiers/envs/experimental/composable/tasksets/cp/cp_task.py @@ -6,8 +6,6 @@ taskset = CPTaskSet() """ -from __future__ import annotations - import json import logging import random diff --git a/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py b/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py index b99f98a48..6683aa8a9 100644 --- a/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py +++ b/verifiers/envs/experimental/composable/tasksets/harbor/harbor.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import json import logging import tarfile diff --git a/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py b/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py index f24f36d1e..a6ba2231c 100644 --- a/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py +++ b/verifiers/envs/experimental/composable/tasksets/lean/lean_task.py @@ -33,8 +33,6 @@ "protected" means. """ -from __future__ import annotations - import re from dataclasses import dataclass @@ -86,7 +84,7 @@ @dataclass(frozen=True) -class _Preset: +class DatasetPreset: dataset_name: str dataset_split: str = "train" dataset_subset: str | None = None @@ -97,23 +95,23 @@ class _Preset: normalize_mathlib_imports: bool = False -PRESETS: dict[str, _Preset] = { - "goedel-pset": _Preset("Goedel-LM/Goedel-Pset-v1"), - "numina-lean": _Preset("AI-MO/NuminaMath-LEAN", name_column="uuid"), - "deepseek-prover-v1": _Preset( +PRESETS: dict[str, DatasetPreset] = { + "goedel-pset": DatasetPreset("Goedel-LM/Goedel-Pset-v1"), + "numina-lean": DatasetPreset("AI-MO/NuminaMath-LEAN", name_column="uuid"), + "deepseek-prover-v1": DatasetPreset( "deepseek-ai/DeepSeek-Prover-V1", header_column="header", name_column="name", ), - "kimina": _Preset("AI-MO/Kimina-Prover-Promptset", name_column="name"), - "minif2f": _Preset( + "kimina": DatasetPreset("AI-MO/Kimina-Prover-Promptset", name_column="name"), + "minif2f": DatasetPreset( "cat-searcher/minif2f-lean4", dataset_split="test", header_column="header", name_column="id", normalize_mathlib_imports=True, ), - "deepseek-proverbench": _Preset( + "deepseek-proverbench": DatasetPreset( "deepseek-ai/DeepSeek-ProverBench", header_column="header", name_column="name", diff --git a/verifiers/envs/experimental/composable/tasksets/math/math_task.py b/verifiers/envs/experimental/composable/tasksets/math/math_task.py index 073a5ee31..92c4169ad 100644 --- a/verifiers/envs/experimental/composable/tasksets/math/math_task.py +++ b/verifiers/envs/experimental/composable/tasksets/math/math_task.py @@ -8,8 +8,6 @@ task = MathTaskSet("hendrycks/math") """ -from __future__ import annotations - import logging from verifiers.envs.experimental.composable import SandboxSpec, SandboxTaskSet diff --git a/verifiers/envs/experimental/composable/tasksets/swe/_test_patch.py b/verifiers/envs/experimental/composable/tasksets/swe/_test_patch.py index 2819c2021..f8c619d91 100644 --- a/verifiers/envs/experimental/composable/tasksets/swe/_test_patch.py +++ b/verifiers/envs/experimental/composable/tasksets/swe/_test_patch.py @@ -37,8 +37,6 @@ fields in practice do not contain such paths. """ -from __future__ import annotations - import logging import tempfile from pathlib import Path diff --git a/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py b/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py index c7802a3cd..78224ae98 100644 --- a/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py +++ b/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import logging import shlex import tempfile diff --git a/verifiers/envs/experimental/composable/tasksets/swe/openswe.py b/verifiers/envs/experimental/composable/tasksets/swe/openswe.py index 0acf54393..c55978bbc 100644 --- a/verifiers/envs/experimental/composable/tasksets/swe/openswe.py +++ b/verifiers/envs/experimental/composable/tasksets/swe/openswe.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import logging import re import tempfile diff --git a/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py b/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py index bfe3fec41..58bb895ec 100644 --- a/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py +++ b/verifiers/envs/experimental/composable/tasksets/swe/r2e_gym.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import json import logging import tempfile diff --git a/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py b/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py index df4101018..fd23147bc 100644 --- a/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py +++ b/verifiers/envs/experimental/composable/tasksets/swe/swe_bench.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import json import logging import re diff --git a/verifiers/envs/experimental/composable/tasksets/swe/swe_lego.py b/verifiers/envs/experimental/composable/tasksets/swe/swe_lego.py index ce1ebdef6..a3d4d76d7 100644 --- a/verifiers/envs/experimental/composable/tasksets/swe/swe_lego.py +++ b/verifiers/envs/experimental/composable/tasksets/swe/swe_lego.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import json import logging import re diff --git a/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2.py b/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2.py index a88df4a6a..b94ce47e6 100644 --- a/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2.py +++ b/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2.py @@ -24,8 +24,6 @@ ``/testbed`` — because upstream eval scripts cd to that path. """ -from __future__ import annotations - import json import logging import re diff --git a/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2_log_parsers.py b/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2_log_parsers.py index 447b05f91..d9a45a692 100644 --- a/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2_log_parsers.py +++ b/verifiers/envs/experimental/composable/tasksets/swe/swe_rebench_v2_log_parsers.py @@ -3208,7 +3208,7 @@ def parse_log_ocaml(log: str) -> dict[str, str]: return results -class _OcamlDuneLogParser: +class OcamlDuneLogParser: running_re = re.compile(r"^Running\[(?P\d+)\]:\s+\((?P.+)\)$") output_re = re.compile(r"^Output\[(?P\d+)]:") fail_tokens = ("FAIL", "ERROR", "EXCEPTION", "CRASH", "FATAL") @@ -3266,7 +3266,7 @@ def finalize(self) -> dict[str, str]: def parse_log_ocaml_v2(log: str) -> dict[str, str]: """Parse dune test logs and return {test_binary: status}.""" - parser = _OcamlDuneLogParser() + parser = OcamlDuneLogParser() for raw_line in log.splitlines(): parser.handle_line(raw_line) return parser.finalize() diff --git a/verifiers/envs/experimental/composable/tasksets/swe/swe_smith.py b/verifiers/envs/experimental/composable/tasksets/swe/swe_smith.py index ea2cdb822..944464756 100644 --- a/verifiers/envs/experimental/composable/tasksets/swe/swe_smith.py +++ b/verifiers/envs/experimental/composable/tasksets/swe/swe_smith.py @@ -20,8 +20,6 @@ priority languages (py, go, java, js, ts, rs) have 100% coverage. """ -from __future__ import annotations - import logging import shlex import tempfile diff --git a/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py b/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py index b18b8fe56..58622b9da 100644 --- a/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py +++ b/verifiers/envs/experimental/composable/tasksets/swe/swe_tasksets.py @@ -6,8 +6,6 @@ bench = make_swebench_taskset() """ -from __future__ import annotations - from typing import Any from verifiers.envs.experimental.composable import TaskSet diff --git a/verifiers/envs/experimental/gym_env.py b/verifiers/envs/experimental/gym_env.py index 0d0096319..315cc4e02 100644 --- a/verifiers/envs/experimental/gym_env.py +++ b/verifiers/envs/experimental/gym_env.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from collections.abc import Callable from typing import Any, Protocol, TypeAlias, cast diff --git a/verifiers/envs/experimental/harbor_env/env.py b/verifiers/envs/experimental/harbor_env/env.py index 3a1649fd9..9cd256dd0 100644 --- a/verifiers/envs/experimental/harbor_env/env.py +++ b/verifiers/envs/experimental/harbor_env/env.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import json import logging import tarfile diff --git a/verifiers/envs/experimental/harbor_env/mcp.py b/verifiers/envs/experimental/harbor_env/mcp.py index 7ecb0ff99..11ba26e28 100644 --- a/verifiers/envs/experimental/harbor_env/mcp.py +++ b/verifiers/envs/experimental/harbor_env/mcp.py @@ -1,7 +1,5 @@ """MCP server lifecycle for Harbor-format tasks.""" -from __future__ import annotations - import asyncio import logging import shlex diff --git a/verifiers/envs/experimental/utils/file_locks.py b/verifiers/envs/experimental/utils/file_locks.py index 30af28592..5aa18cb25 100644 --- a/verifiers/envs/experimental/utils/file_locks.py +++ b/verifiers/envs/experimental/utils/file_locks.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from collections.abc import Iterator from contextlib import contextmanager import fcntl diff --git a/verifiers/envs/experimental/utils/git_checkout_cache.py b/verifiers/envs/experimental/utils/git_checkout_cache.py index 3baf17970..369b8cdfc 100644 --- a/verifiers/envs/experimental/utils/git_checkout_cache.py +++ b/verifiers/envs/experimental/utils/git_checkout_cache.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import fcntl import hashlib import logging diff --git a/verifiers/envs/integrations/README.md b/verifiers/envs/integrations/README.md index a610a249c..170bec820 100644 --- a/verifiers/envs/integrations/README.md +++ b/verifiers/envs/integrations/README.md @@ -7,7 +7,7 @@ Integrations with third-party environment libraries, which may require additiona | `TextArenaEnv` | `ta` | `uv add 'verifiers[ta]'` | | `ReasoningGymEnv` | `rg` | `uv add 'verifiers[rg]'` | | `BrowserEnv` | `browser` | `uv add 'verifiers[browser]'` | -| `OpenEnvEnv` | `openenv` | `uv add 'verifiers[openenv]'` | +| `OpenEnvEnv` | none | `uv add verifiers` | ## TextArenaEnv @@ -137,7 +137,9 @@ Locally, export these in your shell. On the [Environments Hub](https://app.prime Drop-in adapter for [OpenEnv](https://github.com/meta-pytorch/OpenEnv) environments. Always runs in Prime Sandboxes and uses OpenEnv's schema to choose between simulation (step/reset) and MCP tool-calling. -Current verifiers integration targets the released `openenv-core==0.2.1` contract. +The Verifiers adapter uses OpenEnv's public async clients. The bundled OpenEnv +project under `proj/` declares its own server dependencies for the sandbox +image. ### Quick Start @@ -162,33 +164,33 @@ uv run vf-build my-openenv ``` ```python -# environments/my_openenv/my_openenv.py -from typing import Any import verifiers as vf -from verifiers.envs.integrations.openenv_env import OpenEnvEnv +from verifiers.types import Messages, UserMessage + + +class OpenEnvPromptRenderer: + def __call__(self, observation: object) -> Messages: + if isinstance(observation, dict): + prompt = observation.get("prompt") + if isinstance(prompt, str) and prompt.strip(): + return [UserMessage(content=prompt)] + raise RuntimeError("Observation did not include a renderable prompt") -def render_prompt(observation: Any) -> list[dict[str, str]]: - if not isinstance(observation, dict): - raise RuntimeError("Expected dict observation") - prompt = observation.get("prompt") - if isinstance(prompt, str) and prompt.strip(): - return [{"role": "user", "content": prompt}] - raise RuntimeError("Observation did not include a renderable prompt") def load_environment( num_train_examples: int = 100, num_eval_examples: int = 50, seed: int = 0, ) -> vf.Environment: - return OpenEnvEnv( - prompt_renderer=render_prompt, + return vf.OpenEnvEnv( + prompt_renderer=OpenEnvPromptRenderer(), num_train_examples=num_train_examples, num_eval_examples=num_eval_examples, seed=seed, ) ``` -Define a `prompt_renderer` function that converts each OpenEnv observation into a non-empty chat message list for the model prompt. +Define a prompt renderer that converts each OpenEnv observation into a non-empty chat message list for the model prompt. ### Upstream-Matching Examples diff --git a/verifiers/envs/integrations/openenv_env.py b/verifiers/envs/integrations/openenv_env.py index a3388849e..8995aa285 100644 --- a/verifiers/envs/integrations/openenv_env.py +++ b/verifiers/envs/integrations/openenv_env.py @@ -1,11 +1,9 @@ -from __future__ import annotations - import asyncio import inspect import json import logging -import time from dataclasses import dataclass +from importlib import import_module from pathlib import Path from typing import Any, Callable, Iterable, cast @@ -25,16 +23,24 @@ from verifiers.utils.message_utils import from_raw_message from verifiers.utils.tool_utils import is_valid_tool_content_parts -try: - from openenv.core.generic_client import GenericEnvClient -except ImportError as e: - raise ImportError( - "OpenEnvEnv requires openenv-core. Install with: uv add 'verifiers[openenv]'" - ) from e + +def _optional_openenv_type(module_name: str, attr: str) -> type[Any] | None: + try: + return cast(type[Any], getattr(import_module(module_name), attr)) + except ImportError: + return None + + +CallToolAction = _optional_openenv_type( + "openenv.core.env_server.mcp_types", "CallToolAction" +) +GenericEnvClient = _optional_openenv_type( + "openenv.core.generic_client", "GenericEnvClient" +) +MCPToolClient = _optional_openenv_type("openenv.core.mcp_client", "MCPToolClient") try: - from prime_sandboxes import AsyncSandboxClient, CreateSandboxRequest, SandboxClient - from prime_sandboxes.core import APIClient + from prime_sandboxes import AsyncSandboxClient, CreateSandboxRequest except ImportError as e: raise ImportError( "OpenEnvEnv requires prime-sandboxes. Install with: uv add prime-sandboxes" @@ -43,8 +49,33 @@ logger = logging.getLogger(__name__) +def _missing_openenv(component: str) -> ImportError: + return ImportError( + f"OpenEnvEnv requires openenv-core for {component}. " + "Install the `openenv` extra, e.g. `uv add 'verifiers[openenv]'`." + ) + + +def _generic_client_class() -> type[Any]: + if GenericEnvClient is None: + raise _missing_openenv("gym rollouts") + return GenericEnvClient + + +def _mcp_client_class() -> type[Any]: + if MCPToolClient is None: + raise _missing_openenv("MCP rollouts") + return MCPToolClient + + +def _call_tool_action_class() -> type[Any]: + if CallToolAction is None: + raise _missing_openenv("MCP tool calls") + return CallToolAction + + @dataclass -class _OpenEnvServer: +class OpenEnvServer: sandbox_id: str exposure_id: str base_url: str @@ -76,10 +107,6 @@ class OpenEnvEnv(vf.MultiTurnEnv): - Expects a prompt renderer that maps observations to chat messages. """ - _DATASET_RESET_MAX_RETRIES = 5 - _DATASET_RESET_BASE_BACKOFF_SECONDS = 0.25 - _DATASET_RESET_MAX_BACKOFF_SECONDS = 3.0 - def __init__( self, openenv_project: str | Path | None = None, @@ -118,11 +145,10 @@ def __init__( self.schema_request_timeout_seconds = schema_request_timeout_seconds self.wait_for_creation_max_attempts = wait_for_creation_max_attempts - self._active_servers: dict[str, _OpenEnvServer] = {} + self._active_servers: dict[str, OpenEnvServer] = {} self._contract: str | None = None # "gym" or "mcp" self._action_schema: dict[str, Any] | None = None self._mcp_tools: list[Any] | None = None - self._mcp_request_id = 0 self._with_retry = tc.AsyncRetrying( stop=tc.stop_after_attempt(max_retries), @@ -201,115 +227,21 @@ def _build_seed_datasets(self) -> tuple[Dataset, Dataset | None]: def _build_seed_rows(self, total: int) -> list[dict[str, Any]]: if total <= 0: return [] - project_path = self._resolve_project_path() - image, port, start_command, contract = self._resolve_runtime_config( - project_path - ) - server = self._launch_image_server_sync(image, port, start_command, contract) rows: list[dict[str, Any]] = [] - mcp_action_schema: dict[str, Any] | None = None - try: - if contract == "mcp": - mcp_action_schema = self._fetch_action_schema_sync(server.base_url) - seeds = [self.seed + i for i in range(total)] - observations = self._fetch_reset_observations_sync(server.base_url, seeds) - for seed, obs in zip(seeds, observations, strict=False): - prompt = self._render_observation_messages( - obs, - context="reset", - action_schema=mcp_action_schema if contract == "mcp" else None, - contract=contract, - seed=seed, - ) - rows.append( - { - "prompt": prompt, - "info": {"seed": seed}, - } - ) - return rows - finally: - self._cleanup_server_sync(server) - - def _fetch_reset_observations_sync( - self, base_url: str, seeds: list[int] - ) -> list[Any]: - if not seeds: - return [] - client = GenericEnvClient(base_url=base_url) - observations: list[Any] = [] - try: - self._connect_generic_client_sync(client) - for seed in seeds: - observation = self._reset_with_retry_sync(client, seed) - observations.append(observation) - finally: - try: - client.close() - except Exception: - pass - return observations - - def _connect_generic_client_sync(self, client: GenericEnvClient) -> None: - try: - client.connect() - except Exception as e: - raise RuntimeError( - "OpenEnv dataset bootstrap failed to establish a reset session." - ) from e - - def _reset_with_retry_sync(self, client: GenericEnvClient, seed: int) -> Any: - last_error: Exception | None = None - for attempt in range(1, self._DATASET_RESET_MAX_RETRIES + 1): - try: - result = client.reset(seed=int(seed)) - observation = getattr(result, "observation", None) - if observation is None: - raise RuntimeError( - "OpenEnv reset result is missing required `observation`." - ) - return observation - except Exception as e: - last_error = e - if attempt >= self._DATASET_RESET_MAX_RETRIES: - break - backoff = min( - self._DATASET_RESET_BASE_BACKOFF_SECONDS * (2 ** (attempt - 1)), - self._DATASET_RESET_MAX_BACKOFF_SECONDS, - ) - time.sleep(backoff) - # Reconnect the session before retrying this seed. - try: - client.close() - except Exception: - pass - self._connect_generic_client_sync(client) - assert last_error is not None - raise RuntimeError( - f"OpenEnv reset failed during dataset build for seed={seed} " - f"after {self._DATASET_RESET_MAX_RETRIES} attempts." - ) from last_error - - def _fetch_action_schema_sync(self, base_url: str) -> dict[str, Any]: - try: - response = requests.get( - f"{base_url}/schema", - timeout=self.schema_request_timeout_seconds, + for i in range(total): + seed = self.seed + i + rows.append( + { + "prompt": [ + { + "role": "user", + "content": "OpenEnv rollout is initializing.", + } + ], + "info": {"seed": seed}, + } ) - response.raise_for_status() - payload = response.json() - except Exception as e: - raise RuntimeError( - "OpenEnv schema fetch failed while building dataset." - ) from e - if not isinstance(payload, dict): - raise RuntimeError("OpenEnv /schema response must be a JSON object.") - action_schema = payload.get("action", {}) - if not isinstance(action_schema, dict): - raise RuntimeError( - "OpenEnv /schema response missing object `action` schema." - ) - return action_schema + return rows def _validate_dataset_prompts(self, dataset: Dataset, split_name: str) -> None: if "prompt" not in dataset.column_names: @@ -356,23 +288,35 @@ async def setup_state(self, state: vf.State) -> vf.State: seed = int(info.get("seed", 0)) if server.contract == "mcp": - mcp_client = GenericEnvClient(base_url=server.base_url) - await self._invoke(cast(Any, mcp_client).connect) + mcp_client = _mcp_client_class()(base_url=server.base_url) + await mcp_client.connect() state["openenv_mcp_client"] = mcp_client if self._mcp_tools is None: self._mcp_tools = await self._mcp_list_tools(mcp_client) state["tool_defs"] = self._convert_mcp_tools(self._mcp_tools) - result = await self._invoke(cast(Any, mcp_client).reset, seed=seed) + result = await mcp_client.reset(seed=seed) state["openenv_done"] = bool(result.done) - state["prompt"] = self._require_prompt_messages(state) + state["prompt"] = self._render_observation_messages( + result.observation, + context="reset", + action_schema=action_schema, + contract=server.contract, + seed=seed, + ) return state - client = GenericEnvClient(base_url=server.base_url) - await self._invoke(cast(Any, client).connect) + client = _generic_client_class()(base_url=server.base_url) + await client.connect() state["openenv_client"] = client - result = await self._invoke(cast(Any, client).reset, seed=seed) + result = await client.reset(seed=seed) state["openenv_done"] = bool(result.done) - state["prompt"] = self._require_prompt_messages(state) + state["prompt"] = self._render_observation_messages( + result.observation, + context="reset", + action_schema=action_schema, + contract=server.contract, + seed=seed, + ) return state except Exception: await self._cleanup_openenv_state(state) @@ -404,8 +348,8 @@ async def _gym_env_response( action_schema = state.get("openenv_action_schema") or self._action_schema or {} action = self._parse_action(raw_text, action_schema) - client: Any = state["openenv_client"] - result = await self._invoke(client.step, action) + client = cast(Any, state["openenv_client"]) + result = await client.step(action) if state["trajectory"]: state["trajectory"][-1]["reward"] = result.reward @@ -430,7 +374,7 @@ async def _mcp_env_response( if not tool_calls: return [] - mcp_client: Any = state["openenv_mcp_client"] + mcp_client = cast(Any, state["openenv_mcp_client"]) tool_messages: Messages = [] total_reward = 0.0 done = False @@ -487,16 +431,20 @@ async def mcp_no_tool_calls(self, state: vf.State) -> bool: async def _cleanup_openenv_state(self, state: vf.State) -> None: client = state.pop("openenv_client", None) - if client is not None: + generic_client_class = GenericEnvClient + if generic_client_class is not None and isinstance( + client, generic_client_class + ): try: - await self._invoke(cast(Any, client).close) + await client.close() except Exception: pass mcp_client = state.pop("openenv_mcp_client", None) - if mcp_client is not None: + mcp_client_class = MCPToolClient + if mcp_client_class is not None and isinstance(mcp_client, mcp_client_class): try: - await self._invoke(cast(Any, mcp_client).close) + await mcp_client.close() except Exception: pass @@ -511,7 +459,7 @@ async def _cleanup_openenv_state(self, state: vf.State) -> None: async def cleanup_openenv(self, state: vf.State) -> None: await self._cleanup_openenv_state(state) - async def _cleanup_server(self, server: _OpenEnvServer) -> None: + async def _cleanup_server(self, server: OpenEnvServer) -> None: async with AsyncSandboxClient() as sandboxes: try: await self._with_retry(sandboxes.unexpose)( @@ -574,7 +522,7 @@ async def teardown_server(self) -> None: except Exception: pass - async def _create_server(self) -> _OpenEnvServer: + async def _create_server(self) -> OpenEnvServer: project_path = self._resolve_project_path() image, port, start_command, contract = self._resolve_runtime_config( project_path @@ -641,7 +589,7 @@ def _read_build_manifest(self, project_path: Path) -> dict[str, Any]: async def _launch_image_server( self, image: str, port: int, start_command: str, contract: str - ) -> _OpenEnvServer: + ) -> OpenEnvServer: async with AsyncSandboxClient() as sandboxes: req = self._build_sandbox_request(image, start_command=start_command) try: @@ -663,7 +611,7 @@ async def _launch_image_server( protocol="TCP", ) base_url = self._exposure_to_base_url(exposure) - server = _OpenEnvServer( + server = OpenEnvServer( sandbox_id=sandbox.id, exposure_id=exposure.exposure_id, base_url=base_url, @@ -692,58 +640,6 @@ async def _launch_image_server( sandbox.id, "startup", e, image=image, logs=logs ) from e - def _launch_image_server_sync( - self, image: str, port: int, start_command: str, contract: str - ) -> _OpenEnvServer: - sandboxes = SandboxClient(APIClient()) - req = self._build_sandbox_request(image, start_command=start_command) - try: - sandbox = sandboxes.create(req) - except Exception as e: - raise vf.SandboxError( - f"Failed to create OpenEnv sandbox for image {image}." - ) from e - - exposure: Any | None = None - try: - sandboxes.wait_for_creation( - sandbox.id, - max_attempts=self.wait_for_creation_max_attempts, - ) - exposure = sandboxes.expose( - sandbox.id, - port=port, - name="openenv-env", - protocol="TCP", - ) - base_url = self._exposure_to_base_url(exposure) - server = _OpenEnvServer( - sandbox_id=sandbox.id, - exposure_id=exposure.exposure_id, - base_url=base_url, - port=port, - contract=contract, - ) - self._wait_for_ready_sync(server.base_url) - return server - except Exception as e: - logs = self._try_get_logs_sync(sandboxes, sandbox.id) - local_health = self._probe_local_health_sync(sandboxes, sandbox.id, port) - if local_health: - logs = (logs + "\n" if logs else "") + local_health - if exposure is not None: - try: - sandboxes.unexpose(sandbox.id, exposure.exposure_id) - except Exception: - pass - try: - sandboxes.delete(sandbox.id) - except Exception: - pass - raise self._format_sandbox_error( - sandbox.id, "startup", e, image=image, logs=logs - ) from e - async def _probe_local_health( self, sandboxes: AsyncSandboxClient, sandbox_id: str, port: int ) -> str | None: @@ -765,26 +661,6 @@ async def _probe_local_health( return f"Local /health probe stderr: {stderr}" return "Local /health probe returned no output." - def _probe_local_health_sync( - self, sandboxes: SandboxClient, sandbox_id: str, port: int - ) -> str | None: - cmd = f'sh -lc "curl -sS -m 2 http://localhost:{int(port)}/health 2>&1 || true"' - try: - result = sandboxes.execute_command( - sandbox_id, - cmd, - timeout=5, - ) - except Exception as e: - return f"Local /health probe failed to execute: {type(e).__name__}: {e}" - stdout = (getattr(result, "stdout", "") or "").strip() - stderr = (getattr(result, "stderr", "") or "").strip() - if stdout: - return f"Local /health probe stdout: {stdout}" - if stderr: - return f"Local /health probe stderr: {stderr}" - return "Local /health probe returned no output." - def _exposure_to_base_url(self, exposure: Any) -> str: endpoint = getattr(exposure, "external_endpoint", None) if isinstance(endpoint, str) and endpoint.strip(): @@ -802,68 +678,17 @@ def _exposure_to_base_url(self, exposure: Any) -> str: "OpenEnv sandbox exposure did not provide a usable endpoint URL." ) - async def _invoke(self, fn: Any, *args: Any, **kwargs: Any) -> Any: - if inspect.iscoroutinefunction(fn): - return await fn(*args, **kwargs) - return await asyncio.to_thread(lambda: fn(*args, **kwargs)) - - def _next_mcp_request_id(self) -> int: - self._mcp_request_id += 1 - return self._mcp_request_id - - async def _mcp_rpc( - self, client: GenericEnvClient, method: str, params: dict[str, Any] - ) -> Any: - request = { - "type": "mcp", - "data": { - "jsonrpc": "2.0", - "id": self._next_mcp_request_id(), - "method": method, - "params": params, - }, - } - response = await self._invoke(cast(Any, client)._send_and_receive, request) - if not isinstance(response, dict): - raise RuntimeError(f"Invalid MCP response type: {type(response).__name__}") - data = response.get("data") - if not isinstance(data, dict): - raise RuntimeError("Invalid MCP response: missing JSON-RPC data object.") - if data.get("jsonrpc") != "2.0": - raise RuntimeError("Invalid MCP response: jsonrpc must be '2.0'.") - if "error" in data: - err = data["error"] - if isinstance(err, dict): - raise RuntimeError( - f"MCP RPC error ({err.get('code', 'unknown')}): {err.get('message', err)}" - ) - raise RuntimeError(f"MCP RPC error: {err}") - if "result" not in data: - raise RuntimeError("Invalid MCP response: missing result.") - return data["result"] - - async def _mcp_list_tools(self, client: GenericEnvClient) -> list[dict[str, Any]]: - result = await self._mcp_rpc(client, method="tools/list", params={}) - if not isinstance(result, dict): - raise RuntimeError("Invalid MCP tools/list result: expected object.") - tools = result.get("tools") - if not isinstance(tools, list): - raise RuntimeError("Invalid MCP tools/list result: missing tools list.") - parsed_tools: list[dict[str, Any]] = [] - for tool in tools: - if isinstance(tool, dict): - parsed_tools.append(tool) - if not parsed_tools: + async def _mcp_list_tools(self, client: Any) -> list[Any]: + tools = await client.list_tools() + if not isinstance(tools, list) or not tools: raise RuntimeError("MCP tools/list returned no usable tools.") - return parsed_tools + return tools async def _mcp_step_tool( - self, client: GenericEnvClient, tool_name: str, arguments: dict[str, Any] + self, client: Any, tool_name: str, arguments: dict[str, Any] ) -> Any: - return await self._invoke( - client.step, - {"type": "call_tool", "tool_name": tool_name, "arguments": arguments}, - ) + action = _call_tool_action_class()(tool_name=tool_name, arguments=arguments) + return await client.step(action) def _extract_mcp_tool_content(self, observation: Any) -> Any: if hasattr(observation, "model_dump"): @@ -918,31 +743,6 @@ async def _wait_for_ready( f"last error: {last_health_error}" ) - def _wait_for_ready_sync(self, base_url: str, timeout_s: int | None = None) -> None: - timeout = timeout_s if timeout_s is not None else self.startup_timeout_seconds - start = time.monotonic() - last_health_error = "no attempts" - while (time.monotonic() - start) < timeout: - ok, detail = self._check_health(base_url) - if ok: - return - last_health_error = detail - time.sleep(self.startup_poll_interval_seconds) - raise RuntimeError( - "OpenEnv server not ready. " - f"Health check timeout={timeout}s, url={base_url}, " - f"last error: {last_health_error}" - ) - - def _try_get_logs_sync( - self, sandboxes: SandboxClient, sandbox_id: str - ) -> str | None: - try: - logs = sandboxes.get_logs(sandbox_id) - except Exception: - return None - return self._trim_logs(logs) - def _check_health(self, base_url: str) -> tuple[bool, str]: try: resp = requests.get( @@ -955,18 +755,6 @@ def _check_health(self, base_url: str) -> tuple[bool, str]: except Exception as e: return False, f"{type(e).__name__}: {e}" - def _cleanup_server_sync(self, server: _OpenEnvServer) -> None: - sandboxes = SandboxClient(APIClient()) - try: - sandboxes.unexpose(server.sandbox_id, server.exposure_id) - except Exception: - pass - try: - sandboxes.delete(server.sandbox_id) - except Exception: - pass - self._active_servers.pop(server.sandbox_id, None) - async def _fetch_action_schema(self, base_url: str) -> dict[str, Any]: if self._action_schema is not None: return self._action_schema @@ -1151,23 +939,6 @@ def _looks_like_messages(self, value: Any) -> bool: return False return True - def _require_prompt_messages(self, state: vf.State) -> Messages: - current_prompt = state.get("prompt") - if self._looks_like_messages(current_prompt) and cast( - list[Any], current_prompt - ): - messages: Messages = [] - for raw_message in cast(list[Any], current_prompt): - if isinstance(raw_message, dict): - messages.append(from_raw_message(raw_message)) - elif hasattr(raw_message, "role") and hasattr(raw_message, "content"): - messages.append(cast(Message, raw_message)) - return messages - raise RuntimeError( - "OpenEnv dataset must include a non-empty `prompt`. " - "No prompt fallback is supported." - ) - def _convert_mcp_tools(self, tools: Iterable[Any]) -> list[Tool]: tool_defs: list[Tool] = [] for tool in tools: diff --git a/verifiers/rubrics/experimental/hybrid_math_rubric.py b/verifiers/rubrics/experimental/hybrid_math_rubric.py index ff548f6f9..0d4fdd7af 100644 --- a/verifiers/rubrics/experimental/hybrid_math_rubric.py +++ b/verifiers/rubrics/experimental/hybrid_math_rubric.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import asyncio from openai import AsyncOpenAI diff --git a/verifiers/scripts/build.py b/verifiers/scripts/build.py index c3c756b8f..6d2f6c1c4 100644 --- a/verifiers/scripts/build.py +++ b/verifiers/scripts/build.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import argparse import ast import json diff --git a/verifiers/scripts/init.py b/verifiers/scripts/init.py index 48ca9d791..41de3b572 100644 --- a/verifiers/scripts/init.py +++ b/verifiers/scripts/init.py @@ -118,7 +118,7 @@ version = "0.1.0" requires-python = ">=3.10" dependencies = [ - "verifiers[openenv]>={vf.__version__}", + "verifiers>={vf.__version__}", ] [build-system] @@ -151,23 +151,20 @@ def load_environment(**kwargs) -> vf.Environment: ''' OPENENV_ENVIRONMENT_TEMPLATE = """\ -from typing import Any - import verifiers as vf - - -def render_prompt(observation: Any) -> list[dict[str, Any]]: - if isinstance(observation, dict): - messages = observation.get("messages") - if isinstance(messages, list) and messages: - return messages - prompt = observation.get("prompt") - if isinstance(prompt, str) and prompt.strip(): - return [{"role": "user", "content": prompt}] - raise RuntimeError( - "OpenEnv observation did not include a renderable prompt. " - "Update render_prompt() for your project's observation schema." - ) +from verifiers.types import Messages, UserMessage + + +class OpenEnvPromptRenderer: + def __call__(self, observation: object) -> Messages: + if isinstance(observation, dict): + prompt = observation.get("prompt") + if isinstance(prompt, str) and prompt.strip(): + return [UserMessage(content=prompt)] + raise RuntimeError( + "OpenEnv observation did not include a renderable prompt. " + "Update OpenEnvPromptRenderer for your project's observation schema." + ) def load_environment( @@ -179,7 +176,7 @@ def load_environment( num_train_examples=num_train_examples, num_eval_examples=num_eval_examples, seed=seed, - prompt_renderer=render_prompt, + prompt_renderer=OpenEnvPromptRenderer(), ) """ @@ -221,7 +218,7 @@ def load_environment( description = "OpenEnv project bundled with a verifiers environment" requires-python = ">=3.10" dependencies = [ - "openenv-core[core]==0.2.1", + "openenv-core>=0.3.0", "fastapi>=0.115.0", "uvicorn>=0.24.0", ] diff --git a/verifiers/scripts/setup.py b/verifiers/scripts/setup.py index 29a1adc63..6408f1d4d 100644 --- a/verifiers/scripts/setup.py +++ b/verifiers/scripts/setup.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import sys from collections.abc import Sequence diff --git a/verifiers/serve/server/env_router.py b/verifiers/serve/server/env_router.py index 9ea0649cd..a1dd5a4a7 100644 --- a/verifiers/serve/server/env_router.py +++ b/verifiers/serve/server/env_router.py @@ -8,8 +8,6 @@ client-facing socket knowledge. """ -from __future__ import annotations - import asyncio import logging import multiprocessing as mp diff --git a/verifiers/serve/types.py b/verifiers/serve/types.py index 834ce1f25..25ddbbc94 100644 --- a/verifiers/serve/types.py +++ b/verifiers/serve/types.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from asyncio import Future from enum import Enum from typing import Annotated, Literal, TypeVar diff --git a/verifiers/types.py b/verifiers/types.py index 4eb8b957c..7bb8faebb 100644 --- a/verifiers/types.py +++ b/verifiers/types.py @@ -1,10 +1,8 @@ -from __future__ import annotations - import json import sys import time import uuid -from collections.abc import Mapping +from collections.abc import Iterable, Mapping from copy import deepcopy from pathlib import Path from typing import ( @@ -14,6 +12,8 @@ Callable, Literal, TypeAlias, + TypeVar, + overload, cast, ) @@ -45,6 +45,17 @@ "anthropic_messages", "nemorl_chat_completions", ] +EndpointApi = Literal[ + "chat", + "chat_completions", + "completions", + "responses", + "messages", + "openai_chat_completions", + "openai_completions", + "openai_responses", + "anthropic_messages", +] MessageType = Literal["chat", "completion"] # deprecated @@ -383,12 +394,54 @@ class RolloutOutput(dict): token_usage: TokenUsage +_MISSING = object() +_DefaultValue = TypeVar("_DefaultValue") +_BorrowTarget = Literal["model", "sandbox"] +_ToolTarget = str | Iterable[str] +_TranscriptMode = Literal["private", "append"] + + +class StateForTaskDescriptor: + def __get__( + self, instance: "State | None", owner: type["State"] + ) -> Callable[..., "State"]: + def create( + task: Mapping[str, Any], + *, + borrow: _BorrowTarget | Iterable[_BorrowTarget] = (), + tools: _ToolTarget = (), + transcript: _TranscriptMode = "private", + ) -> "State": + state = _state_for_task(owner, task, source_state=instance) + if instance is not None: + _borrow_from_state(state, instance, borrow, tools, transcript) + elif borrow or tools: + raise ValueError("State.for_task borrow/tools requires a source state.") + elif transcript != "private": + raise ValueError( + "State.for_task transcript='append' requires a source state." + ) + return state + + return create + + class State(dict): + for_task = StateForTaskDescriptor() + INPUT_FIELDS = ["prompt", "answer", "info", "example_id"] + INTERNAL_KEYS = {"is_completed", "stop_condition", "is_truncated", "error"} + RUNTIME_HANDLE_KEYS = {"runtime_id", "client_key"} + ENDPOINT_HANDLE_KEYS = { + "endpoint_rollout_key", + "endpoint_root_url", + "endpoint_base_url", + } + # rollout inputs input: RolloutInput task: dict[str, Any] - client: Client + client: "Client" model: str sampling_args: SamplingArgs | None # created during rollout @@ -402,9 +455,22 @@ class State(dict): advantage: float | None metrics: dict[str, float] | None timing: RolloutTiming | None - error: Error | ErrorInfo | None + error: "Error | ErrorInfo | None" usage: TokenUsage | None usage_tracker: object + _vf_state_contract: Literal["legacy", "v1"] + + def __init__(self, *args: Any, **kwargs: Any): + super().__init__(*args, **kwargs) + self._vf_state_contract = "legacy" + + @property + def uses_v1_contract(self) -> bool: + return self._vf_state_contract == "v1" + + def _enable_v1_contract(self) -> "State": + self._vf_state_contract = "v1" + return self def __getitem__(self, key: str) -> Any: # forward to input if exists @@ -421,6 +487,8 @@ def get(self, key: str, default: Any = None) -> Any: return default def __setitem__(self, key: str, value: Any) -> None: + if self.uses_v1_contract and key in self.INTERNAL_KEYS: + raise RuntimeError(_internal_key_error(key)) # forward to input if exists if key in self.INPUT_FIELDS and "input" in self: input_obj = super().__getitem__("input") @@ -429,8 +497,231 @@ def __setitem__(self, key: str, value: Any) -> None: return super().__setitem__(key, value) + def __delitem__(self, key: str) -> None: + if self.uses_v1_contract and key in self.INTERNAL_KEYS: + raise RuntimeError(_internal_key_error(key)) + super().__delitem__(key) + + def update(self, *args: Any, **kwargs: Any) -> None: + values = dict(*args, **kwargs) + if self.uses_v1_contract: + for key, value in values.items(): + self[str(key)] = value + return + super().update(values) + + @overload + def pop(self, key: str) -> Any: ... + + @overload + def pop(self, key: str, default: _DefaultValue) -> Any | _DefaultValue: ... + + def pop(self, key: str, default: Any = _MISSING) -> Any: + if self.uses_v1_contract and key in self.INTERNAL_KEYS: + raise RuntimeError(_internal_key_error(key)) + if default is _MISSING: + return super().pop(key) + return super().pop(key, default) + + def popitem(self) -> tuple[str, Any]: + if not self.uses_v1_contract: + return super().popitem() + for key in reversed(self.keys()): + if key not in self.INTERNAL_KEYS: + return key, super().pop(key) + raise RuntimeError("State.popitem() cannot remove framework-managed fields.") + + def clear(self) -> None: + if self.uses_v1_contract: + raise RuntimeError( + "State.clear() cannot preserve framework-managed fields." + ) + super().clear() + + def setdefault(self, key: object, default: Any = None, /) -> Any: + if self.uses_v1_contract and isinstance(key, str) and key in self.INTERNAL_KEYS: + raise RuntimeError(_internal_key_error(key)) + return super().setdefault(key, default) + + def __ior__(self, other: object) -> "State": + self.update(other) + return self + + def _set_internal(self, key: str, value: Any) -> None: + if key not in self.INTERNAL_KEYS: + raise KeyError(f"{key!r} is not a framework-managed state key.") + super().__setitem__(key, value) + + def _set_completed(self, value: bool = True) -> None: + self._set_internal("is_completed", value) + + def _set_error(self, value: Any) -> None: + self._set_internal("error", value) + + def _set_stop_condition( + self, value: str | None, *, overwrite: bool = False + ) -> None: + if overwrite or self.get("stop_condition") is None: + self._set_internal("stop_condition", value) + + def _set_truncated(self, value: bool = True, *, overwrite: bool = False) -> None: + current = bool(self.get("is_truncated", False)) + self._set_internal( + "is_truncated", bool(value) if overwrite else current or bool(value) + ) + + def stop(self, condition: str = "state_done") -> None: + if not isinstance(condition, str) or not condition: + raise TypeError("State.stop condition must be a non-empty string.") + super().__setitem__("done", True) + self._set_completed(True) + self._set_stop_condition(condition, overwrite=True) + + def runtime_state(self) -> dict[str, Any]: + raw_runtime = self.setdefault("runtime", {}) + if not isinstance(raw_runtime, dict): + raise TypeError("state.runtime must be a mapping.") + return cast(dict[str, Any], raw_runtime) + + def _runtime(self) -> Any: + from verifiers.v1.utils.runtime_registry import load_runtime_from_state + + return load_runtime_from_state(self) + + def get_model(self) -> str: + runtime = self.get("runtime", {}) + if isinstance(runtime, Mapping): + model = runtime.get("model") + if isinstance(model, str) and model: + return model + resolved = runtime.get("resolved") + if isinstance(resolved, Mapping): + handle = resolved.get("model") + if isinstance(handle, Mapping): + model = handle.get("model") + if isinstance(model, str) and model: + return model + try: + return self._runtime().model(self) + except RuntimeError as exc: + raise RuntimeError("State has no resolved model.") from exc + + def get_max_turns(self, default: int) -> int: + runtime = self.get("runtime", {}) + if isinstance(runtime, Mapping) and "max_turns" in runtime: + value = runtime["max_turns"] + if value is None: + return default + if not isinstance(value, int) or isinstance(value, bool): + raise TypeError("state.runtime.max_turns must be an integer.") + return value + return default + + def get_client( + self, + api: EndpointApi | ClientType = "chat_completions", + *, + sync: bool = False, + ) -> object: + from verifiers.v1.utils.endpoint_utils import client_from_state + + return client_from_state(self, api, sync=sync) + + def get_endpoint_config( + self, + api: EndpointApi | ClientType = "chat_completions", + ) -> dict[str, str]: + from verifiers.v1.utils.endpoint_utils import endpoint_config_from_state + + return endpoint_config_from_state(self, api) + + def get_tools(self) -> dict[str, Callable[..., Any]]: + from verifiers.v1.utils.tool_utils import load_tools_from_state + + return load_tools_from_state(self) + + def _runtime_handles(self) -> dict[str, Any]: + runtime = self.runtime_state() + handles = runtime.setdefault("resolved", {}) + if not isinstance(handles, dict): + raise TypeError("state.runtime.resolved must be a mapping.") + return handles + + def _runtime_handle(self, name: str) -> dict[str, Any]: + runtime = self.runtime_state() + handles = runtime.get("resolved") + if handles is not None: + if not isinstance(handles, Mapping): + raise TypeError("state.runtime.resolved must be a mapping.") + existing = handles.get(name) + if existing is not None: + if not isinstance(existing, Mapping): + raise TypeError(f"state.runtime.resolved.{name} must be a mapping.") + return dict(existing) + + runtime_id = runtime.get("runtime_id") + if not isinstance(runtime_id, str) or not runtime_id: + raise RuntimeError("State has no live runtime id.") + if name == "model": + client_key = runtime.get("client_key") + if not isinstance(client_key, str) or not client_key: + raise RuntimeError("State has no resolved model client.") + handle: dict[str, Any] = { + "runtime_id": runtime_id, + "client_key": client_key, + } + for key in ("model", "client_type", "sampling_args"): + if key in runtime: + handle[key] = runtime[key] + return handle + if name == "endpoint": + return {"runtime_id": runtime_id} + if name == "trajectory": + runtime_obj = self._runtime() + runtime_obj.register_trajectory(self) + trajectory = self.get("trajectory") or [] + if not isinstance(trajectory, list): + raise TypeError("state.trajectory must be a list.") + return { + "runtime_id": runtime_id, + "trajectory_id": str(self["trajectory_id"]), + "start": len(trajectory), + } + if name == "sandbox": + sandbox = runtime.get("sandbox") + if not isinstance(sandbox, Mapping): + raise RuntimeError("State has no resolved primary sandbox.") + handle = dict(sandbox) + handle["runtime_id"] = runtime_id + return handle + raise KeyError(f"Unknown runtime handle {name!r}.") + + def _tools_handle(self, names: _ToolTarget) -> dict[str, Any] | None: + tool_names = tuple(_tool_names(names)) + if not tool_names: + return None + runtime = self._runtime() + handle_id = runtime.register_tool_handle(self, tool_names) + return { + "runtime_id": runtime.runtime_id, + "handle_id": handle_id, + "names": list(tool_names), + } + + def _use_runtime_handle(self, name: str, handle: Mapping[str, Any]) -> "State": + self._runtime_handles()[name] = dict(handle) + return self + + def strip_runtime_handles(self) -> None: + _strip_runtime_handles(self) + + def finalize(self) -> "State": + self.strip_runtime_handles() + self.assert_serializable() + return self + @classmethod - def for_task(cls, task: Mapping[str, Any]) -> State: + def _legacy_for_task(cls, task: Mapping[str, Any]) -> "State": state = cls( { "task": dict(task), @@ -462,6 +753,125 @@ def assert_serializable(self) -> None: assert_json_serializable(self) +def _internal_key_error(key: str) -> str: + if key == "is_completed": + return ( + "state['is_completed'] is framework-managed; use state.stop(...), " + "state['done'], or @vf.stop." + ) + if key == "stop_condition": + return ( + "state['stop_condition'] is framework-managed; use state.stop(...), " + "state['done'], or @vf.stop." + ) + if key == "is_truncated": + return ( + "state['is_truncated'] is framework-managed; raise an overlong-prompt " + "error or let trajectory sync set it." + ) + if key == "error": + return "state['error'] is framework-managed; raise vf.Error instead." + return f"state[{key!r}] is framework-managed." + + +def _state_for_task( + cls: type[State], task: Mapping[str, Any], source_state: State | None = None +) -> State: + if _uses_v1_contract(task, source_state): + return _v1_state_for_task(cls, task) + return cls._legacy_for_task(task) + + +def _uses_v1_contract(task: Mapping[str, Any], source_state: State | None) -> bool: + if source_state is not None and source_state.uses_v1_contract: + return True + return getattr(task, "_vf_state_contract", "legacy") == "v1" + + +def _v1_state_for_task(cls: type[State], task: Mapping[str, Any]) -> State: + from verifiers.v1.utils.timing_utils import timing_record + + state = cls( + { + "task": dict(task), + "runtime": {}, + "trajectory": [], + "trajectory_id": uuid.uuid4().hex, + "artifacts": {}, + "metrics": {}, + "reward": 0.0, + "completion": None, + "timing": timing_record(), + } + )._enable_v1_contract() + state._set_completed(False) + state._set_truncated(False, overwrite=True) + state._set_stop_condition(None, overwrite=True) + state._set_error(None) + for key in ("prompt", "info", "example_id"): + if key in task: + state[key] = deepcopy(task[key]) + return state + + +def _borrow_from_state( + state: State, + source: State, + borrow: _BorrowTarget | Iterable[_BorrowTarget], + tools: _ToolTarget, + transcript: _TranscriptMode, +) -> None: + if transcript not in {"private", "append"}: + raise ValueError("transcript must be 'private' or 'append'.") + for name in _borrow_targets(borrow): + if name not in {"model", "sandbox"}: + raise KeyError(f"Unknown borrow target {name!r}.") + state._use_runtime_handle(name, source._runtime_handle(name)) + tools_handle = source._tools_handle(tools) + if tools_handle is not None: + state._use_runtime_handle("tools", tools_handle) + if transcript == "append": + state._use_runtime_handle("trajectory", source._runtime_handle("trajectory")) + + +def _borrow_targets( + borrow: _BorrowTarget | Iterable[_BorrowTarget], +) -> Iterable[_BorrowTarget]: + if isinstance(borrow, str): + return (cast(_BorrowTarget, borrow),) + return borrow + + +def _tool_names(tools: _ToolTarget) -> Iterable[str]: + if isinstance(tools, str): + return (tools,) + return tools + + +def _strip_runtime_handles(value: object) -> None: + if isinstance(value, State) or type(value) is dict: + mapping = cast(dict[str, Any], value) + for key in State.RUNTIME_HANDLE_KEYS: + mapping.pop(key, None) + runtime = mapping.get("runtime") + if type(runtime) is dict: + runtime_mapping = cast(dict[str, Any], runtime) + runtime_mapping.pop("resolved", None) + for key in State.RUNTIME_HANDLE_KEYS: + runtime_mapping.pop(key, None) + sandbox = runtime_mapping.get("sandbox") + if type(sandbox) is dict: + cast(dict[str, Any], sandbox).pop("lease_key", None) + for key in State.ENDPOINT_HANDLE_KEYS: + mapping.pop(key, None) + for item in list(mapping.values()): + _strip_runtime_handles(item) + return + if isinstance(value, list): + for item in value: + _strip_runtime_handles(item) + + def assert_json_serializable(value: object) -> None: try: json.dumps(value) diff --git a/verifiers/utils/data_utils.py b/verifiers/utils/data_utils.py index 7eafff409..168d895a3 100644 --- a/verifiers/utils/data_utils.py +++ b/verifiers/utils/data_utils.py @@ -1,7 +1,5 @@ # NOTE: Helper functions for example datasets. Not intended for core functionality. -from __future__ import annotations - import random from typing import TYPE_CHECKING, Any, Callable, cast @@ -23,13 +21,13 @@ def format_dataset( - dataset: Dataset, + dataset: "Dataset", system_prompt: str | None = None, few_shot: Messages | None = None, question_key: str = "question", answer_key: str = "answer", map_kwargs: dict = {}, -) -> Dataset: +) -> "Dataset": """ Create `example_id` and `prompt` columns if not present. """ @@ -271,7 +269,7 @@ def preprocess_prime_code(x: dict[str, Any]) -> dict[str, Any]: def load_example_dataset( name: str = "gsm8k", split: str | None = None, n: int | None = None, seed: int = 0 -) -> Dataset: +) -> "Dataset": from datasets import Dataset, concatenate_datasets, load_dataset if name == "aime2024": diff --git a/verifiers/utils/display_utils.py b/verifiers/utils/display_utils.py index b53c91420..2bbdb6e95 100644 --- a/verifiers/utils/display_utils.py +++ b/verifiers/utils/display_utils.py @@ -108,7 +108,7 @@ def emit(self, record: logging.LogRecord) -> None: pass -class _FDToLogger(threading.Thread): +class FDToLogger(threading.Thread): """Background reader that forwards a file descriptor's output to a logger.""" def __init__( @@ -175,8 +175,8 @@ def __init__(self, screen: bool = False, refresh_per_second: int = 4) -> None: self._old_stdout_fd: int | None = None self._old_stderr_fd: int | None = None self._console_file: io.TextIOWrapper | None = None - self._stdout_thread: _FDToLogger | None = None - self._stderr_thread: _FDToLogger | None = None + self._stdout_thread: FDToLogger | None = None + self._stderr_thread: FDToLogger | None = None self._key_listener_thread: threading.Thread | None = None self._key_listener_stop: threading.Event | None = None @@ -265,13 +265,13 @@ def start(self) -> None: os.close(stdout_w) os.dup2(stderr_w, 2) os.close(stderr_w) - self._stdout_thread = _FDToLogger( + self._stdout_thread = FDToLogger( stdout_r, logger.getChild("stdout"), logging.INFO, getattr(self._old_stdout, "encoding", None), ) - self._stderr_thread = _FDToLogger( + self._stderr_thread = FDToLogger( stderr_r, logger.getChild("stderr"), logging.ERROR, diff --git a/verifiers/utils/env_config_utils.py b/verifiers/utils/env_config_utils.py index 8de418f29..51e1114fd 100644 --- a/verifiers/utils/env_config_utils.py +++ b/verifiers/utils/env_config_utils.py @@ -1,5 +1,3 @@ -from __future__ import annotations - from collections.abc import Mapping from typing import cast diff --git a/verifiers/utils/env_utils.py b/verifiers/utils/env_utils.py index f22c8499d..07e95674d 100644 --- a/verifiers/utils/env_utils.py +++ b/verifiers/utils/env_utils.py @@ -69,7 +69,7 @@ def load_environment(env_id: str, **env_args) -> Environment: if default_values: logger.info(f"Using default args: {', '.join(default_values)}") - call_env_args = coerce_typed_env_config(env_load_func, sig, env_args) + call_env_args = prepare_typed_env_config(env_load_func, sig, env_args) env_instance: Environment = env_load_func(**call_env_args) env_instance.env_id = env_instance.env_id or env_id env_instance.env_args = env_instance.env_args or env_args @@ -94,19 +94,22 @@ def load_environment(env_id: str, **env_args) -> Environment: raise RuntimeError(f"Failed to load environment '{env_id}': {str(e)}") from e -def coerce_typed_env_config( +def prepare_typed_env_config( env_load_func: Callable[..., Environment], sig: inspect.Signature, env_args: dict, ) -> dict: - if "config" not in env_args: - return env_args config_type = env_config_annotation(env_load_func, sig) if config_type is None: return env_args + if "config" not in env_args: + call_env_args = dict(env_args) + call_env_args["config"] = config_type() + return call_env_args + config = env_args["config"] - if config is None or isinstance(config, config_type): + if isinstance(config, config_type): return env_args call_env_args = dict(env_args) diff --git a/verifiers/utils/eval_utils.py b/verifiers/utils/eval_utils.py index 0c70d1fb9..665b2b457 100644 --- a/verifiers/utils/eval_utils.py +++ b/verifiers/utils/eval_utils.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import asyncio import itertools import json diff --git a/verifiers/utils/import_utils.py b/verifiers/utils/import_utils.py index a6f4b3e58..30fdba16e 100644 --- a/verifiers/utils/import_utils.py +++ b/verifiers/utils/import_utils.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import importlib from typing import Any, cast diff --git a/verifiers/utils/message_utils.py b/verifiers/utils/message_utils.py index cdb01a05b..17e0e58bb 100644 --- a/verifiers/utils/message_utils.py +++ b/verifiers/utils/message_utils.py @@ -1,8 +1,8 @@ import json import logging import re -from collections.abc import Mapping -from typing import Any, cast +from collections.abc import Mapping, Sequence +from typing import Any, Literal, TypeAlias, cast, overload from rich.text import Text @@ -22,6 +22,10 @@ logger = logging.getLogger(__name__) +MessageLike: TypeAlias = Message | Mapping[str, object] +MessageInput: TypeAlias = str | Sequence[MessageLike] +MessageRole: TypeAlias = Literal["text", "system", "user", "assistant", "tool"] + def from_raw_content_part(part: dict[str, Any]) -> ContentPart: """Convert a raw content-part dict to a typed content part when possible.""" @@ -122,7 +126,7 @@ def from_raw_message(message: dict) -> Message: def normalize_messages( - value: Messages | str, *, field_name: str = "messages" + value: MessageInput, *, field_name: str = "messages" ) -> Messages: """Normalize raw/string message inputs into provider-agnostic Message objects.""" if isinstance(value, str): @@ -142,6 +146,64 @@ def normalize_messages( return normalized +@overload +def get_messages( + messages: Sequence[MessageLike], role: Literal["assistant"] +) -> list[AssistantMessage]: ... + + +@overload +def get_messages( + messages: Sequence[MessageLike], role: Literal["system"] +) -> list[SystemMessage]: ... + + +@overload +def get_messages( + messages: Sequence[MessageLike], role: Literal["user"] +) -> list[UserMessage]: ... + + +@overload +def get_messages( + messages: Sequence[MessageLike], role: Literal["tool"] +) -> list[ToolMessage]: ... + + +@overload +def get_messages( + messages: Sequence[MessageLike], role: Literal["text"] +) -> list[TextMessage]: ... + + +@overload +def get_messages(messages: Sequence[MessageLike], role: None = None) -> Messages: ... + + +def get_messages( + messages: Sequence[MessageLike], role: MessageRole | None = None +) -> Messages: + """Return typed transcript messages, optionally filtered by role.""" + normalized = normalize_messages(messages) + if role is None: + return normalized + return [message for message in normalized if message.role == role] + + +def message_role(message: MessageLike) -> str | None: + if isinstance(message, Mapping): + value = message.get("role") + else: + value = getattr(message, "role", None) + return value if isinstance(value, str) else None + + +def message_to_dict(message: MessageLike) -> dict[str, object]: + if isinstance(message, Mapping): + return dict(message) + return cast(dict[str, object], message.model_dump(exclude_none=True)) + + def maybe_normalize_messages( value: Messages | str, *, diff --git a/verifiers/utils/metric_utils.py b/verifiers/utils/metric_utils.py index 66f204028..6c8b543f6 100644 --- a/verifiers/utils/metric_utils.py +++ b/verifiers/utils/metric_utils.py @@ -68,7 +68,7 @@ def extract(self, output: RolloutOutput) -> float: return 1.0 if output.get("error") is not None else 0.0 -class _TokenUsageKeyMetric(MeanMetric): +class TokenUsageKeyMetric(MeanMetric): """Mean of a specific key in token_usage (skips outputs without it).""" _key: str = "" @@ -80,25 +80,25 @@ def extract(self, output: RolloutOutput) -> float | None: return None -class InputTokensMetric(_TokenUsageKeyMetric): +class InputTokensMetric(TokenUsageKeyMetric): """Mean input_tokens per output.""" _key = "input_tokens" -class OutputTokensMetric(_TokenUsageKeyMetric): +class OutputTokensMetric(TokenUsageKeyMetric): """Mean output_tokens per output.""" _key = "output_tokens" -class FinalInputTokensMetric(_TokenUsageKeyMetric): +class FinalInputTokensMetric(TokenUsageKeyMetric): """Mean final_input_tokens (non-completion context tokens) per output.""" _key = "final_input_tokens" -class FinalOutputTokensMetric(_TokenUsageKeyMetric): +class FinalOutputTokensMetric(TokenUsageKeyMetric): """Mean final_output_tokens (completion context tokens) per output.""" _key = "final_output_tokens" diff --git a/verifiers/utils/pricing_utils.py b/verifiers/utils/pricing_utils.py index 8d7fd2942..cdc7128df 100644 --- a/verifiers/utils/pricing_utils.py +++ b/verifiers/utils/pricing_utils.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import asyncio import json import logging diff --git a/verifiers/utils/save_utils.py b/verifiers/utils/save_utils.py index 78e4490f9..73fd11a50 100644 --- a/verifiers/utils/save_utils.py +++ b/verifiers/utils/save_utils.py @@ -15,6 +15,7 @@ ErrorInfo, GenerateMetadata, GenerateOutputs, + Response, RolloutOutput, SamplingArgs, State, @@ -39,9 +40,7 @@ from verifiers.utils.path_utils import get_results_path from verifiers.utils.usage_utils import ( StateUsageTracker, -) -from verifiers.utils.usage_utils import ( - extract_usage_tokens as extract_usage_tokens_from_response, + response_usage_tokens, ) from verifiers.utils.version_utils import get_version_info @@ -99,52 +98,83 @@ def make_serializable(value: object) -> str | int | float | bool | list | dict | return str(value) -def extract_usage_tokens(response: object) -> tuple[int, int]: - return extract_usage_tokens_from_response(response) +def _token_count(value: object, context: str) -> float: + if isinstance(value, bool) or not isinstance(value, int | float): + raise TypeError(f"{context} must be a number.") + if value < 0: + raise ValueError(f"{context} must be non-negative.") + return float(value) -def _coerce_token_usage(value: object) -> TokenUsage | None: +def _token_usage_from_mapping(value: object, context: str) -> TokenUsage | None: + if value is None: + return None if not isinstance(value, Mapping): + raise TypeError(f"{context} must be a mapping.") + mapping_value = cast(Mapping[str, object], value) + if "input_tokens" not in mapping_value and "output_tokens" not in mapping_value: return None - mapping_value = cast(Mapping[str, Any], value) - try: - # Accept current and legacy key names - input_raw = mapping_value.get("input_tokens") - if input_raw is None: - input_raw = mapping_value.get("prefill_tokens") - output_raw = mapping_value.get("output_tokens") - if output_raw is None: - output_raw = mapping_value.get("decode_tokens") - input_tokens = float(0.0 if input_raw is None else input_raw) - output_tokens = float(0.0 if output_raw is None else output_raw) - except (TypeError, ValueError): + if "input_tokens" not in mapping_value or "output_tokens" not in mapping_value: + raise KeyError(f"{context} requires input_tokens and output_tokens.") + usage = TokenUsage( + input_tokens=_token_count( + mapping_value["input_tokens"], f"{context}.input_tokens" + ), + output_tokens=_token_count( + mapping_value["output_tokens"], f"{context}.output_tokens" + ), + ) + for key in ("final_input_tokens", "final_output_tokens"): + if key in mapping_value and mapping_value[key] is not None: + usage[key] = _token_count(mapping_value[key], f"{context}.{key}") + return usage + + +def _token_usage_from_trajectory(trajectory: object) -> TokenUsage | None: + if not isinstance(trajectory, list): return None - return { - "input_tokens": input_tokens, - "output_tokens": output_tokens, - } + input_tokens = 0 + output_tokens = 0 + usage_seen = False + for index, step in enumerate(trajectory): + if not isinstance(step, Mapping): + raise TypeError(f"state.trajectory[{index}] must be a mapping.") + step_mapping = cast(Mapping[str, object], step) + response = step_mapping.get("response") + if response is None or not isinstance(response, Response): + continue + if response.usage is None: + continue + usage_seen = True + step_input_tokens, step_output_tokens = response_usage_tokens(response) + input_tokens += step_input_tokens + output_tokens += step_output_tokens + if not usage_seen: + return None + return TokenUsage( + input_tokens=float(input_tokens), + output_tokens=float(output_tokens), + ) def _extract_state_token_usage(state: State) -> TokenUsage | None: tracker = state.get("usage_tracker") if isinstance(tracker, StateUsageTracker): usage = tracker.snapshot() - coerced = _coerce_token_usage(usage) - if coerced is not None: - return coerced - # Tracker exists but has not seen usage yet. Avoid falling through to - # state["usage"], which is a zeroed live tracker view. - token_usage = _coerce_token_usage(state.get("token_usage")) - if token_usage is not None: - return token_usage - return None - - for key in ("token_usage", "usage"): - usage = _coerce_token_usage(state.get(key)) if usage is not None: return usage + token_usage = _token_usage_from_mapping( + state.get("token_usage"), "state.token_usage" + ) + if token_usage is not None: + return token_usage + else: + for key in ("token_usage", "usage"): + usage = _token_usage_from_mapping(state.get(key), f"state.{key}") + if usage is not None: + return usage - return None + return _token_usage_from_trajectory(state.get("trajectory")) def get_hf_hub_dataset_name(outputs: GenerateOutputs) -> str: @@ -194,26 +224,6 @@ def state_to_output( tool_defs=state.get("tool_defs"), ) usage = _extract_state_token_usage(state) - if usage is None: - # Legacy fallback for states that do not use state-level usage tracking. - trajectory = state.get("trajectory", []) - input_tokens = 0 - output_tokens = 0 - usage_seen = False - for step in trajectory: - response = step.get("response") - if response is None: - continue - if getattr(response, "usage", None) is not None: - usage_seen = True - step_input_tokens, step_output_tokens = extract_usage_tokens(response) - input_tokens += step_input_tokens - output_tokens += step_output_tokens - if usage_seen: - usage = { - "input_tokens": float(input_tokens), - "output_tokens": float(output_tokens), - } if usage is not None: token_usage: dict[str, float] = { "input_tokens": usage.get("input_tokens", 0.0), @@ -221,7 +231,7 @@ def state_to_output( } # Add context token metrics from trajectory trajectory = state.get("trajectory", []) - if trajectory: + if isinstance(trajectory, list): from verifiers.utils.usage_utils import compute_context_token_metrics token_usage.update(compute_context_token_metrics(trajectory)) diff --git a/verifiers/utils/usage_utils.py b/verifiers/utils/usage_utils.py index ea3f57db6..c8e3921b0 100644 --- a/verifiers/utils/usage_utils.py +++ b/verifiers/utils/usage_utils.py @@ -1,56 +1,20 @@ -from collections.abc import Mapping -import math +from collections.abc import Mapping, Sequence from types import MappingProxyType -from verifiers.types import TokenUsage - - -def _get_usage_value(usage_obj: object, key: str) -> int | float: - if isinstance(usage_obj, Mapping): - return usage_obj.get(key, 0) # type: ignore[return-value] - return getattr(usage_obj, key, 0) - - -def _coerce_usage_int(value: object) -> int: - """Best-effort usage coercion. Invalid values degrade to zero.""" - if value is None: - return 0 - if isinstance(value, bool): - return int(value) - if isinstance(value, int): - return max(0, value) - if isinstance(value, float): - if math.isnan(value) or math.isinf(value): - return 0 - return max(0, int(value)) - if isinstance(value, str): - stripped = value.strip() - if not stripped: - return 0 - try: - return max(0, int(stripped)) - except (TypeError, ValueError): - try: - parsed = float(stripped) - if math.isnan(parsed) or math.isinf(parsed): - return 0 - return max(0, int(parsed)) - except (TypeError, ValueError): - return 0 - return 0 - - -def extract_usage_tokens(response: object) -> tuple[int, int]: - usage = getattr(response, "usage", None) +from verifiers.types import Response, TokenUsage, Usage + + +def response_usage_tokens(response: Response) -> tuple[int, int]: + usage = response.usage if usage is None: return 0, 0 + return usage_tokens(usage) + - prompt_tokens = _get_usage_value(usage, "prompt_tokens") - completion_tokens = _get_usage_value(usage, "completion_tokens") - if not prompt_tokens and not completion_tokens: - prompt_tokens = _get_usage_value(usage, "input_tokens") - completion_tokens = _get_usage_value(usage, "output_tokens") - return _coerce_usage_int(prompt_tokens), _coerce_usage_int(completion_tokens) +def usage_tokens(usage: Usage) -> tuple[int, int]: + if usage.prompt_tokens < 0 or usage.completion_tokens < 0: + raise ValueError("Response usage tokens must be non-negative.") + return usage.prompt_tokens, usage.completion_tokens class StateUsageTracker: @@ -86,10 +50,10 @@ def increment( self._usage_totals["input_tokens"] += input_delta self._usage_totals["output_tokens"] += output_delta - def increment_from_response(self, response: object) -> None: - if getattr(response, "usage", None) is None: + def increment_from_response(self, response: Response) -> None: + if response.usage is None: return - input_tokens, output_tokens = extract_usage_tokens(response) + input_tokens, output_tokens = response_usage_tokens(response) self.increment(input_tokens, output_tokens, mark_seen=True) def snapshot(self) -> TokenUsage | None: @@ -102,7 +66,7 @@ def snapshot(self) -> TokenUsage | None: def compute_context_token_metrics( - trajectory: list, + trajectory: Sequence[Mapping[str, object]], ) -> dict[str, float]: """Compute context token metrics from the trajectory. @@ -128,9 +92,9 @@ def compute_context_token_metrics( found = False for step in reversed(trajectory): response = step.get("response") - if response is None or getattr(response, "usage", None) is None: + if not isinstance(response, Response) or response.usage is None: continue - prompt_tokens, completion_tokens = extract_usage_tokens(response) + prompt_tokens, completion_tokens = response_usage_tokens(response) last_step_total = prompt_tokens + completion_tokens found = True break @@ -142,9 +106,9 @@ def compute_context_token_metrics( total_completion = 0 for step in trajectory: response = step.get("response") - if response is None or getattr(response, "usage", None) is None: + if not isinstance(response, Response) or response.usage is None: continue - _, completion_tokens = extract_usage_tokens(response) + _, completion_tokens = response_usage_tokens(response) total_completion += completion_tokens return { diff --git a/verifiers/utils/version_utils.py b/verifiers/utils/version_utils.py index fce6d84c0..76e29f2c2 100644 --- a/verifiers/utils/version_utils.py +++ b/verifiers/utils/version_utils.py @@ -1,7 +1,5 @@ """Utilities for detecting verifiers and environment version/commit info.""" -from __future__ import annotations - import importlib.metadata import logging import subprocess diff --git a/verifiers/v1/ENVIRONMENT_BEST_PRACTICES.md b/verifiers/v1/ENVIRONMENT_BEST_PRACTICES.md new file mode 100644 index 000000000..92a04caf7 --- /dev/null +++ b/verifiers/v1/ENVIRONMENT_BEST_PRACTICES.md @@ -0,0 +1,73 @@ +# v1 Environment Best Practices + +This is the working checklist for building environments with the v1 +Taskset/Harness pattern. + +## Do + +- Expose `load_environment(config: vf.EnvConfig) -> vf.Env` for v1 + environments. The loader receives a typed config object from the caller. +- Import the public API with `import verifiers as vf`. `verifiers.v1` remains + available for framework-internal tests and narrow module-level checks, but + user environment code should use the top-level namespace. +- Use typed Pydantic config objects in Python code. Raw mappings are for TOML, + CLI, and other external boundaries. +- Keep `config` parameters to one concrete Pydantic config type or `None`. + Do not advertise unions of mappings, base configs, and specific configs. +- Treat `Mapping[str, object]` as an explicit boundary type. Accept it only for + intentionally dynamic payloads such as task rows, protocol messages, + sandbox/program specs, or Pydantic config fields that store arbitrary user + objects. Prefer a named alias such as `ConfigMap`, `TaskRow`, or + `Objects` over spelling the broad type in user-facing + signatures. +- Do not use raw `Any` in v1 environment code. If a value is intentionally + arbitrary, give that boundary a named type in `verifiers.v1.types`. +- Put task/data/scoring settings on a `TasksetConfig` owned by the taskset. +- Put rollout program/runtime settings on a `HarnessConfig` owned by the + harness. For example, `vf.RLM` takes `vf.RLMConfig`. +- Keep environment files as wiring: taskset construction, harness construction, + and small policy choices that compose the two. +- Prefer v1-native framework objects over stdlib-shaped user code. User-facing + environment code should read as Verifiers code first. +- Use `Taskset(objects=..., bindings=...)` for shared extractors, judges, + clients, and other dependencies that signal functions need. +- Compose related categories inside one taskset only when they share the same + harness lifecycle and scoring contract. +- Expose explicit typed loaders for separate v1 envs when categories need + different tasksets, harnesses, or lifecycle behavior. +- Keep dual v0/v1 loaders explicit only while migration is intentionally + dual-stack. + +## Don't + +- Do not mirror every taskset or harness config field as `load_environment` + kwargs. +- Do not put harness settings on taskset configs or taskset settings on harness + configs. +- Do not add environment-level config subclasses to carry fields already owned + by a taskset or harness config. +- Do not wrap v1 `Env` objects in the v0 `EnvGroup`. +- Do not add thin intermediate harness types that only restate `Harness`. + Reusable command agents should be direct `Harness` subclasses with their own + typed config only when they have real behavior to own. +- Do not add heterogeneous `TasksetGroup` routing as a substitute for a real + v1 suite abstraction. +- Do not overfit v1 APIs to names or layering inherited from + `research-environments`; keep the logical v1 ownership boundaries correct. +- Do not use vague config names that only repeat the component name. Name the + actual thing being configured: `rlm_repo_ref`, not `rlm_ref`; `env_vars`, not + `rlm_env`. +- Do not write global helper functions in environment files. Rare exceptions + are process-level handles, such as a lock, where a module-level object is the + cleanest way to assert process-wide control. +- Do not make users manipulate paths, package resources, or other stdlib details + when the framework can express the intent directly. + +## Enforcement + +- Put deterministic, repo-specific style rules in Semgrep rather than custom + AST scripts. Ruff owns generic lint and format, ty owns type correctness, and + Semgrep owns Verifiers-specific policy checks. +- Keep Semgrep rules principle-based. They should encode stable contracts such + as narrow config parameters and no broad user-facing `Any`, not one-off bans + for removed historical names. diff --git a/verifiers/v1/README.md b/verifiers/v1/README.md index 71b91ff6c..e0023b91a 100644 --- a/verifiers/v1/README.md +++ b/verifiers/v1/README.md @@ -10,6 +10,9 @@ and training environments from two primary objects: `vf.Environment` worker API used by evals and trainers. For local experiments, `Harness` is runnable on its own with `await harness.run(task)`. +For current environment authoring guidance, start with +[`ENVIRONMENT_BEST_PRACTICES.md`](ENVIRONMENT_BEST_PRACTICES.md). + The programming model is intentionally small: tasks and state are serializable data; everything else is a function, config value, or runtime-managed handle. @@ -129,7 +132,7 @@ taskset and harness, then compose them. ## Minimal Environment ```python -import verifiers.v1 as vf +import verifiers as vf def source(): @@ -149,8 +152,7 @@ def load_taskset(config: vf.TasksetConfig | None = None): return vf.Taskset(source=source, rewards=[contains_answer], config=config) -def load_environment(config: vf.EnvConfig | None = None): - config = config or vf.EnvConfig() +def load_environment(config: vf.EnvConfig): return vf.Env(taskset=load_taskset(config=config.taskset)) ``` @@ -195,7 +197,7 @@ loader. ```python from datasets import load_dataset -import verifiers.v1 as vf +import verifiers as vf class GSM8KTasksetConfig(vf.TasksetConfig): @@ -262,8 +264,8 @@ controls; runtime metadata belongs on `state`. `task.program` is the taskset-to-harness merge point for command/program data that is task-owned but consumed by a harness. It can define only `files`, `dirs`, `setup`, `env`, `artifacts`, and command `args`. The harness still owns -the program kind (`command`, `fn`, or base loop), sandbox placement, and tool -interface. Duplicate file/env/artifact keys across harness and task fail fast. +the program kind (`command`, `fn`, or base loop), sandbox placement, and program +channel. Duplicate file/env/artifact keys across harness and task fail fast. Advanced callers may create a state for a new task while borrowing selected live resources from an existing state. The stored state remains serializable: borrowed @@ -475,7 +477,7 @@ Sandboxed programs support: callable value; - `dirs`: remote path -> local path or importlib resource directory; - `setup`: commands contributed to the rollout setup queue; -- `tools`: program-facing tool interfaces, optionally with late setup commands; +- `channels`: program-facing tool channels, optionally with late setup commands; - `artifacts`: text/JSON files read back into `state["artifacts"]`. Program does not own lifecycle scoring. `updates`, `metrics`, `rewards`, @@ -534,23 +536,22 @@ signature. Reusable CLI programs should be packaged as `Harness` subclasses. Package implementations live under `verifiers.v1.packages` while the v1 API stabilizes, -and are re-exported from `verifiers.v1` for normal use. `CLIHarness` is the thin -generic wrapper for command programs; `OpenCode`, `Pi`, `MiniSWEAgent`, and -`RLM` are bundled leaf harnesses for common coding-agent CLIs. +and are re-exported from `verifiers.v1` for normal use. `OpenCode`, `Pi`, +`MiniSWEAgent`, and `RLM` are bundled `Harness` leaf wrappers for common +coding-agent CLIs. ```python -import verifiers.v1 as vf +import verifiers as vf env = vf.Env( - taskset=vf.HarborTaskset(tasks="/path/to/harbor/tasks"), + taskset=vf.HarborTaskset(), harness=vf.OpenCode(), ) ``` -`HarborTaskset` accepts either a local Harbor task directory/dataset directory -or a Harbor dataset registry id such as `terminal-bench@2.0`. Registry ids use -the Harbor CLI download path when available; local paths do not require Harbor -to be installed. Harbor task rows contribute sandbox settings and +`HarborTaskset()` loads Harbor-format task directories from the environment +package's reserved `tasks/` directory. `HarborTaskset(dataset="owner/name")` +fetches a Harbor Hub dataset. Harbor task rows contribute sandbox settings and `task.program` uploads for `/task/instruction.md` and `/task/task.toml`. `OpenCode` contributes the OpenCode install/setup, config generation, MCP tool proxy wiring, and log artifact collection. `Pi` follows the same pattern for @@ -561,7 +562,13 @@ project `.mcp.json`. Neither side needs to know the other's private fields. and log/trajectory artifacts. `RLM` follows the same boundary for recursive LLM runs: `HarborTaskset` owns the task directory and tests, while `RLM` owns RLM installation, optional skill -upload to `/task/rlm-skills`, endpoint wiring, and trajectory filtering. +upload to `/rlm/skills`, endpoint wiring, and trajectory filtering. +Use `RLMConfig` in `env.harness` for RLM-specific settings such as +`rlm_repo_ref`, `rlm_tools`, `rlm_max_turns`, and `summarize_at_tokens`. +Tasksets can expose package-owned upload directories with `get_upload_dirs()`. +The base `Taskset` discovers a sibling `skills/` directory by default, and +`RLM` uploads that directory to `/rlm/skills` unless `skills=` is passed +explicitly to the harness. ## State Helpers @@ -590,6 +597,12 @@ These helpers may use process-local handles while the rollout is active. Handles are not persistence mechanisms and are stripped before returned state crosses the rollout/group boundary. +Message helpers are intentionally limited to transcript selection: +`vf.get_messages(messages, role=None)` returns matching typed message objects. It +does not parse answers or define a generic completion-text policy; index or +slice the returned list with ordinary Python, read `message.content` explicitly, +or bind a task-specific extractor on the taskset. + ## Singletons And Collections v1 keeps a sharp distinction between singleton fields and collection fields. @@ -705,11 +718,14 @@ through state helpers. `sandbox` is reserved for tools owned by a sandboxed toolset. `objects.*` is intentionally owner-private. Object factories are named zero-arg -loaders for private dependencies owned by the same `Toolset` or `User`. If a -hidden argument needs task or state data, bind it with a callable source instead -of an object factory. Updates, cleanup, metrics, and rewards should read -serializable task/state data or call resolved tools through `state.get_tools()` -instead of reaching into toolset dependencies directly. +loaders for private dependencies owned by the same `Taskset`, `Toolset`, or +`User`. If a hidden argument needs task or state data, bind it with a callable +source instead of an object factory. Framework args such as `task`, `state`, +`completion`, and `prompt` win over bindings when names collide. + +String binding sources are always framework paths such as `task.answer` or +`objects.index`. Bind literal strings with a callable source so typos in binding +paths fail early instead of silently becoming constants. Tasks can select toolsets and tools for one rollout: @@ -871,11 +887,11 @@ index = "simplewiki" The runtime normalizes MCP tools into callable handles for Python programs and can also present the resolved toolsets as an MCP server for sandbox command -programs. `program.tools` selects the program tool interface, not a concrete +programs. `program.channels` selects the program tool channel, not a concrete tool name: it accepts `"callable"` or `"mcp"`, or a mapping whose keys are those -interfaces. +channels. Concrete tools such as `bash` belong to a `Toolset` and are then exposed through -one of those interfaces. +one of those channels. Programs can discover and call resolved tools through the interception endpoint: @@ -895,9 +911,12 @@ async def program(task, state): return state ``` -Sandboxed base and Python entrypoint programs use the callable interface by -default. Set `program={"sandbox": True, "tools": "callable"}` when the config -should make that interface explicit. +Sandboxed base and Python entrypoint programs use the callable channel by +default. Set `program={"sandbox": True, "channels": "callable"}` when the config +should make that channel explicit. +`program.channels` supports only the generic `callable` and `mcp` channels. +Harness-specific tool carriers, such as RLM skill uploads, should live on the +taskset upload directory contract or the harness config. When a sandboxed `program.fn` ref points at local source, v1 resolves the package from the module root: single-file modules use `pyproject.toml` in the @@ -907,12 +926,12 @@ the program sandbox before running the entrypoint. Dependencies are normal package dependencies. Command programs do not have a universal Python call surface. If -`program.tools` requests `mcp`, v1 materializes an MCP proxy for the resolved +`program.channels` requests `mcp`, v1 materializes an MCP proxy for the resolved toolsets. Generic CLI programs usually need a setup command to add that MCP server to their own config, so the golden path is to put that setup under the `mcp` key. -The interface setup entry is a late rollout setup contribution that runs inside +The channel setup entry is a late rollout setup contribution that runs inside the program sandbox before the command. It may be a shell string or callable; callables can request non-`task` / `state` args only through `program.bindings`. @@ -933,21 +952,24 @@ EOF """ -harness = vf.CLIHarness( - command=["my-cli", "run", "/task/instruction.md"], - sandbox=True, - bindings={"write_cli_config.endpoint_config": endpoint_config}, - tools={"mcp": write_cli_config}, +harness = vf.Harness( + program={ + "command": ["my-cli", "run", "/task/instruction.md"], + "sandbox": True, + "bindings": {"write_cli_config.endpoint_config": endpoint_config}, + "channels": {"mcp": write_cli_config}, + }, + sandbox={"image": "python:3.11-slim"}, ) ``` -`program.setup` is for installing or preparing the process. `program.tools.mcp` +`program.setup` is for installing or preparing the process. `program.channels.mcp` is for registering resolved runtime surfaces such as MCP tools or intercepted model endpoints. Both participate in the same priority-ordered setup stage as `@vf.setup` handlers; built-in program uploads run early, `program.setup` runs -before ordinary priority-0 setup handlers, and tool-interface setup runs late. +before ordinary priority-0 setup handlers, and channel setup runs late. `program.setup` callables should only request `task` and `state`; use -`program.tools.` when setup needs bound runtime values such as an +`program.channels.` when setup needs bound runtime values such as an endpoint config. ## Package Dependencies @@ -1012,9 +1034,10 @@ async def format_reward(task, state) -> float: return float("" in str(state.get("completion"))) ``` -Rollout signals must accept `task, state`. Extra required arguments are only -valid when a Toolset binding supplies them. Group signals are stricter because -they run after rollout-local runtime handles are gone. +Rollout signals can request framework args such as `task`, `state`, +`completion`, and `prompt`. Extra required arguments are valid when a taskset or +toolset binding supplies them. Group signals can request `tasks`, `states`, and +bound hidden args. ### Group Signals @@ -1032,9 +1055,8 @@ async def centered(tasks, states) -> list[float]: ... ``` -Group metrics/rewards/advantages must accept exactly `tasks, states` and return -one float per state. v1 writes advantages only when an explicit advantage signal -is configured. +Group metrics/rewards/advantages must return one float per state. v1 writes +advantages only when an explicit advantage signal is configured. `Env.requires_group_rollouts` is true when group-stage updates, signals, cleanup, or custom group setup are present. `Env.provides_advantages` is true @@ -1079,15 +1101,14 @@ async def summarize_attempts(tasks, states): ... ``` -Rollout update receives `task, state`, plus any Toolset-bound hidden args. -Group update receives exactly `tasks, states`. +Rollout updates can request framework args and taskset/toolset-bound hidden +args. Group updates can request `tasks`, `states`, and bound hidden args. ### Cleanup And Teardown -`@vf.cleanup` runs after scoring for its stage. Rollout cleanup receives -`task, state`, plus any Toolset-bound hidden args. Group cleanup receives -exactly `tasks, states`. Cleanup is the user extension point for final state -mutation and resource-related cleanup. +`@vf.cleanup` runs after scoring for its stage. Cleanup functions can request +their stage's framework args and taskset/toolset-bound hidden args. Cleanup is +the user extension point for final state mutation and resource-related cleanup. ```python @vf.cleanup(stage="group") @@ -1111,7 +1132,7 @@ Environment packages choose how to route runner config into v1 objects. The recommended loader shape is: ```python -import verifiers.v1 as vf +import verifiers as vf def load_taskset(config: vf.TasksetConfig | None = None): @@ -1122,8 +1143,7 @@ def load_harness(config: vf.HarnessConfig | None = None): return vf.Harness(config=config) -def load_environment(config: vf.EnvConfig | None = None): - config = config or vf.EnvConfig() +def load_environment(config: vf.EnvConfig): return vf.Env( taskset=load_taskset(config=config.taskset), harness=load_harness(config=config.harness), @@ -1133,8 +1153,7 @@ def load_environment(config: vf.EnvConfig | None = None): If the base harness is enough, omit `load_harness`: ```python -def load_environment(config: vf.EnvConfig | None = None): - config = config or vf.EnvConfig() +def load_environment(config: vf.EnvConfig): return vf.Env(taskset=load_taskset(config=config.taskset)) ``` @@ -1161,43 +1180,47 @@ max_turns = 4 weight = 0.5 ``` -For concise named args, pass typed child config objects as defaults. Explicit -nested sections win over those defaults. +For concise named args, define one typed args object and pass it as `args`. +`EnvConfig.args` is intentionally user-defined; environment packages decide how +those args flow into taskset and harness construction. ```python +class MyEnvArgsConfig(vf.Config): + split: str = "train" + max_turns: int = 10 + + class MyTasksetConfig(vf.TasksetConfig): split: str = "train" -def load_taskset( - split: str | None = None, - config: vf.TasksetConfig | None = None, -): - config = MyTasksetConfig(config, split=split) +def load_taskset(config: vf.TasksetConfig | None = None): + config = MyTasksetConfig(config) ... -def load_harness( - max_turns: int | None = None, - config: vf.HarnessConfig | None = None, -): - config = vf.HarnessConfig(config, max_turns=max_turns) +def load_harness(config: vf.HarnessConfig | None = None): + config = vf.HarnessConfig(config) ... def load_environment( - config: vf.EnvConfig | None = None, + config: vf.EnvConfig, split: str = "train", max_turns: int = 10, ): config = vf.EnvConfig( config, - taskset=MyTasksetConfig(split=split), - harness=vf.HarnessConfig(max_turns=max_turns), + args=MyEnvArgsConfig(split=split, max_turns=max_turns), ) + args = MyEnvArgsConfig(config.args) return vf.Env( - taskset=load_taskset(config=config.taskset), - harness=load_harness(config=config.harness), + taskset=load_taskset( + config=MyTasksetConfig(config.taskset, split=args.split) + ), + harness=load_harness( + config=vf.HarnessConfig(config.harness, max_turns=args.max_turns) + ), ) ``` @@ -1226,6 +1249,18 @@ max_turns = 8 weight = 1.0 ``` +Taskset and harness sections can import a base config with `config` and then +overlay local fields. Collection fields extend the imported config. + +```toml +[env.harness] +config = "my_env.configs:load_another_harness_config" + +[[env.harness.rewards]] +fn = "my_env.rewards:new_reward_func" +weight = 0 +``` + The outer runner owns model, endpoint, client, sampling, rollout count, and training/eval controls. v1 config owns taskset/harness behavior. Only put `harness.model` or `harness.client` in v1 config for standalone harnesses, @@ -1366,7 +1401,7 @@ Python programs can be configured directly: [env.harness.program] fn = "my_env.programs:run_agent" sandbox = true -tools = "callable" +channels = "callable" ``` Command programs use `command` and can receive the resolved tools as an MCP @@ -1377,14 +1412,19 @@ server when they run in a sandbox: command = ["bash", "-lc", "my-cli run /task/instruction.md"] sandbox = true -[env.harness.program.tools] +[env.harness.program.channels] mcp = true [env.harness.program.files] "/task/instruction.md" = "task.instruction" ``` -`program.setup` prepares the process. `program.tools.mcp` registers resolved +`program.channels` is deliberately limited to `callable` and `mcp`. +Harness-specific tool carriers belong on the harness or taskset contract; for +example, RLM reads `Taskset.get_upload_dirs()["skills"]` and uploads it to +`/rlm/skills`. + +`program.setup` prepares the process. `program.channels.mcp` registers resolved tool or endpoint config after the interception endpoint is live and before the command runs: @@ -1393,7 +1433,7 @@ command runs: command = ["my-cli", "run", "--config", "/tmp/my-cli.json"] sandbox = true -[env.harness.program.tools] +[env.harness.program.channels] mcp = { fn = "my_env.cli:write_cli_config" } [env.harness.program.bindings] @@ -1430,9 +1470,13 @@ def openai_key(state): return state.get_endpoint_config(api="chat")["api_key"] -vf.CLIHarness( - command=["my-cli", "run"], - env={"OPENAI_API_KEY": openai_key}, +vf.Harness( + program={ + "command": ["my-cli", "run"], + "sandbox": True, + "env": {"OPENAI_API_KEY": openai_key}, + }, + sandbox={"image": "python:3.11-slim"}, ) ``` diff --git a/verifiers/v1/RE_MIGRATION.md b/verifiers/v1/RE_MIGRATION.md index 481695846..05dd80f1f 100644 --- a/verifiers/v1/RE_MIGRATION.md +++ b/verifiers/v1/RE_MIGRATION.md @@ -27,7 +27,7 @@ and scoring logic. | research-environments package | v1 reference to copy | pattern | | --- | --- | --- | | `aime2024`, `aime2025`, `aime2026`, `gpqa`, `math500`, `mmlu_pro`, `simpleqa`, `simpleqa_verified` | `environments/reverse_text/reverse_text_v1.py` | serializable rows, base `Harness`, taskset reward | -| `clbench`, `color_codeword`, `graphwalks`, `ifbench`, `ifeval`, `if_summarize_judge`, `patterned_needle_in_haystack`, `science_env`, `unscramble`, `verbatim_copy` | `environments/reverse_text/reverse_text_v1.py` | single-turn prompt taskset with parser or judge closed over by reward | +| `clbench`, `color_codeword`, `graphwalks`, `ifbench`, `ifeval`, `if_summarize_judge`, `patterned_needle_in_haystack`, `science_env`, `unscramble`, `verbatim_copy` | `environments/reverse_text/reverse_text_v1.py` | single-turn prompt taskset with shared extractor or judge dependencies | | `math_env` with Python execution | `environments/math_python/math_python_v1.py` | sandbox-backed callable Python tool | | `browsecomp`, `ddbc`, `deepdive`, `hle` with tools, `wikispeedia` | `environments/wiki_search/wiki_search_v1.py` | callable `Toolset` with private dependencies and hidden bindings | | `bfcl_v3` | `environments/bfcl_v3/bfcl_v3.py` | task-local dynamic tool schemas | @@ -41,7 +41,7 @@ and scoring logic. | Pi Coding Agent task directories | `Sandbox CLI Harnesses` below | `HarborTaskset` or custom taskset plus `Pi` harness | | `terminal_bench_2`, `general_agent`, `nl2repobench`, RLM task-directory packages | `Task-Directory Command Harnesses` below | sandbox command program with task-owned uploads and artifacts | | `scicode`, `livecodebench`, `code_env` | `Code Verification And Post-Rollout Checks` below | update runs verification, reward reads serializable result | -| mixed benchmark suites | `Mixed Environment Suites` below | one v1 `Env` per taskset/harness pair, grouped with `EnvGroup` | +| mixed benchmark suites | `Mixed Environment Suites` below | one v1 `Env` per taskset/harness pair, exposed through explicit loaders | | third-party agent libraries such as DSPy | `environments/dspy_flights/dspy_flights.py` | Python program using `state.get_endpoint_config(...)` or `state.get_client(...)` | ## General Migration Shape @@ -49,7 +49,7 @@ and scoring logic. Every migrated package should expose: ```python -import verifiers.v1 as vf +import verifiers as vf def load_taskset(config: vf.TasksetConfig | None = None) -> vf.Taskset: @@ -67,8 +67,7 @@ def load_harness(config: vf.HarnessConfig | None = None) -> vf.Harness: return vf.Harness(config=config) -def load_v1_environment(config: vf.EnvConfig | None = None) -> vf.Env: - config = config or vf.EnvConfig() +def load_environment(config: vf.EnvConfig) -> vf.Env: return vf.Env( taskset=load_taskset(config=config.taskset), harness=load_harness(config=config.harness), @@ -78,8 +77,7 @@ def load_v1_environment(config: vf.EnvConfig | None = None) -> vf.Env: If the base harness is enough, omit `load_harness`: ```python -def load_v1_environment(config: vf.EnvConfig | None = None) -> vf.Env: - config = config or vf.EnvConfig() +def load_environment(config: vf.EnvConfig) -> vf.Env: return vf.Env(taskset=load_taskset(config=config.taskset)) ``` @@ -142,7 +140,7 @@ Migration: Example: ```python -import verifiers.v1 as vf +import verifiers as vf def source(): @@ -157,15 +155,16 @@ def source(): @vf.reward(weight=1.0) async def exact(task, state) -> float: - return float(str(task["answer"]).strip() in completion_text(state)) + messages = vf.get_messages(state.get("completion") or [], role="assistant") + response = str(messages[-1].content or "") if messages else "" + return float(str(task["answer"]).strip() in response) def load_taskset(config: vf.TasksetConfig | None = None): return vf.Taskset(source=source, rewards=[exact], config=config) -def load_v1_environment(config: vf.EnvConfig | None = None): - config = config or vf.EnvConfig() +def load_environment(config: vf.EnvConfig): return vf.Env(taskset=load_taskset(config=config.taskset)) ``` @@ -173,8 +172,28 @@ Gotchas: - Reference answers stay on `task`; do not expect `state["answer"]` to be the gold answer. -- If a parser is needed, keep it as a normal Python object closed over by the - reward function. +- Shared extraction or judging dependencies belong on `Taskset(objects=...)` and + enter reward signatures through `bindings`: + +```python +class AnswerExtractor: + def __call__(self, completion: list[dict[str, object]]) -> str: + ... + + +@vf.reward +async def exact(task, state, extract_answer) -> float: + return float(extract_answer(state.get("completion") or []) == task["answer"]) + + +taskset = vf.Taskset( + source=source, + rewards=[exact], + objects={"extract_answer": AnswerExtractor}, + bindings={"exact.extract_answer": "objects.extract_answer"}, +) +``` + - Judge metrics are regular reward/metric functions. Instantiate judge clients inside a lazy factory or pass a client config through taskset config. @@ -199,7 +218,7 @@ Migration: Example: ```python -import verifiers.v1 as vf +import verifiers as vf async def search(query: str, exa) -> str: @@ -280,7 +299,7 @@ Example: ```python from verifiers.types import Tool -import verifiers.v1 as vf +import verifiers as vf class SchemaTool: @@ -439,7 +458,7 @@ Migration: 1. Wrap each server as `vf.MCPTool(command=..., args=[...])`. 2. Put MCP tools in a taskset or harness toolset. -3. Use `program={"command": [...], "sandbox": True, "tools": "mcp"}` for +3. Use `program={"command": [...], "sandbox": True, "channels": "mcp"}` for sandbox command harnesses that should consume resolved toolsets through MCP. Reference: `environments/mcp_search_env/mcp_search_env.py`. @@ -465,8 +484,8 @@ Gotchas: - Use task fields and bindings when the server needs task-specific arguments. - Callable tools and MCP tools can coexist in toolsets. Python programs receive callable handles; sandbox command programs can request an MCP server through - `program.tools`. -- `program.tools` names the program-facing interface, not a concrete tool. Use + `program.channels`. +- `program.channels` names the program-facing channel, not a concrete tool. Use `"callable"` or `"mcp"`; tools such as `bash` are regular Toolset entries. ## Nested Harness Calls @@ -493,7 +512,8 @@ async def ask_child(question: str, harness, state) -> str: ).freeze() child_state = await harness.run(child_task) state.setdefault("child_answers", []).append(child_state["answer"]) - return completion_text(child_state) + messages = vf.get_messages(child_state.get("completion") or [], role="assistant") + return str(messages[-1].content or "") if messages else "" def load_child_harness(): @@ -542,15 +562,18 @@ Example: ```python env = vf.Env( - taskset=vf.HarborTaskset(tasks="/path/to/harbor/tasks"), + taskset=vf.HarborTaskset(), harness=vf.OpenCode(), ) ``` Gotchas: -- `HarborTaskset` owns task loading, per-task sandbox overrides, `/task` - uploads, and test scoring. +- `HarborTaskset()` loads Harbor-format task directories from the environment + package's reserved `tasks/` directory. `HarborTaskset(dataset="owner/name")` + fetches a Harbor Hub dataset. +- `HarborTaskset` owns task loading, per-task sandbox overrides, `/task` uploads, + and test scoring. - `OpenCode` owns OpenCode installation, config generation, MCP tool proxy wiring, and log artifacts. - `Pi` owns Pi installation, intercepted model config generation, optional MCP @@ -560,7 +583,7 @@ Gotchas: - `RLM` owns RLM installation, optional `/task/rlm-skills` upload, endpoint wiring, and trajectory filtering. - `task.program` is the merge point for task-owned program files/env/setup. -- Harness-owned CLI tool registration belongs in `program.tools.mcp`; it runs +- Harness-owned CLI tool registration belongs in `program.channels.mcp`; it runs after ordinary setup and before the command. - Use group-scoped sandbox lifetime when scoring needs to inspect the sandbox. @@ -604,7 +627,7 @@ harness = vf.Harness( program={ "sandbox": True, "command": ["bash", "-lc", "solver run /task/instruction.md"], - "tools": "mcp", + "channels": "mcp", "files": {"/task/instruction.md": instruction}, "dirs": {"/workspace/task": task_package}, "setup": ["pip install -e /workspace/task"], @@ -639,34 +662,30 @@ Use this for: Migration: 1. Build one v1 `Env` per independently configurable taskset/harness pair. -2. Combine those envs with `vf.EnvGroup`. +2. Expose separate typed loaders for the v1 envs until a v1-native suite wrapper + exists. 3. Keep category-specific rewards, tools, and harness settings inside each child env. Example: ```python -class SuiteConfig(vf.Config): - math: object | None = None - graph: object | None = None - - -def load_environment(config=None): - config = SuiteConfig(config) - return vf.EnvGroup( - [ - load_math_environment(config.math), - load_graph_environment(config.graph), - ] - ) +def load_math_environment(config: vf.EnvConfig) -> vf.Env: + return vf.Env(...) + + +def load_graph_environment(config: vf.EnvConfig) -> vf.Env: + return vf.Env(...) ``` Gotchas: -- Use `EnvGroup` when categories need different harnesses or scoring contracts. +- Do not wrap v1 `Env` objects in the v0 `EnvGroup`; that creates a mixed + contract where rollout execution and scoring live on different stacks. - Use one `Taskset` with a `category` task field when categories share the same harness and lifecycle. -- v1 env capability flags still flow through the child `Env` objects. +- Keep explicit v0 and v1 loaders only when the migration is intentionally dual + stack. ## Code Verification And Post-Rollout Checks diff --git a/verifiers/v1/__init__.py b/verifiers/v1/__init__.py index 8418d593a..bc6b1b00d 100644 --- a/verifiers/v1/__init__.py +++ b/verifiers/v1/__init__.py @@ -10,12 +10,23 @@ teardown, update, ) +from verifiers.types import ( + AssistantMessage, + Message, + Messages, + SystemMessage, + TextMessage, + ToolMessage, + UserMessage, +) +from verifiers.utils.message_utils import get_messages from .config import ( Config, EnvConfig, HarnessConfig, MCPToolConfig, + ProgramConfig, SandboxConfig, TasksetConfig, ToolsetConfig, @@ -24,12 +35,12 @@ from .env import Env from .harness import Harness from .packages.harnesses import ( - CLIHarness, MiniSWEAgent, OpenCode, OpenCodeConfig, Pi, RLM, + RLMConfig, ) from .utils.scoring_utils import ( add_metric, @@ -42,38 +53,64 @@ ) from .state import State from .task import Task -from .taskset import Taskset +from .taskset import Taskset, discover_sibling_dir from .packages.tasksets import ( HarborTaskset, HarborTasksetConfig, ) from .toolset import MCPTool, Toolset +from .types import ( + ConfigData, + ConfigMap, + GroupHandler, + Handler, + MutableConfigMap, + Objects, + TaskRow, + TaskRows, +) from .user import User __all__ = [ + "ConfigData", "Config", + "ConfigMap", "Env", "EnvConfig", + "AssistantMessage", + "GroupHandler", "Harness", "HarnessConfig", "HarborTaskset", "HarborTasksetConfig", - "CLIHarness", + "Handler", + "MutableConfigMap", "MCPTool", "MCPToolConfig", + "Message", + "Messages", "MiniSWEAgent", "OpenCode", "OpenCodeConfig", + "Objects", "Pi", + "ProgramConfig", "RLM", + "RLMConfig", "SandboxConfig", "State", "Task", + "TaskRow", + "TaskRows", "Taskset", "TasksetConfig", + "SystemMessage", + "TextMessage", "Toolset", "ToolsetConfig", + "ToolMessage", "User", + "UserMessage", "UserConfig", "add_metric", "add_reward", @@ -82,7 +119,9 @@ "build_signals", "cleanup", "collect_signals", + "discover_sibling_dir", "metric", + "get_messages", "reward", "score_group", "score_rollout", diff --git a/verifiers/v1/config.py b/verifiers/v1/config.py index 12b27cf68..0c2b7276b 100644 --- a/verifiers/v1/config.py +++ b/verifiers/v1/config.py @@ -1,43 +1,101 @@ -from __future__ import annotations - -import functools -import importlib -import inspect from collections.abc import Iterable, Mapping from pathlib import Path -from typing import Any, Callable, Literal, cast - -from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator -from pydantic_core import PydanticUndefined +import sys +from typing import ClassVar, Literal, cast + +from pydantic import ( + BaseModel, + ConfigDict, + Field, + SkipValidation, + field_validator, + model_validator, +) from typing_extensions import Self -try: +from .types import ( + CallableConfigEntry, + ConfigData, + ConfigInputMap, + ConfigMap, + ConfigSource, + Handler, + ModelClient, + Objects, + ProgramCommand, + ProgramOptionMap, + ProgramSetup, + ProgramChannels, + ProgramValue, + PromptInput, + TaskSource, + ToolsetSpecs, + ToolSpecs, +) +from .utils.binding_utils import ( + Bindings, + normalize_binding_map, + normalize_object_map, +) +from .utils.config_callable_utils import ( + CallableKind as CallableKind, + config_callables as config_callables, + merge_config_handler_map as merge_config_handler_map, +) +from .utils.config_utils import ( + annotation_text, + config_data, + default_text, + expand_config_ref, + expand_config_ref_data, + import_config_ref as import_config_ref, + merge_child_config, + merge_config_value as merge_config_value, + omit_none, + resolve_config_object as resolve_config_object, + string_mapping as string_mapping, +) +from .utils.mcp_proxy_utils import validate_program_channels + +if sys.version_info >= (3, 11): import tomllib -except ModuleNotFoundError: +else: import tomli as tomllib class Config(BaseModel): model_config = ConfigDict(arbitrary_types_allowed=True, extra="forbid") + supports_config_ref: ClassVar[bool] = False - def __init__(self, config: object | None = None, /, **data: object): + def __init__(self, config: ConfigSource | None = None, /, **data: object): super().__init__(**type(self)._merge_config_data(config, data)) @classmethod - def from_config(cls, config: object | None = None, /, **data: object) -> Self: + def from_config(cls, config: ConfigSource | None = None, /, **data: object) -> Self: return cls(**cls._merge_config_data(config, data)) @classmethod def _merge_config_data( - cls, config: object | None, data: dict[str, object] - ) -> dict[str, object]: + cls, config: ConfigSource | None, data: ConfigData + ) -> ConfigData: data = omit_none(data) + if cls.supports_config_ref: + cls._validate_config_ref_contract() + config = cast(ConfigSource | None, expand_config_ref(config, cls)) + data = expand_config_ref_data(data, cls) if config is not None: base = config_data(config, cls) base.update(data) data = base return data + @classmethod + def _validate_config_ref_contract(cls) -> None: + if "config" in cls.model_fields: + raise TypeError( + f"{cls.__name__} reserves the 'config' field for config refs." + ) + @classmethod def from_toml( cls, path: str | Path, section: str | Iterable[str] | None = None @@ -62,26 +120,6 @@ def schema_text(cls) -> str: return "\n".join(lines) -def config_data(value: object, target: type[Config] | None = None) -> dict[str, object]: - if value is None: - data: dict[str, object] = {} - elif isinstance(value, Config): - data = value.model_dump(exclude_none=True, exclude_unset=True) - if target is not None: - data = { - key: item for key, item in data.items() if key in target.model_fields - } - elif isinstance(value, Mapping): - data = string_mapping(cast(Mapping[object, object], value)) - else: - raise TypeError("Config must be a mapping or Config object.") - return data - - -def omit_none(data: Mapping[str, object]) -> dict[str, object]: - return {key: value for key, value in data.items() if value is not None} - - class SandboxConfig(Config): image: str = "python:3.11-slim" start_command: str = "tail -f /dev/null" @@ -122,28 +160,64 @@ def validate_args(cls, value: object) -> object: return value +class ProgramConfig(Config): + base: bool = False + fn: str | None = None + command: ProgramCommand | None = None + sandbox: bool | SandboxConfig | ConfigMap | None = None + files: ProgramOptionMap = Field(default_factory=dict) + dirs: ProgramOptionMap = Field(default_factory=dict) + setup: ProgramSetup = Field(default_factory=list) + bindings: Bindings = Field(default_factory=dict) + env: ProgramOptionMap = Field(default_factory=dict) + artifacts: ProgramOptionMap = Field(default_factory=dict) + channels: ProgramChannels | None = None + args: list[ProgramValue] = Field(default_factory=list) + + @field_validator("channels") + @classmethod + def validate_channels(cls, value: object) -> object: + validate_program_channels(value) + return value + + @field_validator("bindings", mode="before") + @classmethod + def validate_bindings(cls, value: object) -> Bindings: + return normalize_binding_map(value, "program.bindings", allow_objects=False) + + class UserConfig(Config): - fn: object + fn: Handler | str scope: Literal["rollout", "group", "global"] = "rollout" - bindings: dict[str, object] = Field(default_factory=dict) - objects: dict[str, object] = Field(default_factory=dict) + bindings: Bindings = Field(default_factory=dict) + objects: Objects = Field(default_factory=dict) sandbox: SandboxConfig | None = None + @field_validator("bindings", mode="before") + @classmethod + def validate_bindings(cls, value: object) -> Bindings: + return normalize_binding_map(value, "user.bindings", key_style="arg") + + @field_validator("objects", mode="before") + @classmethod + def validate_objects(cls, value: object) -> Objects: + return normalize_object_map(value, "user.objects") + class ToolsetConfig(Config): - tools: object = Field(default_factory=list) + tools: ToolSpecs | None = Field(default_factory=list) show: list[str] | None = None hide: list[str] | None = None - bindings: dict[str, object] = Field(default_factory=dict) - objects: dict[str, object] = Field(default_factory=dict) + bindings: Bindings = Field(default_factory=dict) + objects: Objects = Field(default_factory=dict) write: bool = False scope: Literal["rollout", "group", "global"] | None = None sandbox: SandboxConfig | Literal["program"] | None = None - stops: list[object] = Field(default_factory=list) - setups: list[object] = Field(default_factory=list) - updates: list[object] = Field(default_factory=list) - cleanups: list[object] = Field(default_factory=list) - teardowns: list[object] = Field(default_factory=list) + stops: list[CallableConfigEntry] = Field(default_factory=list) + setups: list[CallableConfigEntry] = Field(default_factory=list) + updates: list[CallableConfigEntry] = Field(default_factory=list) + cleanups: list[CallableConfigEntry] = Field(default_factory=list) + teardowns: list[CallableConfigEntry] = Field(default_factory=list) @field_validator("show", "hide", mode="before") @classmethod @@ -152,61 +226,115 @@ def validate_visibility_list(cls, value: object) -> object: return [value] return value + @field_validator("bindings", mode="before") + @classmethod + def validate_bindings(cls, value: object) -> Bindings: + return normalize_binding_map(value, "toolset.bindings") + + @field_validator("objects", mode="before") + @classmethod + def validate_objects(cls, value: object) -> Objects: + return normalize_object_map(value, "toolset.objects") + @model_validator(mode="after") - def validate_visibility(self) -> ToolsetConfig: + def validate_visibility(self) -> "ToolsetConfig": if self.show is not None and self.hide is not None: raise ValueError("Toolset accepts show or hide, not both.") return self class TasksetConfig(Config): + supports_config_ref: ClassVar[bool] = True + # Singleton fields describe one logical value owned by the taskset. - source: object | None = None - eval_source: object | None = None + source: TaskSource | None = None + eval_source: TaskSource | None = None taskset_id: str | None = None - system_prompt: object | None = None - user: object | None = None + system_prompt: PromptInput | None = None + user: Handler | str | ConfigMap | None = None + bindings: Bindings = Field(default_factory=dict) + objects: Objects = Field(default_factory=dict) # Collection fields are merged/extended from code and config. - toolsets: object = Field(default_factory=list) - stops: list[object] = Field(default_factory=list) - setups: list[object] = Field(default_factory=list) - updates: list[object] = Field(default_factory=list) - metrics: list[object] = Field(default_factory=list) - rewards: list[object] = Field(default_factory=list) - advantages: list[object] = Field(default_factory=list) - cleanups: list[object] = Field(default_factory=list) - scoring: dict[str, dict[str, object]] = Field(default_factory=dict) + toolsets: ToolsetSpecs | None = Field(default_factory=list) + stops: list[CallableConfigEntry] = Field(default_factory=list) + setups: list[CallableConfigEntry] = Field(default_factory=list) + updates: list[CallableConfigEntry] = Field(default_factory=list) + metrics: list[CallableConfigEntry] = Field(default_factory=list) + rewards: list[CallableConfigEntry] = Field(default_factory=list) + advantages: list[CallableConfigEntry] = Field(default_factory=list) + cleanups: list[CallableConfigEntry] = Field(default_factory=list) + scoring: dict[str, ConfigData] = Field(default_factory=dict) + + @field_validator("bindings", mode="before") + @classmethod + def validate_bindings(cls, value: object) -> Bindings: + return normalize_binding_map(value, "taskset.bindings") + + @field_validator("objects", mode="before") + @classmethod + def validate_objects(cls, value: object) -> Objects: + return normalize_object_map(value, "taskset.objects") class HarnessConfig(Config): + supports_config_ref: ClassVar[bool] = True + # Singleton fields describe one logical value owned by the harness. - program: object | None = None - system_prompt: object | None = None + program: Handler | str | ConfigMap | None = None + system_prompt: PromptInput | None = None system_prompt_merge: str = "reject" sandbox: SandboxConfig | None = None - client: object | None = None + client: ModelClient | ConfigMap | str | None = None model: str | None = None - sampling_args: dict[str, object] = Field(default_factory=dict) - keep_trajectory_step: object | None = None - user: object | None = None + sampling_args: ConfigData = Field(default_factory=dict) + keep_trajectory_step: Handler | str | None = None + user: Handler | str | ConfigMap | None = None + bindings: Bindings = Field(default_factory=dict) # Collection fields are merged/extended from code and config. - toolsets: object = Field(default_factory=list) - stops: list[object] = Field(default_factory=list) - setups: list[object] = Field(default_factory=list) - updates: list[object] = Field(default_factory=list) - metrics: list[object] = Field(default_factory=list) - rewards: list[object] = Field(default_factory=list) - advantages: list[object] = Field(default_factory=list) - cleanups: list[object] = Field(default_factory=list) - scoring: dict[str, dict[str, object]] = Field(default_factory=dict) + toolsets: ToolsetSpecs | None = Field(default_factory=list) + stops: list[CallableConfigEntry] = Field(default_factory=list) + setups: list[CallableConfigEntry] = Field(default_factory=list) + updates: list[CallableConfigEntry] = Field(default_factory=list) + metrics: list[CallableConfigEntry] = Field(default_factory=list) + rewards: list[CallableConfigEntry] = Field(default_factory=list) + advantages: list[CallableConfigEntry] = Field(default_factory=list) + cleanups: list[CallableConfigEntry] = Field(default_factory=list) + scoring: dict[str, ConfigData] = Field(default_factory=dict) max_turns: int = 10 + @field_validator("program", mode="before") + @classmethod + def validate_program(cls, value: object) -> object: + if value is None or callable(value) or isinstance(value, str): + return value + if isinstance(value, Mapping): + return ProgramConfig.from_config( + string_mapping(cast(ConfigInputMap, value)) + ).model_dump(exclude_none=True, exclude_defaults=True) + raise TypeError("program must be a callable, import ref, or mapping.") + + @field_validator("bindings", mode="before") + @classmethod + def validate_bindings(cls, value: object) -> Bindings: + return normalize_binding_map(value, "harness.bindings", allow_objects=False) + class EnvConfig(Config): - taskset: object | None = None - harness: object | None = None + args: SkipValidation[BaseModel | ConfigMap] | None = None + taskset: SkipValidation[BaseModel | ConfigMap] | None = None + harness: SkipValidation[BaseModel | ConfigMap] | None = None + + @field_validator("args") + @classmethod + def validate_args(cls, value: object) -> object: + if value is not None: + try: + config_data(value) + except TypeError as exc: + raise ValueError(str(exc)) from exc + return value @field_validator("taskset", "harness") @classmethod @@ -220,13 +348,13 @@ def validate_child_config(cls, value: object) -> object: @classmethod def _merge_config_data( - cls, config: object | None, data: dict[str, object] - ) -> dict[str, object]: + cls, config: ConfigSource | None, data: ConfigData + ) -> ConfigData: data = omit_none(data) if config is None: return data base = config_data(config, cls) - for section in ("taskset", "harness"): + for section in ("args", "taskset", "harness"): default = data.get(section) override = base.pop(section, None) if default is None: @@ -239,217 +367,15 @@ def _merge_config_data( return base -def merge_child_config(default: object, override: object) -> object: - merged = deep_merge(config_data(default), config_data(override)) - if isinstance(default, Config): - return type(default)(merged) - return merged - - -def merge_config_value(value: object, config: object) -> object: - if config is None: - return value - if value is None: - return config - value_mapping = config_mapping(value) - config_mapping_value = config_mapping(config) - if value_mapping is not None and config_mapping_value is not None: - return deep_merge( - config_mapping_value, - value_mapping, - ) - return value - - -def config_mapping(value: object) -> dict[str, object] | None: - if isinstance(value, Config): - return value.model_dump(exclude_none=True) - if isinstance(value, Mapping): - return string_mapping(cast(Mapping[object, object], value)) - return None - - -def sandbox_config_mapping(value: object | None) -> dict[str, object] | None: +def sandbox_config_mapping(value: object | None) -> ConfigData | None: if value is None: return None if isinstance(value, SandboxConfig): return value.model_dump(exclude_none=True) if isinstance(value, Mapping): - mapping = cast(Mapping[str, object], value) + mapping = cast(ConfigMap, value) prefer = mapping.get("prefer") if prefer is not None and prefer != "program": raise ValueError("sandbox.prefer must be 'program'.") return SandboxConfig.from_config(mapping).model_dump(exclude_none=True) raise TypeError("Sandbox config must be a mapping.") - - -def merge_config_items(values: Iterable[object], config: object) -> list[object]: - return [*values, *config_items(config)] - - -CallableKind = Literal[ - "stop", "setup", "update", "metric", "reward", "advantage", "cleanup", "teardown" -] - - -def merge_config_callables( - values: Iterable[Callable[..., object]], - config: object, - kind: CallableKind, -) -> list[Callable[..., object]]: - return [*config_callables(values, kind), *config_callables(config, kind)] - - -def config_callables(value: object, kind: CallableKind) -> list[Callable[..., object]]: - if value is None: - return [] - if isinstance(value, str): - return [callable_config_item(value, kind)] - if isinstance(value, Mapping): - return [callable_config_item(value, kind)] - if isinstance(value, Iterable): - return [callable_config_item(item, kind) for item in value] - return [callable_config_item(value, kind)] - - -def callable_config_item(value: object, kind: CallableKind) -> Callable[..., object]: - value = resolve_config_object(value) - if isinstance(value, Mapping): - return callable_from_mapping(cast(Mapping[str, object], value), kind) - if not callable(value): - raise TypeError(f"{kind} config entries must resolve to callables.") - return cast(Callable[..., object], value) - - -def callable_from_mapping( - spec: Mapping[str, object], kind: CallableKind -) -> Callable[..., object]: - allowed = callable_config_keys(kind) - unknown = set(spec) - allowed - if unknown: - raise ValueError(f"{kind} callable config has unknown keys: {sorted(unknown)}.") - if bool(spec.get("skip", False)): - raise ValueError( - f"{kind} callable config should be removed instead of skipped." - ) - fn = resolve_config_object(spec.get("fn")) - if not callable(fn): - raise TypeError(f"{kind} callable config requires callable fn.") - metadata = {key: spec[key] for key in spec if key not in {"fn", "skip"}} - return configured_callable(cast(Callable[..., object], fn), kind, metadata) - - -def callable_config_keys(kind: CallableKind) -> set[str]: - keys = {"fn", "priority", "skip"} - if kind in {"update", "metric", "reward", "cleanup"}: - keys.add("stage") - if kind == "reward": - keys.add("weight") - return keys - - -def configured_callable( - fn: Callable[..., object], - kind: CallableKind, - metadata: Mapping[str, object], -) -> Callable[..., object]: - if not metadata: - return fn - - @functools.wraps(fn) - async def wrapper(**kwargs: object) -> object: - result = fn(**kwargs) - if inspect.isawaitable(result): - return await result - return result - - setattr(wrapper, "__signature__", inspect.signature(fn)) - setattr(wrapper, kind, True) - if "priority" in metadata: - priority = metadata["priority"] - if not isinstance(priority, int) or isinstance(priority, bool): - raise TypeError(f"{kind} priority must be an integer.") - setattr(wrapper, f"{kind}_priority", priority) - if "stage" in metadata: - stage = metadata["stage"] - if stage not in {"rollout", "group"}: - raise ValueError(f"{kind} stage must be 'rollout' or 'group'.") - setattr(wrapper, f"{kind}_stage", stage) - if "weight" in metadata: - weight = metadata["weight"] - if not isinstance(weight, int | float) or isinstance(weight, bool): - raise TypeError("reward weight must be numeric.") - setattr(wrapper, "reward_weight", float(weight)) - return cast(Callable[..., object], wrapper) - - -def config_items(value: object) -> list[object]: - if value is None: - return [] - if isinstance(value, str): - return [import_config_ref(value)] - if isinstance(value, Mapping): - return [value] - if isinstance(value, Iterable): - return [resolve_config_object(item) for item in value] - return [value] - - -def resolve_config_object(value: object) -> object: - if isinstance(value, str): - return import_config_ref(value) - return value - - -def import_config_ref(ref: str) -> object: - module_name, separator, attr_path = ref.partition(":") - if not separator or not module_name or not attr_path: - raise ValueError(f"Config ref {ref!r} must use 'module:object'.") - obj: object = importlib.import_module(module_name) - for part in attr_path.split("."): - obj = getattr(obj, part) - return obj - - -def deep_merge( - base: dict[str, object], overlay: Mapping[str, object] -) -> dict[str, object]: - merged: dict[str, object] = dict(base) - for key, value in overlay.items(): - existing = merged.get(key) - if isinstance(existing, Mapping) and isinstance(value, Mapping): - merged[key] = deep_merge( - string_mapping(cast(Mapping[object, object], existing)), - string_mapping(cast(Mapping[object, object], value)), - ) - else: - merged[key] = value - return merged - - -def string_mapping(value: Mapping[object, object]) -> dict[str, object]: - result: dict[str, object] = {} - for key, item in value.items(): - if not isinstance(key, str): - raise TypeError("Config mappings require string keys.") - result[key] = item - return result - - -def annotation_text(annotation: Any) -> str: - if getattr(annotation, "__args__", None): - return str(annotation).replace("typing.", "") - name = getattr(annotation, "__name__", None) - if isinstance(name, str): - return name - return str(annotation).replace("typing.", "") - - -def default_text(field: object) -> str: - default_factory = getattr(field, "default_factory", None) - if default_factory is not None: - return "" - default = getattr(field, "default", PydanticUndefined) - if default is PydanticUndefined: - return "required" - return repr(default) diff --git a/verifiers/v1/env.py b/verifiers/v1/env.py index f3753221d..190203ab4 100644 --- a/verifiers/v1/env.py +++ b/verifiers/v1/env.py @@ -1,8 +1,5 @@ -from __future__ import annotations - import asyncio import uuid -from collections.abc import Mapping from typing import cast import verifiers as vf @@ -13,6 +10,7 @@ from .harness import Harness from .state import State from .taskset import Taskset +from .types import ConfigMap class Env(vf.Environment): @@ -116,7 +114,7 @@ async def _run_group_states( return cast(list[vf.State], states) def apply_controls( - self, states: list[State], controls: Mapping[str, object] | None = None + self, states: list[State], controls: ConfigMap | None = None ) -> list[State]: if controls is None: return states diff --git a/verifiers/v1/harness.py b/verifiers/v1/harness.py index 7f62ed8b6..4bb83f5d0 100644 --- a/verifiers/v1/harness.py +++ b/verifiers/v1/harness.py @@ -1,13 +1,9 @@ -from __future__ import annotations +from collections.abc import Mapping +from typing import TYPE_CHECKING, ClassVar, cast -from collections.abc import Callable, Mapping -from typing import Any, ClassVar, cast - -from verifiers.clients import Client -from verifiers.decorators import update +from verifiers.decorators import metric, update from verifiers.errors import Error, OverlongPromptError from verifiers.types import ( - ClientConfig, MessageContent, Messages, SamplingArgs, @@ -23,11 +19,12 @@ HarnessConfig, SandboxConfig, import_config_ref, - merge_config_callables, + merge_config_handler_map, merge_config_value, resolve_config_object, sandbox_config_mapping, ) +from .utils.binding_utils import BindingMap, normalize_binding_map from .utils.endpoint_utils import ( Endpoint, assistant_completion_from_messages, @@ -38,7 +35,7 @@ proxy_program, proxy_sandbox, ) -from .utils.program_utils import endpoint_api_key, program_tool_types, run_local_command +from .utils.program_utils import endpoint_api_key, program_channels, run_local_command from .utils.program_utils import ( merge_task_program, merge_task_sandbox, @@ -62,8 +59,12 @@ from .utils.trajectory_utils import has_borrowed_trajectory, sync_trajectory from .state import State from .task import Task -from .toolset import merge_toolsets, normalize_toolset_collection +from .toolset import ToolsetCollection, merge_toolsets, normalize_toolset_collection from .user import normalize_user +from .types import ConfigData, ConfigMap, Handler, ModelClient, ProgramMap, PromptInput + +if TYPE_CHECKING: + from .taskset import Taskset class Harness: @@ -72,25 +73,26 @@ class Harness: def __init__( self, # Singleton fields. - program: Callable[..., object] | Mapping[str, object] | None = None, - system_prompt: object | None = None, - user: object | None = None, - sandbox: Mapping[str, object] | SandboxConfig | None = None, - client: Client | ClientConfig | None = None, + program: Handler | ProgramMap | None = None, + system_prompt: PromptInput | None = None, + user: Handler | str | ConfigMap | None = None, + bindings: BindingMap | None = None, + sandbox: ConfigMap | SandboxConfig | None = None, + client: ModelClient | None = None, model: str | None = None, sampling_args: SamplingArgs | None = None, max_turns: int | None = None, # Collection fields. - toolsets: object | None = None, - stops: list[Callable[..., object]] | None = None, - setups: list[Callable[..., object]] | None = None, - updates: list[Callable[..., object]] | None = None, - metrics: list[Callable[..., object]] | None = None, - rewards: list[Callable[..., object]] | None = None, - advantages: list[Callable[..., object]] | None = None, - cleanups: list[Callable[..., object]] | None = None, + toolsets: ToolsetCollection | None = None, + stops: list[Handler] | None = None, + setups: list[Handler] | None = None, + updates: list[Handler] | None = None, + metrics: list[Handler] | None = None, + rewards: list[Handler] | None = None, + advantages: list[Handler] | None = None, + cleanups: list[Handler] | None = None, # Config. - config: HarnessConfig | Mapping[str, object] | None = None, + config: HarnessConfig | None = None, ): self.config = type(self).config_type.from_config(config) if max_turns is not None: @@ -98,20 +100,25 @@ def __init__( program_value = resolve_config_object( merge_config_value(program, self.config.program) ) - self.program = cast( - Callable[..., object] | Mapping[str, object] | None, program_value + self.program = cast(Handler | ProgramMap | None, program_value) + system_prompt_value = cast( + PromptInput | None, + merge_config_value(system_prompt, self.config.system_prompt), ) self.system_prompt = normalize_system_prompt( - merge_config_value(system_prompt, self.config.system_prompt), - field_name="harness.system_prompt", + system_prompt_value, field_name="harness.system_prompt" ) self.system_prompt_merge = self.config.system_prompt_merge self.user = normalize_user(merge_config_value(user, self.config.user)) + self.bindings = { + **self.config.bindings, + **normalize_binding_map(bindings, "Harness bindings", allow_objects=False), + } self.sandbox = sandbox_config_mapping( merge_config_value(sandbox, self.config.sandbox) ) self.client = cast( - Client | ClientConfig | None, + ModelClient | None, resolve_config_object(merge_config_value(client, self.config.client)), ) self.model = cast(str | None, merge_config_value(model, self.config.model)) @@ -122,41 +129,30 @@ def __init__( self.toolsets, self.named_toolsets = merge_toolsets( toolsets or (), self.config.toolsets ) - self.stops = cast( - list[Callable[..., object]], - merge_config_callables(stops or (), self.config.stops, "stop"), - ) - self.setups = cast( - list[Callable[..., object]], - merge_config_callables(setups or (), self.config.setups, "setup"), - ) - self.updates = cast( - list[Callable[..., object]], - merge_config_callables(updates or (), self.config.updates, "update"), - ) - self.metrics = cast( - list[Callable[..., object]], - merge_config_callables(metrics or (), self.config.metrics, "metric"), - ) - self.rewards = cast( - list[Callable[..., object]], - merge_config_callables(rewards or (), self.config.rewards, "reward"), - ) - self.advantages = cast( - list[Callable[..., object]], - merge_config_callables( - advantages or (), self.config.advantages, "advantage" - ), - ) - self.cleanups = cast( - list[Callable[..., object]], - merge_config_callables(cleanups or (), self.config.cleanups, "cleanup"), + handlers = merge_config_handler_map( + { + "stop": stops or (), + "setup": setups or (), + "update": updates or (), + "metric": [num_turns, *(metrics or [])], + "reward": rewards or (), + "advantage": advantages or (), + "cleanup": cleanups or (), + }, + self.config, ) + self.stops = handlers["stop"] + self.setups = handlers["setup"] + self.updates = handlers["update"] + self.metrics = handlers["metric"] + self.rewards = handlers["reward"] + self.advantages = handlers["advantage"] + self.cleanups = handlers["cleanup"] keep_step_value = resolve_config_object(self.config.keep_trajectory_step) if keep_step_value is not None and not callable(keep_step_value): raise TypeError("keep_trajectory_step must be callable.") - self.keep_trajectory_step = cast(Callable[..., object] | None, keep_step_value) - self.taskset: object | None = None + self.keep_trajectory_step = cast(Handler | None, keep_step_value) + self.taskset: "Taskset | None" = None self.runtime = self.resolve_runtime() self.endpoint = Endpoint(use_tunnel=self.program_uses_sandbox()) self._program = self.compile_program(self.program) @@ -165,19 +161,17 @@ def __init__( def config_schema(cls) -> str: return cls.config_type.schema_text() - def _add_handler( - self, handlers: list[Callable[..., object]], fn: Callable[..., object] - ) -> None: + def _add_handler(self, handlers: list[Handler], fn: Handler) -> None: handlers.append(fn) self.runtime = self.resolve_runtime() - def add_metric(self, fn: Callable[..., object]) -> None: + def add_metric(self, fn: Handler) -> None: self._add_handler(self.metrics, fn) - def add_reward(self, fn: Callable[..., object]) -> None: + def add_reward(self, fn: Handler) -> None: self._add_handler(self.rewards, fn) - def add_advantage(self, fn: Callable[..., object]) -> None: + def add_advantage(self, fn: Handler) -> None: self._add_handler(self.advantages, fn) def add_toolset(self, toolset: object) -> None: @@ -189,31 +183,27 @@ def add_toolset(self, toolset: object) -> None: self.named_toolsets.update(named_toolsets) self.runtime = self.resolve_runtime() - def add_stop(self, fn: Callable[..., object]) -> None: + def add_stop(self, fn: Handler) -> None: self._add_handler(self.stops, fn) - def add_setup(self, fn: Callable[..., object]) -> None: + def add_setup(self, fn: Handler) -> None: self._add_handler(self.setups, fn) - def add_update(self, fn: Callable[..., object]) -> None: + def add_update(self, fn: Handler) -> None: self._add_handler(self.updates, fn) - def add_cleanup(self, fn: Callable[..., object]) -> None: + def add_cleanup(self, fn: Handler) -> None: self._add_handler(self.cleanups, fn) - def attach_taskset(self, taskset: object) -> None: + def attach_taskset(self, taskset: "Taskset") -> None: self.taskset = taskset - attach_harness = getattr(taskset, "attach_harness", None) - if callable(attach_harness): - attach_harness(self) + taskset.attach_harness(self) self.runtime = self.resolve_runtime() def resolve_runtime(self) -> Runtime: return Runtime(taskset=self.taskset, harness=self) - async def run( - self, task: Task | Mapping[str, Any], state: State | None = None - ) -> State: + async def run(self, task: Task | ConfigMap, state: State | None = None) -> State: task = task if isinstance(task, Task) else Task(task).freeze() state = await self.init_state(task) if state is None else state timing_recorded = False @@ -282,7 +272,7 @@ async def render_completion(self, task: Task, state: State) -> None: sync_trajectory(state) async def setup_state(self, task: Task, state: State) -> State: - explicit_runtime = dict(cast(Mapping[str, object], state.get("runtime") or {})) + explicit_runtime = dict(cast(ConfigMap, state.get("runtime") or {})) task_controls = { key: task[key] for key in ("max_turns", "tools") if key in task } @@ -305,7 +295,7 @@ async def setup_state(self, task: Task, state: State) -> State: if self.sampling_args: sampling_args = dict(self.sampling_args) sampling_args.update( - cast(Mapping[str, object], state["runtime"].get("sampling_args") or {}) + cast(ConfigMap, state["runtime"].get("sampling_args") or {}) ) state["runtime"]["sampling_args"] = sampling_args self.resolve_system_prompt(task, state) @@ -356,9 +346,7 @@ def resolved_endpoint(self, state: State) -> Endpoint: raise RuntimeError("Resolved endpoint handle has no live endpoint.") return endpoint - def compile_program( - self, program: Callable[..., object] | Mapping[str, object] | None - ) -> Callable[..., object]: + def compile_program(self, program: Handler | ProgramMap | None) -> Handler: if program is None: return self.base_program if callable(program): @@ -383,16 +371,14 @@ def compile_program( fn = import_config_ref(fn_ref) if not callable(fn): raise TypeError("program.fn did not resolve to a callable.") - return self.local_callable_program(cast(Callable[..., object], fn)) + return self.local_callable_program(cast(Handler, fn)) if kind == "command": sandbox_config = self.program_sandbox_config(program) validate_program_options(program, kind, sandbox_config) - return self.command_program(cast(Mapping[str, object], program)) + return self.command_program(cast(ConfigMap, program)) raise AssertionError(f"Unhandled program kind: {kind}") - def local_callable_program( - self, fn: Callable[..., object] - ) -> Callable[..., object]: + def local_callable_program(self, fn: Handler) -> Handler: async def run(task: Task, state: State) -> object: await self.runtime.setup_rollout(task, state) return await maybe_call_with_named_args(fn, task=task, state=state) @@ -416,7 +402,7 @@ async def base_program(self, task: Task, state: State) -> State: message.model_dump(exclude_none=True) for message in messages ] - def sync_completion() -> list[dict[str, object]]: + def sync_completion() -> list[ConfigData]: rendered_messages = [ message.model_dump(exclude_none=True) for message in messages ] @@ -460,8 +446,8 @@ def sync_completion() -> list[dict[str, object]]: content: MessageContent try: name = tool_call.name - result = await callable_tools[name]( - **json_args(tool_call.arguments) + result = await maybe_call_with_named_args( + callable_tools[name], **json_args(tool_call.arguments) ) content = ( cast(MessageContent, result) @@ -479,7 +465,7 @@ def sync_completion() -> list[dict[str, object]]: return state return state - def command_program(self, program: Mapping[str, object]) -> Callable[..., object]: + def command_program(self, program: ConfigMap) -> Handler: async def run(task: Task, state: State) -> State: runtime = self.runtime merged_program = merge_task_program(program, task, kind="command") @@ -500,8 +486,8 @@ async def run(task: Task, state: State) -> State: return run def sandbox_base_program( - self, program: Mapping[str, object], sandbox_config: Mapping[str, object] - ) -> Callable[..., object]: + self, program: ConfigMap, sandbox_config: ConfigMap + ) -> Handler: async def run(task: Task, state: State) -> State: merged_program = merge_task_program(program, task, kind="base") return await run_sandbox_python_program( @@ -521,10 +507,10 @@ async def run(task: Task, state: State) -> State: def sandbox_fn_program( self, - program: Mapping[str, object], - sandbox_config: Mapping[str, object], + program: ConfigMap, + sandbox_config: ConfigMap, fn_ref: str, - ) -> Callable[..., object]: + ) -> Handler: async def run(task: Task, state: State) -> State: merged_program = merge_task_program(program, task, kind="fn") return await run_sandbox_python_program( @@ -545,14 +531,9 @@ async def run(task: Task, state: State) -> State: def program_uses_sandbox(self) -> bool: if not isinstance(self.program, Mapping): return False - return ( - self.program_sandbox_config(cast(Mapping[str, object], self.program)) - is not None - ) + return self.program_sandbox_config(cast(ConfigMap, self.program)) is not None - def program_sandbox_config( - self, program: Mapping[str, object] - ) -> Mapping[str, object] | None: + def program_sandbox_config(self, program: ConfigMap) -> ConfigMap | None: sandbox = program.get("sandbox") if sandbox is None or sandbox is False: return None @@ -561,22 +542,20 @@ def program_sandbox_config( raise ValueError("program.sandbox=true requires Harness.sandbox.") if not isinstance(self.sandbox, Mapping): raise TypeError("Harness.sandbox must be a mapping.") - sandbox_config = cast(Mapping[str, object], self.sandbox) + sandbox_config = cast(ConfigMap, self.sandbox) validate_program_sandbox_scope(sandbox_config) return sandbox_config if not isinstance(sandbox, Mapping | SandboxConfig): raise TypeError("program.sandbox must be true, false, or a mapping.") sandbox_config = {} if self.sandbox is not None: - sandbox_config.update(cast(Mapping[str, object], self.sandbox)) + sandbox_config.update(cast(ConfigMap, self.sandbox)) sandbox_config.update(sandbox_config_mapping(sandbox) or {}) validate_program_sandbox_scope(sandbox_config) return sandbox_config - def prepare_sandbox_program( - self, program: Mapping[str, object], state: State - ) -> Mapping[str, object]: - if "mcp" in program_tool_types(program): + def prepare_sandbox_program(self, program: ConfigMap, state: State) -> ConfigMap: + if "mcp" in program_channels(program): endpoint_root_url = state.get("endpoint_root_url") if not isinstance(endpoint_root_url, str): raise RuntimeError("MCP program tools require an active endpoint.") @@ -588,11 +567,20 @@ def prepare_sandbox_program( return program def prepare_sandbox_config( - self, sandbox_config: Mapping[str, object], program: Mapping[str, object] - ) -> Mapping[str, object]: + self, sandbox_config: ConfigMap, program: ConfigMap + ) -> ConfigMap: config = dict(sandbox_config) - if "mcp" in program_tool_types(program): + if "mcp" in program_channels(program): config = proxy_sandbox(config) if program_kind(program) in {"base", "fn"}: config = python_program_sandbox(config) return config + + +@metric +async def num_turns(task: Task, state: State) -> float: + _ = task + trajectory = state.get("trajectory") or [] + if not isinstance(trajectory, list): + raise TypeError("state.trajectory must be a list.") + return float(len(trajectory)) diff --git a/verifiers/v1/packages/harnesses/__init__.py b/verifiers/v1/packages/harnesses/__init__.py index bf71e077f..125c9d274 100644 --- a/verifiers/v1/packages/harnesses/__init__.py +++ b/verifiers/v1/packages/harnesses/__init__.py @@ -1,8 +1,14 @@ -from .cli import CLIHarness -from .configs import OpenCodeConfig +from .configs import OpenCodeConfig, RLMConfig from .mini_swe_agent import MiniSWEAgent from .opencode import OpenCode from .pi import Pi from .rlm import RLM -__all__ = ["CLIHarness", "MiniSWEAgent", "OpenCode", "OpenCodeConfig", "Pi", "RLM"] +__all__ = [ + "MiniSWEAgent", + "OpenCode", + "OpenCodeConfig", + "Pi", + "RLM", + "RLMConfig", +] diff --git a/verifiers/v1/packages/harnesses/cli.py b/verifiers/v1/packages/harnesses/cli.py deleted file mode 100644 index e9eec5303..000000000 --- a/verifiers/v1/packages/harnesses/cli.py +++ /dev/null @@ -1,121 +0,0 @@ -from __future__ import annotations - -from collections.abc import Callable, Mapping -from typing import cast - -from verifiers.clients import Client -from verifiers.types import ClientConfig, SamplingArgs - -from ...config import HarnessConfig, SandboxConfig, sandbox_config_mapping -from ...harness import Harness -from ...utils.program_utils import program_list_items - - -DEFAULT_CLI_SANDBOX = { - "image": "python:3.11-slim", - "workdir": "/app", - "scope": "rollout", - "timeout_minutes": 120, - "command_timeout": 900, - "network_access": True, -} - - -class CLIHarness(Harness): - def __init__( - self, - command: str | list[object], - *, - sandbox: bool | Mapping[str, object] | SandboxConfig = True, - files: Mapping[str, object] | None = None, - dirs: Mapping[str, object] | None = None, - setup: object | list[object] | None = None, - bindings: Mapping[str, object] | None = None, - env: Mapping[str, object] | None = None, - artifacts: Mapping[str, object] | None = None, - tools: object | None = None, - program: Mapping[str, object] | None = None, - system_prompt: object | None = None, - user: object | None = None, - client: Client | ClientConfig | None = None, - model: str | None = None, - sampling_args: SamplingArgs | None = None, - max_turns: int | None = None, - toolsets: object | None = None, - stops: list[Callable[..., object]] | None = None, - setups: list[Callable[..., object]] | None = None, - updates: list[Callable[..., object]] | None = None, - metrics: list[Callable[..., object]] | None = None, - rewards: list[Callable[..., object]] | None = None, - advantages: list[Callable[..., object]] | None = None, - cleanups: list[Callable[..., object]] | None = None, - config: HarnessConfig | Mapping[str, object] | None = None, - ): - program_config: dict[str, object] = { - "command": command, - "sandbox": sandbox, - } - if files is not None: - program_config["files"] = dict(files) - if dirs is not None: - program_config["dirs"] = dict(dirs) - if setup is not None: - program_config["setup"] = setup - if bindings is not None: - program_config["bindings"] = dict(bindings) - if env is not None: - program_config["env"] = dict(env) - if artifacts is not None: - program_config["artifacts"] = dict(artifacts) - if tools is not None: - program_config["tools"] = tools - if program is not None: - program_config = merge_program_defaults(program_config, program) - sandbox_config = DEFAULT_CLI_SANDBOX if sandbox is True else None - if sandbox is not True and sandbox is not False: - sandbox_config = { - **DEFAULT_CLI_SANDBOX, - **(sandbox_config_mapping(sandbox) or {}), - } - super().__init__( - program=program_config, - system_prompt=system_prompt, - user=user, - sandbox=sandbox_config, - client=client, - model=model, - sampling_args=sampling_args, - max_turns=max_turns, - toolsets=toolsets, - stops=stops, - setups=setups, - updates=updates, - metrics=metrics, - rewards=rewards, - advantages=advantages, - cleanups=cleanups, - config=config, - ) - - -def merge_program_defaults( - defaults: Mapping[str, object], overrides: Mapping[str, object] -) -> dict[str, object]: - merged = dict(defaults) - for key, value in overrides.items(): - if ( - key in {"files", "dirs", "bindings", "env", "artifacts"} - and isinstance(merged.get(key), Mapping) - and isinstance(value, Mapping) - ): - base = cast(Mapping[str, object], merged[key]) - patch = cast(Mapping[str, object], value) - merged[key] = {**dict(base), **dict(patch)} - elif key in {"setup", "args"}: - merged[key] = [ - *program_list_items(merged.get(key), f"program.{key}"), - *program_list_items(value, f"program.{key}"), - ] - else: - merged[key] = value - return merged diff --git a/verifiers/v1/packages/harnesses/command.py b/verifiers/v1/packages/harnesses/command.py new file mode 100644 index 000000000..ea63ba870 --- /dev/null +++ b/verifiers/v1/packages/harnesses/command.py @@ -0,0 +1,116 @@ +from collections.abc import Mapping +from typing import cast + +from typing_extensions import NotRequired, TypedDict + +from verifiers.clients import Client +from verifiers.types import ClientConfig, SamplingArgs + +from ...config import SandboxConfig, sandbox_config_mapping +from ...types import ( + ConfigData, + ConfigMap, + Handler, + ProgramCommand, + ProgramMap, + ProgramOptionMap, + ProgramSetup, + ProgramChannels, +) +from ...utils.binding_utils import Bindings +from ...utils.program_utils import program_list_items +from ...toolset import ToolsetCollection + + +DEFAULT_COMMAND_SANDBOX: ConfigData = { + "image": "python:3.11-slim", + "workdir": "/app", + "scope": "rollout", + "timeout_minutes": 120, + "command_timeout": 900, + "network_access": True, +} + + +class HarnessKwargs(TypedDict): + user: NotRequired[Handler | str | ConfigMap | None] + client: NotRequired[Client | ClientConfig | None] + model: NotRequired[str | None] + sampling_args: NotRequired[SamplingArgs | None] + toolsets: NotRequired[ToolsetCollection | None] + stops: NotRequired[list[Handler] | None] + setups: NotRequired[list[Handler] | None] + updates: NotRequired[list[Handler] | None] + metrics: NotRequired[list[Handler] | None] + rewards: NotRequired[list[Handler] | None] + advantages: NotRequired[list[Handler] | None] + cleanups: NotRequired[list[Handler] | None] + + +def command_program( + *, + command: ProgramCommand, + sandbox: bool | ConfigMap | SandboxConfig, + files: ProgramOptionMap | None = None, + dirs: ProgramOptionMap | None = None, + setup: ProgramSetup | None = None, + bindings: Bindings | None = None, + env: ProgramOptionMap | None = None, + artifacts: ProgramOptionMap | None = None, + channels: ProgramChannels | None = None, + program: ProgramMap | None = None, +) -> ConfigData: + config: ConfigData = { + "command": command, + "sandbox": sandbox is not False, + } + if files is not None: + config["files"] = dict(files) + if dirs is not None: + config["dirs"] = dict(dirs) + if setup is not None: + config["setup"] = setup + if bindings is not None: + config["bindings"] = dict(bindings) + if env is not None: + config["env"] = dict(env) + if artifacts is not None: + config["artifacts"] = dict(artifacts) + if channels is not None: + config["channels"] = channels + if program is not None: + config = merge_program_defaults(config, program) + return config + + +def command_sandbox( + sandbox: bool | ConfigMap | SandboxConfig, + defaults: ConfigMap | None = None, +) -> ConfigData | None: + if sandbox is False: + return None + base = {**DEFAULT_COMMAND_SANDBOX, **dict(defaults or {})} + if sandbox is True: + return base + return {**base, **(sandbox_config_mapping(sandbox) or {})} + + +def merge_program_defaults(defaults: ConfigMap, overrides: ProgramMap) -> ConfigData: + merged = dict(defaults) + for key, value in overrides.items(): + if ( + key in {"files", "dirs", "bindings", "env", "artifacts"} + and isinstance(merged.get(key), Mapping) + and isinstance(value, Mapping) + ): + base = cast(ConfigMap, merged[key]) + patch = cast(ConfigMap, value) + merged[key] = {**dict(base), **dict(patch)} + elif key in {"setup", "args"}: + merged[key] = [ + *program_list_items(merged.get(key), f"program.{key}"), + *program_list_items(value, f"program.{key}"), + ] + else: + merged[key] = value + return merged diff --git a/verifiers/v1/packages/harnesses/configs.py b/verifiers/v1/packages/harnesses/configs.py index 5a133aafc..a40c2c343 100644 --- a/verifiers/v1/packages/harnesses/configs.py +++ b/verifiers/v1/packages/harnesses/configs.py @@ -1,8 +1,9 @@ -from __future__ import annotations +from pathlib import Path from pydantic import Field from ...config import HarnessConfig +from ...types import ConfigData, ConfigSource, ProgramValue, PromptInput OPENCODE_DEFAULT_RELEASE_REPO = "PrimeIntellect-ai/opencode" OPENCODE_DEFAULT_RELEASE_VERSION = "1.1.63-rl2" @@ -41,13 +42,22 @@ "codesearch", "skill", ) +RLM_DEFAULT_REPO_URL = "github.com/PrimeIntellect-ai/rlm-harness.git" +RLM_DEFAULT_REPO_REF = "main" +RLM_DEFAULT_MAX_TURNS = 100 +RLM_DEFAULT_EXEC_TIMEOUT = 300 +RLM_DEFAULT_MAX_DEPTH = 0 +RLM_DEFAULT_INSTRUCTION_PATH = "/rlm/instruction.txt" +RLM_DEFAULT_APPEND_TO_SYSTEM_PROMPT_PATH = "/rlm/append_to_system_prompt.txt" +RLM_DEFAULT_WORKDIR = "/workspace" +RLM_DEFAULT_TOOLS = ("ipython",) class OpenCodeConfig(HarnessConfig): @classmethod def _merge_config_data( - cls, config: object | None, data: dict[str, object] - ) -> dict[str, object]: + cls, config: ConfigSource | None, data: ConfigData + ) -> ConfigData: system_prompt_disabled = ( data.get("system_prompt") is None and "system_prompt" in data ) or (isinstance(config, OpenCodeConfig) and config.system_prompt is None) @@ -60,7 +70,7 @@ def _merge_config_data( instruction_path: str = OPENCODE_DEFAULT_INSTRUCTION_PATH system_prompt_path: str = OPENCODE_DEFAULT_SYSTEM_PROMPT_PATH log_path: str = OPENCODE_DEFAULT_LOG_PATH - system_prompt: object | None = OPENCODE_DEFAULT_SYSTEM_PROMPT + system_prompt: PromptInput | None = OPENCODE_DEFAULT_SYSTEM_PROMPT disabled_tools: list[str] = Field( default_factory=lambda: list(OPENCODE_DEFAULT_DISABLED_TOOLS) ) @@ -72,3 +82,21 @@ def _merge_config_data( install_ripgrep: bool = True provider_timeout_ms: int = 3_600_000 max_turns: int = 4 + + +class RLMConfig(HarnessConfig): + workdir: str = RLM_DEFAULT_WORKDIR + instruction_path: str = RLM_DEFAULT_INSTRUCTION_PATH + rlm_repo_url: str = RLM_DEFAULT_REPO_URL + rlm_repo_ref: str = RLM_DEFAULT_REPO_REF + rlm_max_turns: int = RLM_DEFAULT_MAX_TURNS + rlm_exec_timeout: int = RLM_DEFAULT_EXEC_TIMEOUT + rlm_max_depth: int = RLM_DEFAULT_MAX_DEPTH + summarize_at_tokens: int | tuple[int, int] | list[int] | None = None + include_sub_rlm_trajectories: bool = False + append_to_system_prompt: str = "" + local_checkout: str | Path | None = None + gh_token: str | None = None + rlm_tools: list[str] = Field(default_factory=lambda: list(RLM_DEFAULT_TOOLS)) + env_vars: dict[str, ProgramValue] = Field(default_factory=dict) + skills: str | Path | None = None diff --git a/verifiers/v1/packages/harnesses/mini_swe_agent.py b/verifiers/v1/packages/harnesses/mini_swe_agent.py index 66d942c76..afc90b422 100644 --- a/verifiers/v1/packages/harnesses/mini_swe_agent.py +++ b/verifiers/v1/packages/harnesses/mini_swe_agent.py @@ -1,16 +1,16 @@ -from __future__ import annotations - import shlex -from collections.abc import Mapping from pathlib import PurePosixPath -from typing import Any -from .cli import CLIHarness +from typing_extensions import Unpack + +from .command import HarnessKwargs, command_program, command_sandbox from ...config import SandboxConfig +from ...harness import Harness from ...utils.prompt_utils import ( state_system_prompt_text, task_text as task_instruction_text, ) +from ...types import ConfigMap, ProgramMap, ProgramOptionMap, ProgramValue, PromptInput DEFAULT_INSTALL_DIR = "/opt/mini-swe-agent" DEFAULT_PREFIX_DIR = f"{DEFAULT_INSTALL_DIR}/prefix" @@ -37,7 +37,7 @@ DEFAULT_ENVIRONMENT_TIMEOUT = 120 -class MiniSWEAgent(CLIHarness): +class MiniSWEAgent(Harness): def __init__( self, *, @@ -53,18 +53,18 @@ def __init__( environment_timeout: int = DEFAULT_ENVIRONMENT_TIMEOUT, extra_config_specs: list[str] | None = None, install_python: bool = True, - system_prompt: object | None = None, - sandbox: bool | Mapping[str, object] | SandboxConfig = True, - program: Mapping[str, object] | None = None, + system_prompt: PromptInput | None = None, + sandbox: bool | ConfigMap | SandboxConfig = True, + program: ProgramMap | None = None, max_turns: int | None = 4, - **kwargs: Any, + **kwargs: Unpack[HarnessKwargs], ): - files: dict[str, object] = { + files: dict[str, ProgramValue] = { instruction_path: task_instruction_text, } if system_prompt is not None: files[system_prompt_path] = state_system_prompt_text - artifacts = { + artifacts: ProgramOptionMap = { "mini_swe_agent_log": { "path": log_path, "format": "text", @@ -76,36 +76,40 @@ def __init__( "optional": True, }, } + command = [ + "bash", + "-lc", + build_mini_swe_agent_run_script( + agent_workdir=agent_workdir, + instruction_path=instruction_path, + system_prompt_path=system_prompt_path + if system_prompt is not None + else None, + log_path=log_path, + trajectory_path=trajectory_path, + config_spec=config_spec, + model_class=model_class, + environment_timeout=environment_timeout, + extra_config_specs=extra_config_specs, + ), + ] super().__init__( - command=[ - "bash", - "-lc", - build_mini_swe_agent_run_script( - agent_workdir=agent_workdir, - instruction_path=instruction_path, - system_prompt_path=system_prompt_path - if system_prompt is not None - else None, - log_path=log_path, - trajectory_path=trajectory_path, - config_spec=config_spec, - model_class=model_class, - environment_timeout=environment_timeout, - extra_config_specs=extra_config_specs, + program=command_program( + command=command, + sandbox=sandbox, + files=files, + setup=build_mini_swe_agent_install_script( + package_version=package_version, + package_sha256=package_sha256, + install_python=install_python, ), - ], - sandbox=sandbox, - files=files, - setup=build_mini_swe_agent_install_script( - package_version=package_version, - package_sha256=package_sha256, - install_python=install_python, + env={ + "OPENAI_MODEL": "runtime.model", + }, + artifacts=artifacts, + program=program, ), - env={ - "OPENAI_MODEL": "runtime.model", - }, - artifacts=artifacts, - program=program, + sandbox=command_sandbox(sandbox), system_prompt=system_prompt, max_turns=max_turns, **kwargs, diff --git a/verifiers/v1/packages/harnesses/opencode.py b/verifiers/v1/packages/harnesses/opencode.py index 6fa7cdc84..e45817be9 100644 --- a/verifiers/v1/packages/harnesses/opencode.py +++ b/verifiers/v1/packages/harnesses/opencode.py @@ -1,12 +1,11 @@ -from __future__ import annotations - import json import shlex -from collections.abc import Mapping from pathlib import PurePosixPath -from typing import Any +from typing import cast + +from typing_extensions import Unpack -from .cli import CLIHarness +from .command import HarnessKwargs, command_program, command_sandbox from .configs import ( OPENCODE_DEFAULT_AGENT_WORKDIR, OPENCODE_DEFAULT_DISABLED_TOOLS, @@ -20,11 +19,20 @@ OpenCodeConfig, ) from ...config import SandboxConfig +from ...harness import Harness from ...utils.mcp_proxy_utils import proxy_command from ...utils.prompt_utils import ( state_system_prompt_text, task_text as task_instruction_text, ) +from ...types import ( + ConfigData, + ConfigMap, + ProgramCommand, + ProgramMap, + ProgramValue, + PromptInput, +) DEFAULT_RELEASE_REPO = OPENCODE_DEFAULT_RELEASE_REPO DEFAULT_RELEASE_VERSION = OPENCODE_DEFAULT_RELEASE_VERSION @@ -36,10 +44,15 @@ DEFAULT_SYSTEM_PROMPT = OPENCODE_DEFAULT_SYSTEM_PROMPT DEFAULT_DISABLED_TOOLS = list(OPENCODE_DEFAULT_DISABLED_TOOLS) -_UNSET: object = object() + +class Unset: + pass -class OpenCode(CLIHarness): +UNSET = Unset() + + +class OpenCode(Harness): config_type = OpenCodeConfig def __init__( @@ -49,7 +62,7 @@ def __init__( instruction_path: str | None = None, system_prompt_path: str | None = None, log_path: str | None = None, - system_prompt: object | None = _UNSET, + system_prompt: PromptInput | None | Unset = UNSET, disabled_tools: list[str] | None = None, allow_git: bool | None = None, disable_compaction: bool | None = None, @@ -58,13 +71,13 @@ def __init__( release_sha256: str | None = None, install_ripgrep: bool | None = None, provider_timeout_ms: int | None = None, - sandbox: bool | Mapping[str, object] | SandboxConfig | None = None, - program: Mapping[str, object] | None = None, + sandbox: bool | ConfigMap | SandboxConfig | None = None, + program: ProgramMap | None = None, max_turns: int | None = None, - config: OpenCodeConfig | Mapping[str, object] | None = None, - **kwargs: Any, + config: OpenCodeConfig | None = None, + **kwargs: Unpack[HarnessKwargs], ): - config_data: dict[str, object] = { + config_data: ConfigData = { "agent_workdir": agent_workdir, "instruction_path": instruction_path, "system_prompt_path": system_prompt_path, @@ -79,22 +92,24 @@ def __init__( "provider_timeout_ms": provider_timeout_ms, "max_turns": max_turns, } - if system_prompt is not _UNSET: + if system_prompt is not UNSET: config_data["system_prompt"] = system_prompt config = OpenCodeConfig.from_config(config, **config_data) if system_prompt is None: config.system_prompt = None - sandbox_config: bool | Mapping[str, object] | SandboxConfig + sandbox_config: bool | ConfigMap | SandboxConfig sandbox_config = ( config.sandbox if sandbox is None and config.sandbox is not None else True ) if sandbox is not None: sandbox_config = sandbox - files: dict[str, object] = { - config.instruction_path: task_instruction_text, + files: dict[str, ProgramValue] = { + config.instruction_path: cast(ProgramValue, task_instruction_text), } if config.system_prompt is not None: - files[config.system_prompt_path] = state_system_prompt_text + files[config.system_prompt_path] = cast( + ProgramValue, state_system_prompt_text + ) artifacts = { "opencode_log": { "path": config.log_path, @@ -103,42 +118,46 @@ def __init__( } } system_prompt_disabled = config.system_prompt is None + command: ProgramCommand = [ + "bash", + "-lc", + build_opencode_run_script( + agent_workdir=config.agent_workdir, + instruction_path=config.instruction_path, + log_path=config.log_path, + allow_git=config.allow_git, + ), + ] super().__init__( - command=[ - "bash", - "-lc", - build_opencode_run_script( - agent_workdir=config.agent_workdir, - instruction_path=config.instruction_path, - log_path=config.log_path, - allow_git=config.allow_git, + program=command_program( + command=command, + sandbox=sandbox_config, + files=files, + setup=build_install_script( + release_repo=config.release_repo, + release_version=config.release_version, + release_sha256=config.release_sha256, + install_ripgrep=config.install_ripgrep, ), - ], - sandbox=sandbox_config, - files=files, - setup=build_install_script( - release_repo=config.release_repo, - release_version=config.release_version, - release_sha256=config.release_sha256, - install_ripgrep=config.install_ripgrep, + channels={ + "mcp": build_opencode_mcp_setup_script( + agent_workdir=config.agent_workdir, + system_prompt_path=config.system_prompt_path + if config.system_prompt is not None + else None, + log_path=config.log_path, + disabled_tools=config.disabled_tools, + disable_compaction=config.disable_compaction, + provider_timeout_ms=config.provider_timeout_ms, + ) + }, + artifacts=artifacts, + program=program, ), - tools={ - "mcp": build_opencode_mcp_setup_script( - agent_workdir=config.agent_workdir, - system_prompt_path=config.system_prompt_path - if config.system_prompt is not None - else None, - log_path=config.log_path, - disabled_tools=config.disabled_tools, - disable_compaction=config.disable_compaction, - provider_timeout_ms=config.provider_timeout_ms, - ) - }, - artifacts=artifacts, - program=program, + sandbox=command_sandbox(sandbox_config), system_prompt=config.system_prompt, max_turns=config.max_turns, - config=config.model_dump(exclude_none=False), + config=config, **kwargs, ) if system_prompt_disabled: @@ -198,10 +217,10 @@ def build_opencode_config( disable_compaction: bool, provider_timeout_ms: int, ) -> str: - agent_config: dict[str, object] = { + agent_config: ConfigData = { "title": {"disable": True}, } - config: dict[str, object] = { + config: ConfigData = { "${SCHEMA_DOLLAR}schema": "https://opencode.ai/config.json", "provider": { "intercepted": { @@ -235,7 +254,7 @@ def build_opencode_config( } if disable_compaction: config["compaction"] = {"auto": False, "prune": False} - build_config: dict[str, object] = {} + build_config: ConfigData = {} if system_prompt_path is not None: build_config["prompt"] = "{file:" + system_prompt_path + "}" if disabled_tools: diff --git a/verifiers/v1/packages/harnesses/pi.py b/verifiers/v1/packages/harnesses/pi.py index 2887d4143..f08bb52ff 100644 --- a/verifiers/v1/packages/harnesses/pi.py +++ b/verifiers/v1/packages/harnesses/pi.py @@ -1,19 +1,19 @@ -from __future__ import annotations - import json import shlex -from collections.abc import Mapping from pathlib import PurePosixPath -from typing import Any -from .cli import CLIHarness +from typing_extensions import Unpack + +from .command import HarnessKwargs, command_program, command_sandbox from ...config import SandboxConfig +from ...harness import Harness from ...state import State from ...utils.mcp_proxy_utils import proxy_command from ...utils.prompt_utils import ( state_system_prompt_text, task_text as task_instruction_text, ) +from ...types import ConfigMap, ProgramMap, ProgramOptionMap, ProgramValue, PromptInput DEFAULT_PI_PACKAGE = "@mariozechner/pi-coding-agent" DEFAULT_PI_WORKDIR = "/app" @@ -23,7 +23,7 @@ DEFAULT_SYSTEM_PROMPT = "Complete the user's task using the available tools." -class Pi(CLIHarness): +class Pi(Harness): def __init__( self, *, @@ -31,53 +31,57 @@ def __init__( instruction_path: str = DEFAULT_INSTRUCTION_PATH, system_prompt_path: str = DEFAULT_SYSTEM_PROMPT_PATH, log_path: str = DEFAULT_LOG_PATH, - system_prompt: object | None = DEFAULT_SYSTEM_PROMPT, + system_prompt: PromptInput | None = DEFAULT_SYSTEM_PROMPT, package: str = DEFAULT_PI_PACKAGE, install_mcp_adapter: bool = True, - sandbox: bool | Mapping[str, object] | SandboxConfig = True, - program: Mapping[str, object] | None = None, + sandbox: bool | ConfigMap | SandboxConfig = True, + program: ProgramMap | None = None, max_turns: int | None = 4, - **kwargs: Any, + **kwargs: Unpack[HarnessKwargs], ): - files: dict[str, object] = { + files: dict[str, ProgramValue] = { instruction_path: task_instruction_text, } if system_prompt is not None: files[system_prompt_path] = state_system_prompt_text - artifacts = { + artifacts: ProgramOptionMap = { "pi_log": { "path": log_path, "format": "text", "optional": True, } } + command = [ + "bash", + "-lc", + build_pi_run_script( + agent_workdir=agent_workdir, + instruction_path=instruction_path, + system_prompt_path=system_prompt_path + if system_prompt is not None + else None, + log_path=log_path, + ), + ] super().__init__( - command=[ - "bash", - "-lc", - build_pi_run_script( - agent_workdir=agent_workdir, - instruction_path=instruction_path, - system_prompt_path=system_prompt_path - if system_prompt is not None - else None, - log_path=log_path, - ), - ], - sandbox=sandbox, - files=files, - setup=build_pi_install_script(package=package), - tools={ - "mcp": build_pi_mcp_setup( - agent_workdir=agent_workdir, - install_mcp_adapter=install_mcp_adapter, - ) - } - if install_mcp_adapter - else None, - bindings={"setup_pi.endpoint_config": pi_endpoint_config}, - artifacts=artifacts, - program=program, + program=command_program( + command=command, + sandbox=sandbox, + files=files, + setup=build_pi_install_script(package=package), + channels={ + "mcp": build_pi_mcp_setup( + agent_workdir=agent_workdir, + install_mcp_adapter=install_mcp_adapter, + ) + } + if install_mcp_adapter + else None, + bindings={"setup_pi.endpoint_config": pi_endpoint_config}, + artifacts=artifacts, + program=program, + ), + sandbox=command_sandbox(sandbox), system_prompt=system_prompt, max_turns=max_turns, **kwargs, @@ -110,7 +114,7 @@ def setup_pi(endpoint_config) -> str: def build_pi_mcp_setup_script( *, agent_workdir: str, - endpoint_config: Mapping[str, object], + endpoint_config: ConfigMap, install_mcp_adapter: bool, ) -> str: models_json = pi_models_json(endpoint_config) @@ -173,7 +177,7 @@ def pi_endpoint_config(state: State) -> dict[str, str]: return state.get_endpoint_config(api="chat") -def pi_models_json(endpoint_config: Mapping[str, object]) -> str: +def pi_models_json(endpoint_config: ConfigMap) -> str: api = str(endpoint_config.get("api_client_type") or "openai_chat_completions") api_name = { "openai_chat_completions": "openai-completions", diff --git a/verifiers/v1/packages/harnesses/rlm.py b/verifiers/v1/packages/harnesses/rlm.py index f1c321fb8..d204aeee9 100644 --- a/verifiers/v1/packages/harnesses/rlm.py +++ b/verifiers/v1/packages/harnesses/rlm.py @@ -1,136 +1,188 @@ -from __future__ import annotations - import hashlib import json import random import shlex from collections.abc import Callable, Mapping +from importlib.abc import Traversable from pathlib import Path -from typing import Any +from typing import cast + +from typing_extensions import Unpack from verifiers.envs.experimental.utils.git_checkout_cache import ( resolve_git_checkout, validate_git_checkout, ) -from ...config import HarnessConfig, SandboxConfig +from ...config import SandboxConfig, sandbox_config_mapping +from ...harness import Harness from ...state import State from ...task import Task +from ...taskset import Taskset from ...utils.prompt_utils import task_text -from .cli import CLIHarness - -DEFAULT_RLM_REPO_URL = "github.com/PrimeIntellect-ai/rlm-harness.git" -DEFAULT_RLM_REF = "main" -DEFAULT_RLM_MAX_TURNS = 100 -DEFAULT_RLM_EXEC_TIMEOUT = 300 -DEFAULT_RLM_MAX_DEPTH = 0 -DEFAULT_RLM_INSTRUCTION_PATH = "/rlm/instruction.txt" -DEFAULT_APPEND_TO_SYSTEM_PROMPT_PATH = "/rlm/append_to_system_prompt.txt" +from .command import HarnessKwargs, command_program +from .configs import ( + RLM_DEFAULT_APPEND_TO_SYSTEM_PROMPT_PATH, + RLM_DEFAULT_EXEC_TIMEOUT, + RLM_DEFAULT_MAX_DEPTH, + RLM_DEFAULT_MAX_TURNS, + RLM_DEFAULT_REPO_REF, + RLM_DEFAULT_REPO_URL, + RLM_DEFAULT_INSTRUCTION_PATH, + RLMConfig, +) +from ...types import ConfigMap, ProgramMap, ProgramOptionMap, ProgramValue + +DEFAULT_RLM_REPO_URL = RLM_DEFAULT_REPO_URL +DEFAULT_RLM_REPO_REF = RLM_DEFAULT_REPO_REF +DEFAULT_RLM_MAX_TURNS = RLM_DEFAULT_MAX_TURNS +DEFAULT_RLM_EXEC_TIMEOUT = RLM_DEFAULT_EXEC_TIMEOUT +DEFAULT_RLM_MAX_DEPTH = RLM_DEFAULT_MAX_DEPTH +DEFAULT_RLM_INSTRUCTION_PATH = RLM_DEFAULT_INSTRUCTION_PATH +DEFAULT_APPEND_TO_SYSTEM_PROMPT_PATH = RLM_DEFAULT_APPEND_TO_SYSTEM_PROMPT_PATH DEFAULT_RLM_CHECKOUT_PATH = "/tmp/rlm-checkout" DEFAULT_RLM_SKILLS_PATH = "/rlm/skills" DEFAULT_RLM_LOCAL_CHECKOUT_CACHE_ROOT = ( Path.home() / ".cache" / "verifiers" / "rlm-checkouts" ) REQUIRED_RLM_CHECKOUT_FILES = ("install.sh", "pyproject.toml") +ProgramDir = str | Path | Traversable + +class RLM(Harness): + config_type = RLMConfig -class RLM(CLIHarness): def __init__( self, *, - workdir: str = "/workspace", - instruction_path: str = DEFAULT_RLM_INSTRUCTION_PATH, - rlm_repo_url: str = DEFAULT_RLM_REPO_URL, - rlm_ref: str = DEFAULT_RLM_REF, - rlm_max_turns: int = DEFAULT_RLM_MAX_TURNS, - rlm_exec_timeout: int = DEFAULT_RLM_EXEC_TIMEOUT, - rlm_max_depth: int = DEFAULT_RLM_MAX_DEPTH, + workdir: str | None = None, + instruction_path: str | None = None, + rlm_repo_url: str | None = None, + rlm_repo_ref: str | None = None, + rlm_max_turns: int | None = None, + rlm_exec_timeout: int | None = None, + rlm_max_depth: int | None = None, summarize_at_tokens: int | tuple[int, int] | list[int] | None = None, - include_sub_rlm_trajectories: bool = False, - append_to_system_prompt: str = "", + include_sub_rlm_trajectories: bool | None = None, + append_to_system_prompt: str | None = None, local_checkout: str | Path | None = None, gh_token: str | None = None, rlm_tools: list[str] | None = None, - rlm_env: Mapping[str, object] | None = None, + env_vars: ProgramOptionMap | None = None, skills: str | Path | None = None, - sandbox: bool | Mapping[str, object] | SandboxConfig = True, - program: Mapping[str, object] | None = None, - config: HarnessConfig | Mapping[str, object] | None = None, - **kwargs: Any, + sandbox: bool | ConfigMap | SandboxConfig | None = None, + program: ProgramMap | None = None, + config: RLMConfig | None = None, + **kwargs: Unpack[HarnessKwargs], ): - harness_config = HarnessConfig.from_config(config) + harness_config = RLMConfig.from_config( + config, + workdir=workdir, + instruction_path=instruction_path, + rlm_repo_url=rlm_repo_url, + rlm_repo_ref=rlm_repo_ref, + rlm_max_turns=rlm_max_turns, + rlm_exec_timeout=rlm_exec_timeout, + rlm_max_depth=rlm_max_depth, + summarize_at_tokens=summarize_at_tokens, + include_sub_rlm_trajectories=include_sub_rlm_trajectories, + append_to_system_prompt=append_to_system_prompt, + local_checkout=local_checkout, + gh_token=gh_token, + rlm_tools=rlm_tools, + env_vars=dict(env_vars) if env_vars is not None else None, + skills=skills, + ) if ( - not include_sub_rlm_trajectories + not harness_config.include_sub_rlm_trajectories and harness_config.keep_trajectory_step is None ): harness_config.keep_trajectory_step = keep_only_parent_rlm_steps - tool_names = list(rlm_tools) if rlm_tools is not None else ["ipython"] - summarize_resolver = build_summarize_resolver(summarize_at_tokens) - env: dict[str, object] = { + summarize_resolver = build_summarize_resolver( + harness_config.summarize_at_tokens + ) + env: ProgramOptionMap = { "PATH": "/root/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "OPENAI_MODEL": "runtime.model", "RLM_MODEL": "runtime.model", - "RLM_TOOLS": ",".join(tool_names), - "RLM_MAX_TURNS": str(rlm_max_turns), - "RLM_EXEC_TIMEOUT": str(rlm_exec_timeout), - "RLM_MAX_DEPTH": str(rlm_max_depth), - **dict(rlm_env or {}), + "RLM_TOOLS": ",".join(harness_config.rlm_tools), + "RLM_MAX_TURNS": str(harness_config.rlm_max_turns), + "RLM_EXEC_TIMEOUT": str(harness_config.rlm_exec_timeout), + "RLM_MAX_DEPTH": str(harness_config.rlm_max_depth), + **harness_config.env_vars, } if summarize_resolver is not None: env["RLM_SUMMARIZE_AT_TOKENS"] = summarize_resolver - sandbox_config: Mapping[str, object] | SandboxConfig | bool - sandbox_config = sandbox - if sandbox is True: + sandbox_config: ConfigMap | SandboxConfig | bool + sandbox_config = ( + harness_config.sandbox + if sandbox is None and harness_config.sandbox is not None + else True + ) + if sandbox is not None: + sandbox_config = sandbox + if sandbox_config is True: sandbox_config = { "image": "python:3.11-slim", - "workdir": workdir, + "workdir": harness_config.workdir, "cpu_cores": 1, "memory_gb": 2, "disk_size_gb": 5, "network_access": True, "timeout_minutes": 60, - "command_timeout": max(rlm_exec_timeout + 120, 600), + "command_timeout": max(harness_config.rlm_exec_timeout + 120, 600), } - elif isinstance(sandbox, Mapping): + elif sandbox_config is not False: sandbox_config = { - "workdir": workdir, - "command_timeout": max(rlm_exec_timeout + 120, 600), - **dict(sandbox), + "workdir": harness_config.workdir, + "command_timeout": max(harness_config.rlm_exec_timeout + 120, 600), + **(sandbox_config_mapping(sandbox_config) or {}), } - dirs: dict[str, object] = { + dirs: dict[str, ProgramValue] = { DEFAULT_RLM_CHECKOUT_PATH: rlm_checkout_loader( - local_checkout=local_checkout, - rlm_repo_url=rlm_repo_url, - rlm_ref=rlm_ref, - gh_token=gh_token, + local_checkout=harness_config.local_checkout, + rlm_repo_url=harness_config.rlm_repo_url, + rlm_repo_ref=harness_config.rlm_repo_ref, + gh_token=harness_config.gh_token, ) } - if skills is not None: - dirs[DEFAULT_RLM_SKILLS_PATH] = Path(skills) + if harness_config.skills is not None: + dirs[DEFAULT_RLM_SKILLS_PATH] = Path(harness_config.skills) + self._explicit_skills = harness_config.skills is not None + command = [ + "bash", + "-lc", + build_run_script(harness_config.instruction_path, harness_config.workdir), + ] super().__init__( - command=["bash", "-lc", build_run_script(instruction_path, workdir)], - sandbox=sandbox_config, - files={ - instruction_path: task_instruction_text, - DEFAULT_APPEND_TO_SYSTEM_PROMPT_PATH: append_to_system_prompt, - }, - dirs=dirs, - setup=[ - "apt-get -o Acquire::Retries=3 update && " - "apt-get -o Acquire::Retries=3 install -y --no-install-recommends " - "ca-certificates curl git && rm -rf /var/lib/apt/lists/*", - build_install_command(), - ], - env=env, - artifacts={ - "rlm_metrics": { - "path": f"{workdir}/.rlm/sessions/*/meta.json", - "format": "json", - "key": "metrics", - "optional": True, - } - }, - program=program, + program=command_program( + command=command, + sandbox=sandbox_config, + files={ + harness_config.instruction_path: task_instruction_text, + DEFAULT_APPEND_TO_SYSTEM_PROMPT_PATH: ( + harness_config.append_to_system_prompt + ), + }, + dirs=dirs, + setup=[ + "apt-get -o Acquire::Retries=3 update && " + "apt-get -o Acquire::Retries=3 install -y --no-install-recommends " + "ca-certificates curl git && rm -rf /var/lib/apt/lists/*", + build_install_command(), + ], + env=env, + artifacts={ + "rlm_metrics": { + "path": f"{harness_config.workdir}/.rlm/sessions/*/meta.json", + "format": "json", + "key": "metrics", + "optional": True, + } + }, + program=program, + ), + sandbox=None if sandbox_config is False else sandbox_config, metrics=[ rlm_sub_llm_call_count, rlm_sub_llm_total_turns, @@ -140,6 +192,33 @@ def __init__( **kwargs, ) + def attach_taskset(self, taskset: Taskset) -> None: + if not self._explicit_skills: + upload_dirs = taskset.get_upload_dirs() + if not isinstance(upload_dirs, Mapping): + raise TypeError("Taskset.get_upload_dirs() must return a mapping.") + skills = upload_dirs.get("skills") + self.set_program_dir( + DEFAULT_RLM_SKILLS_PATH, + cast(ProgramDir | None, skills), + ) + super().attach_taskset(taskset) + self._program = self.compile_program(self.program) + + def set_program_dir( + self, remote_path: str, local_source: ProgramDir | None + ) -> None: + if not isinstance(self.program, Mapping): + raise TypeError("RLM program must be a mapping.") + program = dict(cast(ConfigMap, self.program)) + dirs = dict(cast(ConfigMap, program.get("dirs") or {})) + if local_source is None: + dirs.pop(remote_path, None) + else: + dirs[remote_path] = local_source + program["dirs"] = dirs + self.program = program + def build_install_command() -> str: script = f""" @@ -166,7 +245,7 @@ def build_run_script(instruction_path: str, workdir: str) -> str: def rlm_checkout_loader( local_checkout: str | Path | None, rlm_repo_url: str, - rlm_ref: str, + rlm_repo_ref: str, gh_token: str | None, ) -> Callable[[], Path]: checkout: Path | None = None @@ -183,7 +262,7 @@ def load() -> Path: else: checkout = resolve_git_checkout( repo_url=rlm_repo_url, - ref=rlm_ref, + ref=rlm_repo_ref, cache_root=DEFAULT_RLM_LOCAL_CHECKOUT_CACHE_ROOT, gh_token=gh_token, required_files=REQUIRED_RLM_CHECKOUT_FILES, @@ -197,21 +276,24 @@ def task_instruction_text(task: Task, state: State) -> str: return task_text(task, state, keys=("instruction", "question")) -def keep_only_parent_rlm_steps( - step: object, state: State, headers: Mapping[str, object] -) -> bool: +def keep_only_parent_rlm_steps(step: object, state: State, headers: ConfigMap) -> bool: _ = step, state return str(headers.get("x-rlm-depth", "0")) == "0" -def rlm_metric(state: Mapping[str, Any], key: str) -> float: +def rlm_metric(state: ConfigMap, key: str) -> float: artifacts = state.get("artifacts") if not isinstance(artifacts, Mapping): return 0.0 + artifacts = cast(ConfigMap, artifacts) metrics = artifacts.get("rlm_metrics") if not isinstance(metrics, Mapping): return 0.0 - return float(metrics.get(key, 0.0) or 0.0) + metrics = cast(ConfigMap, metrics) + value = metrics.get(key, 0.0) + if isinstance(value, bool) or not isinstance(value, int | float | str): + return 0.0 + return float(value or 0.0) async def rlm_sub_llm_call_count(task: Task, state: State) -> float: @@ -259,7 +341,7 @@ def sampled_threshold(state: State) -> str: raise ValueError("summarize_at_tokens must be int, (lo, hi), or None") -def draw_threshold(state: Mapping[str, Any], lo: int, hi: int) -> int: +def draw_threshold(state: ConfigMap, lo: int, hi: int) -> int: prompt = json.dumps(state.get("prompt"), sort_keys=True, default=str) digest = hashlib.sha256(prompt.encode("utf-8")).hexdigest() return random.Random(int(digest[:16], 16)).randint(lo, hi) diff --git a/verifiers/v1/packages/tasksets/harbor.py b/verifiers/v1/packages/tasksets/harbor.py index 7d200e8de..1533b921b 100644 --- a/verifiers/v1/packages/tasksets/harbor.py +++ b/verifiers/v1/packages/tasksets/harbor.py @@ -1,29 +1,58 @@ -from __future__ import annotations - +import inspect import json -import logging +import os import re import shutil import subprocess +import sys import tarfile import tempfile -from collections.abc import Callable, Iterable, Mapping +from collections.abc import Iterable, Mapping +from importlib.resources import files from pathlib import Path -from typing import Any, cast +from typing import cast from pydantic import Field +from typing_extensions import Unpack from verifiers.utils.import_utils import load_toml from ...config import TasksetConfig, merge_config_value -from ...taskset import Taskset +from ...taskset import Taskset, TasksetKwargs +from ...utils.sandbox_utils import SandboxClient from verifiers.decorators import reward +from ...types import ConfigData, Handler, ProgramOptionMap + +TASKS_SUBDIR = "tasks" + + +def _resolve_caller_package() -> str | None: + for frame_info in inspect.stack()[1:]: + package = frame_info.frame.f_globals.get("__package__") + if not isinstance(package, str) or not package: + package = frame_info.frame.f_globals.get("__name__") + if not isinstance(package, str) or not package or package == "__main__": + continue + if package.startswith("verifiers"): + continue + return package + return None -logger = logging.getLogger(__name__) + +def _bundle_tasks_root(module_name: str) -> Path: + try: + tasks = cast(os.PathLike[str], files(module_name) / TASKS_SUBDIR) + return Path(os.fspath(tasks)) + except TypeError as exc: + module = sys.modules.get(module_name) + module_file = getattr(module, "__file__", None) + if not isinstance(module_file, str): + raise exc + return Path(module_file).resolve().parent / TASKS_SUBDIR class HarborTasksetConfig(TasksetConfig): - tasks: object | None = None + dataset: str | None = None task_names: list[str] | None = None cache_dir: str | None = None refresh: bool = False @@ -37,7 +66,7 @@ class HarborTasksetConfig(TasksetConfig): workdir: str = "/app" task_dir: str = "/task" scope: str = "rollout" - env: dict[str, object] = Field(default_factory=dict) + env: ConfigData = Field(default_factory=dict) class HarborTaskset(Taskset): @@ -45,7 +74,7 @@ class HarborTaskset(Taskset): def __init__( self, - tasks: str | Path | None = None, + dataset: str | None = None, task_names: Iterable[str] | None = None, cache_dir: str | Path | None = None, refresh: bool | None = None, @@ -59,13 +88,19 @@ def __init__( workdir: str | None = None, task_dir: str | None = None, scope: str | None = None, - env: Mapping[str, object] | None = None, - rewards: Iterable[Callable[..., object]] = (), - config: HarborTasksetConfig | Mapping[str, object] | None = None, - **kwargs: Any, + env: ProgramOptionMap | None = None, + rewards: Iterable[Handler] = (), + config: HarborTasksetConfig | None = None, + **kwargs: Unpack[TasksetKwargs], ): self.config = type(self).config_type.from_config(config) - self.tasks = merge_config_value(tasks, self.config.tasks) + dataset_value = merge_config_value(dataset, self.config.dataset) + if dataset_value is not None and not isinstance(dataset_value, str): + raise TypeError("HarborTaskset dataset must be a string.") + self.dataset = dataset_value + self._bundle_package = ( + _resolve_caller_package() if self.dataset is None else None + ) self.task_names = list( task_names if task_names is not None @@ -73,10 +108,9 @@ def __init__( if self.config.task_names is not None else [] ) + cache_dir_value = merge_config_value(cache_dir, self.config.cache_dir) self.cache_dir = ( - Path(str(merge_config_value(cache_dir, self.config.cache_dir))).expanduser() - if merge_config_value(cache_dir, self.config.cache_dir) - else None + Path(str(cache_dir_value)).expanduser() if cache_dir_value else None ) self.refresh = bool(self.config.refresh if refresh is None else refresh) self.docker_image = str( @@ -121,7 +155,7 @@ def __init__( **kwargs, ) - def load_rows(self) -> list[dict[str, Any]]: + def load_rows(self) -> list[ConfigData]: root = self.resolve_tasks_root() task_dirs = harbor_task_dirs(root, self.task_names) rows = [ @@ -132,23 +166,28 @@ def load_rows(self) -> list[dict[str, Any]]: return rows def resolve_tasks_root(self) -> Path: - if self.tasks is None: - raise ValueError("HarborTaskset requires tasks=path_or_registry_id.") - if isinstance(self.tasks, Path): - candidate = self.tasks.expanduser() - else: - candidate = Path(str(self.tasks)).expanduser() - if candidate.exists(): - return candidate - if isinstance(self.tasks, str): + if self.dataset is not None: return download_harbor_dataset( - self.tasks, + self.dataset, cache_dir=self.cache_dir, refresh=self.refresh, ) - raise FileNotFoundError(f"Harbor tasks path not found: {candidate}") + if self._bundle_package is None: + raise RuntimeError( + "HarborTaskset() without a dataset must be constructed from inside " + "an installed Python package. Pass dataset='...' to fetch from " + "Harbor Hub, or construct it from a packaged environment." + ) + root = _bundle_tasks_root(self._bundle_package) + if not root.exists(): + raise FileNotFoundError( + "HarborTaskset() without a dataset requires " + f"{self._bundle_package}/{TASKS_SUBDIR}/ to contain Harbor task " + f"directories. Not found: {root}" + ) + return root - def task_row(self, task_dir: Path, index: int) -> dict[str, Any]: + def task_row(self, task_dir: Path, index: int) -> ConfigData: task_toml_path = task_dir / "task.toml" instruction_path = task_dir / "instruction.md" with task_toml_path.open("rb") as f: @@ -216,22 +255,21 @@ def task_row(self, task_dir: Path, index: int) -> dict[str, Any]: def harbor_task_dirs(root: Path, task_names: Iterable[str] | None = None) -> list[Path]: selected = set(task_names or []) - if is_harbor_task_dir(root): - if selected and root.name not in selected: - return [] - return [root] if not root.exists(): raise FileNotFoundError(f"Harbor tasks path not found: {root}") tasks: list[Path] = [] for task_dir in sorted(root.iterdir()): if not task_dir.is_dir(): - continue - if selected and task_dir.name not in selected: - continue - if is_harbor_task_dir(task_dir): + raise ValueError( + f"Harbor tasks root {root} contains non-directory entry {task_dir}." + ) + if not is_harbor_task_dir(task_dir): + raise ValueError( + f"Malformed Harbor task {task_dir}: missing task.toml or " + "instruction.md." + ) + if not selected or task_dir.name in selected: tasks.append(task_dir) - else: - logger.warning("Skipping %s: missing task.toml or instruction.md", task_dir) if selected: found = {path.name for path in tasks} missing = sorted(selected - found) @@ -251,7 +289,9 @@ def is_harbor_task_dir(path: Path) -> bool: def parse_number(value: object, default: float) -> float: if value is None: return default - return float(cast(Any, value)) + if isinstance(value, bool) or not isinstance(value, int | float | str): + raise TypeError("Expected a numeric value.") + return float(value) def parse_gb(value: object, default: float) -> float: @@ -278,8 +318,8 @@ def download_harbor_dataset( uvx_bin = shutil.which("uvx") if harbor_bin is None and uvx_bin is None: raise FileNotFoundError( - f"{dataset_id!r} is not a local path and the Harbor CLI is not installed. " - "Install Harbor, install uvx, or pass a local Harbor task directory." + f"Harbor dataset {dataset_id!r} requires the Harbor CLI or uvx. " + "Install Harbor or uvx before using Harbor Hub datasets." ) root = cache_dir or Path.home() / ".cache" / "verifiers" / "harbor" dataset_dir = root / safe_dataset_dir_name(dataset_id) @@ -330,7 +370,7 @@ async def harbor_reward(task, state) -> float: task_dir = Path(str(harbor["task_dir"])) from prime_sandboxes import AsyncSandboxClient - client = AsyncSandboxClient() + client = cast(SandboxClient, AsyncSandboxClient()) try: await upload_harbor_tests(client, sandbox_id, task_dir) test_timeout = int(parse_number(harbor.get("test_timeout"), 900)) @@ -362,15 +402,16 @@ async def harbor_reward(task, state) -> float: return parse_reward_text(str(reward_result.stdout or "").strip()) -async def upload_harbor_tests(client: object, sandbox_id: str, task_dir: Path) -> None: +async def upload_harbor_tests( + client: SandboxClient, sandbox_id: str, task_dir: Path +) -> None: with tempfile.NamedTemporaryFile(suffix=".tar.gz", delete=False) as tmp_file: tar_path = Path(tmp_file.name) try: await build_harbor_tests_archive(task_dir, tar_path) remote_tar = "/tmp/harbor_tests.tar.gz" - sandbox_client = cast(Any, client) - await sandbox_client.upload_file(sandbox_id, remote_tar, str(tar_path)) - await sandbox_client.execute_command( + await client.upload_file(sandbox_id, remote_tar, str(tar_path)) + await client.execute_command( sandbox_id=sandbox_id, command=( f"mkdir -p /oracle /tests /logs/verifier && " diff --git a/verifiers/v1/runtime.py b/verifiers/v1/runtime.py index c9918c625..704fa4fdb 100644 --- a/verifiers/v1/runtime.py +++ b/verifiers/v1/runtime.py @@ -1,41 +1,64 @@ -from __future__ import annotations - import asyncio import glob import inspect import json +import time import uuid -import weakref -from contextlib import AsyncExitStack from collections.abc import Awaitable, Callable, Iterable, Mapping, Sequence -from typing import Any, cast, get_args +from contextlib import AsyncExitStack +from typing import TYPE_CHECKING, Literal, cast, get_args from verifiers.clients import Client, resolve_client from verifiers.types import Messages, Response, Tool -from verifiers.types import ClientConfig, ClientType -from verifiers.utils.client_utils import resolve_client_config +from verifiers.types import ClientConfig, ClientType, SamplingArgs from verifiers.utils.async_utils import maybe_call_with_named_args +from verifiers.utils.client_utils import resolve_client_config from verifiers.utils.message_utils import normalize_messages from verifiers.utils.response_utils import parse_response_message, parse_response_tokens from verifiers.utils.tool_utils import convert_func_to_tool_def -from .config import resolve_config_object +from .config import ToolsetConfig, resolve_config_object +from .utils.binding_utils import ( + BindingSource, + GROUP_FRAMEWORK_ARGS, + ROLLOUT_FRAMEWORK_ARGS, + binding_key_parts, + binding_object_name, + binding_source_root, + function_name, + read_path, + same_callable, + validate_binding_source, + validate_bound_arg, + validate_callable_source, +) from .utils.lifecycle_utils import ( collect_handlers, + handler_collection_attr, run_handlers, sort_handlers, + state_done, unique_handlers, validate_handler_args, ) +from .utils.object_utils import close_object, resolve_object_factory +from .utils.runtime_registry import load_runtime, register_runtime, unregister_runtime from .utils.scoring_utils import SignalRecord, build_signals, collect_signals +from .utils.scoring_utils import group_framework_kwargs, rollout_framework_kwargs from .utils.scoring_utils import score_group as score_group_signals from .utils.scoring_utils import score_rollout as score_rollout_signals +from .utils.serialization_utils import serializable +from .utils.timing_utils import record_model_timing from .utils.artifact_utils import artifact_format, artifact_key, artifact_optional from .utils.artifact_utils import artifact_path +from .utils.tool_utils import schema_callable, string_list, tool_visible +from .utils.tool_utils import toolset_object_scope +from .utils.usage_utils import record_response_usage from .state import State from .task import Task from .toolset import ( MCPTool, + ToolEntry, Toolset, flatten_toolsets, iter_toolsets, @@ -43,14 +66,20 @@ tool_name, ) from .user import User +from .types import ConfigData, ConfigMap, Handler, PromptMessage -_RUNTIME_REGISTRY: weakref.WeakValueDictionary[str, Runtime] = ( - weakref.WeakValueDictionary() -) +if TYPE_CHECKING: + from .harness import Harness + from .taskset import Taskset + from .utils.mcp_utils import MCPToolHandle + from .utils.sandbox_utils import SandboxLease + +BindingOwner = Toolset | Literal["taskset"] | None +BindingEntry = tuple[str, BindingSource, BindingOwner] class BorrowedTool: - def __init__(self, runtime: Runtime, handle_id: str, name: str): + def __init__(self, runtime: "Runtime", handle_id: str, name: str): self.runtime = runtime self.handle_id = handle_id self.name = name @@ -67,9 +96,11 @@ async def __call__(self, **kwargs: object) -> object: class Runtime: - def __init__(self, taskset: object | None = None, harness: object | None = None): + def __init__( + self, taskset: "Taskset | None" = None, harness: "Harness | None" = None + ): self.runtime_id = uuid.uuid4().hex - _RUNTIME_REGISTRY[self.runtime_id] = self + register_runtime(self.runtime_id, self) self.taskset = taskset self.harness = harness self.toolsets = self._collect_toolsets() @@ -77,14 +108,15 @@ def __init__(self, taskset: object | None = None, harness: object | None = None) self.rollout_toolsets: dict[str, list[Toolset]] = {} self.objects: dict[tuple[int, str, str], object] = {} self.user_objects: dict[tuple[int, str, str], object] = {} + self.taskset_objects: dict[tuple[int, str], object] = {} self.model_clients: dict[str, Client] = {} self.owned_model_clients: set[str] = set() - self.sandbox_leases: dict[tuple[str, str], object] = {} + self.sandbox_leases: dict[tuple[str, str], SandboxLease] = {} self.sandbox_lock = asyncio.Lock() self.mcp_exit_stacks: dict[str, AsyncExitStack] = {} - self.mcp_tools: dict[str, dict[str, object]] = {} - self.exposed_mcp_tools: dict[str, dict[str, object]] = {} - self.trajectories: dict[str, list[Mapping[str, object]]] = {} + self.mcp_tools: dict[str, ConfigData] = {} + self.exposed_mcp_tools: dict[str, ConfigData] = {} + self.trajectories: dict[str, list[ConfigMap]] = {} self.tool_handles: dict[str, tuple[Task, State, tuple[str, ...]]] = {} self.stop_conditions = collect_handlers( self._handler_owners(), @@ -173,7 +205,7 @@ def prepare_state(self, task: Task, state: State) -> None: self.register_trajectory(state) def register_tool_handle(self, state: State, names: Sequence[str]) -> str: - task = Task(cast(Mapping[str, Any], state["task"])).freeze() + task = Task(cast(ConfigMap, state["task"])).freeze() available = self.all_exposed_tools(state) unknown = sorted(set(names) - set(available)) if unknown: @@ -219,27 +251,27 @@ def register_trajectory(self, state: State) -> None: if not isinstance(trajectory, list): raise TypeError("state.trajectory must be a list.") self.trajectories[str(state["trajectory_id"])] = cast( - list[Mapping[str, object]], trajectory + list[ConfigMap], trajectory ) - def resolved_handles(self, state: State) -> Mapping[str, object]: + def resolved_handles(self, state: State) -> ConfigMap: runtime = state.get("runtime", {}) if not isinstance(runtime, Mapping): return {} resolved = runtime.get("resolved") or {} if not isinstance(resolved, Mapping): raise TypeError("state.runtime.resolved must be a mapping.") - return cast(Mapping[str, object], resolved) + return cast(ConfigMap, resolved) - def resolved_handle(self, state: State, name: str) -> Mapping[str, object] | None: + def resolved_handle(self, state: State, name: str) -> ConfigMap | None: handle = self.resolved_handles(state).get(name) if handle is None: return None if not isinstance(handle, Mapping): raise TypeError(f"state.runtime.resolved.{name} must be a mapping.") - return cast(Mapping[str, object], handle) + return cast(ConfigMap, handle) - def handle_runtime(self, handle: Mapping[str, object], name: str) -> Runtime: + def handle_runtime(self, handle: ConfigMap, name: str) -> "Runtime": runtime_id = handle.get("runtime_id") if not isinstance(runtime_id, str) or not runtime_id: raise TypeError( @@ -333,7 +365,7 @@ def model(self, state: State) -> str: raise RuntimeError("Harness has no model for intercepted requests.") return model - def sampling_args(self, state: State) -> dict[str, Any]: + def sampling_args(self, state: State) -> SamplingArgs: sampling = state.get("runtime", {}).get("sampling_args") or {} if not sampling: handle = self.resolved_handle(state, "model") @@ -341,7 +373,7 @@ def sampling_args(self, state: State) -> dict[str, Any]: sampling = handle.get("sampling_args") or {} if not isinstance(sampling, Mapping): raise TypeError("state.runtime.sampling_args must be a mapping.") - return dict(cast(Mapping[str, Any], sampling)) + return cast(SamplingArgs, dict(cast(ConfigMap, sampling))) def tool_defs(self, state: State) -> list[Tool] | None: defs: list[Tool] = [] @@ -351,20 +383,21 @@ def tool_defs(self, state: State) -> list[Tool] | None: return defs or None async def user_messages( - self, task: Task, state: State, transcript: Sequence[object] | None = None - ) -> list[dict[str, object]]: + self, + task: Task, + state: State, + transcript: Sequence[PromptMessage] | None = None, + ) -> list[ConfigData]: user = self._resolve_user() if user is None: return [] - kwargs: dict[str, object] = {} + kwargs: ConfigData = {} fn = user.fn if user.sandbox is not None: kwargs["sandbox"] = await self.resolve_user_sandbox(user, task, state) for name, source in user.bindings.items(): validate_bound_arg(user.fn, name, f"User binding {name!r}") - validate_binding_source_root( - binding_source_root(source), f"User binding {name!r}" - ) + validate_binding_source(source, f"User binding {name!r}") kwargs[name] = await self.resolve_user_binding( user, source, task, state, transcript ) @@ -400,9 +433,7 @@ def _tool_def(self, name: str, tool: object, state: State) -> Tool: schema_tool = schema_callable(tool, filtered_signature) tool_def = convert_func_to_tool_def(schema_tool) parameters = dict(tool_def.parameters) - properties = dict( - cast(Mapping[str, object], parameters.get("properties") or {}) - ) + properties = dict(cast(ConfigMap, parameters.get("properties") or {})) for arg_name in hidden_args: properties.pop(arg_name, None) parameters["properties"] = properties @@ -443,9 +474,12 @@ async def is_completed(self, task: Task, state: State) -> bool: [*self.stop_conditions, *self._rollout_handlers("stop", state)] ) for condition in conditions: - extra_kwargs = await self.binding_kwargs(condition, task, state) + framework_kwargs = rollout_framework_kwargs(task, state) + extra_kwargs = await self.binding_kwargs( + condition, task, state, set(framework_kwargs) + ) completed = await maybe_call_with_named_args( - condition, task=task, state=state, **extra_kwargs + condition, **extra_kwargs, **framework_kwargs ) if completed: state._set_completed(True) @@ -462,17 +496,15 @@ async def is_completed(self, task: Task, state: State) -> bool: return True return False - def tool_calls( - self, task: Task, state: State - ) -> dict[str, Callable[..., Awaitable[object]]]: - calls: dict[str, Callable[..., Awaitable[object]]] = {} + def tool_calls(self, task: Task, state: State) -> dict[str, Handler]: + calls: dict[str, Handler] = {} for name in self.all_exposed_tools(state): calls[name] = self._tool_call(name, task, state, exposed=True) return calls def _tool_call( self, tool_name: str, task: Task, state: State, exposed: bool - ) -> Callable[..., Awaitable[object]]: + ) -> Handler: async def call(**kwargs: object) -> object: return await self._call_tool(tool_name, task, state, exposed, **kwargs) @@ -505,12 +537,12 @@ def _tool_signature( async def _call_tool_callable( self, - tool: Callable[..., object], + tool: Handler, tool_name: str, task: Task, state: State, - visible_kwargs: Mapping[str, object], - hidden_kwargs: Mapping[str, object], + visible_kwargs: ConfigMap, + hidden_kwargs: ConfigMap, ) -> object: call_kwargs = dict(visible_kwargs) try: @@ -526,7 +558,7 @@ async def _call_tool_callable( return await result return result parameters = signature.parameters - hidden_values: dict[str, object] = { + hidden_values: ConfigData = { "task": task, "state": state, **hidden_kwargs, @@ -557,7 +589,7 @@ async def _call_tool( kind = "exposed tool" if exposed else "tool" raise KeyError(f"Unknown {kind} {tool_name!r}.") visible_kwargs = dict(kwargs) - hidden_kwargs: dict[str, object] = {} + hidden_kwargs: ConfigData = {} owner = self.tool_owner(tool_name, state) for hidden_arg in ("runtime", "task", "state"): if hidden_arg in visible_kwargs: @@ -580,7 +612,7 @@ async def _call_tool( owner, source, task, state ) return await self._call_tool_callable( - cast(Callable[..., object], tools[tool_name]), + cast(Handler, tools[tool_name]), tool_name, task=task, state=state, @@ -594,9 +626,10 @@ async def submit_model_request( task: Task, state: State, tool_defs: list[Tool] | None = None, - extras: dict[str, object] | None = None, + extras: ConfigData | None = None, ) -> Response: client = self.model_client(state) + request_start = time.time() response = await client.get_response( prompt=prompt, model=self.model(state), @@ -604,6 +637,9 @@ async def submit_model_request( sampling_args=self.sampling_args(state), state=state, ) + request_end = time.time() + record_model_timing(state, request_start, request_end) + record_response_usage(state, response) completion = await parse_response_message(response) tokens = await parse_response_tokens(response) is_truncated = response.message.is_truncated or ( @@ -624,7 +660,7 @@ async def submit_model_request( if keep_step is not None: headers = {} if extras is not None and isinstance(extras.get("headers"), Mapping): - headers = dict(cast(Mapping[str, object], extras["headers"])) + headers = dict(cast(ConfigMap, extras["headers"])) keep = await maybe_call_with_named_args( keep_step, step=step, state=state, headers=headers ) @@ -637,7 +673,7 @@ async def setup_rollout( self, task: Task, state: State, - setup_handlers: Iterable[Callable[..., object]] = (), + setup_handlers: Iterable[Handler] = (), **kwargs: object, ) -> State: handlers = sort_handlers( @@ -679,7 +715,7 @@ async def update_group(self, tasks: list[Task], states: list[State]) -> list[Sta "update", ) validate_handler_args(handlers, {"tasks", "states"}, "update", "group") - await run_handlers(handlers, tasks=tasks, states=states) + await self.run_group_handlers(handlers, tasks=tasks, states=states) return states async def score_rollout(self, task: Task, state: State) -> State: @@ -695,8 +731,9 @@ async def score_group(self, tasks: list[Task], states: list[State]) -> list[Stat await self.update_group(tasks, states) await score_group_signals( self.group_signals, - cast(list[Mapping[str, Any]], tasks), - cast(list[dict[str, Any]], states), + cast(list[ConfigMap], tasks), + cast(list[ConfigData], states), + resolve_kwargs=self.group_binding_kwargs, ) return states @@ -722,7 +759,7 @@ async def cleanup_group(self, tasks: list[Task], states: list[State]) -> None: *self._group_handlers("cleanup", states, stage="group"), ] ) - await run_handlers(handlers, tasks=tasks, states=states) + await self.run_group_handlers(handlers, tasks=tasks, states=states) for state in states: await self.release_objects("group", state) await self.release_user_objects("group", state) @@ -751,78 +788,206 @@ async def teardown(self) -> None: await run_handlers(self.teardown_handlers) await self.release_objects("global") await self.release_user_objects("global") + await self.release_taskset_objects() for handle in list(self.sandbox_leases.values()): await maybe_call_with_named_args(getattr(handle, "delete")) self.sandbox_leases.clear() self.tool_handles.clear() await self.close_all_mcp_tools() await self.release_all_model_clients() - _RUNTIME_REGISTRY.pop(self.runtime_id, None) + unregister_runtime(self.runtime_id) async def run_rollout_handlers( self, - handlers: Iterable[Callable[..., object]], + handlers: Iterable[Handler], task: Task, state: State, **kwargs: object, ) -> None: for handler in handlers: - extra_kwargs = await self.binding_kwargs(handler, task, state) - duplicate = set(kwargs) & set(extra_kwargs) - if duplicate: - raise ValueError( - f"Handler {function_name(handler)!r} received duplicate " - f"bound args: {sorted(duplicate)}." - ) + framework_kwargs = rollout_framework_kwargs(task, state) + protected_args = set(framework_kwargs) | set(kwargs) + extra_kwargs = await self.binding_kwargs( + handler, task, state, protected_args + ) await maybe_call_with_named_args( - handler, task=task, state=state, **kwargs, **extra_kwargs + handler, **extra_kwargs, **kwargs, **framework_kwargs + ) + + async def run_group_handlers( + self, + handlers: Iterable[Handler], + tasks: list[Task], + states: list[State], + **kwargs: object, + ) -> None: + for handler in handlers: + framework_kwargs = group_framework_kwargs( + cast(list[ConfigMap], tasks), + cast(list[ConfigData], states), + ) + protected_args = set(framework_kwargs) | set(kwargs) + extra_kwargs = await self.group_binding_kwargs( + handler, + cast(list[ConfigMap], tasks), + cast(list[ConfigData], states), + protected_args, + ) + await maybe_call_with_named_args( + handler, **extra_kwargs, **kwargs, **framework_kwargs ) async def binding_kwargs( self, - fn: Callable[..., object], - task: Mapping[str, Any], - state: dict[str, Any], - ) -> dict[str, object]: + fn: Handler, + task: ConfigMap, + state: ConfigData, + protected_args: set[str] | None = None, + ) -> ConfigData: name = function_name(fn) - kwargs: dict[str, object] = {} - for binding_key, source in self.bindings_for_callable( + kwargs: ConfigData = {} + protected = protected_args or set() + for binding_key, source, owner in self._binding_entries_for_callable( fn, cast(State, state) - ).items(): + ): prefix, arg_name = binding_key_parts(binding_key) if prefix != name: continue - validate_bound_arg(fn, arg_name, f"Binding {binding_key!r}") - kwargs[arg_name] = await self.resolve_binding( - source, cast(Task, task), cast(State, state) + if arg_name in protected: + continue + validate_bound_arg(fn, arg_name, f"Binding {binding_key!r}", protected) + if arg_name in kwargs: + raise ValueError(f"Binding arg {arg_name!r} is defined twice.") + kwargs[arg_name] = await self.resolve_owner_binding( + owner, source, cast(Task, task), cast(State, state) + ) + return kwargs + + async def group_binding_kwargs( + self, + fn: Handler, + tasks: list[ConfigMap], + states: list[ConfigData], + protected_args: set[str] | None = None, + ) -> ConfigData: + if not states: + return {} + state = cast(State, states[0]) + name = function_name(fn) + kwargs: ConfigData = {} + protected = protected_args or set() + for binding_key, source, owner in self._binding_entries_for_callable(fn, state): + prefix, arg_name = binding_key_parts(binding_key) + if prefix != name: + continue + if arg_name in protected: + continue + validate_bound_arg(fn, arg_name, f"Binding {binding_key!r}", protected) + if arg_name in kwargs: + raise ValueError(f"Binding arg {arg_name!r} is defined twice.") + kwargs[arg_name] = await self.resolve_group_binding( + owner, + source, + cast(list[Task], tasks), + cast(list[State], states), + state, ) return kwargs - async def resolve_binding(self, source: object, task: Task, state: State) -> object: + async def resolve_owner_binding( + self, owner: BindingOwner, source: BindingSource, task: Task, state: State + ) -> object: + if isinstance(owner, Toolset): + return await self.resolve_tool_binding(owner, source, task, state) + if owner == "taskset": + return await self.resolve_taskset_binding(source, task, state) + return await self.resolve_binding(source, task, state) + + async def resolve_taskset_binding( + self, source: BindingSource, task: Task, state: State + ) -> object: + if isinstance(source, str): + root, separator, tail = source.partition(".") + if root == "objects": + if not separator: + raise ValueError("objects binding sources must name an object.") + name, _, rest = tail.partition(".") + value = await self.resolve_taskset_object(name, task, state) + return read_path(value, rest) if rest else value + return await self.resolve_binding(source, task, state) + + async def resolve_binding( + self, source: BindingSource, task: Task, state: State + ) -> object: if isinstance(source, str): if binding_source_root(source) == "objects": raise ValueError( - "objects.* bindings are private to the owning Toolset/User " - "callable." + "objects.* bindings are private to the owning Taskset, " + "Toolset, or User callable." ) return await self._resolve_path(source, task, state) if isinstance(source, Mapping) and "fn" in source: - spec = cast(Mapping[str, object], source) - unknown = set(spec) - {"fn"} - if unknown: - raise ValueError( - f"Callable binding source has unknown keys: {sorted(unknown)}." - ) + spec = cast(ConfigMap, source) + validate_callable_source(spec, "Callable binding source") fn = resolve_config_object(spec["fn"]) if not callable(fn): raise TypeError("Callable binding source requires callable fn.") return await maybe_call_with_named_args(fn, task=task, state=state) if callable(source): return await maybe_call_with_named_args(source, task=task, state=state) - return source + raise TypeError("Binding sources must be framework paths or callables.") + + async def resolve_group_binding( + self, + owner: BindingOwner, + source: BindingSource, + tasks: list[Task], + states: list[State], + state: State, + ) -> object: + if isinstance(source, str): + root, separator, tail = source.partition(".") + if root == "tasks": + return read_path(tasks, tail) if separator else tasks + if root == "states": + return read_path(states, tail) if separator else states + if root in {"task", "state", "tools"}: + raise ValueError("Group handler bindings must use tasks or states.") + if root == "runtime": + runtime = state.get("runtime", {}) + return read_path(runtime, tail) if separator else runtime + if root == "objects": + if not separator: + raise ValueError("objects binding sources must name an object.") + name, _, rest = tail.partition(".") + if owner == "taskset": + value = await self.resolve_taskset_object(name, tasks[0], state) + elif isinstance(owner, Toolset): + if toolset_object_scope(owner) == "rollout": + raise ValueError( + "objects.* group bindings require a group or global Toolset scope." + ) + value = await self._resolve_toolset_object( + owner, name, tasks[0], state + ) + else: + raise ValueError( + "objects.* group bindings require an object owner." + ) + return read_path(value, rest) if rest else value + if isinstance(source, Mapping) and "fn" in source: + spec = cast(ConfigMap, source) + validate_callable_source(spec, "Callable binding source") + fn = resolve_config_object(spec["fn"]) + if not callable(fn): + raise TypeError("Callable binding source requires callable fn.") + return await maybe_call_with_named_args(fn, tasks=tasks, states=states) + if callable(source): + return await maybe_call_with_named_args(source, tasks=tasks, states=states) + raise TypeError("Binding sources must be framework paths or callables.") async def resolve_tool_binding( - self, toolset: Toolset | None, source: object, task: Task, state: State + self, toolset: Toolset | None, source: BindingSource, task: Task, state: State ) -> object: if isinstance(source, str): root, separator, tail = source.partition(".") @@ -834,17 +999,17 @@ async def resolve_tool_binding( name, _, rest = tail.partition(".") value = await self._resolve_toolset_object(toolset, name, task, state) if rest: - return _read_path(value, rest) + return read_path(value, rest) return value return await self.resolve_binding(source, task, state) async def resolve_user_binding( self, user: User, - source: object, + source: BindingSource, task: Task, state: State, - transcript: Sequence[object] | None = None, + transcript: Sequence[PromptMessage] | None = None, ) -> object: if isinstance(source, str): root, separator, tail = source.partition(".") @@ -855,7 +1020,7 @@ async def resolve_user_binding( else: raise KeyError(f"Unknown user object {name!r}.") if rest: - return _read_path(value, rest) + return read_path(value, rest) return value if callable(source): return await maybe_call_with_named_args( @@ -877,6 +1042,31 @@ async def resolve_user_object( self.user_objects[key] = obj return obj + async def resolve_taskset_object( + self, name: str, task: Task, state: State + ) -> object: + _ = task, state + taskset = self.taskset + if taskset is None: + raise RuntimeError("Taskset objects require a Taskset.") + objects = getattr(taskset, "objects", {}) + if not isinstance(objects, Mapping): + raise TypeError("Taskset objects must be a mapping.") + specs = cast(ConfigMap, objects) + if name not in specs: + raise KeyError(f"Unknown Taskset object {name!r}.") + key = (id(taskset), name) + if key in self.taskset_objects: + return self.taskset_objects[key] + obj = await resolve_object_factory(specs[name], f"Taskset object {name!r}") + self.taskset_objects[key] = obj + return obj + + async def release_taskset_objects(self) -> None: + for key, obj in list(self.taskset_objects.items()): + await close_object(obj) + del self.taskset_objects[key] + async def release_user_objects( self, scope: str, state: State | None = None ) -> None: @@ -895,6 +1085,8 @@ async def ensure_rollout_toolsets(self, task: Task, state: State) -> None: self.rollout_toolsets[key] = await self._task_toolset_additions(task, state) def validate_bindings(self, state: State) -> None: + for owner in (self.taskset, self.harness): + self._validate_owner_bindings(owner) for toolset in iter_toolsets(self.active_toolsets(state)): self._validate_toolset_bindings(toolset) user = self._resolve_user() @@ -902,7 +1094,7 @@ def validate_bindings(self, state: State) -> None: for name, source in user.bindings.items(): validate_bound_arg(user.fn, name, f"User binding {name!r}") source_root = binding_source_root(source) - validate_binding_source_root(source_root, f"User binding {name!r}") + validate_binding_source(source, f"User binding {name!r}") if source_root == "objects": object_name = binding_object_name(source) if object_name not in user.objects: @@ -911,6 +1103,48 @@ def validate_bindings(self, state: State) -> None: f"{object_name!r}." ) + def _validate_owner_bindings(self, owner: object | None) -> None: + if owner is None: + return + targets = self._owner_binding_targets(owner) + allow_objects = owner is self.taskset + for binding_key, source in self._owner_bindings(owner).items(): + target_name, arg_name = binding_key_parts(binding_key) + target = targets.get(target_name) + if target is None: + raise ValueError( + f"Binding {binding_key!r} does not match a Taskset/Harness " + "callable." + ) + target_kind, fn = target + protected_args = self._binding_target_framework_args(target_kind, fn) + if arg_name in protected_args: + continue + validate_bound_arg( + fn, + arg_name, + f"Binding {binding_key!r}", + protected_args, + ) + validate_binding_source( + source, f"Binding {binding_key!r}", allow_objects=allow_objects + ) + source_root = binding_source_root(source) + if source_root == "objects": + object_name = binding_object_name(source) + objects = getattr(owner, "objects", {}) + if not isinstance(objects, Mapping): + raise TypeError("Taskset objects must be a mapping.") + if object_name not in objects: + raise KeyError( + f"Binding {binding_key!r} references unknown Taskset object " + f"{object_name!r}." + ) + + def _binding_target_framework_args(self, kind: str, fn: Handler) -> frozenset[str]: + stage = str(getattr(fn, f"{kind}_stage", "rollout")) + return GROUP_FRAMEWORK_ARGS if stage == "group" else ROLLOUT_FRAMEWORK_ARGS + def _validate_toolset_bindings(self, toolset: Toolset) -> None: targets = self._toolset_binding_targets(toolset) for binding_key, source in toolset.bindings.items(): @@ -924,7 +1158,7 @@ def _validate_toolset_bindings(self, toolset: Toolset) -> None: target_kind, fn = target validate_bound_arg(fn, arg_name, f"Binding {binding_key!r}") source_root = binding_source_root(source) - validate_binding_source_root(source_root, f"Binding {binding_key!r}") + validate_binding_source(source, f"Binding {binding_key!r}") if source_root == "objects" and target_kind != "tool": raise ValueError( f"Binding {binding_key!r} uses objects.*, which is only valid " @@ -938,12 +1172,100 @@ def _validate_toolset_bindings(self, toolset: Toolset) -> None: f"{object_name!r}." ) + def _owner_binding_targets(self, owner: object) -> dict[str, tuple[str, Handler]]: + targets: dict[str, tuple[str, Handler]] = {} + + def add_target(kind: str, fn: Handler) -> None: + name = function_name(fn) + existing = targets.get(name) + if existing is not None and not same_callable(existing[1], fn): + raise ValueError( + f"Taskset/Harness binding target {name!r} is defined twice." + ) + targets[name] = (kind, fn) + + collection_kinds = { + "stops": "stop", + "setups": "setup", + "updates": "update", + "metrics": "metric", + "rewards": "reward", + "advantages": "advantage", + "cleanups": "cleanup", + } + for attr, kind in collection_kinds.items(): + for fn in getattr(owner, attr, ()): + if callable(fn): + add_target(kind, cast(Handler, fn)) + for _, method in inspect.getmembers(owner, predicate=callable): + for kind in ( + "stop", + "setup", + "update", + "metric", + "reward", + "advantage", + "cleanup", + ): + if getattr(method, kind, False): + add_target(kind, cast(Handler, method)) + return targets + + def _owner_bindings(self, owner: object | None) -> dict[str, BindingSource]: + if owner is None: + return {} + bindings = getattr(owner, "bindings", {}) + if not isinstance(bindings, Mapping): + raise TypeError("Taskset/Harness bindings must be a mapping.") + return dict(cast(dict[str, BindingSource], bindings)) + + def _binding_entries_for_callable( + self, fn: Handler, state: State + ) -> list[BindingEntry]: + target_name = function_name(fn) + entries: list[BindingEntry] = [] + for owner in (self.taskset, self.harness): + if owner is None: + continue + target = self._owner_binding_targets(owner).get(target_name) + if target is None or not same_callable(target[1], fn): + continue + entry_owner: BindingOwner = "taskset" if owner is self.taskset else None + self._extend_binding_entries( + entries, self._owner_bindings(owner), target_name, entry_owner + ) + for toolset in iter_toolsets(self.active_toolsets(state)): + target = self._toolset_binding_targets(toolset).get(target_name) + if target is None or not same_callable(target[1], fn): + continue + self._extend_binding_entries( + entries, toolset.bindings, target_name, toolset + ) + return entries + + def _extend_binding_entries( + self, + entries: list[BindingEntry], + bindings: dict[str, BindingSource], + target_name: str, + owner: BindingOwner = None, + ) -> None: + existing = {key for key, _, _ in entries} + for binding_key, source in bindings.items(): + prefix, _ = binding_key_parts(binding_key) + if prefix != target_name: + continue + if binding_key in existing: + raise ValueError(f"Binding {binding_key!r} is defined twice.") + existing.add(binding_key) + entries.append((binding_key, source, owner)) + def _toolset_binding_targets( self, toolset: Toolset - ) -> dict[str, tuple[str, Callable[..., object]]]: - targets: dict[str, tuple[str, Callable[..., object]]] = {} + ) -> dict[str, tuple[str, Handler]]: + targets: dict[str, tuple[str, Handler]] = {} - def add_target(name: str, kind: str, fn: Callable[..., object]) -> None: + def add_target(name: str, kind: str, fn: Handler) -> None: if name in targets: raise ValueError(f"Toolset binding target {name!r} is defined twice.") targets[name] = (kind, fn) @@ -952,14 +1274,14 @@ def add_target(name: str, kind: str, fn: Callable[..., object]) -> None: if isinstance(item, Toolset | MCPTool): continue if callable(item): - add_target(tool_name(item), "tool", cast(Callable[..., object], item)) + add_target(tool_name(item), "tool", cast(Handler, item)) for attr in ("stops", "setups", "updates", "cleanups"): for fn in getattr(toolset, attr): if callable(fn): add_target( function_name(fn), attr[:-1], - cast(Callable[..., object], fn), + cast(Handler, fn), ) for _, method in inspect.getmembers(toolset, predicate=callable): if any( @@ -969,17 +1291,17 @@ def add_target(name: str, kind: str, fn: Callable[..., object]) -> None: add_target( function_name(method), "handler", - cast(Callable[..., object], method), + cast(Handler, method), ) return targets - def _task_toolsets_config(self, task: Mapping[str, Any]) -> Mapping[str, object]: + def _task_toolsets_config(self, task: ConfigMap) -> ConfigMap: raw_toolsets = task.get("toolsets") if raw_toolsets is None: return {} if not isinstance(raw_toolsets, Mapping): raise TypeError("task.toolsets must be a mapping.") - return cast(Mapping[str, object], raw_toolsets) + return cast(ConfigMap, raw_toolsets) async def _task_toolset_additions(self, task: Task, state: State) -> list[Toolset]: toolsets: list[Toolset] = [] @@ -999,14 +1321,14 @@ async def _runtime_named_toolset( if isinstance(spec, Toolset): return spec if isinstance(spec, Mapping): - mapping = cast(Mapping[str, object], spec) + mapping = cast(ConfigMap, spec) if "fn" in mapping: fn = resolve_config_object(mapping.get("fn")) if not callable(fn): raise TypeError(f"Task toolset {name!r} requires callable fn.") kwargs = {key: value for key, value in mapping.items() if key != "fn"} result = await maybe_call_with_named_args( - cast(Callable[..., object], fn), + cast(Handler, fn), task=task, state=state, **kwargs, @@ -1017,10 +1339,10 @@ async def _runtime_named_toolset( f"Task toolset {name!r} fn must return exactly one Toolset." ) return toolsets[0] - return Toolset(config=mapping) + return Toolset(config=ToolsetConfig.from_config(mapping)) if callable(spec): result = await maybe_call_with_named_args( - cast(Callable[..., object], spec), task=task, state=state + cast(Handler, spec), task=task, state=state ) toolsets = normalize_toolset_result(result) if len(toolsets) != 1: @@ -1037,7 +1359,7 @@ def active_toolsets(self, state: State) -> list[Toolset]: return [*self._static_toolsets_for_state(state), *self._rollout_toolsets(state)] def _static_toolsets_for_state(self, state: State) -> list[Toolset]: - task = cast(Mapping[str, Any], state.get("task") or {}) + task = cast(ConfigMap, state.get("task") or {}) selected = self._selected_toolset_names(task) ids_to_names = { id(toolset): name for name, toolset in self.named_toolsets.items() @@ -1050,7 +1372,7 @@ def _static_toolsets_for_state(self, state: State) -> list[Toolset]: active.append(toolset) return active - def _selected_toolset_names(self, task: Mapping[str, Any]) -> set[str]: + def _selected_toolset_names(self, task: ConfigMap) -> set[str]: names = set(self.named_toolsets) config = self._task_toolsets_config(task) show = config.get("show") @@ -1104,9 +1426,9 @@ def _collect_named_toolsets(self) -> dict[str, Toolset]: return named def _tools_for_toolsets( - self, toolsets: Iterable[object], apply_visibility: bool - ) -> dict[str, object]: - tools: dict[str, object] = {} + self, toolsets: Iterable[ToolEntry], apply_visibility: bool + ) -> ConfigData: + tools: ConfigData = {} for tool in flatten_toolsets(toolsets, apply_visibility=apply_visibility): if isinstance(tool, MCPTool): continue @@ -1138,33 +1460,6 @@ def visit(toolset: Toolset) -> None: def tool_owner(self, name: str, state: State) -> Toolset | None: return self._tool_owners_for(self.active_toolsets(state)).get(name) - def bindings_for_state(self, state: State) -> dict[str, object]: - bindings: dict[str, object] = {} - for toolset in iter_toolsets(self.active_toolsets(state)): - for key, value in toolset.bindings.items(): - if key in bindings: - raise ValueError(f"Tool binding {key!r} is defined twice.") - bindings[key] = value - return bindings - - def bindings_for_callable( - self, fn: Callable[..., object], state: State - ) -> dict[str, object]: - bindings: dict[str, object] = {} - for toolset in iter_toolsets(self.active_toolsets(state)): - targets = self._toolset_binding_targets(toolset) - target = targets.get(function_name(fn)) - if target is None or not same_callable(target[1], fn): - continue - for key, value in toolset.bindings.items(): - target_name, _ = binding_key_parts(key) - if target_name != function_name(fn): - continue - if key in bindings: - raise ValueError(f"Binding {key!r} is defined twice.") - bindings[key] = value - return bindings - def _owner_signals(self, owner: object | None) -> list[SignalRecord]: if owner is None: return [] @@ -1177,16 +1472,16 @@ def _owner_signals(self, owner: object | None) -> list[SignalRecord]: advantages=getattr(owner, "advantages", ()), ) - def _handler_owners(self) -> tuple[object | None, ...]: + def _handler_owners(self) -> tuple["Taskset | Harness | None", ...]: return (self.taskset, self.harness) def _extra_handlers( self, attr: str, - builtins: Sequence[Callable[..., object]] = (), - owners: Sequence[object | None] | None = None, - ) -> list[Callable[..., object]]: - handlers: list[Callable[..., object]] = list(builtins) + builtins: Sequence[Handler] = (), + owners: Sequence["Taskset | Harness | Toolset | None"] | None = None, + ) -> list[Handler]: + handlers: list[Handler] = list(builtins) collection_attr = handler_collection_attr(attr) for owner in owners or self._handler_owners(): if owner is None: @@ -1194,7 +1489,7 @@ def _extra_handlers( for handler in getattr(owner, "__dict__", {}).get(collection_attr, ()): if not callable(handler): raise TypeError(f"{collection_attr} entries must be callable.") - handlers.append(cast(Callable[..., object], handler)) + handlers.append(cast(Handler, handler)) return handlers def _rollout_handlers( @@ -1202,8 +1497,8 @@ def _rollout_handlers( attr: str, state: State, stage: str | None = None, - ) -> list[Callable[..., object]]: - handlers: list[Callable[..., object]] = [] + ) -> list[Handler]: + handlers: list[Handler] = [] collection_attr = handler_collection_attr(attr) for toolset in iter_toolsets(self.active_toolsets(state)): for handler in getattr(toolset, collection_attr, ()): @@ -1214,7 +1509,7 @@ def _rollout_handlers( and getattr(handler, f"{attr}_stage", "rollout") != stage ): continue - handlers.append(cast(Callable[..., object], handler)) + handlers.append(cast(Handler, handler)) for _, method in inspect.getmembers(toolset, predicate=callable): if getattr(method, attr, False) is not True: continue @@ -1231,8 +1526,8 @@ def _group_handlers( attr: str, states: Sequence[State], stage: str | None = None, - ) -> list[Callable[..., object]]: - handlers: list[Callable[..., object]] = [] + ) -> list[Handler]: + handlers: list[Handler] = [] for state in states: handlers.extend(self._rollout_handlers(attr, state, stage=stage)) return sort_handlers(unique_handlers(handlers), attr) @@ -1242,7 +1537,7 @@ async def _collect_artifact(self, spec: object, task: Task, state: State) -> obj return await maybe_call_with_named_args(spec, task=task, state=state) if not isinstance(spec, Mapping): raise TypeError("Artifact specs must be callables or mappings.") - spec_map = cast(Mapping[str, object], spec) + spec_map = cast(ConfigMap, spec) path = artifact_path(spec_map) optional = artifact_optional(spec_map) matches = sorted(glob.glob(path.format(**state))) @@ -1260,7 +1555,7 @@ async def _collect_artifact(self, spec: object, task: Task, state: State) -> obj raise ValueError(f"Unsupported artifact format: {format_name!r}") key = artifact_key(spec_map) if key is not None: - data = cast(Mapping[str, Any], data)[key] + data = cast(ConfigMap, data)[key] return data async def _resolve_path(self, path: str, task: Task, state: State) -> object: @@ -1285,9 +1580,9 @@ async def _resolve_path(self, path: str, task: Task, state: State) -> object: else: raise ValueError(f"Unknown binding root {root!r}.") if separator and root not in {"objects", "tools"}: - return _read_path(value, tail) + return read_path(value, tail) if tail: - return _read_path(value, tail) + return read_path(value, tail) return value async def _resolve_toolset_object( @@ -1366,17 +1661,17 @@ async def resolve_tool_sandbox( raise RuntimeError( "Toolset sandbox='program' requires an active program sandbox." ) - return SandboxHandle(cast(Any, lease), state) + return SandboxHandle(lease, state) if not isinstance(sandbox, Mapping): raise TypeError("Toolset sandbox must be a mapping.") - sandbox_config = cast(Mapping[str, object], sandbox) + sandbox_config = cast(ConfigMap, sandbox) prefer = sandbox_config.get("prefer") if prefer is not None: if prefer != "program": raise ValueError("Toolset sandbox.prefer must be 'program'.") lease = self._active_program_sandbox_lease(state) if lease is not None: - return SandboxHandle(cast(Any, lease), state) + return SandboxHandle(lease, state) scope = sandbox_scope(sandbox_config) key = (self.scope_key(scope, state), tool_sandbox_key(toolset)) async with self.sandbox_lock: @@ -1384,9 +1679,9 @@ async def resolve_tool_sandbox( if lease is None: lease = await create_tool_sandbox_lease(toolset) self.sandbox_leases[key] = lease - return SandboxHandle(cast(Any, lease), state) + return SandboxHandle(lease, state) - def _active_program_sandbox_lease(self, state: State) -> object | None: + def _active_program_sandbox_lease(self, state: State) -> "SandboxLease | None": sandbox_handle = self.resolved_handle(state, "sandbox") if sandbox_handle is not None: return self._sandbox_lease_from_handle(sandbox_handle, "sandbox") @@ -1408,8 +1703,8 @@ def _active_program_sandbox_lease(self, state: State) -> object | None: return lease async def resolve_program_sandbox( - self, sandbox_config: Mapping[str, object], task: Task, state: State - ) -> Any: + self, sandbox_config: ConfigMap, task: Task, state: State + ) -> "SandboxLease": from .utils.sandbox_utils import ( create_sandbox_lease, program_sandbox_key, @@ -1431,8 +1726,8 @@ async def resolve_program_sandbox( return lease def _sandbox_lease_from_handle( - self, handle: Mapping[str, object], name: str - ) -> object: + self, handle: ConfigMap, name: str + ) -> "SandboxLease": runtime = self.handle_runtime(handle, name) lease_key = handle.get("lease_key") if ( @@ -1471,7 +1766,7 @@ async def resolve_user_sandbox( if lease is None: lease = await create_scoped_sandbox_lease(user, key[1]) self.sandbox_leases[key] = lease - return SandboxHandle(cast(Any, lease), state) + return SandboxHandle(lease, state) async def release_sandboxes(self, scope: str, state: State) -> None: scope_key = self.scope_key(scope, state) @@ -1522,10 +1817,10 @@ def bind_global_sandboxes(self, state: State) -> None: scope_key, _ = key if scope_key != "global": continue - attach_sandbox_ref(state, cast(Any, lease)) + attach_sandbox_ref(state, lease) - def sandbox_owners(self, state: State | None = None) -> list[object]: - owners: list[object] = [*self.toolsets] + def sandbox_owners(self, state: State | None = None) -> list[Toolset | User]: + owners: list[Toolset | User] = [*self.toolsets] if state is not None: owners.extend(self._rollout_toolsets(state)) user = self._resolve_user() @@ -1540,8 +1835,8 @@ async def ensure_mcp_tools(self, state: State) -> None: if key in self.mcp_exit_stacks: continue exit_stack = AsyncExitStack() - tools: dict[str, object] = {} - exposed_tools: dict[str, object] = {} + tools: ConfigData = {} + exposed_tools: ConfigData = {} try: for toolset in self.active_toolsets(state): await self._register_mcp_tools( @@ -1566,11 +1861,12 @@ async def _register_mcp_tools( toolset: Toolset, parents: list[Toolset], connect_mcp_tool: Callable[ - [MCPTool, AsyncExitStack[bool | None]], Awaitable[Sequence[object]] + [MCPTool, AsyncExitStack[bool | None]], + Awaitable[Sequence["MCPToolHandle"]], ], exit_stack: AsyncExitStack, - tools: dict[str, object], - exposed_tools: dict[str, object], + tools: ConfigData, + exposed_tools: ConfigData, state: State, target_key: str, ) -> None: @@ -1621,7 +1917,7 @@ async def close_all_mcp_tools(self) -> None: del self.mcp_exit_stacks[key] await exit_stack.aclose() - def all_tools(self, state: State) -> dict[str, object]: + def all_tools(self, state: State) -> ConfigData: tools = self._tools_for_toolsets( self.active_toolsets(state), apply_visibility=False ) @@ -1635,7 +1931,7 @@ def all_tools(self, state: State) -> dict[str, object]: tools[name] = tool return tools - def unfiltered_exposed_tools(self, state: State) -> dict[str, object]: + def unfiltered_exposed_tools(self, state: State) -> ConfigData: tools = self._tools_for_toolsets( self.active_toolsets(state), apply_visibility=True ) @@ -1649,7 +1945,7 @@ def unfiltered_exposed_tools(self, state: State) -> dict[str, object]: tools[name] = tool return tools - def borrowed_tools_for_state(self, state: State) -> dict[str, object]: + def borrowed_tools_for_state(self, state: State) -> ConfigData: handle = self.resolved_handle(state, "tools") if handle is None: return {} @@ -1660,7 +1956,7 @@ def borrowed_tools_for_state(self, state: State) -> dict[str, object]: source_runtime = self.handle_runtime(handle, "tools") return {name: BorrowedTool(source_runtime, handle_id, name) for name in names} - def all_exposed_tools(self, state: State) -> dict[str, object]: + def all_exposed_tools(self, state: State) -> ConfigData: tools = self.unfiltered_exposed_tools(state) selected = state.get("runtime", {}).get("tools") if selected is None: @@ -1697,9 +1993,9 @@ def all_exposed_tools(self, state: State) -> dict[str, object]: return tools raise TypeError("state.runtime.tools must be a mapping with show or hide.") - def mcp_tools_for_state(self, state: State, exposed: bool) -> dict[str, object]: + def mcp_tools_for_state(self, state: State, exposed: bool) -> ConfigData: source = self.exposed_mcp_tools if exposed else self.mcp_tools - tools: dict[str, object] = {} + tools: ConfigData = {} for key in self.mcp_scope_keys(state): for name, tool in source.get(key, {}).items(): if name in tools: @@ -1731,201 +2027,3 @@ def visit(toolset: Toolset) -> None: def mcp_scope_key(self, toolset: Toolset, state: State) -> str: scope = toolset_object_scope(toolset) return f"{scope}:{self.scope_key(scope, state)}:{id(toolset)}" - - -def tool_visible(toolset: Toolset, name: str) -> bool: - if toolset.show is not None and name not in toolset.show: - return False - if toolset.hide is not None and name in toolset.hide: - return False - return True - - -def toolset_object_scope(toolset: Toolset) -> str: - if toolset.scope is not None: - return toolset.scope - return "rollout" if toolset.write else "global" - - -async def state_done(task: Task, state: State) -> bool: - _ = task - return bool(state.get("done")) - - -def handler_collection_attr(attr: str) -> str: - return { - "stop": "stops", - "setup": "setups", - "update": "updates", - "cleanup": "cleanups", - "teardown": "teardowns", - }.get(attr, attr) - - -def _read_path(value: object, path: str) -> object: - if not path: - return value - current = value - for part in path.split("."): - if isinstance(current, Mapping): - current = cast(Mapping[str, object], current)[part] - elif isinstance(current, list): - current = current[int(part)] - else: - current = getattr(current, part) - return current - - -def string_list(value: object, field: str) -> list[str]: - if isinstance(value, str): - return [value] - if not isinstance(value, Sequence) or isinstance(value, bytes): - raise TypeError(f"{field} must be a string or list of strings.") - result = [str(item) for item in value] - if len(result) != len(set(result)): - raise ValueError(f"{field} contains duplicate names.") - return result - - -def schema_callable( - tool: object, signature: inspect.Signature -) -> Callable[..., object]: - def call_for_schema(**kwargs: object) -> None: - _ = kwargs - return None - - call_for_schema.__name__ = tool_name(tool) - call_for_schema.__doc__ = getattr(tool, "__doc__", None) - setattr(call_for_schema, "__signature__", signature) - return call_for_schema - - -def function_name(fn: Callable[..., object]) -> str: - name = getattr(fn, "__name__", None) - if not isinstance(name, str) or not name: - raise ValueError("Callable bindings require a stable __name__.") - return name - - -def binding_key_parts(key: object) -> tuple[str, str]: - if not isinstance(key, str): - raise TypeError("Binding keys must be strings.") - target, separator, arg_name = key.partition(".") - if separator != "." or not target or not arg_name or "." in arg_name: - raise ValueError(f"Binding key {key!r} must be 'callable.arg'.") - return target, arg_name - - -def binding_source_root(source: object) -> str | None: - if not isinstance(source, str): - return None - root, _, _ = source.partition(".") - return root - - -def validate_binding_source_root(root: str | None, context: str) -> None: - if root is None: - return - if root not in {"task", "state", "runtime", "objects", "tools"}: - raise ValueError( - f"{context} source root must be task, state, runtime, objects, or tools." - ) - - -def binding_object_name(source: object) -> str: - if not isinstance(source, str): - raise TypeError("Object binding source must be a string.") - root, separator, tail = source.partition(".") - if root != "objects" or not separator: - raise ValueError("Object binding source must be 'objects.name'.") - name, _, _ = tail.partition(".") - if not name: - raise ValueError("Object binding source must be 'objects.name'.") - return name - - -def validate_bound_arg( - fn: Callable[..., object] | object, arg_name: str, context: str -) -> None: - if arg_name in {"task", "state", "runtime"}: - raise ValueError(f"{context} cannot bind reserved arg {arg_name!r}.") - if not callable(fn): - raise TypeError(f"{context} target is not callable.") - try: - signature = inspect.signature(fn) - except (TypeError, ValueError) as exc: - raise TypeError(f"{context} target signature cannot be inspected.") from exc - if arg_name not in signature.parameters: - name = ( - getattr(fn, "__name__", None) - or getattr(fn, "name", None) - or type(fn).__name__ - ) - raise TypeError( - f"{context} targets {name!r}, but {name!r} does not declare " - f"arg {arg_name!r}." - ) - - -def same_callable(left: Callable[..., object], right: Callable[..., object]) -> bool: - if left is right: - return True - left_self = getattr(left, "__self__", None) - right_self = getattr(right, "__self__", None) - left_func = getattr(left, "__func__", None) - right_func = getattr(right, "__func__", None) - return left_self is right_self and left_func is not None and left_func is right_func - - -def serializable(value: object) -> object: - model_dump = getattr(value, "model_dump", None) - if callable(model_dump): - return model_dump(exclude_none=True) - if isinstance(value, list): - return [serializable(item) for item in value] - if isinstance(value, tuple): - return [serializable(item) for item in value] - if isinstance(value, Mapping): - return {str(key): serializable(item) for key, item in value.items()} - return value - - -async def close_object(obj: object) -> None: - for name in ("aclose", "close", "delete", "teardown"): - fn = getattr(obj, name, None) - if callable(fn): - await maybe_call_with_named_args(fn) - return - - -async def resolve_object_factory(spec: object, context: str) -> object: - if not callable(spec): - return spec - try: - signature = inspect.signature(spec) - except (TypeError, ValueError) as exc: - raise TypeError(f"{context} factory signature cannot be inspected.") from exc - if signature.parameters: - raise TypeError(f"{context} factory must accept no arguments.") - value = cast(Callable[[], object], spec)() - if inspect.isawaitable(value): - return await cast(Awaitable[object], value) - return value - - -def load_runtime(runtime_id: str) -> Runtime: - runtime = _RUNTIME_REGISTRY.get(runtime_id) - if runtime is None: - raise RuntimeError(f"No live v1 runtime registered for id {runtime_id!r}.") - return runtime - - -def load_runtime_from_state(state: Mapping[str, object]) -> Runtime: - runtime_state = state.get("runtime") - if not isinstance(runtime_state, Mapping): - raise RuntimeError("State has no runtime metadata.") - runtime_state = cast(Mapping[str, object], runtime_state) - runtime_id = runtime_state.get("runtime_id") - if not isinstance(runtime_id, str) or not runtime_id: - raise RuntimeError("State has no live runtime id.") - return load_runtime(runtime_id) diff --git a/verifiers/v1/state.py b/verifiers/v1/state.py index 1cf1a663d..d8502caae 100644 --- a/verifiers/v1/state.py +++ b/verifiers/v1/state.py @@ -1,401 +1,10 @@ -from __future__ import annotations +"""V1 state contract exports. -from collections.abc import Awaitable, Callable, Iterable, Mapping -from copy import deepcopy -from typing import TYPE_CHECKING, Any, Literal, Protocol, overload, cast -import uuid - -from verifiers.types import State as VFState - -from .utils.timing_utils import timing_record - -if TYPE_CHECKING: - from .runtime import Runtime - from .utils.endpoint_utils import EndpointApi - from verifiers.types import ClientType - - -_MISSING = object() - -BorrowTarget = Literal["model", "sandbox"] -ToolTarget = str | Iterable[str] -TranscriptMode = Literal["private", "append"] - - -class ForTask(Protocol): - def __call__( - self, - task: Mapping[str, Any], - *, - borrow: BorrowTarget | Iterable[BorrowTarget] = (), - tools: ToolTarget = (), - transcript: TranscriptMode = "private", - ) -> State: ... - - -class _StateForTask: - @overload - def __get__(self, instance: None, owner: type[State]) -> ForTask: ... - - @overload - def __get__(self, instance: State, owner: type[State]) -> ForTask: ... - - def __get__( - self, instance: State | None, owner: type[State] - ) -> Callable[..., State]: - def create( - task: Mapping[str, Any], - *, - borrow: BorrowTarget | Iterable[BorrowTarget] = (), - tools: ToolTarget = (), - transcript: TranscriptMode = "private", - ) -> State: - state = _state_for_task(owner, task) - if instance is not None: - _borrow_from_state(state, instance, borrow, tools, transcript) - elif borrow or tools: - raise ValueError("State.for_task borrow/tools requires a source state.") - elif transcript != "private": - raise ValueError( - "State.for_task transcript='append' requires a source state." - ) - return state - - return create - - -class State(VFState): - for_task = _StateForTask() - - INTERNAL_KEYS = {"is_completed", "stop_condition", "is_truncated", "error"} - RUNTIME_HANDLE_KEYS = {"runtime_id", "client_key"} - ENDPOINT_HANDLE_KEYS = { - "endpoint_rollout_key", - "endpoint_root_url", - "endpoint_base_url", - } - - def __init__(self, *args: object, **kwargs: Any): - values = dict(*args, **kwargs) - protected = sorted(set(values) & self.INTERNAL_KEYS) - if protected: - raise RuntimeError( - f"State constructor cannot set framework-managed keys: {protected}." - ) - super().__init__(values) - - def __setitem__(self, key: str, value: Any) -> None: - if key in self.INTERNAL_KEYS: - raise RuntimeError(internal_key_error(key)) - super().__setitem__(key, value) - - def __delitem__(self, key: str) -> None: - if key in self.INTERNAL_KEYS: - raise RuntimeError(internal_key_error(key)) - super().__delitem__(key) - - def update(self, *args: object, **kwargs: Any) -> None: - values = dict(*args, **kwargs) - for key, value in values.items(): - self[str(key)] = value - - def pop(self, key: str, default: Any = _MISSING) -> Any: - if key in self.INTERNAL_KEYS: - raise RuntimeError(internal_key_error(key)) - if default is _MISSING: - return super().pop(key) - return super().pop(key, default) - - def popitem(self) -> tuple[str, Any]: - raise RuntimeError("State.popitem() cannot preserve framework-managed fields.") - - def clear(self) -> None: - raise RuntimeError("State.clear() cannot preserve framework-managed fields.") - - def setdefault(self, key: str, default: Any = None) -> Any: - if key in self.INTERNAL_KEYS: - raise RuntimeError(internal_key_error(key)) - return super().setdefault(key, default) - - def __ior__(self, other: object) -> State: - self.update(other) - return self - - def _set_internal(self, key: str, value: Any) -> None: - if key not in self.INTERNAL_KEYS: - raise KeyError(f"{key!r} is not a framework-managed state key.") - super().__setitem__(key, value) - - def _set_completed(self, value: bool = True) -> None: - self._set_internal("is_completed", value) - - def _set_error(self, value: Any) -> None: - self._set_internal("error", value) - - def _set_stop_condition( - self, value: str | None, *, overwrite: bool = False - ) -> None: - if overwrite or self.get("stop_condition") is None: - self._set_internal("stop_condition", value) - - def _set_truncated(self, value: bool = True, *, overwrite: bool = False) -> None: - current = bool(self.get("is_truncated", False)) - self._set_internal( - "is_truncated", bool(value) if overwrite else current or bool(value) - ) - - def stop(self, condition: str = "state_done") -> None: - if not isinstance(condition, str) or not condition: - raise TypeError("State.stop condition must be a non-empty string.") - super().__setitem__("done", True) - self._set_completed(True) - self._set_stop_condition(condition, overwrite=True) - - def runtime_state(self) -> dict[str, Any]: - raw_runtime = self.setdefault("runtime", {}) - if not isinstance(raw_runtime, dict): - raise TypeError("state.runtime must be a mapping.") - return raw_runtime - - def _runtime(self) -> Runtime: - from .runtime import load_runtime_from_state - - return load_runtime_from_state(self) - - def get_model(self) -> str: - runtime = self.get("runtime", {}) - if isinstance(runtime, Mapping): - model = runtime.get("model") - if isinstance(model, str) and model: - return model - resolved = runtime.get("resolved") - if isinstance(resolved, Mapping): - handle = resolved.get("model") - if isinstance(handle, Mapping): - model = handle.get("model") - if isinstance(model, str) and model: - return model - try: - return self._runtime().model(self) - except RuntimeError as exc: - raise RuntimeError("State has no resolved model.") from exc - - def get_max_turns(self, default: int) -> int: - runtime = self.get("runtime", {}) - if isinstance(runtime, Mapping) and "max_turns" in runtime: - value = runtime["max_turns"] - if value is None: - return default - if not isinstance(value, int) or isinstance(value, bool): - raise TypeError("state.runtime.max_turns must be an integer.") - return value - return default - - def get_client( - self, - api: EndpointApi | ClientType = "chat_completions", - *, - sync: bool = False, - ) -> object: - from .utils.endpoint_utils import client_from_state - - return client_from_state(self, api, sync=sync) - - def get_endpoint_config( - self, - api: EndpointApi | ClientType = "chat_completions", - ) -> dict[str, str]: - from .utils.endpoint_utils import endpoint_config_from_state - - return endpoint_config_from_state(self, api) - - def get_tools(self) -> dict[str, Callable[..., Awaitable[object]]]: - from .utils.tool_utils import load_tools_from_state - - return load_tools_from_state(self) - - def _runtime_handles(self) -> dict[str, Any]: - runtime = self.runtime_state() - handles = runtime.setdefault("resolved", {}) - if not isinstance(handles, dict): - raise TypeError("state.runtime.resolved must be a mapping.") - return handles - - def _runtime_handle(self, name: str) -> dict[str, object]: - runtime = self.runtime_state() - handles = runtime.get("resolved") - if handles is not None: - if not isinstance(handles, Mapping): - raise TypeError("state.runtime.resolved must be a mapping.") - existing = handles.get(name) - if existing is not None: - if not isinstance(existing, Mapping): - raise TypeError(f"state.runtime.resolved.{name} must be a mapping.") - return dict(cast(Mapping[str, object], existing)) - - runtime_id = runtime.get("runtime_id") - if not isinstance(runtime_id, str) or not runtime_id: - raise RuntimeError("State has no live runtime id.") - if name == "model": - client_key = runtime.get("client_key") - if not isinstance(client_key, str) or not client_key: - raise RuntimeError("State has no resolved model client.") - handle: dict[str, object] = { - "runtime_id": runtime_id, - "client_key": client_key, - } - for key in ("model", "client_type", "sampling_args"): - if key in runtime: - handle[key] = runtime[key] - return handle - if name == "endpoint": - return {"runtime_id": runtime_id} - if name == "trajectory": - runtime_obj = self._runtime() - runtime_obj.register_trajectory(self) - trajectory = self.get("trajectory") or [] - if not isinstance(trajectory, list): - raise TypeError("state.trajectory must be a list.") - return { - "runtime_id": runtime_id, - "trajectory_id": str(self["trajectory_id"]), - "start": len(trajectory), - } - if name == "sandbox": - sandbox = runtime.get("sandbox") - if not isinstance(sandbox, Mapping): - raise RuntimeError("State has no resolved primary sandbox.") - handle = dict(cast(Mapping[str, object], sandbox)) - handle["runtime_id"] = runtime_id - return handle - raise KeyError(f"Unknown runtime handle {name!r}.") - - def _tools_handle(self, names: ToolTarget) -> dict[str, object] | None: - tool_names = tuple(_tool_names(names)) - if not tool_names: - return None - runtime = self._runtime() - handle_id = runtime.register_tool_handle(self, tool_names) - return { - "runtime_id": runtime.runtime_id, - "handle_id": handle_id, - "names": list(tool_names), - } - - def _use_runtime_handle(self, name: str, handle: Mapping[str, object]) -> State: - self._runtime_handles()[name] = dict(handle) - return self - - def strip_runtime_handles(self) -> None: - strip_runtime_handles(self) - - def finalize(self) -> State: - self.strip_runtime_handles() - self.assert_serializable() - return self +V1 uses the shared top-level ``verifiers.State`` type. V1 tasks opt state +instances into strict runtime/lifecycle-field handling when they are passed to +``State.for_task(...)``. +""" +from verifiers.types import State __all__ = ["State"] - - -def internal_key_error(key: str) -> str: - if key == "is_completed": - return ( - "state['is_completed'] is framework-managed; use state.stop(...), " - "state['done'], or @vf.stop." - ) - if key == "stop_condition": - return ( - "state['stop_condition'] is framework-managed; use state.stop(...), " - "state['done'], or @vf.stop." - ) - if key == "is_truncated": - return ( - "state['is_truncated'] is framework-managed; raise an overlong-prompt " - "error or let trajectory sync set it." - ) - if key == "error": - return "state['error'] is framework-managed; raise vf.Error instead." - return f"state[{key!r}] is framework-managed." - - -def _state_for_task(cls: type[State], task: Mapping[str, Any]) -> State: - state = cls( - { - "task": dict(task), - "runtime": {}, - "trajectory": [], - "trajectory_id": uuid.uuid4().hex, - "artifacts": {}, - "metrics": {}, - "reward": 0.0, - "completion": None, - "timing": timing_record(), - } - ) - state._set_completed(False) - state._set_truncated(False, overwrite=True) - state._set_stop_condition(None, overwrite=True) - state._set_error(None) - for key in ("prompt", "info", "example_id"): - if key in task: - state[key] = deepcopy(task[key]) - return state - - -def _borrow_from_state( - state: State, - source: State, - borrow: BorrowTarget | Iterable[BorrowTarget], - tools: ToolTarget, - transcript: TranscriptMode, -) -> None: - if transcript not in {"private", "append"}: - raise ValueError("transcript must be 'private' or 'append'.") - for name in _borrow_targets(borrow): - if name not in {"model", "sandbox"}: - raise KeyError(f"Unknown borrow target {name!r}.") - state._use_runtime_handle(name, source._runtime_handle(name)) - tools_handle = source._tools_handle(tools) - if tools_handle is not None: - state._use_runtime_handle("tools", tools_handle) - if transcript == "append": - state._use_runtime_handle("trajectory", source._runtime_handle("trajectory")) - - -def _borrow_targets( - borrow: BorrowTarget | Iterable[BorrowTarget], -) -> Iterable[BorrowTarget]: - if isinstance(borrow, str): - return (cast(BorrowTarget, borrow),) - return borrow - - -def _tool_names(tools: ToolTarget) -> Iterable[str]: - if isinstance(tools, str): - return (tools,) - return tools - - -def strip_runtime_handles(value: object) -> None: - if isinstance(value, State) or type(value) is dict: - mapping = cast(dict[str, object], value) - for key in State.RUNTIME_HANDLE_KEYS: - mapping.pop(key, None) - runtime = mapping.get("runtime") - if type(runtime) is dict: - runtime_mapping = cast(dict[str, object], runtime) - runtime_mapping.pop("resolved", None) - for key in State.RUNTIME_HANDLE_KEYS: - runtime_mapping.pop(key, None) - sandbox = runtime_mapping.get("sandbox") - if type(sandbox) is dict: - cast(dict[str, object], sandbox).pop("lease_key", None) - for key in State.ENDPOINT_HANDLE_KEYS: - mapping.pop(key, None) - for item in list(mapping.values()): - strip_runtime_handles(item) - return - if isinstance(value, list): - for item in value: - strip_runtime_handles(item) diff --git a/verifiers/v1/task.py b/verifiers/v1/task.py index 8d9f02780..9d74eb832 100644 --- a/verifiers/v1/task.py +++ b/verifiers/v1/task.py @@ -1,21 +1,20 @@ -from __future__ import annotations - -from collections.abc import Iterable, Mapping +from collections.abc import Mapping from copy import deepcopy -from typing import Any, SupportsIndex, cast - -from verifiers.types import assert_json_serializable from .config import sandbox_config_mapping +from .utils.task_freeze_utils import assert_serializable, freeze_value from .utils.prompt_utils import normalize_prompt, normalize_system_prompt +from .types import ConfigMap, JsonValue class Task(dict): - def __init__(self, row: Mapping[str, Any] | None = None): + _vf_state_contract = "v1" + + def __init__(self, row: ConfigMap | None = None): super().__init__(deepcopy(dict(row or {}))) self._frozen = False - def freeze(self) -> Task: + def freeze(self) -> "Task": if "runtime" in self: raise TypeError( "task.runtime is not supported; use top-level task fields or state.runtime." @@ -56,7 +55,7 @@ def freeze(self) -> Task: def frozen(self) -> bool: return self._frozen - def __setitem__(self, key: str, value: Any) -> None: + def __setitem__(self, key: str, value: object) -> None: self._raise_if_frozen() super().__setitem__(key, value) @@ -76,102 +75,18 @@ def pop(self, key: str, default: object = None) -> object: self._raise_if_frozen() return super().pop(key, default) - def popitem(self) -> tuple[str, Any]: - self._raise_if_frozen() - return super().popitem() + def popitem(self) -> tuple[str, JsonValue]: + raise TypeError("Task.popitem() is not supported.") def clear(self) -> None: self._raise_if_frozen() super().clear() - def __ior__(self, value: Any, /) -> Task: + def __ior__(self, value: object, /) -> "Task": self._raise_if_frozen() - return cast(Task, dict.__ior__(self, value)) + self.update(value) + return self def _raise_if_frozen(self) -> None: if self._frozen: raise TypeError("Task is immutable after freeze.") - - -def assert_serializable(value: object) -> None: - assert_json_serializable(value) - - -class FrozenDict(dict): - def __deepcopy__(self, memo: dict[int, object]) -> dict[object, object]: - return { - deepcopy(key, memo): deepcopy(value, memo) for key, value in self.items() - } - - def __setitem__(self, key: str, value: Any) -> None: - raise TypeError("Frozen task mappings are immutable.") - - def __delitem__(self, key: str) -> None: - raise TypeError("Frozen task mappings are immutable.") - - def update(self, *args: object, **kwargs: object) -> None: - raise TypeError("Frozen task mappings are immutable.") - - def setdefault(self, key: str, default: object = None) -> object: - raise TypeError("Frozen task mappings are immutable.") - - def pop(self, key: str, default: object = None) -> object: - raise TypeError("Frozen task mappings are immutable.") - - def popitem(self) -> tuple[object, object]: - raise TypeError("Frozen task mappings are immutable.") - - def clear(self) -> None: - raise TypeError("Frozen task mappings are immutable.") - - def __ior__(self, value: object) -> FrozenDict: - raise TypeError("Frozen task mappings are immutable.") - - -class FrozenList(list): - def __deepcopy__(self, memo: dict[int, object]) -> list[object]: - return [deepcopy(value, memo) for value in self] - - def __setitem__(self, key: object, value: Any) -> None: - raise TypeError("Frozen task lists are immutable.") - - def __delitem__(self, key: object) -> None: - raise TypeError("Frozen task lists are immutable.") - - def append(self, value: Any) -> None: - raise TypeError("Frozen task lists are immutable.") - - def extend(self, values: object) -> None: - raise TypeError("Frozen task lists are immutable.") - - def insert(self, index: SupportsIndex, object: Any, /) -> None: - raise TypeError("Frozen task lists are immutable.") - - def pop(self, index: SupportsIndex = -1, /) -> object: - raise TypeError("Frozen task lists are immutable.") - - def remove(self, value: Any) -> None: - raise TypeError("Frozen task lists are immutable.") - - def clear(self) -> None: - raise TypeError("Frozen task lists are immutable.") - - def __iadd__(self, values: Iterable[Any]) -> FrozenList: - raise TypeError("Frozen task lists are immutable.") - - def __imul__(self, value: SupportsIndex) -> FrozenList: - raise TypeError("Frozen task lists are immutable.") - - def sort(self, *args: object, **kwargs: object) -> None: - raise TypeError("Frozen task lists are immutable.") - - def reverse(self) -> None: - raise TypeError("Frozen task lists are immutable.") - - -def freeze_value(value: Any) -> Any: - if isinstance(value, Mapping): - return FrozenDict({key: freeze_value(item) for key, item in value.items()}) - if isinstance(value, list): - return FrozenList(freeze_value(item) for item in value) - return value diff --git a/verifiers/v1/taskset.py b/verifiers/v1/taskset.py index 7325b822a..cc8df2cb4 100644 --- a/verifiers/v1/taskset.py +++ b/verifiers/v1/taskset.py @@ -1,26 +1,66 @@ -from __future__ import annotations - import json import uuid import weakref -from collections.abc import Callable, Iterable, Mapping +from importlib.abc import Traversable +from collections.abc import Iterable, Mapping from copy import deepcopy -from typing import Any, ClassVar, cast +from pathlib import Path +from typing import TYPE_CHECKING, ClassVar, cast from datasets import Dataset from verifiers.types import task_payload_from_info +from typing_extensions import NotRequired, TypedDict from .config import ( TasksetConfig, - merge_config_callables, + merge_config_handler_map, merge_config_value, resolve_config_object, ) +from .utils.binding_utils import ( + BindingMap, + normalize_binding_map, + normalize_object_map, +) from .state import State from .task import Task -from .toolset import merge_toolsets, normalize_toolset_collection +from .toolset import ToolsetCollection, merge_toolsets, normalize_toolset_collection from .user import normalize_user from .utils.prompt_utils import normalize_system_prompt +from .utils.taskset_utils import dataset_info_with_task, discover_sibling_dir +from .utils.taskset_utils import rows_from_source +from .types import ( + ConfigData, + ConfigMap, + Handler, + Objects, + PromptInput, + TaskRow, + TaskRowsSource, +) + +if TYPE_CHECKING: + from .harness import Harness + + +TaskSourceValue = TaskRowsSource | None + + +class TasksetKwargs(TypedDict): + eval_source: NotRequired[TaskSourceValue] + taskset_id: NotRequired[str | None] + system_prompt: NotRequired[PromptInput | None] + user: NotRequired[Handler | str | ConfigMap | None] + bindings: NotRequired[BindingMap | None] + objects: NotRequired[Objects | None] + toolsets: NotRequired[ToolsetCollection] + stops: NotRequired[Iterable[Handler]] + setups: NotRequired[Iterable[Handler]] + updates: NotRequired[Iterable[Handler]] + metrics: NotRequired[Iterable[Handler]] + rewards: NotRequired[Iterable[Handler]] + advantages: NotRequired[Iterable[Handler]] + cleanups: NotRequired[Iterable[Handler]] class Taskset: @@ -29,109 +69,106 @@ class Taskset: def __init__( self, # Singleton fields. - source: Iterable[Mapping[str, Any]] - | Callable[[], Iterable[Mapping[str, Any]]] - | None = None, - eval_source: Iterable[Mapping[str, Any]] - | Callable[[], Iterable[Mapping[str, Any]]] - | None = None, + source: TaskSourceValue = None, + eval_source: TaskSourceValue = None, taskset_id: str | None = None, - system_prompt: object | None = None, - user: object | None = None, + system_prompt: PromptInput | None = None, + user: Handler | str | ConfigMap | None = None, + bindings: BindingMap | None = None, + objects: Objects | None = None, # Collection fields. - toolsets: Iterable[object] = (), - stops: Iterable[Callable[..., object]] = (), - setups: Iterable[Callable[..., object]] = (), - updates: Iterable[Callable[..., object]] = (), - metrics: Iterable[Callable[..., object]] = (), - rewards: Iterable[Callable[..., object]] = (), - advantages: Iterable[Callable[..., object]] = (), - cleanups: Iterable[Callable[..., object]] = (), + toolsets: ToolsetCollection | None = None, + stops: Iterable[Handler] = (), + setups: Iterable[Handler] = (), + updates: Iterable[Handler] = (), + metrics: Iterable[Handler] = (), + rewards: Iterable[Handler] = (), + advantages: Iterable[Handler] = (), + cleanups: Iterable[Handler] = (), # Config. - config: TasksetConfig | Mapping[str, object] | None = None, + config: TasksetConfig | None = None, ): self.config = type(self).config_type.from_config(config) source_value = resolve_config_object( merge_config_value(source, self.config.source) ) self.source = cast( - Iterable[Mapping[str, Any]] - | Callable[[], Iterable[Mapping[str, Any]]] - | None, + TaskSourceValue, source_value, ) eval_source_value = resolve_config_object( merge_config_value(eval_source, self.config.eval_source) ) self.eval_source = cast( - Iterable[Mapping[str, Any]] - | Callable[[], Iterable[Mapping[str, Any]]] - | None, + TaskSourceValue, eval_source_value, ) resolved_taskset_id = merge_config_value(taskset_id, self.config.taskset_id) if resolved_taskset_id is not None and not isinstance(resolved_taskset_id, str): raise TypeError("taskset_id must be a string.") self.taskset_id = resolved_taskset_id or type(self).__name__ - self.system_prompt = normalize_system_prompt( + system_prompt_value = cast( + PromptInput | None, merge_config_value(system_prompt, self.config.system_prompt), - field_name="taskset.system_prompt", + ) + self.system_prompt = normalize_system_prompt( + system_prompt_value, field_name="taskset.system_prompt" ) self.user = normalize_user(merge_config_value(user, self.config.user)) + self.bindings = { + **self.config.bindings, + **normalize_binding_map(bindings, "Taskset bindings"), + } + self.objects = { + **{ + str(key): resolve_config_object(item) + for key, item in self.config.objects.items() + }, + **normalize_object_map(objects, "Taskset objects"), + } self.toolsets, self.named_toolsets = merge_toolsets( - toolsets, self.config.toolsets + toolsets or (), self.config.toolsets ) - self.stops = cast( - list[Callable[..., object]], - merge_config_callables(stops, self.config.stops, "stop"), + handlers = merge_config_handler_map( + { + "stop": stops, + "setup": setups, + "update": updates, + "metric": metrics, + "reward": rewards, + "advantage": advantages, + "cleanup": cleanups, + }, + self.config, ) - self.setups = cast( - list[Callable[..., object]], - merge_config_callables(setups, self.config.setups, "setup"), - ) - self.updates = cast( - list[Callable[..., object]], - merge_config_callables(updates, self.config.updates, "update"), - ) - self.metrics = cast( - list[Callable[..., object]], - merge_config_callables(metrics, self.config.metrics, "metric"), - ) - self.rewards = cast( - list[Callable[..., object]], - merge_config_callables(rewards, self.config.rewards, "reward"), - ) - self.advantages = cast( - list[Callable[..., object]], - merge_config_callables(advantages, self.config.advantages, "advantage"), - ) - self.cleanups = cast( - list[Callable[..., object]], - merge_config_callables(cleanups, self.config.cleanups, "cleanup"), - ) - self._rows: list[dict[str, Any]] | None = None - self._eval_rows: list[dict[str, Any]] | None = None + self.stops = handlers["stop"] + self.setups = handlers["setup"] + self.updates = handlers["update"] + self.metrics = handlers["metric"] + self.rewards = handlers["reward"] + self.advantages = handlers["advantage"] + self.cleanups = handlers["cleanup"] + self._rows: list[ConfigData] | None = None + self._eval_rows: list[ConfigData] | None = None self._dataset: Dataset | None = None self._eval_dataset: Dataset | None = None - self._attached_harnesses: weakref.WeakSet[object] = weakref.WeakSet() + self._attached_harnesses: weakref.WeakSet["Harness"] = weakref.WeakSet() @classmethod def config_schema(cls) -> str: return cls.config_type.schema_text() - def _add_handler( - self, handlers: list[Callable[..., object]], fn: Callable[..., object] - ) -> None: + def _add_handler(self, handlers: list[Handler], fn: Handler) -> None: handlers.append(fn) self._refresh_attached_harnesses() - def add_metric(self, fn: Callable[..., object]) -> None: + def add_metric(self, fn: Handler) -> None: self._add_handler(self.metrics, fn) - def add_reward(self, fn: Callable[..., object]) -> None: + def add_reward(self, fn: Handler) -> None: self._add_handler(self.rewards, fn) - def add_advantage(self, fn: Callable[..., object]) -> None: + def add_advantage(self, fn: Handler) -> None: self._add_handler(self.advantages, fn) def add_toolset(self, toolset: object) -> None: @@ -143,40 +180,45 @@ def add_toolset(self, toolset: object) -> None: self.named_toolsets.update(named_toolsets) self._refresh_attached_harnesses() - def add_stop(self, fn: Callable[..., object]) -> None: + def add_stop(self, fn: Handler) -> None: self._add_handler(self.stops, fn) - def add_setup(self, fn: Callable[..., object]) -> None: + def add_setup(self, fn: Handler) -> None: self._add_handler(self.setups, fn) - def add_update(self, fn: Callable[..., object]) -> None: + def add_update(self, fn: Handler) -> None: self._add_handler(self.updates, fn) - def add_cleanup(self, fn: Callable[..., object]) -> None: + def add_cleanup(self, fn: Handler) -> None: self._add_handler(self.cleanups, fn) - def attach_harness(self, harness: object) -> None: + def attach_harness(self, harness: "Harness") -> None: self._attached_harnesses.add(harness) + def get_skills_dir(self) -> Traversable | Path | None: + return discover_sibling_dir(type(self), "skills") + + def get_upload_dirs(self) -> dict[str, Traversable | Path]: + skills = self.get_skills_dir() + return {} if skills is None else {"skills": skills} + def _refresh_attached_harnesses(self) -> None: for harness in list(self._attached_harnesses): - resolve_runtime = getattr(harness, "resolve_runtime", None) - if callable(resolve_runtime): - setattr(harness, "runtime", resolve_runtime()) + harness.runtime = harness.resolve_runtime() - def rows(self) -> list[dict[str, Any]]: + def rows(self) -> list[ConfigData]: if self._rows is None: self._rows = rows_from_source(self.source) return self._rows - def eval_rows(self) -> list[dict[str, Any]]: + def eval_rows(self) -> list[ConfigData]: if self.eval_source is None: return self.rows() if self._eval_rows is None: self._eval_rows = rows_from_source(self.eval_source) return self._eval_rows - def task(self, row: Mapping[str, Any]) -> Task: + def task(self, row: ConfigMap) -> Task: task = Task(row) task["taskset_id"] = self.taskset_id task_id = task.get("task_id") @@ -187,7 +229,7 @@ def task(self, row: Mapping[str, Any]) -> Task: task["task_id"] = str(task_id if task_id is not None else uuid.uuid4().hex) return task.freeze() - def to_task(self, value: Mapping[str, Any] | Task | str) -> Task: + def to_task(self, value: ConfigMap | Task | str) -> Task: if isinstance(value, Task): return value if isinstance(value, str): @@ -231,7 +273,7 @@ def __iter__(self): def __len__(self) -> int: return len(self.rows()) - def _dataset_row(self, row: Mapping[str, Any], index: int) -> dict[str, Any]: + def _dataset_row(self, row: TaskRow, index: int) -> ConfigData: normalized = deepcopy(dict(row)) normalized.setdefault("example_id", index) if "prompt" not in normalized: @@ -242,7 +284,7 @@ def _dataset_row(self, row: Mapping[str, Any], index: int) -> dict[str, Any]: else [] ) task_payload = dict(self.task(normalized)) - dataset_row: dict[str, Any] = { + dataset_row: ConfigData = { "prompt": task_payload["prompt"], "example_id": normalized["example_id"], "info": dataset_info_with_task(task_payload), @@ -250,20 +292,3 @@ def _dataset_row(self, row: Mapping[str, Any], index: int) -> dict[str, Any]: if "answer" in normalized: dataset_row["answer"] = normalized["answer"] return dataset_row - - -def dataset_info_with_task(task: Mapping[str, Any]) -> dict[str, Any]: - return {"task": json.dumps(task)} - - -def rows_from_source( - source: Iterable[Mapping[str, Any]] - | Callable[[], Iterable[Mapping[str, Any]]] - | None, -) -> list[dict[str, Any]]: - if source is None: - return [] - if callable(source): - source_loader = cast(Callable[[], Iterable[Mapping[str, Any]]], source) - return [dict(row) for row in source_loader()] - return [dict(row) for row in source] diff --git a/verifiers/v1/toolset.py b/verifiers/v1/toolset.py index 3d6254843..25e5de315 100644 --- a/verifiers/v1/toolset.py +++ b/verifiers/v1/toolset.py @@ -1,84 +1,80 @@ -from __future__ import annotations - import inspect -from collections.abc import Callable, Iterable, Mapping +from collections.abc import Iterable, Mapping from dataclasses import dataclass, field -from typing import cast +from typing import TypeAlias, cast from .config import ( + CallableConfigEntry, MCPToolConfig, SandboxConfig, ToolsetConfig, config_callables, resolve_config_object, sandbox_config_mapping, - string_mapping, ) +from .utils.binding_utils import BindingMap, normalize_binding_map +from .utils.binding_utils import normalize_object_map +from .types import ConfigMap, Handler, Objects, ToolSpec @dataclass(frozen=True) class Toolset: # Tool surface. - tools: tuple[object, ...] = () + tools: "tuple[ToolEntry, ...]" = () show: tuple[str, ...] | None = None hide: tuple[str, ...] | None = None # Local dependencies and runtime policy. - bindings: Mapping[str, object] = field(default_factory=dict) - objects: Mapping[str, object] = field(default_factory=dict) + bindings: BindingMap = field(default_factory=dict) + objects: Objects = field(default_factory=dict) write: bool = False scope: str | None = None - sandbox: Mapping[str, object] | SandboxConfig | str | None = None + sandbox: ConfigMap | SandboxConfig | str | None = None # Lifecycle collections. - stops: tuple[object, ...] = () - setups: tuple[object, ...] = () - updates: tuple[object, ...] = () - cleanups: tuple[object, ...] = () - teardowns: tuple[object, ...] = () + stops: tuple[Handler, ...] = () + setups: tuple[Handler, ...] = () + updates: tuple[Handler, ...] = () + cleanups: tuple[Handler, ...] = () + teardowns: tuple[Handler, ...] = () # Config. - config: object | None = None + config: ToolsetConfig | None = None def __init__( self, # Tool surface. - tools: Iterable[object] = (), + tools: "ToolEntries | None" = (), show: Iterable[str] | None = None, hide: Iterable[str] | None = None, # Local dependencies and runtime policy. - bindings: Mapping[str, object] | None = None, - objects: Mapping[str, object] | None = None, + bindings: BindingMap | None = None, + objects: Objects | None = None, write: bool | None = None, scope: str | None = None, - sandbox: Mapping[str, object] | SandboxConfig | str | None = None, + sandbox: ConfigMap | SandboxConfig | str | None = None, # Lifecycle collections. - stops: Iterable[object] = (), - setups: Iterable[object] = (), - updates: Iterable[object] = (), - cleanups: Iterable[object] = (), - teardowns: Iterable[object] = (), + stops: Iterable[CallableConfigEntry] = (), + setups: Iterable[CallableConfigEntry] = (), + updates: Iterable[CallableConfigEntry] = (), + cleanups: Iterable[CallableConfigEntry] = (), + teardowns: Iterable[CallableConfigEntry] = (), # Config. - config: object | None = None, + config: ToolsetConfig | None = None, ): config_map = toolset_config_mapping(config) + tool_values = tool_items(tools) + config_bindings: BindingMap = {} + config_objects: Objects = {} if config_map: - tools = [*tools, *tool_items(config_map.get("tools"))] + tool_values.extend(tool_items(config_map.get("tools"))) show = show if show is not None else string_items(config_map.get("show")) hide = hide if hide is not None else string_items(config_map.get("hide")) - config_bindings = config_map.get("bindings") or {} - if not isinstance(config_bindings, Mapping): - raise TypeError("Toolset bindings must be a mapping.") - bindings = { - **string_mapping(cast(Mapping[object, object], config_bindings)), - **dict(bindings or {}), - } - config_objects = config_map.get("objects") or {} - if not isinstance(config_objects, Mapping): - raise TypeError("Toolset objects must be a mapping.") - objects = { - **{ - str(key): resolve_config_object(item) - for key, item in config_objects.items() - }, - **dict(objects or {}), + config_bindings = normalize_binding_map( + config_map.get("bindings"), "Toolset bindings" + ) + config_objects = { + str(key): resolve_config_object(item) + for key, item in normalize_object_map( + config_map.get("objects"), "Toolset objects" + ).items() } if "write" in config_map and write is None: write_value = config_map["write"] @@ -91,7 +87,7 @@ def __init__( sandbox = ( sandbox if sandbox is not None - else cast(Mapping[str, object] | str | None, config_map.get("sandbox")) + else cast(ConfigMap | str | None, config_map.get("sandbox")) ) stops = [*stops, *config_callables(config_map.get("stops"), "stop")] setups = [ @@ -112,24 +108,35 @@ def __init__( ] if show is not None and hide is not None: raise ValueError("Toolset accepts show or hide, not both.") - object.__setattr__(self, "tools", tuple(tool_item(tool) for tool in tools)) + object.__setattr__(self, "tools", tuple(tool_values)) object.__setattr__(self, "show", tuple(show) if show is not None else None) object.__setattr__(self, "hide", tuple(hide) if hide is not None else None) - object.__setattr__(self, "bindings", dict(bindings or {})) - object.__setattr__(self, "objects", dict(objects or {})) + object.__setattr__( + self, + "bindings", + { + **config_bindings, + **normalize_binding_map(bindings, "Toolset bindings"), + }, + ) + object.__setattr__( + self, + "objects", + {**config_objects, **normalize_object_map(objects, "Toolset objects")}, + ) object.__setattr__(self, "write", bool(write)) if scope is not None and scope not in {"rollout", "group", "global"}: raise ValueError("Toolset scope must be 'rollout', 'group', or 'global'.") object.__setattr__(self, "scope", scope) if isinstance(sandbox, str) and sandbox != "program": raise ValueError("Toolset sandbox string must be 'program'.") - sandbox_value: Mapping[str, object] | str | None + sandbox_value: ConfigMap | str | None if isinstance(sandbox, str): sandbox_value = sandbox else: sandbox_value = sandbox_config_mapping(sandbox) if isinstance(sandbox_value, Mapping): - prefer = cast(Mapping[str, object], sandbox_value).get("prefer") + prefer = cast(ConfigMap, sandbox_value).get("prefer") if prefer is not None and prefer != "program": raise ValueError("Toolset sandbox.prefer must be 'program'.") object.__setattr__(self, "sandbox", sandbox_value) @@ -145,10 +152,16 @@ def __init__( object.__setattr__(self, "config", config) +ToolsetItem: TypeAlias = Toolset | ToolSpec +ToolsetCollection: TypeAlias = ( + ToolsetItem | Iterable[ToolsetItem] | dict[str, ToolsetItem | ConfigMap] +) + + def flatten_toolsets( - toolsets: Iterable[object], apply_visibility: bool = False -) -> list[object]: - flat: list[object] = [] + toolsets: "Iterable[ToolEntry]", apply_visibility: bool = False +) -> "list[ToolEntry]": + flat: list[ToolEntry] = [] for item in toolsets: if isinstance(item, Toolset): tools = flatten_toolsets(item.tools, apply_visibility) @@ -164,7 +177,7 @@ def flatten_toolsets( return flat -def iter_toolsets(toolsets: Iterable[object]) -> list[Toolset]: +def iter_toolsets(toolsets: "Iterable[ToolEntry]") -> list[Toolset]: groups: list[Toolset] = [] for item in toolsets: if isinstance(item, Toolset): @@ -173,7 +186,7 @@ def iter_toolsets(toolsets: Iterable[object]) -> list[Toolset]: return groups -def normalize_toolsets(toolsets: Iterable[object]) -> list[Toolset]: +def normalize_toolsets(toolsets: "Iterable[ToolEntry]") -> list[Toolset]: return [normalize_toolset(toolset) for toolset in toolsets] @@ -207,7 +220,7 @@ def normalize_toolset_collection( return [normalize_toolset(value)], {} if not isinstance(value, Iterable): return [normalize_toolset(value)], {} - return normalize_toolsets(value), {} + return normalize_toolsets(cast(Iterable[ToolEntry], value)), {} def named_toolset_from_config(name: str, value: object) -> Toolset: @@ -215,28 +228,24 @@ def named_toolset_from_config(name: str, value: object) -> Toolset: if isinstance(value, Toolset): return value if isinstance(value, Mapping): - spec = cast(Mapping[str, object], value) + spec = cast(ConfigMap, value) if "fn" in spec: return toolset_from_factory(name, spec) - return Toolset(config=spec) + return Toolset(config=ToolsetConfig.from_config(spec)) if callable(value): - return call_toolset_factory(name, cast(Callable[..., object], value), {}) + return call_toolset_factory(name, cast(Handler, value), {}) return normalize_toolset(value) -def toolset_from_factory(name: str, spec: Mapping[str, object]) -> Toolset: +def toolset_from_factory(name: str, spec: ConfigMap) -> Toolset: fn = resolve_config_object(spec.get("fn")) if not callable(fn): raise TypeError(f"Toolset {name!r} requires callable fn.") kwargs = {key: value for key, value in spec.items() if key != "fn"} - return call_toolset_factory(name, cast(Callable[..., object], fn), kwargs) + return call_toolset_factory(name, cast(Handler, fn), kwargs) -def call_toolset_factory( - name: str, - fn: Callable[..., object], - kwargs: Mapping[str, object], -) -> Toolset: +def call_toolset_factory(name: str, fn: Handler, kwargs: ConfigMap) -> Toolset: result = fn(**kwargs) if inspect.isawaitable(result): raise TypeError(f"Toolset {name!r} fn must be synchronous.") @@ -254,7 +263,7 @@ def normalize_toolset_result(value: object) -> list[Toolset]: return [normalize_toolset(value)] if not isinstance(value, Iterable): return [normalize_toolset(value)] - return normalize_toolsets(value) + return normalize_toolsets(cast(Iterable[ToolEntry], value)) def normalize_toolset(value: object) -> Toolset: @@ -262,15 +271,15 @@ def normalize_toolset(value: object) -> Toolset: if isinstance(value, Toolset): return value if isinstance(value, Mapping): - return toolset_from_mapping(cast(Mapping[str, object], value)) - return Toolset(tools=[value]) + return toolset_from_mapping(cast(ConfigMap, value)) + return Toolset(tools=[cast(ToolEntry, value)]) -def toolset_from_mapping(spec: Mapping[str, object]) -> Toolset: - return Toolset(config=spec) +def toolset_from_mapping(spec: ConfigMap) -> Toolset: + return Toolset(config=ToolsetConfig.from_config(spec)) -def tool_items(value: object) -> list[object]: +def tool_items(value: object) -> "list[ToolEntry]": if value is None: return [] if isinstance(value, str) or isinstance(value, Mapping): @@ -280,18 +289,22 @@ def tool_items(value: object) -> list[object]: return [tool_item(item) for item in value] -def tool_item(value: object) -> object: +def tool_item(value: object) -> "ToolEntry": value = resolve_config_object(value) + if isinstance(value, Toolset | MCPTool): + return value if isinstance(value, MCPToolConfig): return MCPTool.from_mapping(value.model_dump(exclude_none=True)) if isinstance(value, Mapping): if "command" in value: - return MCPTool.from_mapping(cast(Mapping[str, object], value)) + return MCPTool.from_mapping(cast(ConfigMap, value)) raise TypeError("Tool mapping specs require command.") - return value + if not callable(value): + raise TypeError("Tool entries must be callables, Toolsets, or MCP tool specs.") + return cast(Handler, value) -def toolset_config_mapping(config: object | None) -> Mapping[str, object]: +def toolset_config_mapping(config: ToolsetConfig | None) -> ConfigMap: if config is None: return {} return ToolsetConfig.from_config(config).model_dump(exclude_none=True) @@ -326,14 +339,14 @@ def tool_name(tool: object) -> str: class MCPTool: command: str args: tuple[str, ...] = () - env: Mapping[str, str] | None = None + env: dict[str, str] | None = None cwd: str | None = None def __init__( self, command: str, args: Iterable[str] = (), - env: Mapping[str, str] | None = None, + env: dict[str, str] | None = None, cwd: str | None = None, ): object.__setattr__(self, "command", command) @@ -342,7 +355,7 @@ def __init__( object.__setattr__(self, "cwd", cwd) @classmethod - def from_mapping(cls, spec: Mapping[str, object]) -> MCPTool: + def from_mapping(cls, spec: ConfigMap) -> "MCPTool": config = MCPToolConfig.from_config(spec) return cls( command=config.command, @@ -350,3 +363,7 @@ def from_mapping(cls, spec: Mapping[str, object]) -> MCPTool: env=config.env, cwd=config.cwd, ) + + +ToolEntry: TypeAlias = Handler | str | ConfigMap | Toolset | MCPTool | MCPToolConfig +ToolEntries: TypeAlias = ToolEntry | Iterable[ToolEntry] diff --git a/verifiers/v1/types.py b/verifiers/v1/types.py new file mode 100644 index 000000000..76c45fc4a --- /dev/null +++ b/verifiers/v1/types.py @@ -0,0 +1,59 @@ +from collections.abc import ( + Awaitable, + Callable, + Iterable, + Mapping, + MutableMapping, + Sequence, +) +from importlib.abc import Traversable +from os import PathLike +from typing import Literal, TypeAlias + +from pydantic import BaseModel +from verifiers.clients import Client +from verifiers.types import ClientConfig, Message + +Handler: TypeAlias = Callable[..., object] +GroupHandler: TypeAlias = Callable[..., Sequence[float] | Awaitable[Sequence[float]]] +ConfigMap: TypeAlias = Mapping[str, object] +ConfigData: TypeAlias = dict[str, object] +ConfigInputMap: TypeAlias = Mapping[object, object] +MutableConfigMap: TypeAlias = MutableMapping[str, object] +JsonValue: TypeAlias = ( + str | int | float | bool | None | list["JsonValue"] | dict[str, "JsonValue"] +) +JsonData: TypeAlias = dict[str, JsonValue] +ConfigFactory: TypeAlias = Callable[[], BaseModel | ConfigMap] +ConfigSource: TypeAlias = BaseModel | ConfigMap | str | ConfigFactory +CallableConfigEntry: TypeAlias = Handler | str | ConfigMap +HandlerList: TypeAlias = Iterable[Handler] + +TaskRow: TypeAlias = Mapping[str, object] +TaskRows: TypeAlias = Iterable[TaskRow] +TaskRowsSource: TypeAlias = Callable[[], TaskRows] | TaskRows +TaskSource: TypeAlias = str | TaskRowsSource + +PromptMessage: TypeAlias = Message | ConfigMap +PromptInput: TypeAlias = str | Sequence[PromptMessage] +Transcript: TypeAlias = Sequence[PromptMessage] +TranscriptData: TypeAlias = list[PromptMessage] +ToolSpec: TypeAlias = Handler | str | ConfigMap +ToolSpecs: TypeAlias = ToolSpec | Sequence[ToolSpec] +ToolsetSpecs: TypeAlias = ToolSpec | Sequence[ToolSpec] | ConfigMap + +ModelClient: TypeAlias = Client | ClientConfig + +ProgramScalar: TypeAlias = str | int | float | bool | None +ProgramSource: TypeAlias = PathLike[str] | Traversable +ProgramValue: TypeAlias = ProgramScalar | Handler | ConfigMap | ProgramSource +ProgramCommand: TypeAlias = str | Sequence[ProgramValue] +ProgramMap: TypeAlias = Mapping[str, object] +ProgramData: TypeAlias = dict[str, object] +ProgramOptionMap: TypeAlias = Mapping[str, ProgramValue] +ProgramSetup: TypeAlias = ProgramValue | Sequence[ProgramValue] +ProgramChannel: TypeAlias = Literal["callable", "mcp"] +ProgramChannelSpec: TypeAlias = ProgramChannel | ConfigMap +ProgramChannels: TypeAlias = ProgramChannelSpec | list[ProgramChannelSpec] + +Objects: TypeAlias = Mapping[str, object | Callable[[], object | Awaitable[object]]] diff --git a/verifiers/v1/user.py b/verifiers/v1/user.py index 4f527853e..2d56783fc 100644 --- a/verifiers/v1/user.py +++ b/verifiers/v1/user.py @@ -1,47 +1,51 @@ -from __future__ import annotations - import inspect -from collections.abc import Callable, Mapping, Sequence +from collections.abc import Mapping, Sequence from dataclasses import dataclass, field from typing import Literal, cast from .config import UserConfig, import_config_ref, resolve_config_object +from .utils.binding_utils import BindingMap, normalize_binding_map +from .utils.binding_utils import normalize_object_map from .utils.trajectory_utils import completion_from_trajectory +from .types import ConfigMap, Handler, Objects, PromptMessage UserScope = Literal["rollout", "group", "global"] def state_transcript( - state: Mapping[str, object], transcript: Sequence[object] | None = None -) -> list[object]: + state: ConfigMap, transcript: Sequence[PromptMessage] | None = None +) -> list[PromptMessage]: if transcript is not None: return list(transcript) prompt = state.get("prompt") completion = state.get("completion") if isinstance(prompt, list) and isinstance(completion, list): - return [*prompt, *completion] + return [ + *cast(list[PromptMessage], prompt), + *cast(list[PromptMessage], completion), + ] if isinstance(completion, list): - return list(completion) + return list(cast(list[PromptMessage], completion)) trajectory = state.get("trajectory") if isinstance(trajectory, Sequence) and not isinstance(trajectory, str): - return completion_from_trajectory( - cast(Sequence[Mapping[str, object]], trajectory) - ) + return completion_from_trajectory(cast(Sequence[ConfigMap], trajectory)) return [] @dataclass(frozen=True) class User: - fn: Callable[..., object] + fn: Handler scope: UserScope = "rollout" - bindings: Mapping[str, object] = field(default_factory=dict) - objects: Mapping[str, object] = field(default_factory=dict) - sandbox: Mapping[str, object] | None = None + bindings: BindingMap = field(default_factory=dict) + objects: Objects = field(default_factory=dict) + sandbox: ConfigMap | None = None def __post_init__(self) -> None: if self.scope not in {"rollout", "group", "global"}: raise ValueError("User scope must be 'rollout', 'group', or 'global'.") - bindings = dict(self.bindings) + bindings = normalize_binding_map( + self.bindings, "User bindings", key_style="arg" + ) try: parameters = inspect.signature(self.fn).parameters except (TypeError, ValueError): @@ -49,6 +53,9 @@ def __post_init__(self) -> None: if "transcript" in parameters: bindings.setdefault("transcript", state_transcript) object.__setattr__(self, "bindings", bindings) + object.__setattr__( + self, "objects", normalize_object_map(self.objects, "User objects") + ) def normalize_user(value: object | None) -> User | None: @@ -58,13 +65,13 @@ def normalize_user(value: object | None) -> User | None: if isinstance(value, UserConfig): return user_from_mapping(value.model_dump(exclude_none=True)) if isinstance(value, Mapping): - return user_from_mapping(cast(Mapping[str, object], value)) + return user_from_mapping(cast(ConfigMap, value)) if callable(value): return User(value) raise TypeError("User must be a callable, User, import ref, or mapping.") -def user_from_mapping(spec: Mapping[str, object]) -> User: +def user_from_mapping(spec: ConfigMap) -> User: config = UserConfig.from_config(spec) fn = config.fn if isinstance(fn, str): diff --git a/verifiers/v1/utils/artifact_utils.py b/verifiers/v1/utils/artifact_utils.py index f6ccfcfe3..8f5758abb 100644 --- a/verifiers/v1/utils/artifact_utils.py +++ b/verifiers/v1/utils/artifact_utils.py @@ -1,30 +1,28 @@ -from __future__ import annotations +from ..types import ConfigMap -from collections.abc import Mapping - -def artifact_path(spec: Mapping[str, object]) -> str: +def artifact_path(spec: ConfigMap) -> str: path = spec.get("path") if not isinstance(path, str): raise TypeError("program artifact path must be a string.") return path -def artifact_format(spec: Mapping[str, object]) -> str: +def artifact_format(spec: ConfigMap) -> str: value = spec.get("format", "text") if not isinstance(value, str): raise TypeError("program artifact format must be a string.") return value -def artifact_key(spec: Mapping[str, object]) -> str | None: +def artifact_key(spec: ConfigMap) -> str | None: value = spec.get("key") if value is not None and not isinstance(value, str): raise TypeError("program artifact key must be a string.") return value -def artifact_optional(spec: Mapping[str, object]) -> bool: +def artifact_optional(spec: ConfigMap) -> bool: value = spec.get("optional", False) if not isinstance(value, bool): raise TypeError("program artifact optional must be a boolean.") diff --git a/verifiers/v1/utils/binding_utils.py b/verifiers/v1/utils/binding_utils.py new file mode 100644 index 000000000..2143eaa26 --- /dev/null +++ b/verifiers/v1/utils/binding_utils.py @@ -0,0 +1,216 @@ +import inspect +from collections.abc import Mapping, Set +from typing import Literal, TypeAlias, cast +from ..types import ConfigMap, Handler, Objects + + +BindingRoot: TypeAlias = Literal[ + "task", "state", "tasks", "states", "runtime", "objects", "tools" +] +CallableBindingSource: TypeAlias = Handler | ConfigMap +BindingSource: TypeAlias = str | CallableBindingSource +BindingMap: TypeAlias = dict[str, BindingSource] +Bindings: TypeAlias = dict[str, BindingSource] + +VALID_BINDING_ROOTS: frozenset[str] = frozenset( + {"task", "state", "tasks", "states", "runtime", "objects", "tools"} +) +ROLLOUT_FRAMEWORK_ARGS: frozenset[str] = frozenset( + { + "answer", + "completion", + "error", + "example_id", + "info", + "metrics", + "prompt", + "question", + "reward", + "runtime", + "state", + "task", + "task_id", + "timing", + "trajectory", + } +) +GROUP_FRAMEWORK_ARGS: frozenset[str] = frozenset({"states", "tasks"}) + + +def normalize_binding_map( + value: object, + field: str, + *, + allow_objects: bool = True, + validate_sources: bool = True, + key_style: Literal["callable", "arg"] = "callable", +) -> Bindings: + if value is None: + return {} + if not isinstance(value, Mapping): + raise TypeError(f"{field} must be a mapping.") + result: Bindings = {} + for raw_key, source in value.items(): + if not isinstance(raw_key, str): + raise TypeError(f"{field} keys must be strings.") + if key_style == "callable": + binding_key_parts(raw_key) + elif not raw_key or "." in raw_key: + raise ValueError(f"{field} keys must be argument names.") + if validate_sources: + validate_binding_source( + source, + f"{field} source for {raw_key!r}", + allow_objects=allow_objects, + ) + if isinstance(source, Mapping): + normalized_source = cast(ConfigMap, source) + elif isinstance(source, str) or callable(source): + normalized_source = source + else: + raise TypeError( + f"{field} source for {raw_key!r} must be a framework path or callable." + ) + result[raw_key] = normalized_source + return result + + +def normalize_object_map(value: object, field: str) -> Objects: + if value is None: + return {} + if not isinstance(value, Mapping): + raise TypeError(f"{field} must be a mapping.") + result: Objects = {} + for raw_key, source in value.items(): + if not isinstance(raw_key, str): + raise TypeError(f"{field} keys must be strings.") + if not raw_key: + raise ValueError(f"{field} keys must be non-empty strings.") + result[raw_key] = source + return result + + +def validate_binding_source( + source: object, context: str, *, allow_objects: bool = True +) -> None: + if ( + not isinstance(source, str) + and not callable(source) + and not isinstance(source, Mapping) + ): + raise TypeError(f"{context} must be a framework path or callable.") + root = binding_source_root(source) + validate_binding_source_root(root, context, allow_objects=allow_objects) + if root == "objects": + binding_object_name(source) + if isinstance(source, Mapping): + validate_callable_source(cast(ConfigMap, source), context) + + +def validate_callable_source(source: ConfigMap, context: str) -> None: + if "fn" not in source: + raise TypeError(f"{context} mapping sources must use an 'fn' key.") + unknown = set(source) - {"fn"} + if unknown: + raise ValueError(f"{context} has unknown keys: {sorted(unknown)}.") + + +def function_name(fn: Handler) -> str: + name = getattr(fn, "__name__", None) + if not isinstance(name, str) or not name: + raise ValueError("Callable bindings require a stable __name__.") + return name + + +def binding_key_parts(key: object) -> tuple[str, str]: + if not isinstance(key, str): + raise TypeError("Binding keys must be strings.") + target, separator, arg_name = key.partition(".") + if separator != "." or not target or not arg_name or "." in arg_name: + raise ValueError(f"Binding key {key!r} must be 'callable.arg'.") + return target, arg_name + + +def binding_source_root(source: object) -> BindingRoot | None: + if not isinstance(source, str): + return None + root, _, _ = source.partition(".") + if root in VALID_BINDING_ROOTS: + return cast(BindingRoot, root) + raise ValueError( + "Binding string sources must start with task, state, tasks, states, " + f"runtime, objects, or tools; got {source!r}." + ) + + +def validate_binding_source_root( + root: BindingRoot | None, context: str, *, allow_objects: bool = True +) -> None: + if root is None: + return + if root == "objects" and not allow_objects: + raise ValueError(f"{context} cannot use objects.* sources.") + + +def binding_object_name(source: object) -> str: + if not isinstance(source, str): + raise TypeError("Object binding source must be a string.") + root, separator, tail = source.partition(".") + if root != "objects" or not separator: + raise ValueError("Object binding source must be 'objects.name'.") + name, _, _ = tail.partition(".") + if not name: + raise ValueError("Object binding source must be 'objects.name'.") + return name + + +def validate_bound_arg( + fn: Handler | object, + arg_name: str, + context: str, + protected_args: Set[str] = frozenset(), +) -> None: + if arg_name in protected_args: + return + if arg_name in {"task", "state", "runtime"}: + raise ValueError(f"{context} cannot bind reserved arg {arg_name!r}.") + if not callable(fn): + raise TypeError(f"{context} target is not callable.") + try: + signature = inspect.signature(fn) + except (TypeError, ValueError) as exc: + raise TypeError(f"{context} target signature cannot be inspected.") from exc + if arg_name not in signature.parameters: + name = ( + getattr(fn, "__name__", None) + or getattr(fn, "name", None) + or type(fn).__name__ + ) + raise TypeError( + f"{context} targets {name!r}, but {name!r} does not declare " + f"arg {arg_name!r}." + ) + + +def same_callable(left: Handler, right: Handler) -> bool: + if left is right: + return True + left_self = getattr(left, "__self__", None) + right_self = getattr(right, "__self__", None) + left_func = getattr(left, "__func__", None) + right_func = getattr(right, "__func__", None) + return left_self is right_self and left_func is not None and left_func is right_func + + +def read_path(value: object, path: str) -> object: + current = value + for part in path.split("."): + if not part: + raise ValueError(f"Invalid empty path segment in {path!r}.") + if isinstance(current, Mapping): + current = cast(ConfigMap, current)[part] + elif isinstance(current, list): + current = current[int(part)] + else: + current = getattr(current, part) + return current diff --git a/verifiers/v1/utils/config_callable_utils.py b/verifiers/v1/utils/config_callable_utils.py new file mode 100644 index 000000000..62ffb839f --- /dev/null +++ b/verifiers/v1/utils/config_callable_utils.py @@ -0,0 +1,123 @@ +import functools +import inspect +from collections.abc import Iterable, Mapping +from typing import Literal, TypeAlias, cast + +from .config_utils import resolve_config_object +from ..types import ConfigMap, Handler + +CallableKind: TypeAlias = Literal[ + "stop", "setup", "update", "metric", "reward", "advantage", "cleanup", "teardown" +] + +CALLABLE_KIND_FIELDS: dict[CallableKind, str] = { + "stop": "stops", + "setup": "setups", + "update": "updates", + "metric": "metrics", + "reward": "rewards", + "advantage": "advantages", + "cleanup": "cleanups", + "teardown": "teardowns", +} + + +def merge_config_callables( + values: Iterable[Handler], + config: object, + kind: CallableKind, +) -> list[Handler]: + return [*config_callables(values, kind), *config_callables(config, kind)] + + +def merge_config_handler_map( + values: dict[CallableKind, Iterable[Handler]], + config: object, +) -> dict[CallableKind, list[Handler]]: + return { + kind: merge_config_callables( + constructor_values, getattr(config, CALLABLE_KIND_FIELDS[kind]), kind + ) + for kind, constructor_values in values.items() + } + + +def config_callables(value: object, kind: CallableKind) -> list[Handler]: + if value is None: + return [] + if isinstance(value, str): + return [callable_config_item(value, kind)] + if isinstance(value, Mapping): + return [callable_config_item(value, kind)] + if isinstance(value, Iterable): + return [callable_config_item(item, kind) for item in value] + return [callable_config_item(value, kind)] + + +def callable_config_item(value: object, kind: CallableKind) -> Handler: + value = resolve_config_object(value) + if isinstance(value, Mapping): + return callable_from_mapping(cast(ConfigMap, value), kind) + if not callable(value): + raise TypeError(f"{kind} config entries must resolve to callables.") + return cast(Handler, value) + + +def callable_from_mapping(spec: ConfigMap, kind: CallableKind) -> Handler: + allowed = callable_config_keys(kind) + unknown = set(spec) - allowed + if unknown: + raise ValueError(f"{kind} callable config has unknown keys: {sorted(unknown)}.") + if bool(spec.get("skip", False)): + raise ValueError( + f"{kind} callable config should be removed instead of skipped." + ) + fn = resolve_config_object(spec.get("fn")) + if not callable(fn): + raise TypeError(f"{kind} callable config requires callable fn.") + metadata = {key: spec[key] for key in spec if key not in {"fn", "skip"}} + return configured_callable(cast(Handler, fn), kind, metadata) + + +def callable_config_keys(kind: CallableKind) -> set[str]: + keys = {"fn", "priority", "skip"} + if kind in {"update", "metric", "reward", "cleanup"}: + keys.add("stage") + if kind == "reward": + keys.add("weight") + return keys + + +def configured_callable( + fn: Handler, + kind: CallableKind, + metadata: ConfigMap, +) -> Handler: + if not metadata: + return fn + + @functools.wraps(fn) + async def wrapper(**kwargs: object) -> object: + result = fn(**kwargs) + if inspect.isawaitable(result): + return await result + return result + + setattr(wrapper, "__signature__", inspect.signature(fn)) + setattr(wrapper, kind, True) + if "priority" in metadata: + priority = metadata["priority"] + if not isinstance(priority, int) or isinstance(priority, bool): + raise TypeError(f"{kind} priority must be an integer.") + setattr(wrapper, f"{kind}_priority", priority) + if "stage" in metadata: + stage = metadata["stage"] + if stage not in {"rollout", "group"}: + raise ValueError(f"{kind} stage must be 'rollout' or 'group'.") + setattr(wrapper, f"{kind}_stage", stage) + if "weight" in metadata: + weight = metadata["weight"] + if not isinstance(weight, int | float) or isinstance(weight, bool): + raise TypeError("reward weight must be numeric.") + setattr(wrapper, "reward_weight", float(weight)) + return cast(Handler, wrapper) diff --git a/verifiers/v1/utils/config_utils.py b/verifiers/v1/utils/config_utils.py new file mode 100644 index 000000000..0cf3b2a25 --- /dev/null +++ b/verifiers/v1/utils/config_utils.py @@ -0,0 +1,177 @@ +import importlib +import inspect +from collections.abc import Mapping +from typing import cast + +from pydantic import BaseModel +from pydantic_core import PydanticUndefined +from ..types import ConfigData, ConfigFactory, ConfigInputMap, ConfigMap + + +CONFIG_REF_COLLECTION_FIELDS = { + "stops", + "setups", + "updates", + "metrics", + "rewards", + "advantages", + "cleanups", + "teardowns", +} + + +def config_data(value: object, target: type[BaseModel] | None = None) -> ConfigData: + if value is None: + data: ConfigData = {} + elif isinstance(value, BaseModel): + data = value.model_dump(exclude_none=True, exclude_unset=True) + if target is not None: + data = { + key: item for key, item in data.items() if key in target.model_fields + } + elif isinstance(value, Mapping): + data = string_mapping(cast(ConfigInputMap, value)) + else: + raise TypeError("Config must be a mapping or config object.") + return data + + +def omit_none(data: ConfigMap) -> ConfigData: + return {key: value for key, value in data.items() if value is not None} + + +def merge_child_config(default: object, override: object) -> object: + merged = deep_merge(config_data(default), config_data(override)) + if isinstance(default, BaseModel): + return cast(type[BaseModel], type(default)).model_validate(merged) + return merged + + +def expand_config_ref(value: object | None, target: type[BaseModel]) -> object | None: + if not isinstance(value, Mapping): + return value + data = string_mapping(cast(ConfigInputMap, value)) + config_ref = data.pop("config", None) + if config_ref is None: + return data + base = load_config_ref(config_ref) + base_data = config_data(base, target) + return merge_config_ref_overlay(base_data, data) + + +def expand_config_ref_data(data: ConfigData, target: type[BaseModel]) -> ConfigData: + expanded = expand_config_ref(data, target) + if not isinstance(expanded, dict): + raise TypeError("config data must resolve to a mapping.") + return cast(ConfigData, expanded) + + +def load_config_ref(config_ref: object) -> object: + value = resolve_config_object(config_ref) + if callable(value): + value = cast(ConfigFactory, value)() + if inspect.isawaitable(value): + raise TypeError("config refs must resolve synchronously.") + config_data(value) + return value + + +def merge_config_ref_overlay(base: ConfigData, overlay: ConfigMap) -> ConfigData: + merged = dict(base) + for key, value in overlay.items(): + existing = merged.get(key) + if ( + key in CONFIG_REF_COLLECTION_FIELDS + and isinstance(existing, list) + and isinstance(value, list) + ): + merged[key] = [*existing, *value] + elif isinstance(existing, Mapping) and isinstance(value, Mapping): + merged[key] = deep_merge( + string_mapping(cast(ConfigInputMap, existing)), + string_mapping(cast(ConfigInputMap, value)), + ) + else: + merged[key] = value + return merged + + +def merge_config_value(value: object, config: object) -> object: + if config is None: + return value + if value is None: + return config + value_mapping = config_mapping(value) + config_mapping_value = config_mapping(config) + if value_mapping is not None and config_mapping_value is not None: + return deep_merge( + config_mapping_value, + value_mapping, + ) + return value + + +def config_mapping(value: object) -> ConfigData | None: + if isinstance(value, BaseModel): + return value.model_dump(exclude_none=True) + if isinstance(value, Mapping): + return string_mapping(cast(ConfigInputMap, value)) + return None + + +def resolve_config_object(value: object) -> object: + if isinstance(value, str): + return import_config_ref(value) + return value + + +def import_config_ref(ref: str) -> object: + module_name, separator, attr_path = ref.partition(":") + if not separator or not module_name or not attr_path: + raise ValueError(f"Config ref {ref!r} must use 'module:object'.") + obj: object = importlib.import_module(module_name) + for part in attr_path.split("."): + obj = getattr(obj, part) + return obj + + +def deep_merge(base: ConfigData, overlay: ConfigMap) -> ConfigData: + merged: ConfigData = dict(base) + for key, value in overlay.items(): + existing = merged.get(key) + if isinstance(existing, Mapping) and isinstance(value, Mapping): + merged[key] = deep_merge( + string_mapping(cast(ConfigInputMap, existing)), + string_mapping(cast(ConfigInputMap, value)), + ) + else: + merged[key] = value + return merged + + +def string_mapping(value: ConfigInputMap) -> ConfigData: + result: ConfigData = {} + for key, item in value.items(): + if not isinstance(key, str): + raise TypeError("Config mappings require string keys.") + result[key] = item + return result + + +def annotation_text(annotation: object) -> str: + if getattr(annotation, "__args__", None): + return str(annotation).replace("typing.", "") + name = getattr(annotation, "__name__", None) + if isinstance(name, str): + return name + return str(annotation).replace("typing.", "") + + +def default_text(field: object) -> str: + default_factory = getattr(field, "default_factory", None) + if default_factory is not None: + return "" + default = getattr(field, "default", PydanticUndefined) + if default is PydanticUndefined: + return "required" + return repr(default) diff --git a/verifiers/v1/utils/endpoint_utils.py b/verifiers/v1/utils/endpoint_utils.py index 496250563..ef47c6c8f 100644 --- a/verifiers/v1/utils/endpoint_utils.py +++ b/verifiers/v1/utils/endpoint_utils.py @@ -1,13 +1,11 @@ -from __future__ import annotations - import asyncio import json import logging import os import time import uuid -from collections.abc import Callable, Mapping -from typing import Any, Literal, cast +from collections.abc import Mapping +from typing import Literal, Protocol, cast from anthropic import Anthropic, AsyncAnthropic from openai import AsyncOpenAI, OpenAI @@ -16,6 +14,7 @@ from verifiers.types import ( AssistantMessage, ClientType, + EndpointApi, Messages, SystemMessage, Tool, @@ -36,18 +35,18 @@ from ..runtime import Runtime from ..state import State from ..task import Task +from ..types import ConfigData, ConfigMap, Handler, PromptMessage -EndpointApi = Literal[ - "chat", - "chat_completions", - "completions", - "responses", - "messages", - "openai_chat_completions", - "openai_completions", - "openai_responses", - "anthropic_messages", -] + +class TunnelHandle(Protocol): + is_running: bool + url: object + + async def start(self) -> object: ... + + async def check_registered(self) -> bool: ... + + def sync_stop(self) -> object: ... def client_from_state( @@ -68,7 +67,7 @@ def endpoint_config_from_state( return endpoint.config(state, api=api) -def endpoint_from_state(state: State) -> Endpoint: +def endpoint_from_state(state: State) -> "Endpoint": runtime = state._runtime() harness = getattr(runtime, "harness", None) endpoint = getattr(harness, "endpoint", None) @@ -144,7 +143,7 @@ def __init__( self.port, secret=secret or os.environ.get("ENDPOINT_SECRET") ) self.secret = self.server.secret - self._tunnel: object | None = None + self._tunnel: TunnelHandle | None = None self._tunnel_lock = asyncio.Lock() self._tunnel_last_checked = 0.0 self._rollout_queues: dict[str, asyncio.Queue[str]] = {} @@ -224,8 +223,8 @@ def unregister_rollout(self, rollout_key: str) -> None: def rollout_queue(self, rollout_key: str) -> asyncio.Queue[str]: return self._rollout_queues[rollout_key] - def get_request(self, request_id: str) -> dict[str, object]: - return cast(dict[str, object], self.server.intercepts[request_id]) + def get_request(self, request_id: str) -> ConfigData: + return cast(ConfigData, self.server.intercepts[request_id]) async def url_base(self) -> str: if self.use_tunnel: @@ -236,12 +235,12 @@ async def get_tunnel_url(self) -> str: from prime_tunnel import Tunnel async with self._tunnel_lock: - tunnel = cast(Any, self._tunnel) + tunnel = self._tunnel if tunnel is not None and not tunnel.is_running: tunnel.sync_stop() self._tunnel = None - tunnel = cast(Any, self._tunnel) + tunnel = self._tunnel if tunnel is not None: now = time.time() if now - self._tunnel_last_checked > self.TUNNEL_CHECK_INTERVAL: @@ -251,25 +250,25 @@ async def get_tunnel_url(self) -> str: self._tunnel = None if self._tunnel is None: - tunnel = Tunnel(local_port=self.port) + tunnel = cast(TunnelHandle, Tunnel(local_port=self.port)) url = await tunnel.start() self._tunnel = tunnel self._tunnel_last_checked = time.time() return str(url) - tunnel = cast(Any, self._tunnel) + tunnel = self._tunnel if tunnel.url is None: raise TunnelError("Tunnel started but URL is unavailable.") return str(tunnel.url) async def check_tunnel(self) -> None: - tunnel = cast(Any, self._tunnel) + tunnel = self._tunnel if tunnel is not None and not tunnel.is_running: raise TunnelError("Tunnel process died during rollout.") async def teardown(self) -> None: async with self._tunnel_lock: - tunnel = cast(Any, self._tunnel) + tunnel = self._tunnel if tunnel is not None: tunnel.sync_stop() self._tunnel = None @@ -277,16 +276,16 @@ async def teardown(self) -> None: async def run_intercepted_program( - program: Callable[..., object], + program: Handler, endpoint: Endpoint, runtime: Runtime, task: Task, state: State, ) -> object: - async def call_tool(name: str, arguments: Mapping[str, object]) -> object: + async def call_tool(name: str, arguments: ConfigMap) -> object: return await runtime.call_tool(name, task, state, **dict(arguments)) - async def call_user(transcript: list[object]) -> object: + async def call_user(transcript: list[PromptMessage]) -> object: return await runtime.user_messages(task, state, transcript=transcript) async def check_stop() -> object: @@ -432,7 +431,7 @@ async def forward_request( deliver_response(request, response, error) -def normalize_endpoint_prompt(request: dict[str, object]) -> Messages: +def normalize_endpoint_prompt(request: ConfigData) -> Messages: protocol = request.get("protocol") if protocol == "anthropic_messages": return normalize_anthropic_messages(request) @@ -453,7 +452,7 @@ def normalize_endpoint_messages(messages: object) -> Messages: raise TypeError("Endpoint messages must be vf.Messages or str.") -def normalize_anthropic_messages(request: dict[str, object]) -> Messages: +def normalize_anthropic_messages(request: ConfigData) -> Messages: messages: Messages = [] system = request.get("system") if isinstance(system, str) and system: @@ -464,7 +463,7 @@ def normalize_anthropic_messages(request: dict[str, object]) -> Messages: for raw_message in raw_messages: if not isinstance(raw_message, Mapping): raise TypeError("Anthropic endpoint message entries must be dicts.") - raw_message = cast(Mapping[str, object], raw_message) + raw_message = cast(ConfigMap, raw_message) role = raw_message.get("role") content = raw_message.get("content") if role == "user": @@ -486,7 +485,7 @@ def normalize_anthropic_user_message(content: object) -> Messages: for block in content: if not isinstance(block, Mapping): continue - block = cast(Mapping[str, object], block) + block = cast(ConfigMap, block) block_type = block.get("type") if block_type == "text" and isinstance(block.get("text"), str): text_parts.append(str(block["text"])) @@ -515,7 +514,7 @@ def normalize_anthropic_assistant_message(content: object) -> AssistantMessage: for block in content: if not isinstance(block, Mapping): continue - block = cast(Mapping[str, object], block) + block = cast(ConfigMap, block) block_type = block.get("type") if block_type == "text" and isinstance(block.get("text"), str): text_parts.append(str(block["text"])) @@ -544,7 +543,7 @@ def anthropic_block_content_text(content: object) -> str: for block in content: if not isinstance(block, Mapping): continue - block = cast(Mapping[str, object], block) + block = cast(ConfigMap, block) text = block.get("text") if isinstance(text, str): text_parts.append(text) @@ -561,7 +560,7 @@ def normalize_openai_responses_input(raw_input: object) -> Messages: for item in raw_input: if not isinstance(item, Mapping): raise TypeError("OpenAI Responses input entries must be dicts.") - item = cast(Mapping[str, object], item) + item = cast(ConfigMap, item) item_type = item.get("type") if item_type == "function_call": call_id = item.get("call_id") or item.get("id") @@ -608,7 +607,7 @@ def responses_content_text(content: object) -> str: text_parts: list[str] = [] for part in content: if isinstance(part, Mapping): - part = cast(Mapping[str, object], part) + part = cast(ConfigMap, part) text = part.get("text") if isinstance(text, str): text_parts.append(text) @@ -628,13 +627,13 @@ def normalize_endpoint_tools(tools: object, protocol: str) -> list[Tool] | None: continue if not isinstance(raw_tool, dict): raise TypeError("Endpoint tool definitions must be dicts.") - raw_tool = cast(dict[str, Any], raw_tool) + raw_tool = cast(ConfigData, raw_tool) if protocol == "anthropic_messages": normalized.append( Tool( name=str(raw_tool.get("name", "")), description=str(raw_tool.get("description", "")), - parameters=cast(dict[str, Any], raw_tool.get("input_schema") or {}), + parameters=cast(ConfigData, raw_tool.get("input_schema") or {}), ) ) continue @@ -643,19 +642,20 @@ def normalize_endpoint_tools(tools: object, protocol: str) -> list[Tool] | None: Tool( name=str(raw_tool.get("name", "")), description=str(raw_tool.get("description", "")), - parameters=cast(dict[str, Any], raw_tool.get("parameters") or {}), + parameters=cast(ConfigData, raw_tool.get("parameters") or {}), strict=cast(bool | None, raw_tool.get("strict")), ) ) continue function_payload = raw_tool.get("function") if raw_tool.get("type") == "function" and isinstance(function_payload, dict): + function_payload = cast(ConfigData, function_payload) normalized.append( Tool( name=str(function_payload.get("name", "")), description=str(function_payload.get("description", "")), parameters=cast( - dict[str, Any], function_payload.get("parameters") or {} + ConfigData, function_payload.get("parameters") or {} ), strict=cast(bool | None, function_payload.get("strict")), ) @@ -666,6 +666,6 @@ def normalize_endpoint_tools(tools: object, protocol: str) -> list[Tool] | None: def assistant_completion_from_messages( - prompt: list[dict[str, object]], messages: list[dict[str, object]] -) -> list[dict[str, object]]: + prompt: list[ConfigData], messages: list[ConfigData] +) -> list[ConfigData]: return messages[len(prompt) :] diff --git a/verifiers/v1/utils/json_utils.py b/verifiers/v1/utils/json_utils.py index a7bb28ad4..cb01cbb4d 100644 --- a/verifiers/v1/utils/json_utils.py +++ b/verifiers/v1/utils/json_utils.py @@ -1,11 +1,10 @@ -from __future__ import annotations - import json from typing import cast +from ..types import ConfigData -def json_args(value: str) -> dict[str, object]: +def json_args(value: str) -> ConfigData: parsed = json.loads(value or "{}") if not isinstance(parsed, dict): raise ValueError("Tool call arguments must decode to a JSON object.") - return cast(dict[str, object], parsed) + return cast(ConfigData, parsed) diff --git a/verifiers/v1/utils/judge_utils.py b/verifiers/v1/utils/judge_utils.py index 32397c98f..d4d67cb1b 100644 --- a/verifiers/v1/utils/judge_utils.py +++ b/verifiers/v1/utils/judge_utils.py @@ -1,15 +1,14 @@ -from __future__ import annotations - import json from collections.abc import Mapping from typing import cast +from ..types import ConfigData, ConfigMap -def parse_judge_json(text: str) -> dict[str, object]: +def parse_judge_json(text: str) -> ConfigData: try: value = json.loads(text) if isinstance(value, dict): - return cast(dict[str, object], value) + return cast(ConfigData, value) except json.JSONDecodeError: pass start = text.find("{") @@ -18,7 +17,7 @@ def parse_judge_json(text: str) -> dict[str, object]: try: value = json.loads(text[start : end + 1]) if isinstance(value, dict): - return cast(dict[str, object], value) + return cast(ConfigData, value) except json.JSONDecodeError: pass return {"score": 0.0, "reason": "judge did not return JSON", "raw": text} @@ -37,7 +36,7 @@ def clamp_float(value: object) -> float: def truncate_command_record(record: object) -> object: if not isinstance(record, Mapping): return record - record = cast(Mapping[str, object], record) + record = cast(ConfigMap, record) return { **dict(record), "command": truncate_text(str(record.get("command") or ""), limit=2_000), @@ -50,14 +49,3 @@ def truncate_text(text: str, limit: int = 6_000) -> str: if len(text) <= limit: return text return text[:limit] + "\n..." - - -def completion_text(completion: object) -> str: - if isinstance(completion, list): - for message in reversed(completion): - if not isinstance(message, Mapping): - continue - message = cast(Mapping[str, object], message) - if message.get("role") == "assistant": - return str(message.get("content") or "") - return str(completion or "") diff --git a/verifiers/v1/utils/lifecycle_utils.py b/verifiers/v1/utils/lifecycle_utils.py index 1c14a14c0..4aae957f0 100644 --- a/verifiers/v1/utils/lifecycle_utils.py +++ b/verifiers/v1/utils/lifecycle_utils.py @@ -1,27 +1,35 @@ -from __future__ import annotations - import inspect -from collections.abc import Callable, Iterable -from typing import Literal, cast +from collections.abc import Iterable +from typing import TYPE_CHECKING, Literal, cast from verifiers.utils.async_utils import maybe_call_with_named_args +from ..state import State +from ..task import Task +from ..types import Handler + +if TYPE_CHECKING: + from ..harness import Harness + from ..taskset import Taskset + from ..toolset import Toolset + from ..user import User + LifecycleStage = Literal["rollout", "group"] def collect_handlers( - owners: Iterable[object | None], + owners: Iterable["Taskset | Harness | Toolset | User | None"], attr: str, - extra: Iterable[Callable[..., object]] = (), + extra: Iterable[Handler] = (), stage: LifecycleStage | None = None, -) -> list[Callable[..., object]]: - handlers: list[Callable[..., object]] = [] +) -> list[Handler]: + handlers: list[Handler] = [] for owner in owners: if owner is None: continue for _, method in inspect.getmembers(owner, predicate=callable): if getattr(method, attr, False) is True: - handlers.append(cast(Callable[..., object], method)) + handlers.append(cast(Handler, method)) handlers.extend(extra) if stage is not None: handlers = [ @@ -33,36 +41,25 @@ def collect_handlers( def validate_handler_args( - handlers: Iterable[Callable[..., object]], + handlers: Iterable[Handler], expected: set[str], attr: str, stage: LifecycleStage, ) -> None: - expected_text = expected_args_text(expected) + _ = expected, attr, stage for handler in handlers: - names = set(inspect.signature(handler).parameters) - name = str(getattr(handler, "__name__", type(handler).__name__)) - if stage == "group" and names != expected: - raise ValueError( - f"group {attr} handler {name!r} must accept exactly {expected_text}." - ) - if not expected.issubset(names): - raise ValueError( - f"{stage} {attr} handler {name!r} must accept {expected_text}." - ) - - -async def run_handlers( - handlers: Iterable[Callable[..., object]], **kwargs: object -) -> None: + inspect.signature(handler) + + +async def run_handlers(handlers: Iterable[Handler], **kwargs: object) -> None: for handler in handlers: await maybe_call_with_named_args(handler, **kwargs) def unique_handlers( - handlers: Iterable[Callable[..., object]], -) -> list[Callable[..., object]]: - unique: list[Callable[..., object]] = [] + handlers: Iterable[Handler], +) -> list[Handler]: + unique: list[Handler] = [] seen: set[tuple[int, int]] = set() for handler in handlers: key = ( @@ -76,9 +73,7 @@ def unique_handlers( return unique -def sort_handlers( - handlers: Iterable[Callable[..., object]], attr: str -) -> list[Callable[..., object]]: +def sort_handlers(handlers: Iterable[Handler], attr: str) -> list[Handler]: return sorted( handlers, key=lambda handler: ( @@ -88,9 +83,16 @@ def sort_handlers( ) -def expected_args_text(expected: set[str]) -> str: - if expected == {"task", "state"}: - return "task and state" - if expected == {"tasks", "states"}: - return "tasks and states" - return " and ".join(sorted(expected)) +async def state_done(task: Task, state: State) -> bool: + _ = task + return bool(state.get("done")) + + +def handler_collection_attr(attr: str) -> str: + return { + "stop": "stops", + "setup": "setups", + "update": "updates", + "cleanup": "cleanups", + "teardown": "teardowns", + }.get(attr, attr) diff --git a/verifiers/v1/utils/mcp_proxy_utils.py b/verifiers/v1/utils/mcp_proxy_utils.py index 165a54727..885774bca 100644 --- a/verifiers/v1/utils/mcp_proxy_utils.py +++ b/verifiers/v1/utils/mcp_proxy_utils.py @@ -1,9 +1,9 @@ -from __future__ import annotations - import json import shlex from collections.abc import Mapping -from typing import Literal, cast +from typing import cast +from ..types import ConfigData, ConfigMap, ProgramChannel + MCP_PROXY_PATH = "/tmp/vf_mcp_tools.py" MCP_PROXY_CONFIG_PATH = "/tmp/vf_mcp_tools.json" @@ -11,52 +11,49 @@ REQUESTS_PACKAGE = "requests" -ProgramToolType = Literal["callable", "mcp"] -PROGRAM_TOOL_TYPES = {"callable", "mcp"} -PROGRAM_TOOL_METADATA = {"priority"} +PROGRAM_CHANNELS = {"callable", "mcp"} +PROGRAM_CHANNEL_METADATA = {"priority"} -def validate_program_tool_types(value: object) -> tuple[ProgramToolType, ...]: +def validate_program_channels(value: object) -> tuple[ProgramChannel, ...]: if value is None: return () if isinstance(value, str): - if value not in PROGRAM_TOOL_TYPES: - raise ValueError("program.tools must be 'callable' or 'mcp'.") - return (cast(ProgramToolType, value),) + if value not in PROGRAM_CHANNELS: + raise ValueError("program.channels must be 'callable' or 'mcp'.") + return (cast(ProgramChannel, value),) if isinstance(value, list): - result: list[ProgramToolType] = [] + result: list[ProgramChannel] = [] for item in value: - for tool_type in validate_program_tool_types(item): - if tool_type in result: + for channel in validate_program_channels(item): + if channel in result: raise ValueError( - f"program.tools defines {tool_type!r} more than once." + f"program.channels defines {channel!r} more than once." ) - result.append(tool_type) + result.append(channel) return tuple(result) if isinstance(value, Mapping): if not all(isinstance(key, str) for key in value): - raise TypeError("program.tools mapping keys must be strings.") - spec = cast(Mapping[str, object], value) - unknown = sorted(set(spec) - PROGRAM_TOOL_TYPES - PROGRAM_TOOL_METADATA) + raise TypeError("program.channels mapping keys must be strings.") + spec = cast(ConfigMap, value) + unknown = sorted(set(spec) - PROGRAM_CHANNELS - PROGRAM_CHANNEL_METADATA) if unknown: - raise ValueError(f"program.tools has unknown tool interface: {unknown}.") + raise ValueError(f"program.channels has unknown channel: {unknown}.") if "priority" in spec: priority = spec["priority"] if not isinstance(priority, int) or isinstance(priority, bool): - raise TypeError("program.tools priority must be an integer.") - result = [ - cast(ProgramToolType, key) for key in spec if key in PROGRAM_TOOL_TYPES - ] + raise TypeError("program.channels priority must be an integer.") + result = [cast(ProgramChannel, key) for key in spec if key in PROGRAM_CHANNELS] if not result: - raise ValueError("program.tools mapping must define a tool interface.") + raise ValueError("program.channels mapping must define a channel.") return tuple(result) - raise TypeError("program.tools must be a string, mapping, or list.") + raise TypeError("program.channels must be a string, mapping, or list.") def proxy_program( - program: Mapping[str, object], tool_base_url: str, tool_api_key: str -) -> dict[str, object]: - files = dict(cast(Mapping[str, object], program.get("files") or {})) + program: ConfigMap, tool_base_url: str, tool_api_key: str +) -> ConfigData: + files = dict(cast(ConfigMap, program.get("files") or {})) if MCP_PROXY_PATH in files and files[MCP_PROXY_PATH] != proxy_source(): raise ValueError(f"program.files cannot override {MCP_PROXY_PATH}.") config = { @@ -75,7 +72,7 @@ def proxy_command() -> list[str]: return ["python3", MCP_PROXY_PATH, MCP_PROXY_CONFIG_PATH] -def proxy_sandbox(sandbox_config: Mapping[str, object]) -> dict[str, object]: +def proxy_sandbox(sandbox_config: ConfigMap) -> ConfigData: config = dict(sandbox_config) packages = package_list(config.get("packages")) if not any(str(package).startswith("mcp") for package in packages): @@ -98,8 +95,6 @@ def package_list(value: object) -> list[str]: def proxy_source() -> str: return r""" -from __future__ import annotations - import asyncio import json import sys diff --git a/verifiers/v1/utils/mcp_utils.py b/verifiers/v1/utils/mcp_utils.py index cbd059f9e..e8efa3c8f 100644 --- a/verifiers/v1/utils/mcp_utils.py +++ b/verifiers/v1/utils/mcp_utils.py @@ -1,23 +1,22 @@ -from __future__ import annotations - import asyncio from contextlib import AsyncExitStack -from typing import Any, cast +from typing import cast from verifiers.errors import ToolError from verifiers.types import Tool from ..toolset import MCPTool +from ..types import ConfigData class MCPToolHandle: - def __init__(self, session: object, tool_def: Tool): + def __init__(self, session: "MCPToolSession", tool_def: Tool): self.session = session self.name = tool_def.name self.tool_def = tool_def async def __call__(self, **kwargs: object) -> object: - result = await cast(Any, self.session).call_tool(self.name, dict(kwargs)) + result = await self.session.call_tool(self.name, dict(kwargs)) return mcp_result_value(result) @@ -25,13 +24,13 @@ class MCPToolSession: def __init__(self, spec: MCPTool): self.spec = spec self.handles: list[MCPToolHandle] = [] - self._queue: asyncio.Queue[tuple[str, str, dict[str, object], object]] = ( + self._queue: asyncio.Queue[tuple[str, str, ConfigData, asyncio.Future]] = ( asyncio.Queue() ) self._ready: asyncio.Future[list[MCPToolHandle]] | None = None self._task: asyncio.Task[None] | None = None - async def __aenter__(self) -> MCPToolSession: + async def __aenter__(self) -> "MCPToolSession": loop = asyncio.get_running_loop() self._ready = loop.create_future() self._task = loop.create_task(self._run()) @@ -41,7 +40,7 @@ async def __aenter__(self) -> MCPToolSession: async def __aexit__(self, exc_type, exc, tb) -> None: await self.close() - async def call_tool(self, name: str, arguments: dict[str, object]) -> object: + async def call_tool(self, name: str, arguments: ConfigData) -> object: loop = asyncio.get_running_loop() future = loop.create_future() await self._queue.put(("call", name, arguments, future)) @@ -89,9 +88,9 @@ async def _run(self) -> None: if action != "call": raise RuntimeError(f"Unknown MCP action: {action}") result = await session.call_tool(name, arguments) - cast(asyncio.Future[object], future).set_result(result) + cast(asyncio.Future, future).set_result(result) except BaseException as exc: - cast(asyncio.Future[object], future).set_exception(exc) + cast(asyncio.Future, future).set_exception(exc) except BaseException as exc: if not ready.done(): ready.set_exception(exc) @@ -106,26 +105,29 @@ async def connect_mcp_tool( def mcp_tool_def(tool: object) -> Tool: - raw = cast(Any, tool) - schema = getattr(raw, "inputSchema", None) or getattr(raw, "input_schema", None) - if schema is None and hasattr(raw, "model_dump"): - dumped = raw.model_dump() + schema = getattr(tool, "inputSchema", None) or getattr(tool, "input_schema", None) + model_dump = getattr(tool, "model_dump", None) + if schema is None and callable(model_dump): + dumped = model_dump() schema = dumped.get("inputSchema") or dumped.get("input_schema") if not isinstance(schema, dict): schema = {"type": "object", "properties": {}} + name = getattr(tool, "name", None) + if not isinstance(name, str) or not name: + raise TypeError("MCP tools require a name.") return Tool( - name=str(raw.name), - description=str(getattr(raw, "description", "") or ""), - parameters=cast(dict[str, object], schema), + name=name, + description=str(getattr(tool, "description", "") or ""), + parameters=cast(ConfigData, schema), strict=None, ) def mcp_result_value(result: object) -> object: - raw = cast(Any, result) - if bool(getattr(raw, "isError", False)): - raise ToolError(str(mcp_content_value(getattr(raw, "content", [])))) - return mcp_content_value(getattr(raw, "content", [])) + content = getattr(result, "content", []) + if bool(getattr(result, "isError", False)): + raise ToolError(str(mcp_content_value(content))) + return mcp_content_value(content) def mcp_content_value(content: object) -> object: diff --git a/verifiers/v1/utils/object_utils.py b/verifiers/v1/utils/object_utils.py new file mode 100644 index 000000000..098f19921 --- /dev/null +++ b/verifiers/v1/utils/object_utils.py @@ -0,0 +1,32 @@ +import inspect +from collections.abc import Awaitable, Callable +from typing import cast + +from verifiers.utils.async_utils import maybe_call_with_named_args + + +async def close_object(obj: object) -> None: + for name in ("aclose", "close", "delete", "teardown"): + fn = getattr(obj, name, None) + if callable(fn): + await maybe_call_with_named_args(fn) + return + + +async def resolve_object_factory(spec: object, context: str) -> object: + if not callable(spec): + return spec + if not ( + inspect.isfunction(spec) or inspect.ismethod(spec) or inspect.isclass(spec) + ): + return spec + try: + signature = inspect.signature(spec) + except (TypeError, ValueError) as exc: + raise TypeError(f"{context} factory signature cannot be inspected.") from exc + if signature.parameters: + raise TypeError(f"{context} factory must accept no arguments.") + value = cast(Callable[[], object | Awaitable[object]], spec)() + if inspect.isawaitable(value): + return await cast(Awaitable[object], value) + return value diff --git a/verifiers/v1/utils/program_utils.py b/verifiers/v1/utils/program_utils.py index 5501127b1..5f5563240 100644 --- a/verifiers/v1/utils/program_utils.py +++ b/verifiers/v1/utils/program_utils.py @@ -1,27 +1,28 @@ -from __future__ import annotations - import asyncio import os import shlex -from collections.abc import Mapping -from typing import Any, Callable, cast +from collections.abc import Mapping, Sequence +from typing import cast from verifiers.errors import InfraError from verifiers.utils.async_utils import maybe_call_with_named_args from ..config import resolve_config_object, string_mapping -from ..runtime import ( - Runtime, - _read_path, +from .binding_utils import ( binding_key_parts, - binding_source_root, function_name, - validate_binding_source_root, + normalize_binding_map, + read_path, + validate_binding_source, validate_bound_arg, + validate_callable_source, ) +from ..runtime import Runtime from ..state import State from ..task import Task -from .mcp_proxy_utils import ProgramToolType, validate_program_tool_types +from .mcp_proxy_utils import validate_program_channels +from ..types import ConfigData, ConfigInputMap, ConfigMap, Handler, ProgramChannel +from ..types import ProgramValue PROGRAM_KIND_KEYS = {"base", "fn", "command"} PROGRAM_OPTION_KEYS = { @@ -32,7 +33,7 @@ "bindings", "env", "artifacts", - "tools", + "channels", } PROGRAM_KEYS = PROGRAM_KIND_KEYS | PROGRAM_OPTION_KEYS | {"args"} SANDBOX_ONLY_PROGRAM_KEYS = {"files", "dirs", "setup", "artifacts"} @@ -48,10 +49,10 @@ async def run_local_command( - program: Mapping[str, object], task: Task, state: State, runtime: Runtime + program: ConfigMap, task: Task, state: State, runtime: Runtime ) -> State: - if "mcp" in program_tool_types(program): - raise ValueError("program.tools='mcp' requires sandbox command placement.") + if "mcp" in program_channels(program): + raise ValueError("program.channels='mcp' requires sandbox command placement.") validate_program_bindings(program) argv = await command_argv(program, task, state, runtime) env = await command_env(program, task, state, runtime, include_base=True) @@ -80,12 +81,12 @@ async def run_local_command( async def command_argv( - program: Mapping[str, object], task: Task, state: State, runtime: Runtime + program: ConfigMap, task: Task, state: State, runtime: Runtime ) -> list[str]: command = program.get("command") if isinstance(command, str): argv = shlex.split(command) - elif isinstance(command, list): + elif isinstance(command, Sequence): argv = [ str(await resolve_program_value(part, task, state, runtime, program)) for part in command @@ -105,7 +106,7 @@ async def command_argv( async def command_env( - program: Mapping[str, object], + program: ConfigMap, task: Task, state: State, runtime: Runtime, @@ -138,7 +139,7 @@ async def resolve_program_value( task: Task, state: State, runtime: Runtime, - program: Mapping[str, object] | None = None, + program: ConfigMap | None = None, ) -> object: fn = program_value_callable(value) if fn is not None: @@ -147,77 +148,72 @@ async def resolve_program_value( if isinstance(value, str): root, separator, tail = value.partition(".") if separator and root == "task": - return _read_path(task, tail) + return read_path(task, tail) if separator and root == "state": - return _read_path(state, tail) + return read_path(state, tail) if separator and root == "runtime": - return _read_path(state.get("runtime", {}), tail) + return read_path(state.get("runtime", {}), tail) if isinstance(value, Mapping): if len(value) != 1: raise ValueError("Program value mappings must have exactly one root.") root, path = next(iter(value.items())) if root == "task": - return _read_path(task, str(path)) + return read_path(task, str(path)) if root == "state": - return _read_path(state, str(path)) + return read_path(state, str(path)) if root == "runtime": - return _read_path(state.get("runtime", {}), str(path)) + return read_path(state.get("runtime", {}), str(path)) raise ValueError(f"Unknown program value root {root!r}.") return value -def program_value_callable(value: object) -> Callable[..., object] | None: +def program_value_callable(value: object) -> Handler | None: if callable(value): - return cast(Callable[..., object], value) + return cast(Handler, value) if isinstance(value, Mapping) and "fn" in value: - spec = cast(Mapping[str, object], value) - unknown = set(spec) - {"fn"} - if unknown: - raise ValueError( - f"Program callable value has unknown keys: {sorted(unknown)}." - ) + spec = cast(ConfigMap, value) + validate_callable_source(spec, "Program callable value") fn = resolve_config_object(spec["fn"]) if not callable(fn): raise TypeError("Program callable value requires callable fn.") - return cast(Callable[..., object], fn) + return cast(Handler, fn) return None async def program_binding_kwargs( - fn: Callable[..., object], - program: Mapping[str, object] | None, + fn: Handler, + program: ConfigMap | None, task: Task, state: State, runtime: Runtime, -) -> dict[str, object]: +) -> ConfigData: if program is None: return {} - raw_bindings = program.get("bindings") or {} - if not isinstance(raw_bindings, Mapping): - raise TypeError("program.bindings must be a mapping.") + raw_bindings = normalize_binding_map( + program.get("bindings"), "program.bindings", allow_objects=False + ) if not raw_bindings: return {} name = function_name(fn) - kwargs: dict[str, object] = {} + kwargs: ConfigData = {} for binding_key, source in raw_bindings.items(): target_name, arg_name = binding_key_parts(binding_key) if target_name != name: continue validate_bound_arg(fn, arg_name, f"Program binding {binding_key!r}") - source_root = binding_source_root(source) - validate_binding_source_root(source_root, f"Program binding {binding_key!r}") - if source_root == "objects": - raise ValueError("program.bindings cannot use objects.* sources.") + validate_binding_source( + source, f"Program binding {binding_key!r}", allow_objects=False + ) if arg_name in kwargs: raise ValueError(f"Program binding arg {arg_name!r} is defined twice.") kwargs[arg_name] = await runtime.resolve_binding(source, task, state) return kwargs -def validate_program_bindings(program: Mapping[str, object]) -> None: - raw_bindings = program.get("bindings") or {} - if not isinstance(raw_bindings, Mapping): - raise TypeError("program.bindings must be a mapping.") +def validate_program_bindings(program: ConfigMap) -> None: + raw_bindings = normalize_binding_map( + program.get("bindings"), "program.bindings", allow_objects=False + ) if not raw_bindings: return targets = program_binding_targets(program) @@ -228,23 +224,22 @@ def validate_program_bindings(program: Mapping[str, object]) -> None: if target_name in program_setup_callable_names(program): raise ValueError( "program.setup callables cannot use program.bindings; move " - "bound runtime setup under program.tools.." + "bound runtime setup under program.channels.." ) raise ValueError( f"Program binding {binding_key!r} does not match a callable " "owned by the same program." ) validate_bound_arg(fn, arg_name, f"Program binding {binding_key!r}") - source_root = binding_source_root(source) - validate_binding_source_root(source_root, f"Program binding {binding_key!r}") - if source_root == "objects": - raise ValueError("program.bindings cannot use objects.* sources.") + validate_binding_source( + source, f"Program binding {binding_key!r}", allow_objects=False + ) def program_binding_targets( - program: Mapping[str, object], -) -> dict[str, Callable[..., object]]: - targets: dict[str, Callable[..., object]] = {} + program: ConfigMap, +) -> dict[str, Handler]: + targets: dict[str, Handler] = {} def add(value: object) -> None: fn = program_value_callable(value) @@ -264,11 +259,11 @@ def add_items(value: object) -> None: add(value) command = program.get("command") - if isinstance(command, list): + if isinstance(command, Sequence) and not isinstance(command, str): for item in command: add(item) add_items(program.get("args")) - for _, item, _ in program_tools_setup(program): + for _, item, _ in program_channel_setup(program): add(item) for key in ("files", "dirs", "env"): value = program.get(key) @@ -278,7 +273,7 @@ def add_items(value: object) -> None: return targets -def program_setup_callable_names(program: Mapping[str, object]) -> set[str]: +def program_setup_callable_names(program: ConfigMap) -> set[str]: names: set[str] = set() setup = program.get("setup") items = setup if isinstance(setup, list) else [setup] @@ -289,14 +284,22 @@ def program_setup_callable_names(program: Mapping[str, object]) -> set[str]: return names -def float_config(config: Mapping[str, object], key: str, default: float) -> float: +def float_config(config: ConfigMap, key: str, default: float) -> float: value = config.get(key) - return default if value is None else float(cast(Any, value)) + if value is None: + return default + if isinstance(value, bool) or not isinstance(value, int | float | str): + raise TypeError(f"{key} must be numeric.") + return float(value) -def int_config(config: Mapping[str, object], key: str, default: int) -> int: +def int_config(config: ConfigMap, key: str, default: int) -> int: value = config.get(key) - return default if value is None else int(cast(Any, value)) + if value is None: + return default + if isinstance(value, bool) or not isinstance(value, int | float | str): + raise TypeError(f"{key} must be numeric.") + return int(value) def endpoint_api_key(runtime: Runtime) -> str: @@ -306,11 +309,11 @@ def endpoint_api_key(runtime: Runtime) -> str: return str(secret or "intercepted") -def program_tool_types(program: Mapping[str, object]) -> tuple[ProgramToolType, ...]: - return validate_program_tool_types(program.get("tools")) +def program_channels(program: ConfigMap) -> tuple[ProgramChannel, ...]: + return validate_program_channels(program.get("channels")) -def program_kind(program: Mapping[str, object]) -> str: +def program_kind(program: ConfigMap) -> str: base = program.get("base", False) if not isinstance(base, bool): raise TypeError("program.base must be a boolean.") @@ -333,9 +336,9 @@ def program_kind(program: Mapping[str, object]) -> str: def validate_program_options( - program: Mapping[str, object], + program: ConfigMap, kind: str, - sandbox_config: Mapping[str, object] | None, + sandbox_config: ConfigMap | None, ) -> None: unknown = sorted(set(program) - PROGRAM_KEYS) if unknown: @@ -345,17 +348,17 @@ def validate_program_options( sandbox_only = sorted(set(program) & SANDBOX_ONLY_PROGRAM_KEYS) if sandbox_only: raise ValueError(f"Program keys {sandbox_only} require sandbox placement.") - tool_types = set(program_tool_types(program)) - if "mcp" in tool_types: + channels = set(program_channels(program)) + if "mcp" in channels: if kind != "command": raise ValueError( - "program.tools='mcp' is only supported for command programs." + "program.channels='mcp' is only supported for command programs." ) if sandbox_config is None: - raise ValueError("program.tools='mcp' requires program.sandbox.") - if "callable" in tool_types and kind == "command": + raise ValueError("program.channels='mcp' requires program.sandbox.") + if "callable" in channels and kind == "command": raise ValueError( - "program.tools='callable' is only supported for base and fn programs." + "program.channels='callable' is only supported for base and fn programs." ) if kind == "base" and sandbox_config is None: inert = sorted(set(program) & (PROGRAM_OPTION_KEYS - {"sandbox"})) @@ -363,21 +366,19 @@ def validate_program_options( raise ValueError(f"Base program keys {inert} require sandbox placement.") -def validate_program_sandbox_scope(sandbox_config: Mapping[str, object]) -> None: +def validate_program_sandbox_scope(sandbox_config: ConfigMap) -> None: scope = str(sandbox_config.get("scope") or "rollout") if scope not in {"rollout", "group", "global"}: raise ValueError("program sandbox scope must be rollout, group, or global.") -def merge_task_program( - program: Mapping[str, object], task: Mapping[str, object], *, kind: str -) -> Mapping[str, object]: +def merge_task_program(program: ConfigMap, task: ConfigMap, *, kind: str) -> ConfigMap: task_program = task.get("program") if task_program is None: return program if not isinstance(task_program, Mapping): raise TypeError("task.program must be a mapping.") - task_program = cast(Mapping[str, object], task_program) + task_program = cast(ConfigMap, task_program) unknown = sorted(set(task_program) - TASK_PROGRAM_KEYS) if unknown: raise ValueError( @@ -391,8 +392,8 @@ def merge_task_program( merged[key] = merge_program_mapping_option( program.get(key), task_program.get(key), key ) - merged["bindings"] = merge_program_mapping_option( - program.get("bindings"), task_program.get("bindings"), "bindings" + merged["bindings"] = merge_program_bindings( + program.get("bindings"), task_program.get("bindings") ) merged["setup"] = [ *program_list_items(program.get("setup"), "program.setup"), @@ -406,20 +407,18 @@ def merge_task_program( return merged -def merge_task_sandbox( - sandbox_config: Mapping[str, object], task: Mapping[str, object] -) -> Mapping[str, object]: +def merge_task_sandbox(sandbox_config: ConfigMap, task: ConfigMap) -> ConfigMap: config = dict(sandbox_config) task_sandbox = task.get("sandbox") if isinstance(task_sandbox, Mapping): - config.update(string_mapping(cast(Mapping[object, object], task_sandbox))) + config.update(string_mapping(cast(ConfigInputMap, task_sandbox))) validate_program_sandbox_scope(config) return config def merge_program_mapping_option( program_value: object, task_value: object, key: str -) -> dict[str, object]: +) -> ConfigData: program_mapping = program_option_mapping(program_value, f"program.{key}") task_mapping = program_option_mapping(task_value, f"task.program.{key}") duplicate = sorted(set(program_mapping) & set(task_mapping)) @@ -430,12 +429,28 @@ def merge_program_mapping_option( return {**program_mapping, **task_mapping} -def program_option_mapping(value: object, field_name: str) -> dict[str, object]: +def merge_program_bindings(program_value: object, task_value: object) -> ConfigData: + program_bindings = normalize_binding_map( + program_value, "program.bindings", allow_objects=False + ) + task_bindings = normalize_binding_map( + task_value, "task.program.bindings", allow_objects=False + ) + duplicate = sorted(set(program_bindings) & set(task_bindings)) + if duplicate: + raise ValueError( + "program.bindings and task.program.bindings define the same keys: " + f"{duplicate}." + ) + return {**program_bindings, **task_bindings} + + +def program_option_mapping(value: object, field_name: str) -> ConfigData: if value is None: return {} if not isinstance(value, Mapping): raise TypeError(f"{field_name} must be a mapping.") - result: dict[str, object] = {} + result: ConfigData = {} for key, item in value.items(): if not isinstance(key, str): raise TypeError(f"{field_name} keys must be strings.") @@ -443,41 +458,43 @@ def program_option_mapping(value: object, field_name: str) -> dict[str, object]: return result -def program_list_items(value: object, field_name: str) -> list[object]: +def program_list_items(value: object, field_name: str) -> list[ProgramValue]: if value is None: return [] if isinstance(value, str): return [value] - if not isinstance(value, list): - return [value] - return list(value) + if not isinstance(value, Sequence): + return [cast(ProgramValue, value)] + return [cast(ProgramValue, item) for item in value] -def program_tools_setup( - program: Mapping[str, object], -) -> list[tuple[ProgramToolType, object, int]]: - tools = program.get("tools") - if tools is None or isinstance(tools, str): +def program_channel_setup( + program: ConfigMap, +) -> list[tuple[ProgramChannel, ProgramValue, int]]: + channels = program.get("channels") + if channels is None or isinstance(channels, str): return [] - if isinstance(tools, list): - result: list[tuple[ProgramToolType, object, int]] = [] - for item in tools: - result.extend(program_tools_setup({"tools": item})) + if isinstance(channels, list): + result: list[tuple[ProgramChannel, ProgramValue, int]] = [] + for item in channels: + result.extend(program_channel_setup({"channels": item})) return result - if not isinstance(tools, Mapping): - validate_program_tool_types(tools) + if not isinstance(channels, Mapping): + validate_program_channels(channels) return [] - tool_types = validate_program_tool_types(tools) - tools_map = cast(Mapping[str, object], tools) - priority = cast(int, tools_map.get("priority", -100)) - result = [] - for tool_type in tool_types: - value = tools_map[tool_type] + channel_names = validate_program_channels(channels) + channels_map = cast(ConfigMap, channels) + priority = cast(int, channels_map.get("priority", -100)) + result: list[tuple[ProgramChannel, ProgramValue, int]] = [] + for channel in channel_names: + value = channels_map[channel] if value is None or value is True: continue if value is False: - raise ValueError("program.tools setup should be removed instead of false.") + raise ValueError( + "program.channels setup should be removed instead of false." + ) items = value if isinstance(value, list) else [value] for item in items: - result.append((tool_type, item, priority)) + result.append((channel, cast(ProgramValue, item), priority)) return result diff --git a/verifiers/v1/utils/prompt_utils.py b/verifiers/v1/utils/prompt_utils.py index 81eabbea0..fc276fc70 100644 --- a/verifiers/v1/utils/prompt_utils.py +++ b/verifiers/v1/utils/prompt_utils.py @@ -1,18 +1,17 @@ -from __future__ import annotations - from collections.abc import Mapping from typing import Literal, cast from verifiers.types import MessageContent, Messages, SystemMessage from verifiers.utils.message_utils import normalize_messages +from ..types import ConfigData, ConfigMap, PromptInput SystemPromptMerge = Literal["reject", "concat", "task", "taskset", "harness"] def normalize_prompt( - value: object, field_name: str = "prompt" -) -> list[dict[str, object]]: + value: PromptInput | None, field_name: str = "prompt" +) -> list[ConfigData]: messages = normalize_messages(cast(Messages, value or []), field_name=field_name) for message in messages: if getattr(message, "role", None) == "system": @@ -24,8 +23,8 @@ def normalize_prompt( def normalize_system_prompt( - value: object, field_name: str = "system_prompt" -) -> list[dict[str, object]]: + value: PromptInput | None, field_name: str = "system_prompt" +) -> list[ConfigData]: if value is None: return [] if isinstance(value, str): @@ -39,13 +38,14 @@ def normalize_system_prompt( def resolve_system_prompt( *, - task: Mapping[str, object], - taskset_system_prompt: list[dict[str, object]], - harness_system_prompt: list[dict[str, object]], + task: ConfigMap, + taskset_system_prompt: list[ConfigData], + harness_system_prompt: list[ConfigData], merge: str, -) -> list[dict[str, object]]: +) -> list[ConfigData]: task_system_prompt = normalize_system_prompt( - task.get("system_prompt"), field_name="task.system_prompt" + cast(PromptInput | None, task.get("system_prompt")), + field_name="task.system_prompt", ) sources = [ ("harness", harness_system_prompt), @@ -74,13 +74,13 @@ def resolve_system_prompt( ) -def dump_messages(messages: Messages) -> list[dict[str, object]]: +def dump_messages(messages: Messages) -> list[ConfigData]: return [message.model_dump(exclude_none=True) for message in messages] def task_text( - task: Mapping[str, object], - state: Mapping[str, object], + task: ConfigMap, + state: ConfigMap, *, keys: tuple[str, ...] = ("instruction",), ) -> str: @@ -92,9 +92,7 @@ def task_text( return messages_text(task.get("prompt", [])) -def state_system_prompt_text( - task: Mapping[str, object], state: Mapping[str, object] -) -> str: +def state_system_prompt_text(task: ConfigMap, state: ConfigMap) -> str: _ = task return messages_text(state.get("system_prompt", [])) @@ -110,7 +108,7 @@ def messages_text(messages: object) -> str: if content is not None: parts.append(content_text(content)) elif isinstance(message, Mapping): - item = cast(Mapping[str, object], message) + item = cast(ConfigMap, message) parts.append(content_text(item.get("content"))) else: parts.append(str(message)) @@ -126,7 +124,7 @@ def content_text(content: MessageContent | object) -> str: text_parts: list[str] = [] for part in content: if isinstance(part, Mapping): - item = cast(Mapping[str, object], part) + item = cast(ConfigMap, part) text = item.get("text") if isinstance(text, str): text_parts.append(text) diff --git a/verifiers/v1/utils/runtime_registry.py b/verifiers/v1/utils/runtime_registry.py new file mode 100644 index 000000000..9befd0ef6 --- /dev/null +++ b/verifiers/v1/utils/runtime_registry.py @@ -0,0 +1,37 @@ +import weakref +from collections.abc import Mapping +from typing import TYPE_CHECKING, cast +from ..types import ConfigMap + +if TYPE_CHECKING: + from ..runtime import Runtime + +_RUNTIME_REGISTRY: weakref.WeakValueDictionary[str, object] = ( + weakref.WeakValueDictionary() +) + + +def register_runtime(runtime_id: str, runtime: object) -> None: + _RUNTIME_REGISTRY[runtime_id] = runtime + + +def unregister_runtime(runtime_id: str) -> None: + _RUNTIME_REGISTRY.pop(runtime_id, None) + + +def load_runtime(runtime_id: str) -> "Runtime": + runtime = _RUNTIME_REGISTRY.get(runtime_id) + if runtime is None: + raise RuntimeError(f"No live v1 runtime registered for id {runtime_id!r}.") + return cast("Runtime", runtime) + + +def load_runtime_from_state(state: ConfigMap) -> "Runtime": + runtime_state = state.get("runtime") + if not isinstance(runtime_state, Mapping): + raise RuntimeError("State has no runtime metadata.") + runtime_state = cast(ConfigMap, runtime_state) + runtime_id = runtime_state.get("runtime_id") + if not isinstance(runtime_id, str) or not runtime_id: + raise RuntimeError("State has no live runtime id.") + return load_runtime(runtime_id) diff --git a/verifiers/v1/utils/sandbox_program_utils.py b/verifiers/v1/utils/sandbox_program_utils.py index 0da5fff17..e98c7f0b4 100644 --- a/verifiers/v1/utils/sandbox_program_utils.py +++ b/verifiers/v1/utils/sandbox_program_utils.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import importlib.machinery import importlib.util import json @@ -9,13 +7,14 @@ from collections.abc import Mapping from dataclasses import dataclass from pathlib import Path -from typing import Any, cast +from typing import cast from verifiers.utils.interception_utils import serialize_tool_defs -from ..runtime import Runtime, serializable +from ..runtime import Runtime from ..state import State from ..task import Task +from .serialization_utils import serializable from .sandbox_utils import ( VF_STATE_INPUT_PATH_KEY, python_package_install_command, @@ -23,6 +22,8 @@ python_runtime_setup_command, run_sandbox_command, ) +from .program_utils import program_list_items, program_option_mapping +from ..types import ConfigData, ConfigMap TASK_PATH = "/tmp/vf_task.json" STATE_INPUT_PATH = "/tmp/vf_state_in.json" @@ -36,7 +37,7 @@ PACKAGE_ROOT = "/tmp/vf_program_package" -def python_program_sandbox(sandbox_config: Mapping[str, object]) -> dict[str, object]: +def python_program_sandbox(sandbox_config: ConfigMap) -> ConfigData: config = dict(sandbox_config) raw_packages = config.get("packages") or [] if isinstance(raw_packages, str): @@ -65,8 +66,8 @@ def is_python_package(requirement: str, package: str) -> bool: async def run_sandbox_python_program( - program: Mapping[str, object], - sandbox_config: Mapping[str, object], + program: ConfigMap, + sandbox_config: ConfigMap, task: Task, state: State, runtime: Runtime, @@ -88,7 +89,7 @@ async def run_sandbox_python_program( output = state.get("artifacts", {}).pop(STATE_ARTIFACT, None) if not isinstance(output, Mapping): raise RuntimeError("Sandbox Python program did not return state.") - patch = dict(cast(Mapping[str, Any], output)) + patch = dict(cast(ConfigMap, output)) apply_internal_state_patch(state, patch, mode=mode) patch_artifacts = patch.pop("artifacts", None) if isinstance(patch_artifacts, Mapping): @@ -100,9 +101,7 @@ async def run_sandbox_python_program( return state -def apply_internal_state_patch( - state: State, patch: dict[str, Any], *, mode: str -) -> None: +def apply_internal_state_patch(state: State, patch: ConfigData, *, mode: str) -> None: for key in State.INTERNAL_KEYS: if key not in patch: continue @@ -126,18 +125,18 @@ def apply_internal_state_patch( def sandbox_runner_program( - program: Mapping[str, object], + program: ConfigMap, task: Task, state: State, mode: str, fn_ref: str | None, max_turns: int, tool_defs: object, -) -> dict[str, object]: +) -> ConfigData: package = sandbox_program_package(mode=mode, fn_ref=fn_ref) if package is not None: program = sandbox_program_with_package(program, package) - files = dict(cast(Mapping[str, object], program.get("files") or {})) + files = program_option_mapping(program.get("files"), "program.files") files[TASK_PATH] = json.dumps(task) files[TOOL_DEFS_PATH] = json.dumps( serializable(serialize_tool_defs(tool_defs or [], "openai_chat_completions")) @@ -155,24 +154,23 @@ def sandbox_runner_program( ) files[RUNNER_PATH] = runner_source() files[RUNNER_CONFIG_PATH] = json.dumps({"max_turns": max_turns}) - artifacts = dict(cast(Mapping[str, object], program.get("artifacts") or {})) + artifacts = program_option_mapping(program.get("artifacts"), "program.artifacts") artifacts[STATE_ARTIFACT] = {"path": STATE_OUTPUT_PATH, "format": "json"} command = python_runtime_command( RUNNER_PATH, *([mode] if fn_ref is None else [mode, fn_ref]), ) - setup = program.get("setup") or [] - if isinstance(setup, str): - setup = [setup] - if not isinstance(setup, list): - setup = [setup] package_setup = [] if package is None else [package.install_command] return { **dict(program), "files": files, "command": command, - "env": dict(cast(Mapping[str, object], program.get("env") or {})), - "setup": [python_runtime_setup_command(), *package_setup, *setup], + "env": program_option_mapping(program.get("env"), "program.env"), + "setup": [ + python_runtime_setup_command(), + *package_setup, + *program_list_items(program.get("setup"), "program.setup"), + ], "artifacts": artifacts, VF_STATE_INPUT_PATH_KEY: STATE_INPUT_PATH, } @@ -209,10 +207,10 @@ def sandbox_program_package(*, mode: str, fn_ref: str | None) -> SandboxPackage def sandbox_program_with_package( - program: Mapping[str, object], package: SandboxPackage -) -> Mapping[str, object]: + program: ConfigMap, package: SandboxPackage +) -> ConfigMap: merged = dict(program) - dirs = dict(cast(Mapping[str, object], merged.get("dirs") or {})) + dirs = program_option_mapping(merged.get("dirs"), "program.dirs") if package.remote_root in dirs: raise ValueError( f"program.dirs already defines internal package path {package.remote_root!r}." @@ -287,8 +285,6 @@ def interpreter_prefixes() -> list[Path]: def runner_source() -> str: return r""" -from __future__ import annotations - import asyncio import importlib import inspect diff --git a/verifiers/v1/utils/sandbox_utils.py b/verifiers/v1/utils/sandbox_utils.py index 48fec216f..8124bea65 100644 --- a/verifiers/v1/utils/sandbox_utils.py +++ b/verifiers/v1/utils/sandbox_utils.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import asyncio import hashlib import importlib.resources as resources @@ -11,7 +9,7 @@ from collections.abc import Awaitable, Callable, Mapping from importlib.abc import Traversable from pathlib import Path -from typing import Any, cast +from typing import TYPE_CHECKING, Protocol, cast from verifiers.errors import SandboxError from verifiers.utils.async_utils import maybe_call_with_named_args @@ -19,19 +17,93 @@ from .artifact_utils import artifact_format, artifact_key, artifact_optional from .artifact_utils import artifact_path from .program_utils import command_argv, command_env, float_config, int_config -from .program_utils import program_tools_setup, resolve_program_value +from .program_utils import program_option_mapping, program_channel_setup +from .program_utils import resolve_program_value from .program_utils import validate_program_bindings from ..runtime import Runtime from ..state import State from ..task import Task +from ..types import ConfigMap, Handler, JsonData, ProgramValue + +if TYPE_CHECKING: + from ..toolset import Toolset VF_STATE_INPUT_PATH_KEY = "_vf_state_input_path" +class SandboxRecord(Protocol): + id: object + + +class SandboxCommandResult(Protocol): + exit_code: int + stdout: str | None + stderr: str | None + + +class SandboxClient(Protocol): + async def create(self, request: object) -> SandboxRecord: ... + + async def wait_for_creation(self, sandbox_id: str) -> object: ... + + async def delete(self, sandbox_id: str) -> object: ... + + async def aclose(self) -> object: ... + + async def execute_command( + self, + sandbox_id: str, + command: str, + *, + timeout: int | None = None, + working_dir: str | None = None, + env: dict[str, str] | None = None, + ) -> SandboxCommandResult: ... + + async def upload_bytes( + self, + sandbox_id: str, + file_path: str, + file_bytes: bytes, + *, + filename: str | None = None, + ) -> object: ... + + async def upload_file( + self, + sandbox_id: str, + file_path: str, + local_file_path: str, + *, + timeout: int | None = None, + ) -> object: ... + + async def download_file( + self, + sandbox_id: str, + file_path: str, + local_file_path: str, + *, + timeout: int | None = None, + ) -> object: ... + + async def read_file(self, sandbox_id: str, path: str) -> object: ... + + async def run_background_job( + self, + sandbox_id: str, + command: str, + *, + timeout: int | None = None, + working_dir: str | None = None, + env: dict[str, str] | None = None, + ) -> SandboxCommandResult: ... + + class SandboxLease: def __init__( self, - client: object, + client: SandboxClient, sandbox_id: str, scope: str, key: str, @@ -48,8 +120,8 @@ async def execute( command: str, timeout: int | None = None, working_dir: str | None = None, - env: Mapping[str, str] | None = None, - ) -> object: + env: dict[str, str] | None = None, + ) -> SandboxCommandResult: result = await maybe_call_with_named_args( getattr(self.client, "execute_command"), sandbox_id=self.id, @@ -58,7 +130,7 @@ async def execute( working_dir=working_dir, env=env, ) - return result + return cast(SandboxCommandResult, result) async def upload_bytes( self, path: str, content: bytes, filename: str | None = None @@ -105,8 +177,8 @@ async def run_background_job( command: str, timeout: int | None = None, working_dir: str | None = None, - env: Mapping[str, str] | None = None, - ) -> object: + env: dict[str, str] | None = None, + ) -> SandboxCommandResult: return await maybe_call_with_named_args( getattr(self.client, "run_background_job"), sandbox_id=self.id, @@ -121,7 +193,7 @@ async def delete(self) -> None: return self.deleted = True try: - await cast(Any, self.client).delete(self.id) + await self.client.delete(self.id) finally: aclose = getattr(self.client, "aclose", None) if callable(aclose): @@ -142,8 +214,8 @@ async def execute( command: str, timeout: int | None = None, working_dir: str | None = None, - env: Mapping[str, str] | None = None, - ) -> object: + env: dict[str, str] | None = None, + ) -> SandboxCommandResult: result = await self.lease.execute( command=command, timeout=timeout, @@ -176,7 +248,7 @@ async def run_background_job( command: str, timeout: int | None = None, working_dir: str | None = None, - env: Mapping[str, str] | None = None, + env: dict[str, str] | None = None, ) -> object: result = await self.lease.run_background_job( command=command, @@ -191,17 +263,15 @@ async def delete(self) -> None: await self.lease.delete() -async def create_tool_sandbox_lease(toolset: object) -> SandboxLease: +async def create_tool_sandbox_lease(toolset: "Toolset") -> SandboxLease: return await create_scoped_sandbox_lease(toolset, tool_sandbox_key(toolset)) -async def create_sandbox_lease( - sandbox_config: Mapping[str, object], key: str -) -> SandboxLease: +async def create_sandbox_lease(sandbox_config: ConfigMap, key: str) -> SandboxLease: from prime_sandboxes import AsyncSandboxClient scope = sandbox_scope(sandbox_config) - client = AsyncSandboxClient() + client = cast(SandboxClient, AsyncSandboxClient()) try: sandbox_id = await create_sandbox(client, sandbox_config) except BaseException: @@ -228,8 +298,8 @@ async def create_scoped_sandbox_lease( async def run_sandbox_command( - program: Mapping[str, object], - sandbox_config: Mapping[str, object], + program: ConfigMap, + sandbox_config: ConfigMap, task: Task, state: State, runtime: Runtime, @@ -289,9 +359,9 @@ async def run_sandbox_command( def program_setup_handlers( - lease: SandboxLease, program: Mapping[str, object], runtime: Runtime -) -> list[Callable[..., object]]: - handlers: list[Callable[..., object]] = [ + lease: SandboxLease, program: ConfigMap, runtime: Runtime +) -> list[Handler]: + handlers: list[Handler] = [ _program_setup_handler( lease, program, @@ -325,13 +395,13 @@ def program_setup_handlers( -50, ), ] - for tool_type, setup_item, priority in program_tools_setup(program): + for channel, setup_item, priority in program_channel_setup(program): handlers.append( - _program_tools_setup_handler( + _program_channel_setup_handler( lease, program, runtime, - str(tool_type), + str(channel), setup_item, priority, ) @@ -341,12 +411,12 @@ def program_setup_handlers( def _program_setup_handler( lease: SandboxLease, - program: Mapping[str, object], + program: ConfigMap, runtime: Runtime, fn: Callable[..., Awaitable[None]], name: str, priority: int, -) -> Callable[..., object]: +) -> Handler: async def handler(task: Task, state: State) -> None: await fn(lease.client, lease.id, program, task, state, runtime) @@ -356,14 +426,14 @@ async def handler(task: Task, state: State) -> None: return handler -def _program_tools_setup_handler( +def _program_channel_setup_handler( lease: SandboxLease, - program: Mapping[str, object], + program: ConfigMap, runtime: Runtime, - tool_type: str, - setup_item: object, + channel: str, + setup_item: ProgramValue, priority: int, -) -> Callable[..., object]: +) -> Handler: async def handler(task: Task, state: State) -> None: await run_program_items( lease.client, @@ -373,16 +443,16 @@ async def handler(task: Task, state: State) -> None: state, runtime, items=[setup_item], - error_prefix=f"Program {tool_type} tools setup failed", + error_prefix=f"Program {channel} channel setup failed", ) - handler.__name__ = f"program_{tool_type}_tools_setup" + handler.__name__ = f"program_{channel}_channel_setup" setattr(handler, "setup", True) setattr(handler, "setup_priority", priority) return handler -async def create_sandbox(client: object, sandbox_config: Mapping[str, object]) -> str: +async def create_sandbox(client: SandboxClient, sandbox_config: ConfigMap) -> str: from prime_sandboxes import CreateSandboxRequest request = CreateSandboxRequest( @@ -396,19 +466,17 @@ async def create_sandbox(client: object, sandbox_config: Mapping[str, object]) - network_access=bool(sandbox_config.get("network_access", True)), timeout_minutes=int_config(sandbox_config, "timeout_minutes", 60), ) - sandbox = await cast(Any, client).create(request) + sandbox = await client.create(request) sandbox_id = str(sandbox.id) try: - await cast(Any, client).wait_for_creation(sandbox_id) + await client.wait_for_creation(sandbox_id) except BaseException: - await cast(Any, client).delete(sandbox_id) + await client.delete(sandbox_id) raise return sandbox_id -async def setup_sandbox( - handle: SandboxLease, sandbox_config: Mapping[str, object] -) -> None: +async def setup_sandbox(handle: SandboxLease, sandbox_config: ConfigMap) -> None: packages = sandbox_config.get("packages") or [] if isinstance(packages, str): packages = shlex.split(packages) @@ -416,12 +484,9 @@ async def setup_sandbox( if not isinstance(packages, list): raise TypeError("sandbox.packages must be a list or string.") package_args = " ".join(shlex.quote(str(package)) for package in packages) - result = cast( - Any, - await handle.execute( - python_package_install_command(package_args), - timeout=int_config(sandbox_config, "install_timeout", 300), - ), + result = await handle.execute( + python_package_install_command(package_args), + timeout=int_config(sandbox_config, "install_timeout", 300), ) if result.exit_code: raise SandboxError(f"Sandbox package install failed: {result.stderr}") @@ -431,17 +496,14 @@ async def setup_sandbox( if not isinstance(commands, list): raise TypeError("sandbox.setup_commands must be a list or string.") for command in commands: - result = cast( - Any, - await handle.execute( - str(command), timeout=int_config(sandbox_config, "setup_timeout", 300) - ), + result = await handle.execute( + str(command), timeout=int_config(sandbox_config, "setup_timeout", 300) ) if result.exit_code: raise SandboxError(f"Sandbox setup command failed: {result.stderr}") -def sandbox_scope(sandbox_config: Mapping[str, object]) -> str: +def sandbox_scope(sandbox_config: ConfigMap) -> str: scope = str(sandbox_config.get("scope") or "rollout") if scope not in {"rollout", "group", "global"}: raise ValueError("sandbox.scope must be 'rollout', 'group', or 'global'.") @@ -505,16 +567,19 @@ def attach_sandbox_ref(state: State, lease: SandboxLease) -> None: def record_tool_sandbox_command( - state: State, lease: SandboxLease, command: str, result: object + state: State, lease: SandboxLease, command: str, result: SandboxCommandResult ) -> None: - result = cast(Any, result) - command_record = { + command_record: JsonData = { "command": command, "returncode": result.exit_code, "stdout": result.stdout or "", "stderr": result.stderr or "", } - state.setdefault("sandbox_commands", []).append(command_record) + commands = state.setdefault("sandbox_commands", []) + if not isinstance(commands, list): + raise TypeError("state.sandbox_commands must be a list.") + commands = cast(list[JsonData], commands) + commands.append(command_record) state.setdefault("runtime", {}) sandboxes = state["runtime"].setdefault("sandboxes", {}) if isinstance(sandboxes, dict): @@ -522,10 +587,13 @@ def record_tool_sandbox_command( lease.key, {"id": lease.id, "scope": lease.scope} ) if isinstance(tool_state, dict): - tool_state.setdefault("commands", []).append(command_record) + tool_commands = tool_state.setdefault("commands", []) + if isinstance(tool_commands, list): + tool_commands = cast(list[JsonData], tool_commands) + tool_commands.append(command_record) -def tool_sandbox_key(toolset: object) -> str: +def tool_sandbox_key(toolset: "Toolset") -> str: from ..toolset import MCPTool, flatten_toolsets, tool_name names = [ @@ -538,7 +606,7 @@ def tool_sandbox_key(toolset: object) -> str: return f"toolset:{id(toolset)}" -def program_sandbox_key(sandbox_config: Mapping[str, object]) -> str: +def program_sandbox_key(sandbox_config: ConfigMap) -> str: try: fingerprint = json.dumps(sandbox_config, sort_keys=True) except TypeError as exc: @@ -556,19 +624,15 @@ def sandbox_owner_key(owner: object) -> str: async def upload_program_files( - client: object, + client: SandboxClient, sandbox_id: str, - program: Mapping[str, object], + program: ConfigMap, task: Task, state: State, runtime: Runtime, ) -> None: - files = program.get("files", {}) - if not isinstance(files, Mapping): - raise TypeError("program.files must be a mapping.") + files = program_option_mapping(program.get("files"), "program.files") for path, source in files.items(): - if not isinstance(path, str): - raise TypeError("program.files keys must be strings.") content = await resolve_program_value(source, task, state, runtime, program) if not isinstance(content, str): content = str(content) @@ -582,19 +646,15 @@ async def upload_program_files( async def upload_program_dirs( - client: object, + client: SandboxClient, sandbox_id: str, - program: Mapping[str, object], + program: ConfigMap, task: Task, state: State, runtime: Runtime, ) -> None: - dirs = program.get("dirs", {}) - if not isinstance(dirs, Mapping): - raise TypeError("program.dirs must be a mapping.") + dirs = program_option_mapping(program.get("dirs"), "program.dirs") for path, source in dirs.items(): - if not isinstance(path, str): - raise TypeError("program.dirs keys must be strings.") local_source = await resolve_program_value( source, task, state, runtime, program ) @@ -602,7 +662,7 @@ async def upload_program_dirs( async def upload_program_dir( - client: object, sandbox_id: str, remote_path: str, local_source: object + client: SandboxClient, sandbox_id: str, remote_path: str, local_source: object ) -> None: if isinstance(local_source, str): local_source = Path(local_source) @@ -611,7 +671,12 @@ async def upload_program_dir( remote_tar = f"/tmp/_vf_upload_{remote_path.strip('/').replace('/', '_')}.tar.gz" tmp_path = await asyncio.to_thread(build_dir_archive, local_source, remote_path) try: - await cast(Any, client).upload_file(sandbox_id, remote_tar, str(tmp_path)) + await maybe_call_with_named_args( + getattr(client, "upload_file"), + sandbox_id=sandbox_id, + file_path=remote_tar, + local_file_path=str(tmp_path), + ) result = await maybe_call_with_named_args( getattr(client, "execute_command"), sandbox_id=sandbox_id, @@ -658,9 +723,9 @@ def upload_tar_filter(tarinfo: tarfile.TarInfo) -> tarfile.TarInfo | None: async def run_program_setup( - client: object, + client: SandboxClient, sandbox_id: str, - program: Mapping[str, object], + program: ConfigMap, task: Task, state: State, runtime: Runtime, @@ -678,9 +743,9 @@ async def run_program_setup( async def upload_state_input( - client: object, + client: SandboxClient, sandbox_id: str, - program: Mapping[str, object], + program: ConfigMap, task: Task, state: State, runtime: Runtime, @@ -701,9 +766,9 @@ async def upload_state_input( async def run_program_commands( - client: object, + client: SandboxClient, sandbox_id: str, - program: Mapping[str, object], + program: ConfigMap, task: Task, state: State, runtime: Runtime, @@ -713,11 +778,11 @@ async def run_program_commands( ) -> None: raw_setup = program.get(key) or [] if isinstance(raw_setup, str): - setup: list[object] = [raw_setup] + setup: list[ProgramValue] = [raw_setup] elif isinstance(raw_setup, list): - setup = list(raw_setup) + setup = [cast(ProgramValue, item) for item in raw_setup] else: - setup = [raw_setup] + setup = [cast(ProgramValue, raw_setup)] await run_program_items( client, sandbox_id, @@ -731,14 +796,14 @@ async def run_program_commands( async def run_program_items( - client: object, + client: SandboxClient, sandbox_id: str, - program: Mapping[str, object], + program: ConfigMap, task: Task, state: State, runtime: Runtime, *, - items: list[object], + items: list[ProgramValue], error_prefix: str, ) -> None: env = await command_env(program, task, state, runtime, include_base=False) @@ -755,20 +820,16 @@ async def run_program_items( async def collect_sandbox_artifacts( - client: object, sandbox_id: str, program: Mapping[str, object], state: State + client: object, sandbox_id: str, program: ConfigMap, state: State ) -> None: - artifacts = program.get("artifacts", {}) - if not isinstance(artifacts, Mapping): - raise TypeError("program.artifacts must be a mapping.") + artifacts = program_option_mapping(program.get("artifacts"), "program.artifacts") if not artifacts: return state.setdefault("artifacts", {}) for name, spec in artifacts.items(): - if not isinstance(name, str): - raise TypeError("program.artifacts keys must be strings.") if not isinstance(spec, Mapping): raise TypeError("program.artifacts values must be mappings.") - spec = cast(Mapping[str, object], spec) + spec = cast(ConfigMap, spec) path = artifact_path(spec) optional = artifact_optional(spec) try: @@ -789,7 +850,7 @@ async def collect_sandbox_artifacts( raise ValueError(f"Unsupported artifact format: {format_name!r}") key = artifact_key(spec) if key is not None: - value = cast(Mapping[str, object], value)[key] + value = cast(ConfigMap, value)[key] state["artifacts"][name] = value diff --git a/verifiers/v1/utils/scoring_utils.py b/verifiers/v1/utils/scoring_utils.py index 19d43560b..4907c231a 100644 --- a/verifiers/v1/utils/scoring_utils.py +++ b/verifiers/v1/utils/scoring_utils.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import importlib import inspect import time @@ -7,29 +5,40 @@ Awaitable, Callable, Iterable, - Mapping, MutableSequence, Sequence, ) -from typing import Any, Literal, cast +from typing import Literal, cast + +from typing_extensions import TypedDict -from verifiers.utils.async_utils import maybe_await +from verifiers.utils.async_utils import maybe_call_with_named_args +from .binding_utils import ROLLOUT_FRAMEWORK_ARGS, function_name from .timing_utils import record_scoring_timing +from ..types import ConfigData, ConfigMap, GroupHandler, Handler SignalKind = Literal["metric", "reward", "advantage"] SignalStage = Literal["rollout", "group"] -SignalRecord = dict[str, object] -SignalConfigMap = Mapping[str, Mapping[str, object]] +SignalConfigMap = dict[str, ConfigMap] SIGNAL_CONFIG_KEYS = {"stage", "priority", "weight", "skip"} +class SignalRecord(TypedDict): + fn: Handler | GroupHandler + name: str + kind: SignalKind + stage: SignalStage + priority: int + weight: float + + def build_signals( owner: object | None = None, scoring: SignalConfigMap | None = None, - metrics: Iterable[Callable[..., object]] | None = None, - rewards: Iterable[Callable[..., object]] | None = None, - advantages: Iterable[Callable[..., object]] | None = None, + metrics: Iterable[Handler] | None = None, + rewards: Iterable[Handler] | None = None, + advantages: Iterable[Handler] | None = None, ) -> list[SignalRecord]: signals: list[SignalRecord] = [] if owner is not None: @@ -58,46 +67,50 @@ def collect_signals(*signal_lists: Iterable[SignalRecord]) -> list[SignalRecord] return sorted(signals, key=signal_sort_key) -def add_metric( - signals: MutableSequence[SignalRecord], fn: Callable[..., object] -) -> None: +def add_metric(signals: MutableSequence[SignalRecord], fn: Handler) -> None: add_signal(signals, signal_from_function(fn, "metric")) -def add_reward( - signals: MutableSequence[SignalRecord], fn: Callable[..., object] -) -> None: +def add_reward(signals: MutableSequence[SignalRecord], fn: Handler) -> None: add_signal(signals, signal_from_function(fn, "reward")) -def add_advantage( - signals: MutableSequence[SignalRecord], fn: Callable[..., object] -) -> None: +def add_advantage(signals: MutableSequence[SignalRecord], fn: Handler) -> None: add_signal(signals, signal_from_function(fn, "advantage")) async def score_rollout( signals: Iterable[SignalRecord], - task: Mapping[str, Any], - state: dict[str, Any], + task: ConfigMap, + state: ConfigData, resolve_kwargs: Callable[ - [Callable[..., object], Mapping[str, Any], dict[str, Any]], - Awaitable[dict[str, object]], + [ + Handler, + ConfigMap, + ConfigData, + set[str], + ], + Awaitable[ConfigData], ] | None = None, -) -> dict[str, Any]: +) -> ConfigData: start_time = time.time() - reward = float(state.get("reward", 0.0) or 0.0) + reward = float_value(state.get("reward"), 0.0) metrics = dict(cast(dict[str, float], state.get("metrics") or {})) + framework_kwargs = rollout_framework_kwargs(task, state) + protected_args = set(framework_kwargs) for signal in sorted(signals, key=signal_sort_key): if signal["stage"] != "rollout": continue - extra_kwargs: dict[str, object] = {} + extra_kwargs: ConfigData = {} if resolve_kwargs is not None: extra_kwargs = await resolve_kwargs( - cast(Callable[..., object], signal["fn"]), task, state + cast(Handler, signal["fn"]), + task, + state, + protected_args, ) - value = await call_rollout_signal(signal, task, state, extra_kwargs) + value = await call_rollout_signal(signal, framework_kwargs, extra_kwargs) metrics[cast(str, signal["name"])] = value if signal["kind"] == "reward": reward += value * cast(float, signal["weight"]) @@ -109,19 +122,39 @@ async def score_rollout( async def score_group( signals: Iterable[SignalRecord], - tasks: list[Mapping[str, Any]], - states: list[dict[str, Any]], -) -> list[dict[str, Any]]: + tasks: list[ConfigMap], + states: list[ConfigData], + resolve_kwargs: Callable[ + [ + Handler, + list[ConfigMap], + list[ConfigData], + set[str], + ], + Awaitable[ConfigData], + ] + | None = None, +) -> list[ConfigData]: start_time = time.time() - rewards = [float(state.get("reward", 0.0) or 0.0) for state in states] + rewards = [float_value(state.get("reward"), 0.0) for state in states] advantage_signals: list[SignalRecord] = [] + framework_kwargs = group_framework_kwargs(tasks, states) + protected_args = set(framework_kwargs) for signal in sorted(signals, key=signal_sort_key): if signal["stage"] != "group": continue if signal["kind"] == "advantage": advantage_signals.append(signal) continue - values = await call_group_signal(signal, tasks, states) + extra_kwargs: ConfigData = {} + if resolve_kwargs is not None: + extra_kwargs = await resolve_kwargs( + cast(Handler, signal["fn"]), + tasks, + states, + protected_args, + ) + values = await call_group_signal(signal, framework_kwargs, extra_kwargs) for index, value in enumerate(values): metrics = dict(cast(dict[str, float], states[index].get("metrics") or {})) metrics[cast(str, signal["name"])] = value @@ -130,7 +163,15 @@ async def score_group( rewards[index] += value * cast(float, signal["weight"]) advantages: list[float] | None = None for signal in advantage_signals: - advantages = await call_group_signal(signal, tasks, states) + extra_kwargs = {} + if resolve_kwargs is not None: + extra_kwargs = await resolve_kwargs( + cast(Handler, signal["fn"]), + tasks, + states, + protected_args, + ) + advantages = await call_group_signal(signal, framework_kwargs, extra_kwargs) for index, state in enumerate(states): state["reward"] = rewards[index] if advantages is not None: @@ -181,9 +222,7 @@ def decorated_signals(owner: object) -> list[SignalRecord]: return signals -def signal_from_function( - fn: Callable[..., object], kind: SignalKind | None = None -) -> SignalRecord: +def signal_from_function(fn: Handler, kind: SignalKind | None = None) -> SignalRecord: inferred_kind = decorated_kind(fn) if kind is not None and inferred_kind is not None and kind != inferred_kind: raise ValueError( @@ -209,9 +248,7 @@ def signal_from_function( } -def apply_signal_config( - signal: SignalRecord, config: Mapping[str, object] -) -> SignalRecord: +def apply_signal_config(signal: SignalRecord, config: ConfigMap) -> SignalRecord: kind = cast(SignalKind, signal["kind"]) stage = get_optional_stage(config) or cast(SignalStage, signal["stage"]) priority_value = get_optional_number(config, "priority") @@ -235,7 +272,7 @@ def apply_signal_config( } -def decorated_kind(fn: Callable[..., object]) -> SignalKind | None: +def decorated_kind(fn: Handler) -> SignalKind | None: has_metric = bool(getattr(fn, "metric", False)) has_reward = bool(getattr(fn, "reward", False)) has_advantage = bool(getattr(fn, "advantage", False)) @@ -251,51 +288,41 @@ def decorated_kind(fn: Callable[..., object]) -> SignalKind | None: def validate_signal(signal: SignalRecord) -> None: - fn = cast(Callable[..., object], signal["fn"]) - names = set(inspect.signature(fn).parameters) + fn = cast(Handler, signal["fn"]) + inspect.signature(fn) if signal["stage"] == "rollout": if signal["kind"] == "advantage": raise ValueError( f"Advantage signal {signal['name']!r} must use stage='group'." ) - if not {"task", "state"}.issubset(names): - raise ValueError( - f"Rollout signal {signal['name']!r} must accept task and state." - ) - if signal["stage"] == "group": - if names != {"tasks", "states"}: - raise ValueError( - f"Group signal {signal['name']!r} must accept exactly tasks and states." - ) async def call_rollout_signal( signal: SignalRecord, - task: Mapping[str, Any], - state: dict[str, Any], - extra_kwargs: Mapping[str, object] | None = None, + framework_kwargs: ConfigMap, + extra_kwargs: ConfigMap | None = None, ) -> float: - value = await maybe_await( - cast(Callable[..., object], signal["fn"]), - task=task, - state=state, - **dict(extra_kwargs or {}), - ) + fn = cast(GroupHandler, signal["fn"]) + kwargs = {**dict(extra_kwargs or {}), **dict(framework_kwargs)} + validate_required_kwargs(fn, kwargs, signal_context(signal)) + value = await maybe_call_with_named_args(fn, **kwargs) return float(value) async def call_group_signal( signal: SignalRecord, - tasks: list[Mapping[str, Any]], - states: list[dict[str, Any]], + framework_kwargs: ConfigMap, + extra_kwargs: ConfigMap | None = None, ) -> list[float]: - value = await maybe_await( - cast(Callable[..., object], signal["fn"]), tasks=tasks, states=states - ) + fn = cast(Handler, signal["fn"]) + kwargs = {**dict(extra_kwargs or {}), **dict(framework_kwargs)} + validate_required_kwargs(fn, kwargs, signal_context(signal)) + value = await maybe_call_with_named_args(fn, **kwargs) name = cast(str, signal["name"]) if not isinstance(value, Sequence) or isinstance(value, str | bytes): raise TypeError(f"Group signal {name!r} must return a list of floats.") values = [float(item) for item in value] + states = cast(list[ConfigData], framework_kwargs["states"]) if len(values) != len(states): raise ValueError( f"Group signal {name!r} returned {len(values)} values for " @@ -304,7 +331,46 @@ async def call_group_signal( return values -def import_ref(ref: str | None) -> Callable[..., object]: +def rollout_framework_kwargs(task: ConfigMap, state: ConfigData) -> ConfigData: + kwargs: ConfigData = {"task": task, "state": state} + for name in sorted(ROLLOUT_FRAMEWORK_ARGS - {"task", "state"}): + if name in state: + kwargs[name] = state[name] + elif name in task: + kwargs[name] = task[name] + return kwargs + + +def group_framework_kwargs( + tasks: list[ConfigMap], states: list[ConfigData] +) -> ConfigData: + return {"tasks": tasks, "states": states} + + +def validate_required_kwargs(fn: Handler, kwargs: ConfigMap, context: str) -> None: + signature = inspect.signature(fn) + missing: list[str] = [] + for parameter in signature.parameters.values(): + if parameter.default is not inspect.Parameter.empty: + continue + if parameter.kind in { + inspect.Parameter.VAR_POSITIONAL, + inspect.Parameter.VAR_KEYWORD, + }: + continue + if parameter.name not in kwargs: + missing.append(parameter.name) + if missing: + raise TypeError( + f"{context} has unresolved required args: {', '.join(missing)}." + ) + + +def signal_context(signal: SignalRecord) -> str: + return f"{signal['kind']} signal {signal['name']!r}" + + +def import_ref(ref: str | None) -> Handler: if ref is None: raise ValueError("Import ref is required.") module_name, separator, attr_name = ref.partition(":") @@ -313,17 +379,17 @@ def import_ref(ref: str | None) -> Callable[..., object]: obj = getattr(importlib.import_module(module_name), attr_name) if not callable(obj): raise TypeError(f"Signal ref {ref!r} did not resolve to a callable.") - return cast(Callable[..., object], obj) + return cast(Handler, obj) -def validate_signal_config(name: str, config: Mapping[str, object]) -> None: +def validate_signal_config(name: str, config: ConfigMap) -> None: unknown_keys = set(config) - SIGNAL_CONFIG_KEYS if unknown_keys: unknown = ", ".join(sorted(unknown_keys)) raise ValueError(f"Signal config {name!r} has unknown keys: {unknown}.") -def get_optional_str(config: Mapping[str, object], key: str) -> str | None: +def get_optional_str(config: ConfigMap, key: str) -> str | None: value = config.get(key) if value is None: return None @@ -332,7 +398,7 @@ def get_optional_str(config: Mapping[str, object], key: str) -> str | None: return value -def get_optional_stage(config: Mapping[str, object]) -> SignalStage | None: +def get_optional_stage(config: ConfigMap) -> SignalStage | None: value = get_optional_str(config, "stage") if value is None: return None @@ -341,7 +407,7 @@ def get_optional_stage(config: Mapping[str, object]) -> SignalStage | None: return cast(SignalStage, value) -def get_optional_number(config: Mapping[str, object], key: str) -> int | float | None: +def get_optional_number(config: ConfigMap, key: str) -> int | float | None: value = config.get(key) if value is None: return None @@ -350,24 +416,30 @@ def get_optional_number(config: Mapping[str, object], key: str) -> int | float | return value -def bool_config(config: Mapping[str, object], key: str, default: bool) -> bool: +def bool_config(config: ConfigMap, key: str, default: bool) -> bool: value = config.get(key, default) if not isinstance(value, bool): raise TypeError(f"Signal config key {key!r} must be a boolean.") return value -def apply_advantage_to_trajectory(state: dict[str, Any], advantage: float) -> None: - for step in state.get("trajectory", []): - if isinstance(step, dict) and step.get("advantage") is None: - step["advantage"] = advantage - - -def function_name(fn: Callable[..., object]) -> str: - name = getattr(fn, "__name__", None) - if not isinstance(name, str) or not name: - raise ValueError("Signal functions require a stable __name__.") - return name +def float_value(value: object, default: float = 0.0) -> float: + if value is None: + return default + if isinstance(value, bool) or not isinstance(value, int | float | str): + return default + return float(value or 0.0) + + +def apply_advantage_to_trajectory(state: ConfigData, advantage: float) -> None: + trajectory = state.get("trajectory", []) + if not isinstance(trajectory, list): + return + for step in trajectory: + if isinstance(step, dict): + step = cast(ConfigData, step) + if step.get("advantage") is None: + step["advantage"] = advantage def signal_sort_key(signal: SignalRecord) -> tuple[int, str, str, str]: diff --git a/verifiers/v1/utils/serialization_utils.py b/verifiers/v1/utils/serialization_utils.py new file mode 100644 index 000000000..f3309401a --- /dev/null +++ b/verifiers/v1/utils/serialization_utils.py @@ -0,0 +1,14 @@ +from collections.abc import Mapping + + +def serializable(value: object) -> object: + model_dump = getattr(value, "model_dump", None) + if callable(model_dump): + return model_dump(exclude_none=True) + if isinstance(value, list): + return [serializable(item) for item in value] + if isinstance(value, tuple): + return [serializable(item) for item in value] + if isinstance(value, Mapping): + return {str(key): serializable(item) for key, item in value.items()} + return value diff --git a/verifiers/v1/utils/task_freeze_utils.py b/verifiers/v1/utils/task_freeze_utils.py new file mode 100644 index 000000000..224f93403 --- /dev/null +++ b/verifiers/v1/utils/task_freeze_utils.py @@ -0,0 +1,89 @@ +from collections.abc import Iterable, Mapping +from copy import deepcopy +from typing import SupportsIndex + +from verifiers.types import assert_json_serializable + + +def assert_serializable(value: object) -> None: + assert_json_serializable(value) + + +class FrozenDict(dict): + def __deepcopy__(self, memo: dict[int, object]) -> dict[object, object]: + return { + deepcopy(key, memo): deepcopy(value, memo) for key, value in self.items() + } + + def __setitem__(self, key: str, value: object) -> None: + raise TypeError("Frozen task mappings are immutable.") + + def __delitem__(self, key: str) -> None: + raise TypeError("Frozen task mappings are immutable.") + + def update(self, *args: object, **kwargs: object) -> None: + raise TypeError("Frozen task mappings are immutable.") + + def setdefault(self, key: str, default: object = None) -> object: + raise TypeError("Frozen task mappings are immutable.") + + def pop(self, key: str, default: object = None) -> object: + raise TypeError("Frozen task mappings are immutable.") + + def popitem(self) -> tuple[object, object]: + raise TypeError("Frozen task mappings are immutable.") + + def clear(self) -> None: + raise TypeError("Frozen task mappings are immutable.") + + def __ior__(self, value: object) -> "FrozenDict": + raise TypeError("Frozen task mappings are immutable.") + + +class FrozenList(list): + def __deepcopy__(self, memo: dict[int, object]) -> list[object]: + return [deepcopy(value, memo) for value in self] + + def __setitem__(self, key: object, value: object) -> None: + raise TypeError("Frozen task lists are immutable.") + + def __delitem__(self, key: object) -> None: + raise TypeError("Frozen task lists are immutable.") + + def append(self, value: object) -> None: + raise TypeError("Frozen task lists are immutable.") + + def extend(self, values: object) -> None: + raise TypeError("Frozen task lists are immutable.") + + def insert(self, index: SupportsIndex, object: object, /) -> None: + raise TypeError("Frozen task lists are immutable.") + + def pop(self, index: SupportsIndex = -1, /) -> object: + raise TypeError("Frozen task lists are immutable.") + + def remove(self, value: object) -> None: + raise TypeError("Frozen task lists are immutable.") + + def clear(self) -> None: + raise TypeError("Frozen task lists are immutable.") + + def __iadd__(self, values: Iterable[object]) -> "FrozenList": + raise TypeError("Frozen task lists are immutable.") + + def __imul__(self, value: SupportsIndex) -> "FrozenList": + raise TypeError("Frozen task lists are immutable.") + + def sort(self, *args: object, **kwargs: object) -> None: + raise TypeError("Frozen task lists are immutable.") + + def reverse(self) -> None: + raise TypeError("Frozen task lists are immutable.") + + +def freeze_value(value: object) -> object: + if isinstance(value, Mapping): + return FrozenDict({key: freeze_value(item) for key, item in value.items()}) + if isinstance(value, list): + return FrozenList(freeze_value(item) for item in value) + return value diff --git a/verifiers/v1/utils/taskset_utils.py b/verifiers/v1/utils/taskset_utils.py new file mode 100644 index 000000000..007ff2980 --- /dev/null +++ b/verifiers/v1/utils/taskset_utils.py @@ -0,0 +1,56 @@ +import importlib +import importlib.resources as resources +import json +from collections.abc import Callable, Iterable +from importlib.abc import Traversable +from pathlib import Path +from typing import cast +from ..types import ConfigData, ConfigMap + + +def dataset_info_with_task(task: ConfigMap) -> ConfigData: + return {"task": json.dumps(task)} + + +def rows_from_source( + source: Iterable[ConfigMap] | Callable[[], Iterable[ConfigMap]] | None, +) -> list[ConfigData]: + if source is None: + return [] + if callable(source): + source_loader = cast(Callable[[], Iterable[ConfigMap]], source) + return [dict(row) for row in source_loader()] + return [dict(row) for row in source] + + +def discover_sibling_dir( + taskset_cls: type[object], dirname: str +) -> Traversable | Path | None: + module = importlib.import_module(taskset_cls.__module__) + package_name = module_package_name(module) + if package_name is not None: + try: + candidate = resources.files(package_name) / dirname + if candidate.is_dir() and any(candidate.iterdir()): + return candidate + except ( + FileNotFoundError, + ModuleNotFoundError, + NotADirectoryError, + TypeError, + ValueError, + ): + pass + module_file = getattr(module, "__file__", None) + if isinstance(module_file, str): + candidate_path = Path(module_file).resolve().parent / dirname + if candidate_path.is_dir() and any(candidate_path.iterdir()): + return candidate_path + return None + + +def module_package_name(module: object) -> str | None: + if hasattr(module, "__path__"): + return str(getattr(module, "__name__")) + package_name = getattr(module, "__package__", None) + return package_name if isinstance(package_name, str) and package_name else None diff --git a/verifiers/v1/utils/timing_utils.py b/verifiers/v1/utils/timing_utils.py index cd40af02a..788fa3b1d 100644 --- a/verifiers/v1/utils/timing_utils.py +++ b/verifiers/v1/utils/timing_utils.py @@ -1,36 +1,119 @@ -from __future__ import annotations - import time -from collections.abc import MutableMapping -from typing import Any, cast +from typing import cast +from ..types import ConfigData, MutableConfigMap -def timing_record(start_time: float | None = None) -> dict[str, float]: +def span_record(start: float = 0.0, end: float = 0.0) -> dict[str, float]: return { - "generation_ms": 0.0, - "scoring_ms": 0.0, - "total_ms": 0.0, - "start_time": time.time() if start_time is None else start_time, + "start": start, + "end": end, + "duration": max(0.0, end - start) if end > 0.0 else 0.0, } -def ensure_timing(state: MutableMapping[str, Any]) -> dict[str, float]: +def spans_record() -> ConfigData: + return {"spans": [], "duration": 0.0} + + +def timing_record(start_time: float | None = None) -> ConfigData: + start = time.time() if start_time is None else start_time + return { + "start_time": start, + "setup": span_record(), + "generation": span_record(start=start), + "scoring": span_record(), + "model": spans_record(), + "env": spans_record(), + "total": 0.0, + "overhead": 0.0, + } + + +def ensure_timing(state: MutableConfigMap) -> ConfigData: timing = state.setdefault("timing", timing_record()) if not isinstance(timing, dict): raise TypeError("state.timing must be a mapping.") - return cast(dict[str, float], timing) + timing = cast(ConfigData, timing) + if "generation_ms" in timing or "total_ms" in timing: + start = _float_value(timing.get("start_time"), time.time()) + elapsed = ( + _float_value(timing.get("total_ms", timing.get("generation_ms", 0.0))) + / 1000 + ) + timing.clear() + timing.update(timing_record(start)) + if elapsed > 0.0: + _set_span(timing, "generation", start, start + elapsed) + _set_total(timing, start + elapsed) + return timing + + +def record_generation_timing(state: MutableConfigMap) -> None: + timing = ensure_timing(state) + start_time = _float_value(timing.get("start_time"), time.time()) + end_time = time.time() + _set_span(timing, "generation", start_time, end_time) + _set_total(timing, end_time) -def record_generation_timing(state: MutableMapping[str, Any]) -> None: +def record_scoring_timing(state: MutableConfigMap, start_time: float) -> None: timing = ensure_timing(state) - start_time = float(timing.get("start_time", time.time())) - elapsed_ms = (time.time() - start_time) * 1000 - timing["generation_ms"] = elapsed_ms - timing["total_ms"] = elapsed_ms + end_time = time.time() + _set_span(timing, "scoring", start_time, end_time) + _set_total(timing, end_time) -def record_scoring_timing(state: MutableMapping[str, Any], start_time: float) -> None: +def record_model_timing( + state: MutableConfigMap, start_time: float, end_time: float +) -> None: timing = ensure_timing(state) - scoring_ms = (time.time() - start_time) * 1000 - timing["scoring_ms"] = float(timing.get("scoring_ms", 0.0)) + scoring_ms - timing["total_ms"] = float(timing.get("total_ms", 0.0)) + scoring_ms + spans = timing.setdefault("model", spans_record()) + if not isinstance(spans, dict): + raise TypeError("state.timing.model must be a mapping.") + spans = cast(ConfigData, spans) + span_list = spans.setdefault("spans", []) + if not isinstance(span_list, list): + raise TypeError("state.timing.model.spans must be a list.") + span_list = cast(list[dict[str, float]], span_list) + span_list.append(span_record(start_time, end_time)) + spans["duration"] = _duration(spans) + max(0.0, end_time - start_time) + + +def _set_span( + timing: MutableConfigMap, key: str, start_time: float, end_time: float +) -> None: + timing[key] = span_record(start_time, end_time) + + +def _set_total(timing: MutableConfigMap, end_time: float) -> None: + start_time = _float_value(timing.get("start_time"), end_time) + total = max(0.0, end_time - start_time) + timing["total"] = total + model = timing.get("model", {}) + env = timing.get("env", {}) + setup = timing.get("setup", {}) + scoring = timing.get("scoring", {}) + timing["overhead"] = max( + 0.0, + total + - _duration(setup) + - _duration(model) + - _duration(env) + - _duration(scoring), + ) + + +def _duration(value: object) -> float: + if not isinstance(value, dict): + return 0.0 + mapping = cast(ConfigData, value) + duration = mapping.get("duration", 0.0) + return _float_value(duration, 0.0) + + +def _float_value(value: object, default: float = 0.0) -> float: + if value is None: + return default + if isinstance(value, bool) or not isinstance(value, int | float | str): + return default + return float(value or 0.0) diff --git a/verifiers/v1/utils/tool_utils.py b/verifiers/v1/utils/tool_utils.py index 9b49c1b28..ba0444666 100644 --- a/verifiers/v1/utils/tool_utils.py +++ b/verifiers/v1/utils/tool_utils.py @@ -1,19 +1,54 @@ -from __future__ import annotations - -from collections.abc import Awaitable, Callable, Mapping -from typing import Any, cast +import inspect +from collections.abc import Sequence +from typing import cast from verifiers.v1.state import State from verifiers.v1.task import Task +from verifiers.v1.toolset import Toolset, tool_name +from ..types import ConfigMap, Handler -def load_tools_from_state( - state: State, -) -> dict[str, Callable[..., Awaitable[object]]]: +def load_tools_from_state(state: State) -> dict[str, Handler]: runtime = state._runtime() - task = Task(cast(Mapping[str, Any], state["task"])).freeze() + task = Task(cast(ConfigMap, state["task"])).freeze() return runtime.tool_calls(task, state) def tool_error_content(error: Exception) -> str: return str(error) + + +def tool_visible(toolset: Toolset, name: str) -> bool: + if toolset.show is not None and name not in toolset.show: + return False + if toolset.hide is not None and name in toolset.hide: + return False + return True + + +def toolset_object_scope(toolset: Toolset) -> str: + if toolset.scope is not None: + return toolset.scope + return "rollout" if toolset.write else "global" + + +def string_list(value: object, field: str) -> list[str]: + if isinstance(value, str): + return [value] + if not isinstance(value, Sequence) or isinstance(value, bytes): + raise TypeError(f"{field} must be a string or list of strings.") + result = [str(item) for item in value] + if len(result) != len(set(result)): + raise ValueError(f"{field} contains duplicate names.") + return result + + +def schema_callable(tool: object, signature: inspect.Signature) -> Handler: + def call_for_schema(**kwargs: object) -> None: + _ = kwargs + return None + + call_for_schema.__name__ = tool_name(tool) + call_for_schema.__doc__ = getattr(tool, "__doc__", None) + setattr(call_for_schema, "__signature__", signature) + return call_for_schema diff --git a/verifiers/v1/utils/trajectory_utils.py b/verifiers/v1/utils/trajectory_utils.py index 0a09610bb..4e4326b8a 100644 --- a/verifiers/v1/utils/trajectory_utils.py +++ b/verifiers/v1/utils/trajectory_utils.py @@ -1,14 +1,15 @@ -from __future__ import annotations - from collections.abc import Mapping, Sequence -from typing import Any, cast +from typing import cast from ..state import State +from verifiers.types import Message + +from ..types import ConfigMap, PromptMessage def sync_trajectory( state: State, - trajectory: Sequence[Mapping[str, object]] | None = None, + trajectory: Sequence[ConfigMap] | None = None, ) -> State: if trajectory is not None: state["trajectory"] = [dict(step) for step in trajectory] @@ -18,10 +19,7 @@ def sync_trajectory( raise TypeError("state.trajectory must be a list.") state["num_model_requests"] = len(steps) state._set_truncated( - any( - bool(cast(Mapping[str, object], step).get("is_truncated", False)) - for step in steps - ) + any(bool(cast(ConfigMap, step).get("is_truncated", False)) for step in steps) ) if not steps: @@ -34,18 +32,18 @@ def sync_trajectory( return state -def has_borrowed_trajectory(state: Mapping[str, object]) -> bool: +def has_borrowed_trajectory(state: ConfigMap) -> bool: runtime = state.get("runtime") if not isinstance(runtime, Mapping): return False - runtime = cast(Mapping[str, object], runtime) + runtime = cast(ConfigMap, runtime) resolved = runtime.get("resolved") if not isinstance(resolved, Mapping): return False - return isinstance(cast(Mapping[str, object], resolved).get("trajectory"), Mapping) + return isinstance(cast(ConfigMap, resolved).get("trajectory"), Mapping) -def completion_from_trajectory(steps: Sequence[Mapping[str, object]]) -> list[Any]: +def completion_from_trajectory(steps: Sequence[ConfigMap]) -> list[PromptMessage]: if not steps: return [] first_prompt = message_list(steps[0], "prompt") @@ -58,21 +56,29 @@ def completion_from_trajectory(steps: Sequence[Mapping[str, object]]) -> list[An def merge_existing_completion( - trajectory_completion: list[Any], existing: object -) -> list[Any]: + trajectory_completion: list[PromptMessage], existing: object +) -> list[PromptMessage]: if not isinstance(existing, list): return trajectory_completion if existing[: len(trajectory_completion)] == trajectory_completion: - return list(existing) + return [cast(PromptMessage, message) for message in existing] return trajectory_completion -def message_list(step: object, field: str) -> list[Any]: +def message_list(step: object, field: str) -> list[PromptMessage]: if not isinstance(step, Mapping): raise TypeError("trajectory steps must be mappings.") - value = cast(Mapping[str, object], step).get(field) + value = cast(ConfigMap, step).get(field) if value is None: return [] if not isinstance(value, list): raise TypeError(f"trajectory step {field} must be a list.") - return list(value) + messages: list[PromptMessage] = [] + for item in value: + if isinstance(item, Mapping): + messages.append(cast(ConfigMap, item)) + elif hasattr(item, "role") and hasattr(item, "content"): + messages.append(cast(Message, item)) + else: + raise TypeError(f"trajectory step {field} items must be messages.") + return messages diff --git a/verifiers/v1/utils/usage_utils.py b/verifiers/v1/utils/usage_utils.py new file mode 100644 index 000000000..e91586cb2 --- /dev/null +++ b/verifiers/v1/utils/usage_utils.py @@ -0,0 +1,21 @@ +from typing import cast + +from verifiers.types import Response +from verifiers.utils.usage_utils import usage_tokens + +from ..state import State + + +def record_response_usage(state: State, response: Response) -> None: + if response.usage is None: + return + input_tokens, output_tokens = usage_tokens(response.usage) + usage = state.setdefault("token_usage", {"input_tokens": 0.0, "output_tokens": 0.0}) + if not isinstance(usage, dict): + raise TypeError("state.token_usage must be a mapping.") + usage = cast(dict[str, float], usage) + usage["input_tokens"] = float(usage.get("input_tokens", 0.0)) + float(input_tokens) + usage["output_tokens"] = float(usage.get("output_tokens", 0.0)) + float( + output_tokens + ) + state["usage"] = usage