diff --git a/configs/private b/configs/private index 70c3503e1d..00204b08e3 160000 --- a/configs/private +++ b/configs/private @@ -1 +1 @@ -Subproject commit 70c3503e1dc4ea499b09f0eee206b509169b79bd +Subproject commit 00204b08e322ca36d49f7d1a468e80009a0e0bd5 diff --git a/deps/research-environments b/deps/research-environments index c752781984..b07ace376f 160000 --- a/deps/research-environments +++ b/deps/research-environments @@ -1 +1 @@ -Subproject commit c752781984c1b4fbb0a3d7f4aac1e7ed67cc749e +Subproject commit b07ace376ff32f53b3b4ad2d58007f2de92e0821 diff --git a/deps/verifiers b/deps/verifiers index e1d4f2593a..dacceceda4 160000 --- a/deps/verifiers +++ b/deps/verifiers @@ -1 +1 @@ -Subproject commit e1d4f2593a66a2130584f5972bee4f3279e824e5 +Subproject commit dacceceda4b14b21354e342ee652b1f27dd96fbe diff --git a/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py b/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py index be5fe249f3..0fd25ff089 100644 --- a/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py +++ b/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py @@ -206,6 +206,72 @@ def resolve_timeout(self): return self +class SystemRoleEchoConfig(BaseConfig): + """Echo supervision for system-message content tokens.""" + + alpha: float = Field(1.0, allow_inf_nan=False) + """Per-token echo weight.""" + + +class UserRoleEchoConfig(BaseConfig): + """Echo supervision for user-message content tokens.""" + + alpha: float = Field(1.0, allow_inf_nan=False) + """Per-token echo weight.""" + + +class AssistantRoleEchoConfig(BaseConfig): + """Echo supervision for assistant-message content and completion tokens.""" + + alpha: float = Field(1.0, allow_inf_nan=False) + """Per-token echo weight. ``alpha=0`` keeps the token supervised but gives it zero gradient.""" + + +class ToolRoleEchoConfig(BaseConfig): + """Echo supervision for tool-message content tokens.""" + + alpha: float = Field(1.0, allow_inf_nan=False) + """Per-token echo weight.""" + + tool_names: set[str] | None = Field(None, min_length=1) + """Restrict echo to these tool function names; None = all tools.""" + + +class EchoFilterConfig(BaseConfig): + """Optional callable that narrows role-selected echo tokens per rollout.""" + + import_path: str + """Dotted import path to the filter callable, e.g. ``"my_module.filter_warnings"``.""" + + kwargs: dict[str, Any] = Field(default_factory=dict) + """Keyword arguments forwarded to the filter as ``**kwargs``.""" + + +class EchoConfig(BaseConfig): + """Enable CE echo on selected message roles for this training env.""" + + system: SystemRoleEchoConfig | None = None + """System-message echo (default: disabled).""" + + user: UserRoleEchoConfig | None = None + """User-message echo (default: disabled).""" + + assistant: AssistantRoleEchoConfig | None = None + """Assistant-message echo (default: disabled).""" + + tool: ToolRoleEchoConfig | None = None + """Tool-message echo (default: disabled).""" + + filter: EchoFilterConfig | None = None + """Optional per-token filter on top of the role baseline.""" + + @model_validator(mode="after") + def validate_roles(self) -> "EchoConfig": + if self.system is self.user is self.assistant is self.tool is None: + raise ValueError("EchoConfig requires at least one of system, user, assistant, or tool.") + return self + + class TrainEnvConfig(EnvConfig): sampling: TrainSamplingConfig = TrainSamplingConfig() """Per-env sampling overrides. Unset fields inherit from the group-level train sampling config.""" @@ -214,6 +280,9 @@ class TrainEnvConfig(EnvConfig): """Rollouts generated per example for GRPO group-relative advantages. Inherits from ``orchestrator.group_size`` when unset.""" + echo: EchoConfig | None = None + """Per-env per-role echo config.""" + class EvalEnvConfig(EnvConfig): sampling: EvalSamplingConfig = EvalSamplingConfig() diff --git a/pyproject.toml b/pyproject.toml index ca3639b3aa..b98dca0bff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,6 +69,7 @@ envs = [ "deepdive", "general-agent", "gpqa", + "harnesses", "hle", "ifeval", "livecodebench", @@ -77,7 +78,6 @@ envs = [ "math-python", "math500", "mini-swe-agent-plus", - "mini-swe-agent-plus-rlm", "mmlu-pro", "opencode-cp", "opencode-deepdive", @@ -88,6 +88,7 @@ envs = [ "rlm-swe", "science-env", "simpleqa-verified", + "tasksets", "tau2-bench", "wiki-search", ] @@ -197,6 +198,8 @@ prime-rl-configs = { path = "packages/prime-rl-configs", editable = true } verifiers = { path = "deps/verifiers", editable = true } renderers = { path = "deps/renderers", editable = true } prime-pydantic-config = { path = "deps/pydantic-config", editable = true } +harnesses = { path = "deps/verifiers/packages/harnesses", editable = true } +tasksets = { path = "deps/verifiers/packages/tasksets", editable = true } aime2024 = { path = "deps/research-environments/environments/aime2024", editable = true } aime2025 = { path = "deps/research-environments/environments/aime2025", editable = true } alphabet-sort = { path = "deps/verifiers/environments/alphabet_sort", editable = true } @@ -213,7 +216,6 @@ math-env = { path = "deps/research-environments/environments/math_env", editable math-python = { path = "deps/verifiers/environments/math_python", editable = true } math500 = { path = "deps/research-environments/environments/math500", editable = true } mini-swe-agent-plus = { path = "deps/research-environments/environments/mini_swe_agent_plus", editable = true } -mini-swe-agent-plus-rlm = { path = "deps/research-environments/environments/mini_swe_agent_plus_rlm", editable = true } mmlu-pro = { path = "deps/research-environments/environments/mmlu_pro", editable = true } opencode-cp = { path = "deps/research-environments/environments/opencode_cp", editable = true } opencode-deepdive = { path = "deps/research-environments/environments/opencode_deepdive", editable = true } diff --git a/src/prime_rl/orchestrator/echo.py b/src/prime_rl/orchestrator/echo.py new file mode 100644 index 0000000000..ca9a6c3600 --- /dev/null +++ b/src/prime_rl/orchestrator/echo.py @@ -0,0 +1,138 @@ +from __future__ import annotations + +from collections.abc import Callable +from dataclasses import dataclass + +import verifiers as vf + +from prime_rl.configs.orchestrator import EchoConfig + + +@dataclass(frozen=True) +class EchoAnnotations: + step_alpha: list[list[float | None]] + + def initial_sample_alpha(self, step_idx: int) -> list[float | None] | None: + alpha = self.step_alpha[step_idx] + return list(alpha) if any(a is not None for a in alpha) else None + + def extension_alpha(self, step_idx: int, prefix_len: int, prompt_len: int) -> list[float | None]: + alpha = self.step_alpha[step_idx] + return alpha[prefix_len:prompt_len] + alpha[prompt_len:] + + +def build_echo_annotations( + rollout: vf.RolloutOutput, + echo_config: EchoConfig | None, + filter_fn: Callable[..., list[list[bool]]] | None = None, +) -> EchoAnnotations | None: + if echo_config is None: + return None + + trajectory = rollout["trajectory"] + step_tokens = [] + for step in trajectory: + tokens = step["tokens"] + if tokens is None: + return None + step_tokens.append(tokens) + + filter_masks = apply_echo_filter(rollout, filter_fn) if filter_fn is not None and trajectory else None + return EchoAnnotations( + step_alpha=[ + _build_step_echo_alpha( + prompt_attribution=tokens.get("prompt_attribution"), + prompt_len=len(tokens["prompt_ids"]), + completion_len=len(tokens["completion_ids"]), + echo_config=echo_config, + filter_mask=filter_masks[step_idx] if filter_masks is not None else None, + ) + for step_idx, tokens in enumerate(step_tokens) + ] + ) + + +def _build_step_echo_alpha( + prompt_attribution: dict | None, + prompt_len: int, + completion_len: int, + echo_config: EchoConfig | None, + filter_mask: list[bool] | None = None, +) -> list[float | None]: + expected_total_len = prompt_len + completion_len + out: list[float | None] = [None] * expected_total_len + if echo_config is not None: + if echo_config.assistant is not None: + out[prompt_len:expected_total_len] = [echo_config.assistant.alpha] * completion_len + + if prompt_attribution is not None: + message_roles = prompt_attribution.get("message_roles") + message_indices = prompt_attribution.get("message_indices") + is_content = prompt_attribution.get("is_content") + if message_roles is not None and is_content and message_indices: + if len(is_content) == prompt_len and len(message_indices) == prompt_len: + role_alphas = { + "system": echo_config.system.alpha if echo_config.system is not None else None, + "user": echo_config.user.alpha if echo_config.user is not None else None, + "assistant": echo_config.assistant.alpha if echo_config.assistant is not None else None, + } + tool_config = echo_config.tool + tool_alpha = tool_config.alpha if tool_config is not None else None + enabled_tools = tool_config.tool_names if tool_config is not None else None + message_tool_names = prompt_attribution.get("message_tool_names") or [] + + for k, mi in enumerate(message_indices): + if mi < 0 or not is_content[k] or mi >= len(message_roles): + continue + role = message_roles[mi] + if role == "tool": + tool_name = message_tool_names[mi] if mi < len(message_tool_names) else None + if tool_alpha is not None and (enabled_tools is None or tool_name in enabled_tools): + out[k] = tool_alpha + continue + + alpha = role_alphas.get(role) + if alpha is not None: + out[k] = alpha + + if filter_mask is not None: + out = [alpha if keep else None for alpha, keep in zip(out, filter_mask, strict=True)] + + return out + + +def apply_echo_filter( + rollout: vf.RolloutOutput, + filter_fn: Callable[..., list[list[bool]]], +) -> list[list[bool]]: + trajectory = rollout["trajectory"] + result = filter_fn(rollout) + + if not isinstance(result, list): + raise TypeError(f"echo filter must return list[list[bool]], got {type(result).__name__}") + if len(result) != len(trajectory): + raise ValueError( + f"echo filter returned {len(result)} per-step masks but the rollout has {len(trajectory)} trajectory steps" + ) + + for step_idx, (step, mask) in enumerate(zip(trajectory, result)): + tokens = step["tokens"] + prompt_len = len(tokens["prompt_ids"]) + completion_len = len(tokens["completion_ids"]) + expected = prompt_len + completion_len + + if not isinstance(mask, list): + raise TypeError(f"echo filter step {step_idx}: mask must be a list, got {type(mask).__name__}") + if len(mask) != expected: + raise ValueError( + f"echo filter step {step_idx}: mask length {len(mask)} " + f"!= expected {expected} " + f"(prompt_len={prompt_len}, completion_len={completion_len})" + ) + for k, v in enumerate(mask): + if type(v) is not bool: + raise TypeError( + f"echo filter step {step_idx}: mask[{k}] must be a plain bool, got {type(v).__name__} ({v!r})" + ) + + return result diff --git a/src/prime_rl/orchestrator/envs.py b/src/prime_rl/orchestrator/envs.py index fe02d2e61a..66129940f6 100644 --- a/src/prime_rl/orchestrator/envs.py +++ b/src/prime_rl/orchestrator/envs.py @@ -2,6 +2,7 @@ import asyncio import atexit +import functools import multiprocessing as mp import time from collections.abc import Awaitable, Callable, Iterator, Sequence @@ -18,7 +19,7 @@ from prime_rl.orchestrator.eval_utils import compute_pass_at_k from prime_rl.utils.logger import ProgressTracker, get_logger from prime_rl.utils.monitor import get_monitor -from prime_rl.utils.utils import capitalize +from prime_rl.utils.utils import capitalize, import_object REQUIRED_STATE_COLUMNS = ["trajectory"] @@ -170,6 +171,10 @@ class TrainEnv(Env): def __init__(self, config: TrainEnvConfig): super().__init__(config) self.sampling_args = config.sampling.to_sampling_args() + self.echo_filter_fn: Callable[..., list[list[bool]]] | None = None + if config.echo is not None and config.echo.filter is not None: + fn = import_object(config.echo.filter.import_path) + self.echo_filter_fn = functools.partial(fn, **config.echo.filter.kwargs) def get_dataset(self, seed: int | None = None): return self.env.get_dataset(seed=seed) diff --git a/src/prime_rl/orchestrator/train_sink.py b/src/prime_rl/orchestrator/train_sink.py index 26e7b915b0..6751a752a9 100644 --- a/src/prime_rl/orchestrator/train_sink.py +++ b/src/prime_rl/orchestrator/train_sink.py @@ -19,6 +19,7 @@ from prime_rl.configs.orchestrator import AdvantageConfig, OrchestratorConfig from prime_rl.orchestrator.advantage import assign_advantages, setup_advantage_fn +from prime_rl.orchestrator.echo import build_echo_annotations from prime_rl.orchestrator.envs import TrainEnvs from prime_rl.orchestrator.filters import RolloutFilter, apply_filters from prime_rl.orchestrator.trajectories import ( @@ -160,11 +161,16 @@ async def process_rollout(self, rollout: TrainRollout) -> None: needs_backfill = any(s["tokens"] is None for s in raw.get("trajectory") or []) if needs_backfill: await asyncio.to_thread(backfill_rollout_tokens, raw, self.tokenizer, renderer=self.renderer) + + env = self.train_envs.get(rollout.env_name) + echo_annotations = await asyncio.to_thread(build_echo_annotations, raw, env.config.echo, env.echo_filter_fn) + samples = await asyncio.to_thread( interleave_rollout, raw, mm_token_type_ids_mapping=self.mm_token_type_ids_mapping, env_name=rollout.env_name, + echo_annotations=echo_annotations, ) rollout.samples = samples or [] # Offload base64 image bytes to disk as soon as the rollout is diff --git a/src/prime_rl/orchestrator/trajectories.py b/src/prime_rl/orchestrator/trajectories.py index 3e8431c12a..6df0d0337d 100644 --- a/src/prime_rl/orchestrator/trajectories.py +++ b/src/prime_rl/orchestrator/trajectories.py @@ -9,6 +9,7 @@ import verifiers as vf from transformers.tokenization_utils import PreTrainedTokenizer +from prime_rl.orchestrator.echo import EchoAnnotations from prime_rl.transport import RoutedExperts, TrainingSample from prime_rl.utils.chat_template import ( common_prefix_len, @@ -206,6 +207,7 @@ def interleave_rollout( mm_token_type_ids_mapping: dict[int, int] | None = None, *, env_name: str = "", + echo_annotations: EchoAnnotations | None = None, ) -> list[TrainingSample] | None: """ Convert vf.RolloutOutput to trainable rollouts by interleaving trajectory steps @@ -225,6 +227,12 @@ def interleave_rollout( For VLM models, each renderer-produced trajectory step carries its per-image processed tensors inline on ``multi_modal_data``; the last merged step's sidecar covers every image in the sample. + + Args: + output: vf.RolloutOutput containing trajectory data + mm_token_type_ids_mapping: Maps prompt-token ids to mm_token_type_ids + (1 = image, 2 = video, 0 otherwise). Renderer-supplied. + echo_annotations: Optional per-step echo alpha annotations. """ logger = get_logger() @@ -238,6 +246,7 @@ def interleave_rollout( return None has_error = output["error"] is not None + # completion_temperatures is left empty; the train sink fills it per-env later. def prepare_step_tokens(step: vf.TrajectoryStep, step_idx: int) -> dict[str, Any] | None: tokens = step["tokens"] @@ -308,6 +317,7 @@ def make_sample(tokens: dict[str, Any], step_idx: int) -> TrainingSample: env_name=env_name, mm_token_type_ids=None, routed_experts=None, # deferred — finalized at end of interleave_rollout + echo_alpha=echo_annotations.initial_sample_alpha(step_idx) if echo_annotations is not None else None, ) # Initialize routed-experts state for this sample. First chunk is the # raw step routed_experts (no pad, no copy). running_len is the @@ -385,6 +395,15 @@ def extend_sample( sample.completion_mask.extend(tokens["completion_mask"]) sample.completion_logprobs.extend(tokens["completion_logprobs"]) + if echo_annotations is not None: + step_prompt_len = len(tokens["prompt_ids"]) + extension = echo_annotations.extension_alpha(step_idx, prefix_len, step_prompt_len) + if any(a is not None for a in extension) or sample.echo_alpha is not None: + if sample.echo_alpha is None: + existing_len = len(sample.prompt_ids) + len(sample.completion_ids) - len(extension) + sample.echo_alpha = [None] * existing_len + sample.echo_alpha.extend(extension) + step_routed = tokens.get("routed_experts") state = sample_routed_state.get(id(sample)) if state is not None: diff --git a/src/prime_rl/trainer/batch.py b/src/prime_rl/trainer/batch.py index ea99859a35..a127aae6fa 100644 --- a/src/prime_rl/trainer/batch.py +++ b/src/prime_rl/trainer/batch.py @@ -41,6 +41,16 @@ def _append_routed_experts(dst: MicroBatch, src: MicroBatch) -> None: dst_routed.shape[0] += src_routed.shape[0] +def _extend_optional_token_field(current, values, existing_len: int, new_len: int, fill_value): + if values is not None: + if current is None: + current = [fill_value] * existing_len + current.extend(values) + elif current is not None: + current.extend([fill_value] * new_len) + return current + + def _pad_routed_experts(micro_batch: MicroBatch, padding_size: int) -> None: routed_experts = micro_batch.routed_experts assert routed_experts is not None @@ -50,12 +60,9 @@ def _pad_routed_experts(micro_batch: MicroBatch, padding_size: int) -> None: def prepare_sample(training_example: TrainingSample, seq_len: int) -> MicroBatch: - """ - Prepare a problem for sequence packing training. - Tokenize and prepare tensors. - """ + """Prepare a sample for sequence-packing training: tokenize and build tensors.""" input_ids = training_example.prompt_ids + training_example.completion_ids - loss_mask = training_example.prompt_mask + training_example.completion_mask + loss_mask = list(training_example.prompt_mask) + list(training_example.completion_mask) inference_logprobs = [0.0] * len(training_example.prompt_ids) + training_example.completion_logprobs advantages = [training_example.advantage] * len(input_ids) reward = training_example.reward if training_example.reward is not None else float("nan") @@ -64,6 +71,23 @@ def prepare_sample(training_example: TrainingSample, seq_len: int) -> MicroBatch mm_token_type_ids = training_example.mm_token_type_ids assert training_example.env_name != "all", "env_name='all' is reserved for aggregate metric keys" env_names = [training_example.env_name] * len(input_ids) + # Echo overlay: token 0 has no valid shifted current-token logprob, so it + # stays masked even if the producer supplied an alpha there. + echo_alpha = training_example.echo_alpha + echo_mask: list[bool] | None = None + if echo_alpha is not None: + if len(echo_alpha) != len(input_ids): + raise ValueError( + f"echo_alpha length must match prompt_ids + completion_ids length " + f"({len(echo_alpha)} != {len(input_ids)}) for env {training_example.env_name!r}" + ) + echo_mask = [False] * len(input_ids) + for k, alpha in enumerate(echo_alpha[1:], start=1): + if alpha is None: + continue + echo_mask[k] = True + advantages[k] = alpha + loss_mask[k] = True # Per-token temperatures: prompt tokens use first completion temp (masked out anyway) # Default to 1.0 if completion is empty (e.g., model generated only tool calls with no text) @@ -92,6 +116,8 @@ def prepare_sample(training_example: TrainingSample, seq_len: int) -> MicroBatch if mm_token_type_ids is not None: mm_token_type_ids = mm_token_type_ids[:seq_len] env_names = env_names[:seq_len] + if echo_mask is not None: + echo_mask = echo_mask[:seq_len] assert ( len(input_ids) @@ -104,6 +130,8 @@ def prepare_sample(training_example: TrainingSample, seq_len: int) -> MicroBatch ), ( f"input_ids: {len(input_ids)}, advantages: {len(advantages)}, loss_mask: {len(loss_mask)}, position_ids: {len(position_ids)}, inference_logprobs: {len(inference_logprobs)}, rewards: {len(rewards)}, temperatures: {len(temperatures)}" ) + if echo_mask is not None: + assert len(echo_mask) == len(input_ids), f"echo_mask: {len(echo_mask)}, input_ids: {len(input_ids)}" if teacher_logprobs is not None: assert len(teacher_logprobs) == len(input_ids), f"teacher_logprobs: {len(teacher_logprobs)}" @@ -133,6 +161,7 @@ def prepare_sample(training_example: TrainingSample, seq_len: int) -> MicroBatch env_names=env_names, mm_kwargs=training_example.mm_kwargs, training_mode=training_example.training_mode, + echo_mask=echo_mask, ) @@ -177,15 +206,16 @@ def packed_samples_into_micro_bs( and bin_content.training_mode == sample.training_mode ): existing_len = len(bin_content.input_ids) + sample_len = len(sample.input_ids) + bin_content.echo_mask = _extend_optional_token_field( + bin_content.echo_mask, sample.echo_mask, existing_len, sample_len, False + ) bin_content.input_ids.extend(sample.input_ids) bin_content.loss_mask.extend(sample.loss_mask) bin_content.advantages.extend(sample.advantages) - if sample.rewards is not None: - if bin_content.rewards is None: - bin_content.rewards = [float("nan")] * existing_len - bin_content.rewards.extend(sample.rewards) - elif bin_content.rewards is not None: - bin_content.rewards.extend([float("nan")] * len(sample.input_ids)) + bin_content.rewards = _extend_optional_token_field( + bin_content.rewards, sample.rewards, existing_len, sample_len, float("nan") + ) bin_content.inference_logprobs.extend(sample.inference_logprobs) bin_content.temperatures.extend(sample.temperatures) if sample.teacher_logprobs is not None: @@ -254,6 +284,8 @@ def pad_micro_batch(micro_batch: MicroBatch, pad_to_multiple_of: int) -> MicroBa micro_batch.mm_token_type_ids.extend([0] * padding_size) if micro_batch.routed_experts is not None: _pad_routed_experts(micro_batch, padding_size) + if micro_batch.echo_mask is not None: + micro_batch.echo_mask.extend([False] * padding_size) micro_batch.env_names.extend([""] * padding_size) return micro_batch @@ -264,6 +296,8 @@ def _make_dummy_batch(source: MicroBatch) -> MicroBatch: dummy = copy.deepcopy(source) dummy.advantages = [0.0] * len(dummy.input_ids) dummy.loss_mask = [False] * len(dummy.input_ids) + if dummy.echo_mask is not None: + dummy.echo_mask = [False] * len(dummy.input_ids) return dummy diff --git a/src/prime_rl/trainer/ckpt.py b/src/prime_rl/trainer/ckpt.py index 296554e0e8..690a65676b 100644 --- a/src/prime_rl/trainer/ckpt.py +++ b/src/prime_rl/trainer/ckpt.py @@ -171,7 +171,12 @@ def save_to_path( # Checkpoint the local dataloader if dataloader is not None: dataloader_dir = path / "dataloader" - dataloader_dir.mkdir(parents=True, exist_ok=True) + # Avoid concurrent mkdir from all ranks — on parallel filesystems + # (e.g. beegfs) a non-master rank can hit EEXIST + is_dir()==False + # right after master creates the dir and have exist_ok=True fail. + if self.world.is_master: + dataloader_dir.mkdir(parents=True, exist_ok=True) + torch.distributed.barrier() torch.save(dataloader.state_dict(), dataloader_dir / f"rank_{self.world.rank}.pt") # Save sharded state @@ -239,7 +244,9 @@ def save( ) -> None: """Save the full checkpoint state for a specified step.""" ckpt_path = self.get_ckpt_path(step) - ckpt_path.parent.mkdir(parents=True, exist_ok=True) + if self.world.is_master: + ckpt_path.parent.mkdir(parents=True, exist_ok=True) + torch.distributed.barrier() self.save_to_path(ckpt_path, model, optimizers, scheduler, progress, dataloader) bisect.insort(self.ckpt_steps, step) @@ -390,7 +397,9 @@ def save( ): """Save a HF-compatible weight-only checkpoint for a given step.""" step_path = self.get_step_path(step) - step_path.mkdir(parents=True, exist_ok=True) + if self.world.is_master: + step_path.mkdir(parents=True, exist_ok=True) + torch.distributed.barrier() # Gather all weights on master rank self.logger.debug("Gathering weights on master rank for weight checkpoint") diff --git a/src/prime_rl/trainer/rl/data.py b/src/prime_rl/trainer/rl/data.py index 45acdcb5c0..e121ff3ffa 100644 --- a/src/prime_rl/trainer/rl/data.py +++ b/src/prime_rl/trainer/rl/data.py @@ -46,6 +46,10 @@ class TensorMicroBatch(TypedDict): # sft → sft loss). All samples in a micro batch share the same mode. training_mode: str + # True where the token participates in echo CE; None when the micro-batch + # has no echo tokens. ``advantages`` carries alpha on echo positions. + echo_mask: Bool[Tensor, "batch seq"] | None + class FakeDataLoader: def __init__(self, config: FakeDataLoaderConfig, seq_len: int, dp_world_size: int): @@ -120,6 +124,7 @@ def _get_sample_micro_batch(self, generator: torch.Generator) -> TensorMicroBatc "mm_kwargs": None, "mm_token_type_ids": None, "training_mode": "rl", + "echo_mask": None, } def _get_micro_batch(self, generator: torch.Generator) -> TensorMicroBatch: @@ -148,6 +153,7 @@ def _get_micro_batch(self, generator: torch.Generator) -> TensorMicroBatch: "mm_kwargs": None, "mm_token_type_ids": None, "training_mode": "rl", + "echo_mask": None, } @@ -243,6 +249,9 @@ def _micro_batch_to_tensor(self, micro_batch: MicroBatch) -> TensorMicroBatch: else None, routed_experts=routed_experts, training_mode=micro_batch.training_mode, + echo_mask=torch.tensor(micro_batch.echo_mask, dtype=torch.bool).unsqueeze(0) + if micro_batch.echo_mask is not None + else None, ) diff --git a/src/prime_rl/trainer/rl/loss.py b/src/prime_rl/trainer/rl/loss.py index 9a9eb25a63..4927c1c2ee 100644 --- a/src/prime_rl/trainer/rl/loss.py +++ b/src/prime_rl/trainer/rl/loss.py @@ -19,6 +19,18 @@ class LossInputs: teacher_logprobs: Float[Tensor, " seq"] | None advantages: Float[Tensor, " seq"] loss_mask: Bool[Tensor, " seq"] + # Echo tokens are excluded from RL loss/metrics and trained through the + # echo CE term. The advantage tensor carries alpha on these positions. + echo_mask: Bool[Tensor, " seq"] | None = None + rl_loss_scale: int = 1 + echo_loss_scale: int = 1 + + +@dataclass +class LossMasks: + loss: Bool[Tensor, " seq"] + rl: Bool[Tensor, " seq"] + echo: Bool[Tensor, " seq"] @dataclass @@ -113,6 +125,16 @@ def compute_importance_ratio_and_mismatch_kl( return log_importance_ratio, importance_ratio, mismatch_kl +def split_loss_masks(loss_mask: Tensor, echo_mask: Tensor | None) -> LossMasks: + if echo_mask is None: + echo_train_mask = torch.zeros_like(loss_mask, dtype=torch.bool) + rl_mask = loss_mask + else: + echo_train_mask = loss_mask & echo_mask + rl_mask = loss_mask & ~echo_mask + return LossMasks(loss=loss_mask, rl=rl_mask, echo=echo_train_mask) + + def default_loss_fn(inputs: LossInputs, loss_config: DefaultLossConfig) -> LossOutputs: """ DPPO+KL loss for RL training, combining: @@ -129,6 +151,7 @@ def default_loss_fn(inputs: LossInputs, loss_config: DefaultLossConfig) -> LossO inference_logprobs = inputs.inference_logprobs advantages = inputs.advantages loss_mask = inputs.loss_mask + masks = split_loss_masks(loss_mask, inputs.echo_mask) log_importance_ratio, importance_ratio, mismatch_kl = compute_importance_ratio_and_mismatch_kl( trainer_logprobs, inference_logprobs @@ -141,27 +164,37 @@ def default_loss_fn(inputs: LossInputs, loss_config: DefaultLossConfig) -> LossO negative_advantages = advantages < 0 dppo_invalid_mask = torch.where(positive_advantages, dppo_invalid_mask_high, dppo_invalid_mask_low) - is_masked = dppo_invalid_mask - is_masked_high = positive_advantages & dppo_invalid_mask_high - is_masked_low = negative_advantages & dppo_invalid_mask_low - drop_mask = loss_mask & is_masked - keep_mask = loss_mask & ~is_masked + dppo_drop_mask = masks.rl & dppo_invalid_mask + dppo_keep_mask = masks.rl & ~dppo_invalid_mask + is_masked_high = masks.rl & positive_advantages & dppo_invalid_mask_high + is_masked_low = masks.rl & negative_advantages & dppo_invalid_mask_low advantages = loss_config.adv_tau * advantages - pg_loss = keep_mask * advantages * importance_ratio - kl_loss = loss_mask * log_importance_ratio**2 - loss = (-pg_loss + loss_config.kl_tau * kl_loss).sum() + rl_pg_loss = dppo_keep_mask * advantages * importance_ratio + rl_kl_loss = masks.rl * log_importance_ratio**2 + rl_loss = (-rl_pg_loss + loss_config.kl_tau * rl_kl_loss).sum() / inputs.rl_loss_scale + + if inputs.echo_mask is not None and masks.echo.any(): + echo_loss = -(advantages * trainer_logprobs)[masks.echo].sum() / inputs.echo_loss_scale + else: + echo_loss = torch.zeros((), device=trainer_logprobs.device, dtype=trainer_logprobs.dtype) + + loss = rl_loss + echo_loss metrics = { - "masked_mismatch_kl": _safe_mean(mismatch_kl, loss_mask & is_masked), # all trainable, masked tokens - "unmasked_mismatch_kl": _safe_mean(mismatch_kl, keep_mask), # all trainable, unmasked tokens - "is_masked": _safe_mean(is_masked, loss_mask), - "is_masked_low": _safe_mean(is_masked_low, loss_mask), - "is_masked_high": _safe_mean(is_masked_high, loss_mask), - "masked_advantage_positive": _safe_mean(positive_advantages, drop_mask), - "masked_advantage_negative": _safe_mean(negative_advantages, drop_mask), + "masked_mismatch_kl": _safe_mean(mismatch_kl, dppo_drop_mask), + "unmasked_mismatch_kl": _safe_mean(mismatch_kl, dppo_keep_mask), + "is_masked": _safe_mean(dppo_drop_mask, masks.rl), + "is_masked_low": _safe_mean(is_masked_low, masks.rl), + "is_masked_high": _safe_mean(is_masked_high, masks.rl), + "masked_advantage_positive": _safe_mean(positive_advantages, dppo_drop_mask), + "masked_advantage_negative": _safe_mean(negative_advantages, dppo_drop_mask), } + if inputs.echo_mask is not None: + metrics["echo_nll"] = _safe_mean(-trainer_logprobs, masks.echo) + metrics["echo_token_count"] = masks.echo.sum().float() + return LossOutputs(loss=loss, metrics=metrics) @@ -202,7 +235,7 @@ def opd_loss_fn(inputs: LossInputs) -> LossOutputs: pg_loss = keep_mask * advantages * importance_ratio kl_loss = loss_mask * log_importance_ratio**2 - loss = (-pg_loss + 1e-3 * kl_loss).sum() + loss = (-pg_loss + 1e-3 * kl_loss).sum() / inputs.rl_loss_scale metrics = { "masked_mismatch_kl": _safe_mean(mismatch_kl, loss_mask & is_masked), @@ -223,7 +256,7 @@ def sft_loss_fn(inputs: LossInputs) -> LossOutputs: trainer_logprobs = inputs.trainer_logprobs loss_mask = inputs.loss_mask - loss = -(trainer_logprobs[loss_mask]).sum() + loss = -(trainer_logprobs[loss_mask]).sum() / inputs.rl_loss_scale metrics = { "nll": _safe_mean(-trainer_logprobs, loss_mask), } @@ -249,7 +282,13 @@ def setup_loss_fns(loss_config: LossConfig) -> dict[str, LossFn]: kwargs = loss_config.kwargs def rl_fn(inputs: LossInputs) -> LossOutputs: - return custom_fn(inputs, **kwargs) + if inputs.echo_mask is not None and inputs.echo_mask.any(): + raise ValueError( + "Echo is only supported with the default RL loss. " + "CustomLossConfig receives the legacy loss_mask/advantages contract and cannot safely interpret echo." + ) + result = custom_fn(inputs, **kwargs) + return LossOutputs(loss=result.loss / inputs.rl_loss_scale, metrics=result.metrics) else: def rl_fn(inputs: LossInputs) -> LossOutputs: @@ -265,8 +304,10 @@ def compute_loss( advantages: list[Float[Tensor, " seq_i"]], loss_mask: list[Bool[Tensor, " seq_i"]], loss_fns: dict[str, LossFn], - loss_scale: int, + rl_loss_scale: int, training_mode: str = "rl", + echo_mask: list[Bool[Tensor, " seq_i"]] | None = None, + echo_loss_scale: int | None = None, ) -> tuple[Float[Tensor, ""], dict[str, Any]]: """ Compute loss for packed sequences (batch size = 1, multiple sequences packed along sequence dimension). @@ -282,11 +323,15 @@ def compute_loss( advantages: Advantages for each sequence loss_mask: Loss mask for each sequence loss_fns: Per-mode loss fn dispatch table from setup_loss_fns() - loss_scale: Scale factor to normalize the loss + rl_loss_scale: Global RL/non-echo token denominator training_mode: Selects which loss fn to apply + echo_mask: Per-sequence echo masks (parallel to loss_mask). Echo tokens + are excluded from RL terms and trained through the echo CE term. + echo_loss_scale: Global echo token denominator. Defaults to rl_loss_scale + for backward-compatible direct calls. Returns: - Tuple of (scaled_loss, aggregated_metrics) + Tuple of (total_loss, aggregated_metrics) """ try: effective_loss_fn = loss_fns[training_mode] @@ -301,13 +346,21 @@ def compute_loss( if teacher_logprobs is None: teacher_logprobs = [None] * len(trainer_logprobs) + if echo_mask is None: + echo_mask_list: list[Bool[Tensor, " seq_i"] | None] = [None] * len(trainer_logprobs) + else: + echo_mask_list = list(echo_mask) + if echo_loss_scale is None: + echo_loss_scale = rl_loss_scale - for t_logp, i_logp, teach_logp, adv, mask in zip( + for t_logp, i_logp, teach_logp, adv, mask, echo_m in zip( trainer_logprobs, inference_logprobs, teacher_logprobs, advantages, loss_mask, + echo_mask_list, + strict=True, ): inputs = LossInputs( trainer_logprobs=t_logp, @@ -315,7 +368,12 @@ def compute_loss( teacher_logprobs=teach_logp, advantages=adv, loss_mask=mask, + echo_mask=echo_m, + rl_loss_scale=rl_loss_scale, + echo_loss_scale=echo_loss_scale, ) + if echo_m is not None and echo_m.any() and training_mode != "rl": + raise ValueError("Echo is only supported for training_mode='rl'.") result = effective_loss_fn(inputs) @@ -326,8 +384,6 @@ def compute_loss( all_metrics[k] = [] all_metrics[k].append(v) - scaled_loss = total_loss / loss_scale - aggregated: dict[str, Any] = {} for k, v in all_metrics.items(): if v[0].dim() == 0: @@ -335,4 +391,4 @@ def compute_loss( else: aggregated[k] = torch.cat(v) - return scaled_loss, aggregated + return total_loss, aggregated diff --git a/src/prime_rl/trainer/rl/packer.py b/src/prime_rl/trainer/rl/packer.py index cf9dcfa02e..75af5185db 100644 --- a/src/prime_rl/trainer/rl/packer.py +++ b/src/prime_rl/trainer/rl/packer.py @@ -181,6 +181,11 @@ def _validate_sample(self, sample: TrainingSample) -> tuple[bool, str | None]: False, f"Run wrote a sample with teacher logprobs length != sample length ({len(sample.teacher_logprobs)} != {sample_length})", ) + if sample.echo_alpha is not None and len(sample.echo_alpha) != sample_length: + return ( + False, + f"Run wrote a sample with echo_alpha length != sample length ({len(sample.echo_alpha)} != {sample_length})", + ) return True, None def _get_batch(self) -> None: @@ -345,6 +350,20 @@ def setup_packer( ) -> BasePacker: multi_run_manager = get_multi_run_manager() if multi_run_manager.max_runs == 1: - return SinglePacker(dp_world_size, seq_len, pad_to_multiple_of, tokenizer, transport_config, start_step) + return SinglePacker( + dp_world_size, + seq_len, + pad_to_multiple_of, + tokenizer, + transport_config, + start_step, + ) else: - return MultiPacker(dp_world_size, seq_len, pad_to_multiple_of, tokenizer, transport_config, start_step) + return MultiPacker( + dp_world_size, + seq_len, + pad_to_multiple_of, + tokenizer, + transport_config, + start_step, + ) diff --git a/src/prime_rl/trainer/rl/token_export.py b/src/prime_rl/trainer/rl/token_export.py index b1f7c96cfa..6d3ffc878a 100644 --- a/src/prime_rl/trainer/rl/token_export.py +++ b/src/prime_rl/trainer/rl/token_export.py @@ -9,7 +9,7 @@ from torch import Tensor from prime_rl.configs.trainer import DefaultLossConfig, TrainerConfig -from prime_rl.trainer.rl.loss import compute_importance_ratio_and_mismatch_kl +from prime_rl.trainer.rl.loss import compute_importance_ratio_and_mismatch_kl, split_loss_masks SCHEMA_VERSION = 1 @@ -124,20 +124,25 @@ def _export_columns( seq_len = len(token_ids) trainer_logprobs = model_output["logprobs"] export_tensors = _compute_export_tensors(micro_batch, trainer_logprobs, loss_config) + rl_loss_mask = export_tensors["rl_loss_mask"] return { "token_ids": token_ids, "position_ids": _tensor_to_ints(micro_batch["position_ids"]), "loss_mask": _tensor_to_bools(micro_batch["loss_mask"]), + "echo_mask": _optional_tensor_to_bools(micro_batch.get("echo_mask"), seq_len), + "rl_loss_mask": _optional_tensor_to_bools(rl_loss_mask, seq_len), "advantages": _tensor_to_floats(micro_batch["advantages"]), "rewards": _optional_tensor_to_floats(micro_batch.get("rewards"), seq_len), "inference_logprobs": _tensor_to_floats(micro_batch["inference_logprobs"]), "trainer_logprobs": _tensor_to_floats(trainer_logprobs), "entropy": _tensor_to_floats(model_output["entropy"]), - "mismatch_kl": _optional_tensor_to_floats(export_tensors["mismatch_kl"], seq_len), - "log_importance_ratio": _optional_tensor_to_floats(export_tensors["log_importance_ratio"], seq_len), - "importance_ratio": _optional_tensor_to_floats(export_tensors["importance_ratio"], seq_len), - "prob_delta": _optional_tensor_to_floats(export_tensors["prob_delta"], seq_len), + "mismatch_kl": _optional_tensor_to_floats(export_tensors["mismatch_kl"], seq_len, mask=rl_loss_mask), + "log_importance_ratio": _optional_tensor_to_floats( + export_tensors["log_importance_ratio"], seq_len, mask=rl_loss_mask + ), + "importance_ratio": _optional_tensor_to_floats(export_tensors["importance_ratio"], seq_len, mask=rl_loss_mask), + "prob_delta": _optional_tensor_to_floats(export_tensors["prob_delta"], seq_len, mask=rl_loss_mask), "is_masked": _optional_tensor_to_bools(export_tensors["is_masked"], seq_len), "is_masked_high": _optional_tensor_to_bools(export_tensors["is_masked_high"], seq_len), "is_masked_low": _optional_tensor_to_bools(export_tensors["is_masked_low"], seq_len), @@ -156,16 +161,21 @@ def _compute_export_tensors( "is_masked": None, "is_masked_high": None, "is_masked_low": None, + "rl_loss_mask": None, } if micro_batch["training_mode"] == "sft": return fields inference_logprobs = micro_batch["inference_logprobs"].to(trainer_logprobs.device) loss_mask = micro_batch["loss_mask"].to(trainer_logprobs.device) + echo_mask_raw = micro_batch.get("echo_mask") + echo_mask = echo_mask_raw.to(trainer_logprobs.device) if echo_mask_raw is not None else None + masks = split_loss_masks(loss_mask, echo_mask) advantages = micro_batch["advantages"].to(trainer_logprobs.device) with torch.no_grad(): log_ratio, ratio, mismatch_kl = compute_importance_ratio_and_mismatch_kl(trainer_logprobs, inference_logprobs) prob_delta = torch.exp(trainer_logprobs) - torch.exp(inference_logprobs) + fields["rl_loss_mask"] = masks.rl fields["log_importance_ratio"] = log_ratio fields["importance_ratio"] = ratio fields["mismatch_kl"] = mismatch_kl @@ -176,9 +186,9 @@ def _compute_export_tensors( positive_advantages = advantages > 0 negative_advantages = advantages < 0 invalid = torch.where(positive_advantages, invalid_high, invalid_low) - fields["is_masked"] = loss_mask & invalid - fields["is_masked_high"] = loss_mask & positive_advantages & invalid_high - fields["is_masked_low"] = loss_mask & negative_advantages & invalid_low + fields["is_masked"] = masks.rl & invalid + fields["is_masked_high"] = masks.rl & positive_advantages & invalid_high + fields["is_masked_low"] = masks.rl & negative_advantages & invalid_low return fields @@ -195,10 +205,15 @@ def _tensor_to_floats(tensor: Tensor) -> list[float | None]: return [_json_float(value) for value in values] -def _optional_tensor_to_floats(tensor: Tensor | None, seq_len: int) -> list[float | None]: +def _optional_tensor_to_floats(tensor: Tensor | None, seq_len: int, mask: Tensor | None = None) -> list[float | None]: if tensor is None: return [None] * seq_len - return _tensor_to_floats(tensor) + if mask is None: + return _tensor_to_floats(tensor) + + values = tensor.detach().to(dtype=torch.float32, device="cpu").reshape(-1).tolist() + keep = mask.detach().to(dtype=torch.bool, device="cpu").reshape(-1).tolist() + return [_json_float(value) if keep_value else None for value, keep_value in zip(values, keep, strict=True)] def _optional_tensor_to_bools(tensor: Tensor | None, seq_len: int) -> list[bool | None]: diff --git a/src/prime_rl/trainer/rl/train.py b/src/prime_rl/trainer/rl/train.py index 83afa666dc..ad628be08f 100644 --- a/src/prime_rl/trainer/rl/train.py +++ b/src/prime_rl/trainer/rl/train.py @@ -32,6 +32,7 @@ compute_importance_ratio_and_mismatch_kl, selective_log_softmax, setup_loss_fns, + split_loss_masks, shift_tensor_left, shift_tensor_right, ) @@ -350,15 +351,26 @@ def load_run_checkpoint(_optimizer, idx: int) -> None: forward_backward_start_time = time.perf_counter() seq_len = micro_batches[0]["input_ids"].shape[1] - # Normalize by the global (dp_cp) number of unmasked tokens in the batch, so every rank - # divides by the same denominator. With a per-rank denominator, ranks with fewer loss - # tokens implicitly upweight their per-token gradient contribution after FSDP averaging. + # Normalize by global (dp_cp) denominators, so every rank divides by + # the same values. RL and echo terms use separate denominators: echo can + # add prompt/tool tokens, but that should not dilute ordinary RL + # completion gradients. # FSDP's per-rank divide is undone after the microbatch loop via fsdp_gradient_divide_factor. - local_loss_scale = sum(micro_batch["loss_mask"].sum().item() for micro_batch in micro_batches) - global_loss_scale = torch.tensor(local_loss_scale, dtype=torch.int64, device="cuda") + local_rl_loss_scale = 0 + local_echo_loss_scale = 0 + for micro_batch in micro_batches: + masks = split_loss_masks(micro_batch["loss_mask"], micro_batch.get("echo_mask")) + local_rl_loss_scale += masks.rl.sum().item() + local_echo_loss_scale += masks.echo.sum().item() + global_loss_scales = torch.tensor( + [local_rl_loss_scale, local_echo_loss_scale], + dtype=torch.int64, + device="cuda", + ) dp_cp_group = parallel_dims.get_mesh("dp_cp").get_group() - dist.all_reduce(global_loss_scale, op=dist.ReduceOp.SUM, group=dp_cp_group) - loss_scale = max(global_loss_scale.item(), 1) + dist.all_reduce(global_loss_scales, op=dist.ReduceOp.SUM, group=dp_cp_group) + rl_loss_scale = max(global_loss_scales[0].item(), 1) + echo_loss_scale = max(global_loss_scales[1].item(), 1) logger.debug(f"Starting forward and backward pass ({batch_size=})") tensors = Tensors() # Used to accumulate tensor statistics across micro-batches and ranks for logging @@ -372,6 +384,7 @@ def load_run_checkpoint(_optimizer, idx: int) -> None: position_ids = micro_batch["position_ids"].to("cuda") advantages = micro_batch["advantages"].to("cuda") loss_mask = micro_batch["loss_mask"].to("cuda") + echo_mask = micro_batch["echo_mask"].to("cuda") if micro_batch.get("echo_mask") is not None else None inference_logprobs = micro_batch["inference_logprobs"].to("cuda") teacher_logprobs = ( micro_batch["teacher_logprobs"].to("cuda") if micro_batch["teacher_logprobs"] is not None else None @@ -473,6 +486,7 @@ def load_run_checkpoint(_optimizer, idx: int) -> None: # Compute loss response_lengths = get_response_lengths(position_ids) + echo_mask_split = echo_mask.squeeze().split(response_lengths) if echo_mask is not None else None loss, loss_tensors = compute_loss( trainer_logprobs=out["logprobs"].squeeze().split(response_lengths), inference_logprobs=inference_logprobs.squeeze().split(response_lengths), @@ -482,8 +496,10 @@ def load_run_checkpoint(_optimizer, idx: int) -> None: advantages=advantages.squeeze().split(response_lengths), loss_mask=loss_mask.squeeze().split(response_lengths), loss_fns=loss_fns, - loss_scale=loss_scale, + rl_loss_scale=rl_loss_scale, training_mode=micro_batch["training_mode"], + echo_mask=echo_mask_split, + echo_loss_scale=echo_loss_scale, ) # Backward pass @@ -491,12 +507,13 @@ def load_run_checkpoint(_optimizer, idx: int) -> None: loss.backward() # Add relevant tensors to tensor dict for logging purposes - entropy = out["entropy"][loss_mask].detach().to("cpu") + loss_masks = split_loss_masks(loss_mask, echo_mask) + entropy = out["entropy"][loss_masks.rl].detach().to("cpu") tensors["entropy/all"].append(entropy) tensors["loss"].append(loss.detach().to("cpu").unsqueeze(0)) env_names = micro_batch["env_names"] - masked_env_names = [env_name for env_name, keep in zip(env_names, loss_mask.flatten().tolist()) if keep] + masked_env_names = [env_name for env_name, keep in zip(env_names, loss_masks.rl.flatten().tolist()) if keep] env_to_indices: dict[str, list[int]] = {} for idx, env_name in enumerate(masked_env_names): env_to_indices.setdefault(env_name, []).append(idx) @@ -507,7 +524,7 @@ def load_run_checkpoint(_optimizer, idx: int) -> None: if micro_batch["training_mode"] != "sft": with torch.no_grad(): _, _, mismatch_kl = compute_importance_ratio_and_mismatch_kl(out["logprobs"], inference_logprobs) - mismatch_kl = mismatch_kl[loss_mask].detach().to("cpu") + mismatch_kl = mismatch_kl[loss_masks.rl].detach().to("cpu") tensors["mismatch_kl/all"].append(mismatch_kl) for env_name, indices in env_to_indices.items(): tensors[f"mismatch_kl/{env_name}"].append(mismatch_kl[indices]) diff --git a/src/prime_rl/transport/types.py b/src/prime_rl/transport/types.py index 1bb31c9325..07bd4914c8 100644 --- a/src/prime_rl/transport/types.py +++ b/src/prime_rl/transport/types.py @@ -56,6 +56,11 @@ class TrainingSample(msgspec.Struct, array_like=True, gc=False, omit_defaults=Tr # taus), sft uses sft_loss_fn. Stamped by the orchestrator from training_mode. training_mode: TrainingMode = "rl" + # Per-token echo alpha parallel to prompt_ids + completion_ids. Field None + # means no echo; per-token None means ordinary RL; a float means echo CE + # with that alpha. ``0.0`` is distinct from None. + echo_alpha: list[float | None] | None = None + class TrainingBatch(msgspec.Struct, array_like=True, gc=False, omit_defaults=True): """A batch of training examples with metadata for transport.""" @@ -89,3 +94,7 @@ class MicroBatch(msgspec.Struct, array_like=True, gc=False, omit_defaults=True): # sft → sft loss). All samples packed into a micro batch share the same mode. training_mode: TrainingMode = "rl" rewards: list[float] | None = None + + # True where the token participates in echo CE. Survives packing/padding + # like ``loss_mask``; None if no sample echoes. + echo_mask: list[bool] | None = None diff --git a/tests/unit/orchestrator/test_batch.py b/tests/unit/orchestrator/test_batch.py index 7531423c72..574c808d71 100644 --- a/tests/unit/orchestrator/test_batch.py +++ b/tests/unit/orchestrator/test_batch.py @@ -117,6 +117,25 @@ def test_prepare_sample_propagates_training_mode(make_training_example): assert micro_batch.training_mode == "sft" +def test_prepare_sample_echo_overlay(make_training_example): + example = make_training_example() + example.echo_alpha = [0.5, 0.0, 0.25, None] + + micro_batch = prepare_sample(example, seq_len=16) + + assert micro_batch.advantages == [1.0, 0.0, 0.25, 1.0] + assert micro_batch.loss_mask == [False, True, True, True] + assert micro_batch.echo_mask == [False, True, True, False] + + +def test_prepare_sample_rejects_misaligned_echo_alpha(make_training_example): + example = make_training_example() + example.echo_alpha = [0.5] + + with pytest.raises(ValueError, match="echo_alpha length"): + prepare_sample(example, seq_len=16) + + def test_prepare_batch_does_not_pack_mixed_training_mode(make_training_example): rl_example = make_training_example(training_mode="rl") sft_example = make_training_example(training_mode="sft") diff --git a/tests/unit/orchestrator/test_trajectories.py b/tests/unit/orchestrator/test_trajectories.py index bda129cd43..00a5836e78 100644 --- a/tests/unit/orchestrator/test_trajectories.py +++ b/tests/unit/orchestrator/test_trajectories.py @@ -1,15 +1,30 @@ +import asyncio +import uuid +from types import SimpleNamespace from unittest.mock import MagicMock import numpy as np import pybase64 import pytest import verifiers as vf - +from pydantic import ValidationError + +from prime_rl.configs.orchestrator import ( + AssistantRoleEchoConfig, + EchoConfig, + EchoFilterConfig, + SystemRoleEchoConfig, + ToolRoleEchoConfig, + UserRoleEchoConfig, +) +from prime_rl.orchestrator.echo import _build_step_echo_alpha, apply_echo_filter, build_echo_annotations +from prime_rl.orchestrator.train_sink import TrainSink from prime_rl.orchestrator.trajectories import ( _deserialize_tool_calls, align_routed_experts, interleave_rollout, ) +from prime_rl.orchestrator.types import TrainRollout _interleave_rollout = interleave_rollout @@ -1378,3 +1393,402 @@ def test_interleave_rollout_packs_pixels_from_renderer_mm_data(): assert _decode_mm_thw(sample) == [[1, 2, 3], [1, 4, 4]] # mm_token_type_ids: image at token 2, video at token 5, rest 0. assert sample.mm_token_type_ids == [0, 1, 0, 0, 2, 0, 0] + + +# --------------------------------------------------------------------------- +# Per-role echo_alpha construction +# --------------------------------------------------------------------------- + + +def _attribution( + message_indices: list[int], + is_content: list[bool], + message_roles: list[str] | None = None, + message_tool_names: list[str | None] | None = None, +) -> dict: + out: dict = {"message_indices": message_indices, "is_content": is_content} + if message_roles is not None: + out["message_roles"] = message_roles + if message_tool_names is not None: + out["message_tool_names"] = message_tool_names + return out + + +@pytest.mark.parametrize( + ("attribution", "prompt_len", "completion_len", "echo_config", "expected"), + [ + pytest.param( + None, + 4, + 2, + EchoConfig(assistant=AssistantRoleEchoConfig(alpha=0.3)), + [None, None, None, None, 0.3, 0.3], + id="no_attribution_marks_assistant_completion", + ), + pytest.param( + _attribution( + message_indices=[0, 0, 1, 1, 2, 2], + is_content=[False, True, False, True, False, True], + message_roles=["user", "tool", "tool"], + message_tool_names=[None, "calc", "lookup"], + ), + 6, + 0, + EchoConfig(tool=ToolRoleEchoConfig(alpha=0.5, tool_names=["lookup"])), + [None, None, None, None, None, 0.5], + id="tool_name_filter", + ), + pytest.param( + _attribution(message_indices=[0], is_content=[True], message_roles=["user"]), + 1, + 2, + EchoConfig(assistant=AssistantRoleEchoConfig(alpha=0.0), tool=None), + [None, 0.0, 0.0], + id="assistant_zero_kills_rl", + ), + pytest.param( + _attribution( + message_indices=[0, 1, 2], + is_content=[True, True, True], + message_roles=["user", "tool", "system"], + message_tool_names=[None, "lookup", None], + ), + 3, + 2, + EchoConfig( + user=UserRoleEchoConfig(alpha=0.1), + tool=ToolRoleEchoConfig(alpha=0.5), + system=SystemRoleEchoConfig(alpha=0.05), + assistant=AssistantRoleEchoConfig(alpha=0.9), + ), + [0.1, 0.5, 0.05, 0.9, 0.9], + id="per_role_alphas_differ", + ), + pytest.param( + _attribution( + message_indices=[0, 0, 0], + is_content=[False, True, True], + message_roles=["system"], + ), + 3, + 0, + EchoConfig(system=SystemRoleEchoConfig(alpha=0.1), tool=None), + [None, 0.1, 0.1], + id="system_role", + ), + pytest.param( + _attribution( + message_indices=[0, 0, 1, 1], + is_content=[False, True, False, True], + message_roles=["user", "tool"], + message_tool_names=[None, "lookup"], + ), + 4, + 0, + EchoConfig(user=UserRoleEchoConfig(alpha=0.2), tool=None), + [None, 0.2, None, None], + id="user_role", + ), + pytest.param( + _attribution( + message_indices=[0, 0, 1, 1], + is_content=[False, True, False, True], + message_roles=["user", "assistant"], + ), + 4, + 3, + EchoConfig(assistant=AssistantRoleEchoConfig(alpha=0.8), tool=None), + [None, None, None, 0.8, 0.8, 0.8, 0.8], + id="assistant_prompt_and_completion", + ), + pytest.param( + _attribution( + message_indices=[0, 0, 1, 1, 1], + is_content=[False, True, False, True, True], + message_roles=["user", "tool"], + message_tool_names=[None, "lookup"], + ), + 5, + 2, + EchoConfig(tool=ToolRoleEchoConfig(alpha=0.7, tool_names=None)), + [None, None, None, 0.7, 0.7, None, None], + id="tool_default_all_tools", + ), + pytest.param( + _attribution( + message_indices=[0, 0, 0, 0], + is_content=[False, False, True, False], + message_roles=["tool"], + message_tool_names=["lookup"], + ), + 4, + 0, + EchoConfig(tool=ToolRoleEchoConfig(alpha=0.4)), + [None, None, 0.4, None], + id="skips_non_content_tokens", + ), + ], +) +def test_build_step_echo_alpha_baseline(attribution, prompt_len, completion_len, echo_config, expected): + assert ( + _build_step_echo_alpha( + prompt_attribution=attribution, + prompt_len=prompt_len, + completion_len=completion_len, + echo_config=echo_config, + ) + == expected + ) + + +def test_echo_config_rejects_filter_without_role(): + with pytest.raises(ValidationError, match="at least one of"): + EchoConfig(filter=EchoFilterConfig(import_path="my_module.my_filter")) + + +def test_tool_role_echo_config_rejects_empty_tool_names(): + with pytest.raises(ValidationError, match=r"too_short|at least 1"): + ToolRoleEchoConfig(tool_names=[]) + + +def test_tool_role_echo_config_coerces_tool_names_to_set(): + config = ToolRoleEchoConfig(tool_names=["lookup", "lookup", "calc"]) + + assert config.tool_names == {"lookup", "calc"} + + +# --------------------------------------------------------------------------- +# _build_step_echo_alpha — filter_mask composition +# --------------------------------------------------------------------------- + + +def _tool_only_attribution(prompt_len: int) -> dict: + return _attribution( + message_indices=[0] * prompt_len, + is_content=[True] * prompt_len, + message_roles=["tool"], + message_tool_names=["lookup"], + ) + + +@pytest.mark.parametrize( + ("attribution", "prompt_len", "completion_len", "echo_config", "filter_mask", "expected"), + [ + pytest.param( + _tool_only_attribution(4), + 4, + 2, + EchoConfig(tool=ToolRoleEchoConfig(alpha=0.5)), + [True, False, True, False, False, False], + [0.5, None, 0.5, None, None, None], + id="narrows_baseline", + ), + pytest.param( + _attribution( + message_indices=[0, 0, 1, 1], + is_content=[True, True, True, True], + message_roles=["user", "tool"], + message_tool_names=[None, "lookup"], + ), + 4, + 0, + EchoConfig(tool=ToolRoleEchoConfig(alpha=0.5)), + [True, True, True, True], + [None, None, 0.5, 0.5], + id="cannot_add_echo_to_disabled_role", + ), + pytest.param( + _attribution( + message_indices=[0, 1, 2], + is_content=[True, True, True], + message_roles=["system", "user", "tool"], + message_tool_names=[None, None, "lookup"], + ), + 3, + 2, + EchoConfig( + system=SystemRoleEchoConfig(alpha=0.05), + user=UserRoleEchoConfig(alpha=0.1), + tool=ToolRoleEchoConfig(alpha=0.5), + assistant=AssistantRoleEchoConfig(alpha=0.9), + ), + [True, False, True, True, False], + [0.05, None, 0.5, 0.9, None], + id="mixed_roles", + ), + ], +) +def test_build_step_echo_alpha_filter_composition( + attribution, prompt_len, completion_len, echo_config, filter_mask, expected +): + assert ( + _build_step_echo_alpha( + prompt_attribution=attribution, + prompt_len=prompt_len, + completion_len=completion_len, + echo_config=echo_config, + filter_mask=filter_mask, + ) + == expected + ) + + +# --------------------------------------------------------------------------- +# apply_echo_filter — shape/type validation + invocation contract +# --------------------------------------------------------------------------- + + +def _step_with_tokens(prompt_len: int, completion_len: int, attribution: dict | None = None) -> vf.TrajectoryStep: + tokens_kwargs: dict = dict( + prompt_ids=list(range(prompt_len)), + prompt_mask=[0] * prompt_len, + completion_ids=list(range(prompt_len, prompt_len + completion_len)), + completion_mask=[1] * completion_len, + completion_logprobs=[-0.1] * completion_len, + overlong_prompt=False, + is_truncated=False, + ) + if attribution is not None: + tokens_kwargs["prompt_attribution"] = attribution + return vf.TrajectoryStep( + prompt=[{"role": "user", "content": "U"}], + completion=[{"role": "assistant", "content": "A"}], + response=MagicMock(), + tokens=vf.TrajectoryStepTokens(**tokens_kwargs), + reward=None, + advantage=None, + is_truncated=False, + trajectory_id="t", + extras={}, + ) + + +def _rollout_with_steps(*step_dims: tuple, env_name: str = "test-env") -> vf.RolloutOutput: + return vf.RolloutOutput( + example_id=0, + env_name=env_name, + trajectory=[_step_with_tokens(*dims) for dims in step_dims], + sampling_args={"temperature": 1.0}, + error=None, + ) + + +def _const_filter(masks): + def filter_fn(rollout): + return masks + + return filter_fn + + +@pytest.mark.parametrize( + ("dims", "filter_return", "exc_type", "match"), + [ + pytest.param( + [(3, 2), (4, 1)], [[True] * 5], ValueError, r"returned 1 per-step masks.*has 2", id="outer_too_short" + ), + pytest.param( + [(3, 2)], [[True] * 5, [True] * 5], ValueError, r"returned 2 per-step masks.*has 1", id="outer_too_long" + ), + pytest.param( + [(3, 2), (4, 1)], + [[True] * 5, [True] * 3], + ValueError, + r"step 1.*mask length 3.*expected 5.*prompt_len=4.*completion_len=1", + id="inner_mismatch", + ), + pytest.param([(2, 1)], "not a list", TypeError, r"must return list.*got str", id="non_list_return"), + pytest.param( + [(1, 1)], [[True, 1]], TypeError, r"step 0.*mask\[1\].*must be a plain bool.*int", id="non_bool_int" + ), + ], +) +def test_apply_echo_filter_invalid_raises(dims, filter_return, exc_type, match): + rollout = _rollout_with_steps(*dims) + with pytest.raises(exc_type, match=match): + apply_echo_filter(rollout, _const_filter(filter_return)) + + +def test_apply_echo_filter_receives_full_rollout(): + rollout = _rollout_with_steps((2, 1)) + seen: dict = {} + + def filter_fn(rollout): + seen.update(example_id=rollout["example_id"], error=rollout["error"], n=len(rollout["trajectory"])) + return [[True] * 3] + + apply_echo_filter(rollout, filter_fn) + assert seen == {"example_id": 0, "error": None, "n": 1} + + +def test_apply_echo_filter_empty_trajectory_returns_empty_masks(): + rollout = vf.RolloutOutput( + example_id=0, env_name="test-env", trajectory=[], sampling_args={"temperature": 1.0}, error=None + ) + assert apply_echo_filter(rollout, _const_filter([])) == [] + + +def test_apply_echo_filter_propagates_user_exception(): + rollout = _rollout_with_steps((2, 1)) + + class FilterCrash(RuntimeError): + pass + + def filter_fn(rollout): + raise FilterCrash("boom") + + with pytest.raises(FilterCrash, match="boom"): + apply_echo_filter(rollout, filter_fn) + + +_TOOL_ATTRIBUTION = { + "message_indices": [0, 0, 0], + "is_content": [True, True, True], + "message_roles": ["tool"], + "message_tool_names": ["lookup"], +} + + +def test_interleave_rollout_filter_masks_narrows_sample_echo_alpha(): + rollout = _rollout_with_steps((3, 2, _TOOL_ATTRIBUTION)) + echo_config = EchoConfig(tool=ToolRoleEchoConfig(alpha=0.5)) + annotations = build_echo_annotations(rollout, echo_config, _const_filter([[True, False, True, True, True]])) + + filtered = _interleave_rollout(rollout, echo_annotations=annotations) + assert filtered[0].echo_alpha == [0.5, None, 0.5, None, None] + + +def test_train_sink_runs_echo_filter_without_prompt_attribution(tmp_path): + rollout_output = _rollout_with_steps((2, 2)) + filter_fn = MagicMock(return_value=[[True, True, True, False]]) + + env = SimpleNamespace( + echo_filter_fn=filter_fn, + config=SimpleNamespace(echo=EchoConfig(assistant=AssistantRoleEchoConfig(alpha=0.3))), + ) + train_envs = SimpleNamespace(get=lambda _env_name: env) + sink = TrainSink( + SimpleNamespace(output_dir=tmp_path), + tokenizer=None, + renderer=None, + train_envs=train_envs, + mm_token_type_ids_mapping=None, + batch_size=1, + token_batch_size=None, + advantage_config=None, + pre_filters=[], + post_filters=[], + ) + rollout = TrainRollout( + raw=rollout_output, + env_name="test-env", + example_id=0, + group_id=uuid.uuid4(), + policy_version=0, + off_policy_steps=0, + ) + + asyncio.run(sink.process_rollout(rollout)) + + filter_fn.assert_called_once_with(rollout_output) + assert len(rollout.samples) == 1 + assert rollout.samples[0].echo_alpha == [None, None, 0.3, None] diff --git a/tests/unit/train/rl/test_loss.py b/tests/unit/train/rl/test_loss.py index 1585dac7bd..7fa524d256 100644 --- a/tests/unit/train/rl/test_loss.py +++ b/tests/unit/train/rl/test_loss.py @@ -22,7 +22,7 @@ def test_grpo_loss(): advantages, loss_mask=loss_mask, loss_fns=loss_fns, - loss_scale=1.0, + rl_loss_scale=1.0, ) assert loss.shape == () @@ -42,7 +42,7 @@ def test_gspo_loss(): advantages, loss_mask=loss_mask, loss_fns=loss_fns, - loss_scale=1.0, + rl_loss_scale=1.0, ) assert loss.shape == () @@ -89,11 +89,11 @@ def test_sft_loss_matches_masked_nll(): advantages=advantages, loss_mask=loss_mask, loss_fns=loss_fns, - loss_scale=2, + rl_loss_scale=2, training_mode="sft", ) - # loss = -sum(masked logprobs) / loss_scale = -(-0.1 - 0.2) / 2 = 0.15 + # loss = -sum(masked logprobs) / rl_loss_scale = -(-0.1 - 0.2) / 2 = 0.15 assert torch.isclose(loss, torch.tensor(0.15, device=loss.device), atol=1e-6) assert "nll" in metrics @@ -112,7 +112,7 @@ def test_sft_loss_override_uses_masked_nll_with_default_loss_config(): advantages=advantages, loss_mask=loss_mask, loss_fns=loss_fns, - loss_scale=2, + rl_loss_scale=2, training_mode="sft", ) @@ -121,6 +121,76 @@ def test_sft_loss_override_uses_masked_nll_with_default_loss_config(): assert "mismatch_kl" not in metrics +def test_default_loss_fn_uses_separate_echo_loss_scale(): + trainer_logprobs = [torch.tensor([-0.2, -0.4], dtype=torch.float32, device="cuda", requires_grad=True)] + inference_logprobs = [trainer_logprobs[0].detach().clone()] + advantages = [torch.tensor([1.0, 2.0], dtype=torch.float32, device="cuda")] + loss_mask = [torch.tensor([True, True], dtype=torch.bool, device="cuda")] + echo_mask = [torch.tensor([False, True], dtype=torch.bool, device="cuda")] + + loss_fns = setup_loss_fns(DefaultLossConfig(dppo_mask_high=10.0, dppo_mask_low=10.0, kl_tau=0.0)) + loss, metrics = compute_loss( + trainer_logprobs=trainer_logprobs, + inference_logprobs=inference_logprobs, + teacher_logprobs=None, + advantages=advantages, + loss_mask=loss_mask, + loss_fns=loss_fns, + rl_loss_scale=1, + echo_loss_scale=4, + echo_mask=echo_mask, + ) + + # RL term: -adv * ratio = -1.0. Echo term: -alpha * logprob / 4 = 0.2. + assert torch.isclose(loss, torch.tensor(-0.8, device=loss.device), atol=1e-6) + assert metrics["echo_token_count"].item() == 1 + loss.backward() + assert torch.isclose(trainer_logprobs[0].grad[1], torch.tensor(-0.5, device="cuda"), atol=1e-6) + + +@pytest.mark.parametrize("training_mode", ["sft", "opd"]) +def test_echo_rejected_for_non_rl_modes(training_mode): + trainer_logprobs = [torch.tensor([-0.1, -0.2], dtype=torch.float32, device="cuda")] + inference_logprobs = [torch.zeros(2, dtype=torch.float32, device="cuda")] + teacher_logprobs = [torch.zeros(2, dtype=torch.float32, device="cuda")] if training_mode == "opd" else None + advantages = [torch.ones(2, dtype=torch.float32, device="cuda")] + loss_mask = [torch.ones(2, dtype=torch.bool, device="cuda")] + echo_mask = [torch.tensor([False, True], dtype=torch.bool, device="cuda")] + + with pytest.raises(ValueError, match="Echo is only supported"): + compute_loss( + trainer_logprobs=trainer_logprobs, + inference_logprobs=inference_logprobs, + teacher_logprobs=teacher_logprobs, + advantages=advantages, + loss_mask=loss_mask, + loss_fns=setup_loss_fns(DefaultLossConfig()), + rl_loss_scale=1, + training_mode=training_mode, + echo_mask=echo_mask, + ) + + +def test_echo_rejected_for_custom_rl_loss(): + loss_fns = setup_loss_fns( + CustomLossConfig( + import_path="tests.unit.train.rl.test_loss._dummy_custom_loss", + kwargs={"multiplier": 2.0}, + ) + ) + inputs = LossInputs( + trainer_logprobs=torch.randn(2, dtype=torch.float32, device="cuda"), + inference_logprobs=torch.randn(2, dtype=torch.float32, device="cuda"), + teacher_logprobs=None, + advantages=torch.ones(2, dtype=torch.float32, device="cuda"), + loss_mask=torch.ones(2, dtype=torch.bool, device="cuda"), + echo_mask=torch.tensor([False, True], dtype=torch.bool, device="cuda"), + ) + + with pytest.raises(ValueError, match="Echo is only supported with the default RL loss"): + loss_fns["rl"](inputs) + + def _dummy_custom_loss(inputs: LossInputs, multiplier: float = 1.0) -> LossOutputs: """A simple custom loss for testing.""" loss = (inputs.trainer_logprobs[inputs.loss_mask].sum() * multiplier).abs() diff --git a/tests/unit/train/rl/test_token_export.py b/tests/unit/train/rl/test_token_export.py new file mode 100644 index 0000000000..bdc16f042e --- /dev/null +++ b/tests/unit/train/rl/test_token_export.py @@ -0,0 +1,32 @@ +import math + +import torch + +from prime_rl.configs.trainer import DefaultLossConfig +from prime_rl.trainer.rl.token_export import _export_columns + + +def test_token_export_masks_rl_diagnostics_without_nan_sentinel(): + micro_batch = { + "input_ids": torch.tensor([101, 102, 103]), + "position_ids": torch.tensor([0, 1, 2]), + "loss_mask": torch.tensor([True, True, True]), + "echo_mask": torch.tensor([False, True, False]), + "advantages": torch.tensor([1.0, 0.5, -1.0]), + "rewards": None, + "inference_logprobs": torch.tensor([-0.2, -0.3, -0.4]), + "env_names": ["env", "env", "env"], + "training_mode": "rl", + } + model_output = { + "logprobs": torch.tensor([-0.1, -0.5, -0.45]), + "entropy": torch.tensor([1.0, 1.1, 1.2]), + } + + columns = _export_columns(micro_batch, model_output, DefaultLossConfig()) + + assert columns["rl_loss_mask"] == [True, False, True] + for key in ("mismatch_kl", "log_importance_ratio", "importance_ratio", "prob_delta"): + assert columns[key][1] is None + assert math.isfinite(columns[key][0]) + assert math.isfinite(columns[key][2]) diff --git a/uv.lock b/uv.lock index a3e998ef50..6ea2e43ebd 100644 --- a/uv.lock +++ b/uv.lock @@ -11,7 +11,7 @@ supported-markers = [ ] [options] -exclude-newer = "2026-05-25T23:50:03.973690224Z" +exclude-newer = "0001-01-01T00:00:00Z" # This has no effect and is included for backwards compatibility when using relative exclude-newer values. exclude-newer-span = "P7D" [options.exclude-newer-package] @@ -76,7 +76,7 @@ dependencies = [ [package.metadata] requires-dist = [ { name = "math-verify", specifier = ">=0.8.0" }, - { name = "verifiers", specifier = ">=0.1.12.dev1" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -91,7 +91,7 @@ dependencies = [ [package.metadata] requires-dist = [ { name = "math-verify", specifier = ">=0.8.0" }, - { name = "verifiers", specifier = ">=0.1.12.dev1" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -276,6 +276,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/64/b4/17d4b0b2a2dc85a6df63d1157e028ed19f90d4cd97c36717afef2bc2f395/attrs-26.1.0-py3-none-any.whl", hash = "sha256:c647aa4a12dfbad9333ca4e71fe62ddc36f4e63b2d260a37a8b83d2f043ac309", size = 67548, upload-time = "2026-03-19T14:22:23.645Z" }, ] +[[package]] +name = "authlib" +version = "1.7.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "joserfc", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/36/98/7d93f30d029643c0275dbc0bd6d5a6f670661ee6c9a94d93af7ab4887600/authlib-1.7.2.tar.gz", hash = "sha256:2cea25fefcd4e7173bdf1372c0afc265c8034b23a8cd5dcb6a9164b826c64231", size = 176511, upload-time = "2026-05-06T08:10:23.116Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fb/95/adcb68e20c34162e9135f370d6e31737719c2b6f94bc953fe7ed1f10fe21/authlib-1.7.2-py2.py3-none-any.whl", hash = "sha256:3e1faedc9d87e7d56a164eca3ccb6ace0d61b94abe83e92242f8dc8bba9b4a9f", size = 259548, upload-time = "2026-05-06T08:10:21.436Z" }, +] + [[package]] name = "backoff" version = "2.2.1" @@ -369,6 +382,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0d/fe/6bea5c9162869c5beba5d9c8abbed835ec85bf1ec1fba05a3822325c45f3/build-1.5.0-py3-none-any.whl", hash = "sha256:13f3eecb844759ab66efec90ca17639bbf14dc06cb2fdf37a9010322d9c50a6f", size = 26018, upload-time = "2026-04-30T03:18:23.644Z" }, ] +[[package]] +name = "burner-redis" +version = "0.1.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/52/89/54706febafc135095b2a9d797cfbd4eed2ab1ad7819808b99b587020471b/burner_redis-0.1.7.tar.gz", hash = "sha256:7474ff092669fd11ef765411572cdafcc3d89b8054aef4ca0617be6d6be4c680", size = 638644, upload-time = "2026-05-08T15:01:42.961Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6f/c0/31c25cc88143eac2dddcc394151a0db627923d44c94376a83768552c9f13/burner_redis-0.1.7-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20eba1917e3bca9eea5957d5700ff8defcb5a209e57a7841d005549aa0151f44", size = 1337341, upload-time = "2026-05-08T15:01:30.397Z" }, + { url = "https://files.pythonhosted.org/packages/e1/32/95cfa1833316ca2b6b2e58150a4900bc1ad256043cdd36198f1887618ccc/burner_redis-0.1.7-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39111467059b8a28f15ea061d2414ec25c3e57c65759983f90f4d358e7d6a72d", size = 1366800, upload-time = "2026-05-08T15:01:32.891Z" }, + { url = "https://files.pythonhosted.org/packages/34/ad/93c3916f053f89b7b5760da5bf855cd78b7885d480f9cfcc64f3732c1dc2/burner_redis-0.1.7-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9b5adfe99aeb8407f468078f3769b2a63e9168fea12f7709df5d2a3b152706e4", size = 1538160, upload-time = "2026-05-08T15:01:34.667Z" }, + { url = "https://files.pythonhosted.org/packages/5c/b9/19bae42cb124932d71168bc8e5bcb1da33aa62b908e5e632b3d298d7cb15/burner_redis-0.1.7-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:591a9d20685f9d6d22bf0c863b50b12dfcf328b06111b3f62c33cd3185d48ce0", size = 1591491, upload-time = "2026-05-08T15:01:36.708Z" }, +] + [[package]] name = "cachetools" version = "7.1.1" @@ -447,6 +472,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/db/8f/61959034484a4a7c527811f4721e75d02d653a35afb0b6054474d8185d4c/charset_normalizer-3.4.7-py3-none-any.whl", hash = "sha256:3dce51d0f5e7951f8bb4900c257dad282f49190fdbebecd4ba99bcc41fef404d", size = 61958, upload-time = "2026-04-02T09:28:37.794Z" }, ] +[[package]] +name = "chess" +version = "1.11.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/93/09/7d04d7581ae3bb8b598017941781bceb7959dd1b13e3ebf7b6a2cd843bc9/chess-1.11.2.tar.gz", hash = "sha256:a8b43e5678fdb3000695bdaa573117ad683761e5ca38e591c4826eba6d25bb39", size = 6131385, upload-time = "2025-02-25T19:10:27.328Z" } + [[package]] name = "chromadb" version = "1.5.9" @@ -506,7 +537,7 @@ wheels = [ [[package]] name = "code-env" -version = "0.3.1" +version = "0.3.2" source = { editable = "deps/research-environments/environments/code_env" } dependencies = [ { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, @@ -518,7 +549,7 @@ requires-dist = [ { name = "prime-sandboxes", specifier = ">=0.2.19" }, { name = "pytest", marker = "extra == 'dev'" }, { name = "pytest-asyncio", marker = "extra == 'dev'" }, - { name = "verifiers", specifier = ">=0.1.13.dev8" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] provides-extras = ["dev"] @@ -534,7 +565,7 @@ dependencies = [ [package.metadata] requires-dist = [ { name = "pillow", specifier = ">=10.0.0" }, - { name = "verifiers", specifier = ">=0.1.10" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -589,6 +620,14 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/33/1d/acad9bd4e97f13f3e2b18a3977fe1b4a37ecf3d38d815333980c6c72e963/contourpy-1.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:459c1f020cd59fcfe6650180678a9993932d80d44ccde1fa1868977438f0b411", size = 1403386, upload-time = "2025-07-26T12:01:33.947Z" }, ] +[[package]] +name = "cronsim" +version = "2.7" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fb/1a/02f105147f7f2e06ed4f734ff5a6439590bb275a53dd91fc73df6312298a/cronsim-2.7-py3-none-any.whl", hash = "sha256:1e1431fa08c51dc7f72e67e571c7c7a09af26420169b607badd4ca9677ffad1e", size = 14213, upload-time = "2025-10-21T16:38:20.431Z" }, +] + [[package]] name = "cryptography" version = "48.0.0" @@ -711,6 +750,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321, upload-time = "2023-10-07T05:32:16.783Z" }, ] +[[package]] +name = "cyclopts" +version = "4.16.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "docstring-parser", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "rich", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "rich-rst", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/34/07/bf61d13de86d96a4c46aff00c9ca0eced44bcc8c3e16280605c1253e5720/cyclopts-4.16.1.tar.gz", hash = "sha256:8aa47bf92a5fb33abca5af05e576eecdb0d2f79893ad29238046df78370fc4a8", size = 181196, upload-time = "2026-05-25T15:29:08.518Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/00/8d/7f362c2fb8ef4decd2160bc24d4292c6ca658cc6d9a161b89ca5122bbdbf/cyclopts-4.16.1-py3-none-any.whl", hash = "sha256:617795392c4113a2c2cc7af716f20244900e87f23daa05442d1268d81472a592", size = 219020, upload-time = "2026-05-25T15:29:09.646Z" }, +] + [[package]] name = "dataclasses-json" version = "0.6.7" @@ -814,7 +868,7 @@ requires-dist = [ { name = "datasets", specifier = ">=4.0.0" }, { name = "diskcache", specifier = ">=5.6.0" }, { name = "pdfminer-six", specifier = ">=20251107" }, - { name = "verifiers", specifier = ">=0.1.11.dev0" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -938,6 +992,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/de/15/545e2b6cf2e3be84bc1ed85613edd75b8aea69807a71c26f4ca6a9258e82/email_validator-2.3.0-py3-none-any.whl", hash = "sha256:80f13f623413e6b197ae73bb10bf4eb0908faf509ad8362c5edeb0be7fd450b4", size = 35604, upload-time = "2025-08-26T13:09:05.858Z" }, ] +[[package]] +name = "exceptiongroup" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" }, +] + [[package]] name = "executing" version = "2.2.1" @@ -1045,6 +1111,33 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a2/1f/ace39fe4719367cf68a4a70a2f6cb2207f285dbfc9c6a0c2f11f6cf12aa7/fastcore-1.12.45-py3-none-any.whl", hash = "sha256:4175a16e47e5ebdbd68d19c75667b056d37c34c9a378ba50d42ff692c973322f", size = 104753, upload-time = "2026-05-08T03:36:12.143Z" }, ] +[[package]] +name = "fastmcp" +version = "2.14.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "authlib", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "cyclopts", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "exceptiongroup", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "httpx", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "jsonschema-path", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "mcp", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "openapi-pydantic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "platformdirs", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "py-key-value-aio", extra = ["disk", "keyring", "memory"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pydantic", extra = ["email"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pydocket", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pyperclip", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "python-dotenv", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "rich", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "uvicorn", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "websockets", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/35/50/9bb042a2d290ccadb35db3580ac507f192e1a39c489eb8faa167cd5e3b57/fastmcp-2.14.0.tar.gz", hash = "sha256:c1f487b36a3e4b043dbf3330e588830047df2e06f8ef0920d62dfb34d0905727", size = 8232562, upload-time = "2025-12-11T23:04:27.134Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/73/b5656172a6beb2eacec95f04403ddea1928e4b22066700fd14780f8f45d1/fastmcp-2.14.0-py3-none-any.whl", hash = "sha256:7b374c0bcaf1ef1ef46b9255ea84c607f354291eaf647ff56a47c69f5ec0c204", size = 398965, upload-time = "2025-12-11T23:04:25.587Z" }, +] + [[package]] name = "fastokens" version = "0.2.0" @@ -1250,7 +1343,7 @@ source = { editable = "deps/research-environments/environments/general_agent" } dependencies = [ { name = "mcp", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "tyro", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] @@ -1261,7 +1354,7 @@ requires-dist = [ { name = "ruff", marker = "extra == 'dev'" }, { name = "ty", marker = "extra == 'dev'" }, { name = "tyro", specifier = ">=0.9" }, - { name = "verifiers", specifier = ">=0.1.15.dev2" }, + { name = "verifiers", extras = ["packages"], specifier = ">=0.1.15.dev17" }, ] provides-extras = ["dev", "test"] @@ -1331,6 +1424,45 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/20/7a/1c6e3562dfd8950adbb11ffbc65d21e7c89d01a6e4f137fa981056de25c5/gitpython-3.1.50-py3-none-any.whl", hash = "sha256:d352abe2908d07355014abdd21ddf798c2a961469239afec4962e9da884858f9", size = 212507, upload-time = "2026-05-06T04:01:23.799Z" }, ] +[[package]] +name = "google-auth" +version = "2.53.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pyasn1-modules", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c6/ad/ff781329bbbdc0974a098d996e89c9e1f7024262f9e3eec442fbb9ad1ac6/google_auth-2.53.0.tar.gz", hash = "sha256:e7e6aa16f6bee7b2b264830fd04f08087a1d5a836df516251a5d15327b246c9c", size = 335844, upload-time = "2026-05-15T20:53:07.928Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4a/c9/db44165ba7c581268c6d46017ef63339110378305062830104fc7fa144cb/google_auth-2.53.0-py3-none-any.whl", hash = "sha256:6e7449917c599b35126a99ec268ec6880301f2fea41dce198fe8fd83ff642b68", size = 246071, upload-time = "2026-05-15T20:53:05.609Z" }, +] + +[package.optional-dependencies] +requests = [ + { name = "requests", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] + +[[package]] +name = "google-genai" +version = "2.6.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "distro", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "google-auth", extra = ["requests"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "httpx", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pydantic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "requests", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "sniffio", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "tenacity", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "typing-extensions", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "websockets", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/dd/ec/6e49f50f5c70588d97c6ed25e0b8c18828bf4d58895f397b53a7522168a1/google_genai-2.6.0.tar.gz", hash = "sha256:7d4f777234002f2e94be499dbdfb43b506a6aca9dbbec13e61d3dc6ce640ffa7", size = 554809, upload-time = "2026-05-22T01:34:33.581Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/9e/e8ba4e58a9d5daf42343f3ea1cb0efb721eba36a1d6624e9873d039a5c1e/google_genai-2.6.0-py3-none-any.whl", hash = "sha256:272b6f6320f5d355735241ad441f972af095ec80dc10cb075cb430d96721648a", size = 821003, upload-time = "2026-05-22T01:34:31.55Z" }, +] + [[package]] name = "googleapis-common-protos" version = "1.75.0" @@ -1355,7 +1487,7 @@ dependencies = [ [package.metadata] requires-dist = [ { name = "math-verify", specifier = ">=0.8.0" }, - { name = "verifiers", specifier = ">=0.1.12.dev1" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -1432,6 +1564,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/69/b2/119f6e6dcbd96f9069ce9a2665e0146588dc9f88f29549711853645e736a/h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd", size = 61779, upload-time = "2025-08-23T18:12:17.779Z" }, ] +[[package]] +name = "harnesses" +source = { editable = "deps/verifiers/packages/harnesses" } +dependencies = [ + { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] + +[package.metadata] +requires-dist = [ + { name = "aiohttp", marker = "extra == 'nemogym'", specifier = ">=3.9.0" }, + { name = "nemo-gym", marker = "python_full_version >= '3.12' and extra == 'nemogym'", specifier = ">=0.2.1,<0.3" }, + { name = "verifiers", specifier = ">=0.1.15.dev11" }, +] +provides-extras = ["nemogym"] + [[package]] name = "hf-xet" version = "1.5.0" @@ -1456,7 +1603,7 @@ dependencies = [ [package.metadata] requires-dist = [ { name = "pillow", specifier = ">=12.0.0" }, - { name = "verifiers", specifier = ">=0.1.15.dev1" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -1609,7 +1756,7 @@ requires-dist = [ { name = "immutabledict" }, { name = "langdetect" }, { name = "nltk" }, - { name = "verifiers", specifier = ">=0.1.10" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -1744,6 +1891,39 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/56/6d/0d9848617b9f753b87f214f1c682592f7ca42de085f564352f10f0843026/ipywidgets-8.1.8-py3-none-any.whl", hash = "sha256:ecaca67aed704a338f88f67b1181b58f821ab5dc89c1f0f5ef99db43c1c2921e", size = 139808, upload-time = "2025-11-01T21:18:10.956Z" }, ] +[[package]] +name = "jaraco-classes" +version = "3.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "more-itertools", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/c0/ed4a27bc5571b99e3cff68f8a9fa5b56ff7df1c2251cc715a652ddd26402/jaraco.classes-3.4.0.tar.gz", hash = "sha256:47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd", size = 11780, upload-time = "2024-03-31T07:27:36.643Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7f/66/b15ce62552d84bbfcec9a4873ab79d993a1dd4edb922cbfccae192bd5b5f/jaraco.classes-3.4.0-py3-none-any.whl", hash = "sha256:f662826b6bed8cace05e7ff873ce0f9283b5c924470fe664fff1c2f00f581790", size = 6777, upload-time = "2024-03-31T07:27:34.792Z" }, +] + +[[package]] +name = "jaraco-context" +version = "6.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/af/50/4763cd07e722bb6285316d390a164bc7e479db9d90daa769f22578f698b4/jaraco_context-6.1.2.tar.gz", hash = "sha256:f1a6c9d391e661cc5b8d39861ff077a7dc24dc23833ccee564b234b81c82dfe3", size = 16801, upload-time = "2026-03-20T22:13:33.922Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f2/58/bc8954bda5fcda97bd7c19be11b85f91973d67a706ed4a3aec33e7de22db/jaraco_context-6.1.2-py3-none-any.whl", hash = "sha256:bf8150b79a2d5d91ae48629d8b427a8f7ba0e1097dd6202a9059f29a36379535", size = 7871, upload-time = "2026-03-20T22:13:32.808Z" }, +] + +[[package]] +name = "jaraco-functools" +version = "4.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "more-itertools", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/36/cf/ea4ef2920830dea3f5ab2ea4da6fb67724e6dca80ee2553788c3607243d0/jaraco_functools-4.5.0.tar.gz", hash = "sha256:3bb5665ea4a020cf78a7040e89154c77edadb3ca74f366479669c5999aa70b03", size = 20272, upload-time = "2026-05-15T21:34:10.025Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/96/9a/982e48afcffcd727a9144506720ffd4224b6b7e355c98641866f38b7c043/jaraco_functools-4.5.0-py3-none-any.whl", hash = "sha256:79ce39246eddbde4b3a03b77ea5f0f7878dc669b166a66cf3fa8e266aa3fa2f4", size = 10594, upload-time = "2026-05-15T21:34:08.595Z" }, +] + [[package]] name = "jaxtyping" version = "0.3.9" @@ -1768,6 +1948,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9a/93/242e2eab5fe682ffcb8b0084bde703a41d51e17ee0f3a31ff0d9d813620a/jedi-0.20.0-py2.py3-none-any.whl", hash = "sha256:7bdd9c2634f56713299976f4cbd59cb3fa92165cc5e05ea811fb253480728b67", size = 4884812, upload-time = "2026-05-01T23:38:43.919Z" }, ] +[[package]] +name = "jeepney" +version = "0.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7b/6f/357efd7602486741aa73ffc0617fb310a29b588ed0fd69c2399acbb85b0c/jeepney-0.9.0.tar.gz", hash = "sha256:cf0e9e845622b81e4a28df94c40345400256ec608d0e55bb8a3feaa9163f5732", size = 106758, upload-time = "2025-02-27T18:51:01.684Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b2/a3/e137168c9c44d18eff0376253da9f1e9234d0239e0ee230d2fee6cea8e55/jeepney-0.9.0-py3-none-any.whl", hash = "sha256:97e5714520c16fc0a45695e5365a2e11b81ea79bba796e26f9f1d178cb182683", size = 49010, upload-time = "2025-02-27T18:51:00.104Z" }, +] + [[package]] name = "jinja2" version = "3.1.6" @@ -1812,6 +2001,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7b/91/984aca2ec129e2757d1e4e3c81c3fcda9d0f85b74670a094cc443d9ee949/joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713", size = 309071, upload-time = "2025-12-15T08:41:44.973Z" }, ] +[[package]] +name = "joserfc" +version = "1.6.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1b/cb/52e479f20804904f5df20ac4539d292dcecd1287aaa33cba1d1def1d9d8e/joserfc-1.6.7.tar.gz", hash = "sha256:6999fe89457069ecacd8cc797c88a805f83054dd883333fa0409f74b46479fd7", size = 232158, upload-time = "2026-05-23T01:46:44.069Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c5/e4/bcf6718b5662894c6831f46296b73cd4b1a2e90c20b6d437e20c4997388c/joserfc-1.6.7-py3-none-any.whl", hash = "sha256:9e51e4a64840aa1734a058258e80a4480e2ff2d5686e480e7c92c954a92fbe05", size = 70603, upload-time = "2026-05-23T01:46:42.129Z" }, +] + [[package]] name = "jsonschema" version = "4.25.1" @@ -1827,6 +2028,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/bf/9c/8c95d856233c1f82500c2450b8c68576b4cf1c871db3afac5c34ff84e6fd/jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63", size = 90040, upload-time = "2025-08-18T17:03:48.373Z" }, ] +[[package]] +name = "jsonschema-path" +version = "0.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pathable", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pyyaml", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "referencing", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/39/79/cd02a4df6d9270efdc7d3feefe6edd730b0820c39eeaa107a2faee8322d5/jsonschema_path-0.5.0.tar.gz", hash = "sha256:493b156ba895c97602655b620a8456caa2ce08c1aa389f5a7addec065e6e855c", size = 19597, upload-time = "2026-05-19T20:45:00.971Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/2c/9e69d73c4297508be9e3b64a970ea3971b3eb8db64ffc5802d40bd25981f/jsonschema_path-0.5.0-py3-none-any.whl", hash = "sha256:2790a070bc7abb08ea3dbe4d340ece4efadf639223001f020c7503229ba068e2", size = 24077, upload-time = "2026-05-19T20:44:59.225Z" }, +] + [[package]] name = "jsonschema-specifications" version = "2025.9.1" @@ -1905,6 +2121,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fe/12/a156ea12972ff5bfa53c5669ec8a71a7dda8474bd540acc9211b0cacbb82/kernels_data-0.14.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:4e9f39e3678f525560f95a4af048388504dc261b7386ecc2a1017095aaa44997", size = 1414335, upload-time = "2026-05-06T08:21:27.804Z" }, ] +[[package]] +name = "keyring" +version = "25.7.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jaraco-classes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "jaraco-context", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "jaraco-functools", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "jeepney", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "secretstorage", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/43/4b/674af6ef2f97d56f0ab5153bf0bfa28ccb6c3ed4d1babf4305449668807b/keyring-25.7.0.tar.gz", hash = "sha256:fe01bd85eb3f8fb3dd0405defdeac9a5b4f6f0439edbb3149577f244a2e8245b", size = 63516, upload-time = "2025-11-16T16:26:09.482Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/db/e655086b7f3a705df045bf0933bdd9c2f79bb3c97bfef1384598bb79a217/keyring-25.7.0-py3-none-any.whl", hash = "sha256:be4a0b195f149690c166e850609a477c532ddbfbaed96a404d4e43f8d5e2689f", size = 39160, upload-time = "2025-11-16T16:26:08.402Z" }, +] + [[package]] name = "kiwisolver" version = "1.5.0" @@ -2038,7 +2270,7 @@ wheels = [ [[package]] name = "livecodebench" -version = "0.2.6" +version = "0.2.7" source = { editable = "deps/research-environments/environments/livecodebench" } dependencies = [ { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, @@ -2048,7 +2280,7 @@ dependencies = [ [package.metadata] requires-dist = [ { name = "prime-sandboxes", specifier = ">=0.2.19" }, - { name = "verifiers", specifier = ">=0.1.15.dev1" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -2102,7 +2334,7 @@ requires-dist = [ { name = "markdown", specifier = ">=3.5.1" }, { name = "math-verify", specifier = ">=0.8.0" }, { name = "sympy", specifier = ">=1.12.0" }, - { name = "verifiers", specifier = ">=0.1.15.dev2" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -2181,7 +2413,7 @@ wheels = [ [[package]] name = "math-env" -version = "0.1.5" +version = "0.1.6" source = { editable = "deps/research-environments/environments/math_env" } dependencies = [ { name = "math-verify", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, @@ -2193,7 +2425,7 @@ dependencies = [ requires-dist = [ { name = "math-verify", specifier = ">=0.8.0" }, { name = "prime-sandboxes", specifier = ">=0.2.19" }, - { name = "verifiers", specifier = ">=0.1.12.dev1" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -2237,7 +2469,7 @@ dependencies = [ [package.metadata] requires-dist = [ { name = "math-verify", specifier = ">=0.8.0" }, - { name = "verifiers", specifier = ">=0.1.12.dev1" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -2321,7 +2553,7 @@ wheels = [ [[package]] name = "mini-swe-agent-plus" -version = "0.2.24" +version = "0.2.25" source = { editable = "deps/research-environments/environments/mini_swe_agent_plus" } dependencies = [ { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, @@ -2335,26 +2567,7 @@ requires-dist = [ { name = "prime-sandboxes", specifier = ">=0.2.19" }, { name = "swebench", specifier = "==4.1.0" }, { name = "tenacity" }, - { name = "verifiers", specifier = ">=0.1.12.dev1" }, -] - -[[package]] -name = "mini-swe-agent-plus-rlm" -version = "0.1.6" -source = { editable = "deps/research-environments/environments/mini_swe_agent_plus_rlm" } -dependencies = [ - { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "swebench", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "tenacity", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, -] - -[package.metadata] -requires-dist = [ - { name = "prime-sandboxes", specifier = ">=0.2.19" }, - { name = "swebench", specifier = "==4.1.0" }, - { name = "tenacity" }, - { name = "verifiers", specifier = ">=0.1.12.dev1" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -2418,7 +2631,7 @@ dependencies = [ [package.metadata] requires-dist = [ { name = "math-verify", specifier = ">=0.8.0" }, - { name = "verifiers", specifier = ">=0.1.15.dev1" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -2494,6 +2707,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3a/a4/a187adcd485ff27bdbdb5c2b4d9cf210427bc74bcaacfc8226409db17535/mooncake_transfer_engine-0.3.11.post1-cp312-cp312-manylinux_2_39_aarch64.whl", hash = "sha256:1ccad9f44cf1a67f4e0494bd02f505503139ab606ecbe76cd6050d7a069247d5", size = 18089789, upload-time = "2026-05-24T16:19:01.828Z" }, ] +[[package]] +name = "more-itertools" +version = "11.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/de/1d/f4da6f02cdffe04d6362210b807146a26044c88d839208aec273bb0d9184/more_itertools-11.1.0.tar.gz", hash = "sha256:48e8f4d9e7e5878571ecf6f2b4e57634f93cd474cc8cfbd2376f2d11b396e30d", size = 145772, upload-time = "2026-05-22T14:14:29.909Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e8/3d/1087453384dbde46a8c7f9356eead2c58be8a7bf156bca40243377c85715/more_itertools-11.1.0-py3-none-any.whl", hash = "sha256:4b65538ae22f6fed0ce4874efd317463a7489796a0939fa66824dd542125a192", size = 72226, upload-time = "2026-05-22T14:14:28.824Z" }, +] + [[package]] name = "mpmath" version = "1.3.0" @@ -3015,19 +3237,31 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/60/c3/3d1e01e2dba517a91760e4a03e4f20ffc75039a6fe584d0e6f9b5c78fd15/openai_harmony-0.0.8-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:007b0476a1f331f8130783f901f1da6f5a7057af1a4891f1b6a31dec364189b5", size = 3205080, upload-time = "2025-11-05T19:07:05.078Z" }, ] +[[package]] +name = "openapi-pydantic" +version = "0.5.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/02/2e/58d83848dd1a79cb92ed8e63f6ba901ca282c5f09d04af9423ec26c56fd7/openapi_pydantic-0.5.1.tar.gz", hash = "sha256:ff6835af6bde7a459fb93eb93bb92b8749b754fc6e51b2f1590a19dc3005ee0d", size = 60892, upload-time = "2025-01-08T19:29:27.083Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/cf/03675d8bd8ecbf4445504d8071adab19f5f993676795708e36402ab38263/openapi_pydantic-0.5.1-py3-none-any.whl", hash = "sha256:a3a09ef4586f5bd760a8df7f43028b60cafb6d9f61de2acba9574766255ab146", size = 96381, upload-time = "2025-01-08T19:29:25.275Z" }, +] + [[package]] name = "opencode-cp" -version = "0.3.10" +version = "0.3.11" source = { editable = "deps/research-environments/environments/opencode_cp" } dependencies = [ { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] requires-dist = [ { name = "prime-sandboxes", specifier = ">=0.2.19" }, - { name = "verifiers", specifier = ">=0.1.15.dev2" }, + { name = "verifiers", extras = ["packages"], specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -3044,54 +3278,54 @@ dependencies = [ requires-dist = [ { name = "datasets", specifier = ">=4.0.0" }, { name = "prime-sandboxes", specifier = ">=0.2.25" }, - { name = "verifiers", specifier = ">=0.1.15.dev2" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] [[package]] name = "opencode-math" -version = "0.4.11" +version = "0.4.12" source = { editable = "deps/research-environments/environments/opencode_math" } dependencies = [ { name = "math-verify", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] requires-dist = [ { name = "math-verify", specifier = ">=0.8.0" }, - { name = "verifiers", specifier = ">=0.1.15.dev2" }, + { name = "verifiers", extras = ["packages"], specifier = ">=0.1.15.dev17" }, ] [[package]] name = "opencode-science" -version = "0.3.11" +version = "0.3.12" source = { editable = "deps/research-environments/environments/opencode_science" } dependencies = [ { name = "math-verify", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] requires-dist = [ { name = "math-verify", specifier = ">=0.8.0" }, - { name = "verifiers", specifier = ">=0.1.15.dev2" }, + { name = "verifiers", extras = ["packages"], specifier = ">=0.1.15.dev17" }, ] [[package]] name = "opencode-swe" -version = "0.4.7" +version = "0.4.8" source = { editable = "deps/research-environments/environments/opencode_swe" } dependencies = [ { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "swebench", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] [package.metadata] requires-dist = [ { name = "prime-sandboxes", specifier = ">=0.2.23" }, { name = "swebench", specifier = "==4.1.0" }, - { name = "verifiers", specifier = ">=0.1.15.dev2" }, + { name = "verifiers", extras = ["packages"], specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -3108,6 +3342,54 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4b/33/b5db29a6c00eb8f50708110d8d453747ca125c8b805bc437b289dbdcc057/opencv_python_headless-4.13.0.92-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:0bd48544f77c68b2941392fcdf9bcd2b9cdf00e98cb8c29b2455d194763cf99e", size = 60391106, upload-time = "2026-02-05T10:30:14.236Z" }, ] +[[package]] +name = "openenv-core" +version = "0.2.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "fastapi", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "fastmcp", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "huggingface-hub", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "openai", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pydantic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pyyaml", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "requests", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "rich", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "tomli", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "tomli-w", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "typer", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "uvicorn", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "websockets", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/d3/d2cef0e459158c9410f073ffd2ad6eca7c09232e7c53d4987acc0b942d28/openenv_core-0.2.1.tar.gz", hash = "sha256:0caa44411af7d866e451e50744d1adab57cdf9a2cf7a1b3f81042675110aebc7", size = 102728, upload-time = "2026-02-04T10:25:24.263Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a1/5a/a7f8b0e53eac45faedcf6fbfacdd28a104f815d3471f2deceefb4234d8be/openenv_core-0.2.1-py3-none-any.whl", hash = "sha256:5868722833df3220b7a3288f581e6c0825c2d8fae42d932ff90d2bb60765813a", size = 121855, upload-time = "2026-02-04T10:25:22.82Z" }, +] + +[[package]] +name = "openreward" +version = "0.1.125" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiohttp", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "anthropic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "click", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "fastapi", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "google-genai", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "openai", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pydantic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pyyaml", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "sse-starlette", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "structlog", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "tenacity", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "typing-extensions", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "uvicorn", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9d/dd/b69264c77fd9720f69443c5f6420a7ae9934b4e2799e276d8655f6dc0721/openreward-0.1.125.tar.gz", hash = "sha256:519687307f960ab3a395bf844d6c2fc018d8a0faad0fc367ad3b24331366d390", size = 138936, upload-time = "2026-05-21T10:24:10.075Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/72/11/51c5473d5b3d209ecaacee6ac5418eef15114d0817a98f9cee0a2d6364ee/openreward-0.1.125-py3-none-any.whl", hash = "sha256:784faeeef6aba2ce8f175bd9af5dff29b3bb0c07a4c3642eb56dc431c9af2924", size = 135521, upload-time = "2026-05-21T10:24:11.369Z" }, +] + [[package]] name = "opentelemetry-api" version = "1.37.0" @@ -3313,6 +3595,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/42/32/658973117bf0fd82a24abbfb94fe73a5e86216e49342985e10acce54775a/partial_json_parser-0.2.1.1.post7-py3-none-any.whl", hash = "sha256:145119e5eabcf80cbb13844a6b50a85c68bf99d376f8ed771e2a3c3b03e653ae", size = 10877, upload-time = "2025-11-17T07:27:40.457Z" }, ] +[[package]] +name = "pathable" +version = "0.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/66/f3/5a20387de9bcd0607871bfc2198ee0e15836da7baa4592ccd7f24c27c986/pathable-0.6.0.tar.gz", hash = "sha256:6404b8b82aef5ff0fd478934137128b99b12212ba35afdde5525ca4f8388ea58", size = 18970, upload-time = "2026-05-19T18:15:11.911Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a2/e8/6d75ffd9784bce2e93d1ae4415649427e39a53bb172d4672b2b59c6f0a7b/pathable-0.6.0-py3-none-any.whl", hash = "sha256:82c4ca6c98c502ad12e0d4e9779b6210afee93c38990988c8c5d1b49bdcdf566", size = 18983, upload-time = "2026-05-19T18:15:10.728Z" }, +] + +[[package]] +name = "pathvalidate" +version = "3.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fa/2a/52a8da6fe965dea6192eb716b357558e103aea0a1e9a8352ad575a8406ca/pathvalidate-3.3.1.tar.gz", hash = "sha256:b18c07212bfead624345bb8e1d6141cdcf15a39736994ea0b94035ad2b1ba177", size = 63262, upload-time = "2025-06-15T09:07:20.736Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/70/875f4a23bfc4731703a5835487d0d2fb999031bd415e7d17c0ae615c18b7/pathvalidate-3.3.1-py3-none-any.whl", hash = "sha256:5263baab691f8e1af96092fa5137ee17df5bdfbd6cff1fcac4d6ef4bc2e1735f", size = 24305, upload-time = "2025-06-15T09:07:19.117Z" }, +] + [[package]] name = "pdfminer-six" version = "20260107" @@ -3526,6 +3826,7 @@ envs = [ { name = "deepdive", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "general-agent", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "gpqa", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "harnesses", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "hle", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "ifeval", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "livecodebench", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, @@ -3534,7 +3835,6 @@ envs = [ { name = "math-python", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "math500", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "mini-swe-agent-plus", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "mini-swe-agent-plus-rlm", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "mmlu-pro", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "opencode-cp", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "opencode-deepdive", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, @@ -3545,6 +3845,7 @@ envs = [ { name = "rlm-swe", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "science-env", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "simpleqa-verified", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "tasksets", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "tau2-bench", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "wiki-search", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] @@ -3600,6 +3901,7 @@ requires-dist = [ { name = "flash-linear-attention", git = "https://github.com/fla-org/flash-linear-attention" }, { name = "general-agent", marker = "extra == 'envs'", editable = "deps/research-environments/environments/general_agent" }, { name = "gpqa", marker = "extra == 'envs'", editable = "deps/research-environments/environments/gpqa" }, + { name = "harnesses", marker = "extra == 'envs'", editable = "deps/verifiers/packages/harnesses" }, { name = "hle", marker = "extra == 'envs'", editable = "deps/research-environments/environments/hle" }, { name = "ifeval", marker = "extra == 'envs'", editable = "deps/research-environments/environments/ifeval" }, { name = "jaxtyping", specifier = ">=0.3.2" }, @@ -3612,7 +3914,6 @@ requires-dist = [ { name = "math-python", marker = "extra == 'envs'", editable = "deps/verifiers/environments/math_python" }, { name = "math500", marker = "extra == 'envs'", editable = "deps/research-environments/environments/math500" }, { name = "mini-swe-agent-plus", marker = "extra == 'envs'", editable = "deps/research-environments/environments/mini_swe_agent_plus" }, - { name = "mini-swe-agent-plus-rlm", marker = "extra == 'envs'", editable = "deps/research-environments/environments/mini_swe_agent_plus_rlm" }, { name = "mmlu-pro", marker = "extra == 'envs'", editable = "deps/research-environments/environments/mmlu_pro" }, { name = "modelexpress", marker = "extra == 'modelexpress'", specifier = "==0.3.0" }, { name = "mooncake-transfer-engine", specifier = ">=0.3.10.post2" }, @@ -3645,6 +3946,7 @@ requires-dist = [ { name = "science-env", marker = "extra == 'envs'", editable = "deps/research-environments/environments/science_env" }, { name = "setproctitle", specifier = ">=1.3.0" }, { name = "simpleqa-verified", marker = "extra == 'envs'", editable = "deps/research-environments/environments/simpleqa_verified" }, + { name = "tasksets", marker = "extra == 'envs'", editable = "deps/verifiers/packages/tasksets" }, { name = "tau2-bench", marker = "extra == 'envs'", editable = "deps/research-environments/environments/tau2_bench" }, { name = "tenacity", specifier = ">=8.2.0" }, { name = "tilelang", specifier = ">=0.1.8" }, @@ -3825,6 +4127,47 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e0/a9/023730ba63db1e494a271cb018dcd361bd2c917ba7004c3e49d5daf795a2/py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5", size = 22335, upload-time = "2022-10-25T20:38:27.636Z" }, ] +[[package]] +name = "py-key-value-aio" +version = "0.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "beartype", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "py-key-value-shared", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/93/ce/3136b771dddf5ac905cc193b461eb67967cf3979688c6696e1f2cdcde7ea/py_key_value_aio-0.3.0.tar.gz", hash = "sha256:858e852fcf6d696d231266da66042d3355a7f9871650415feef9fca7a6cd4155", size = 50801, upload-time = "2025-11-17T16:50:04.711Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/99/10/72f6f213b8f0bce36eff21fda0a13271834e9eeff7f9609b01afdc253c79/py_key_value_aio-0.3.0-py3-none-any.whl", hash = "sha256:1c781915766078bfd608daa769fefb97e65d1d73746a3dfb640460e322071b64", size = 96342, upload-time = "2025-11-17T16:50:03.801Z" }, +] + +[package.optional-dependencies] +disk = [ + { name = "diskcache", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "pathvalidate", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +keyring = [ + { name = "keyring", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +memory = [ + { name = "cachetools", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +redis = [ + { name = "redis", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] + +[[package]] +name = "py-key-value-shared" +version = "0.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "beartype", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "typing-extensions", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7b/e4/1971dfc4620a3a15b4579fe99e024f5edd6e0967a71154771a059daff4db/py_key_value_shared-0.3.0.tar.gz", hash = "sha256:8fdd786cf96c3e900102945f92aa1473138ebe960ef49da1c833790160c28a4b", size = 11666, upload-time = "2025-11-17T16:50:06.849Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/51/e4/b8b0a03ece72f47dce2307d36e1c34725b7223d209fc679315ffe6a4e2c3/py_key_value_shared-0.3.0-py3-none-any.whl", hash = "sha256:5b0efba7ebca08bb158b1e93afc2f07d30b8f40c2fc12ce24a4c0d84f42f9298", size = 19560, upload-time = "2025-11-17T16:50:05.954Z" }, +] + [[package]] name = "pyarrow" version = "24.0.0" @@ -3837,6 +4180,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/36/7a/82c363caa145fff88fb475da50d3bf52bb024f61917be5424c3392eaf878/pyarrow-24.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:25ea65d868eb04015cd18e6df2fbe98f07e5bda2abefabcb88fce39a947716f6", size = 51929490, upload-time = "2026-04-21T10:47:55.981Z" }, ] +[[package]] +name = "pyasn1" +version = "0.6.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5c/5f/6583902b6f79b399c9c40674ac384fd9cd77805f9e6205075f828ef11fb2/pyasn1-0.6.3.tar.gz", hash = "sha256:697a8ecd6d98891189184ca1fa05d1bb00e2f84b5977c481452050549c8a72cf", size = 148685, upload-time = "2026-03-17T01:06:53.382Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5d/a0/7d793dce3fa811fe047d6ae2431c672364b462850c6235ae306c0efd025f/pyasn1-0.6.3-py3-none-any.whl", hash = "sha256:a80184d120f0864a52a073acc6fc642847d0be408e7c7252f31390c0f4eadcde", size = 83997, upload-time = "2026-03-17T01:06:52.036Z" }, +] + +[[package]] +name = "pyasn1-modules" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyasn1", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e9/e6/78ebbb10a8c8e4b61a59249394a4a594c1a7af95593dc933a349c8d00964/pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6", size = 307892, upload-time = "2025-03-28T02:41:22.17Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259, upload-time = "2025-03-28T02:41:19.028Z" }, +] + [[package]] name = "pybase64" version = "1.4.3" @@ -3950,6 +4314,29 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ae/8d/f1af3832f5e6eb13ba94ee809e72b8ecb5eef226d27ee0bef7d963d943c7/pydantic_settings-2.14.1-py3-none-any.whl", hash = "sha256:6e3c7edfd8277687cdc598f56e5cff0e9bfff0910a3749deaa8d4401c3a2b9de", size = 60964, upload-time = "2026-05-08T13:40:04.958Z" }, ] +[[package]] +name = "pydocket" +version = "0.21.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "burner-redis", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "cloudpickle", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "cronsim", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "opentelemetry-api", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "prometheus-client", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "py-key-value-aio", extra = ["memory", "redis"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "python-json-logger", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "redis", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "rich", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "typer", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "typing-extensions", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "uncalled-for", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ad/71/e267ddae6fa3524bfbc00fd409fe2157cc8814751ebf3e2cf22879c1732e/pydocket-0.21.0.tar.gz", hash = "sha256:2fcfc67f05a98689505e6af127af7f71b9612c08a139cfe1a690706c43810968", size = 398122, upload-time = "2026-05-26T15:28:51.812Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/19/d1/fcfa26ced70c37714b5b4c5da9bb2ea9c28116367e8db8994de7b91d0b5f/pydocket-0.21.0-py3-none-any.whl", hash = "sha256:b98f8fcd48fbd5258f6ab0be9080fbc36dcab52a73a9acc0509652de0b445df0", size = 116953, upload-time = "2026-05-26T15:28:50.246Z" }, +] + [[package]] name = "pyelftools" version = "0.32" @@ -4026,6 +4413,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781, upload-time = "2026-01-21T03:57:55.912Z" }, ] +[[package]] +name = "pyperclip" +version = "1.11.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e8/52/d87eba7cb129b81563019d1679026e7a112ef76855d6159d24754dbd2a51/pyperclip-1.11.0.tar.gz", hash = "sha256:244035963e4428530d9e3a6101a1ef97209c6825edab1567beac148ccc1db1b6", size = 12185, upload-time = "2025-09-26T14:40:37.245Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/df/80/fc9d01d5ed37ba4c42ca2b55b4339ae6e200b456be3a1aaddf4a9fa99b8c/pyperclip-1.11.0-py3-none-any.whl", hash = "sha256:299403e9ff44581cb9ba2ffeed69c7aa96a008622ad0c46cb575ca75b5b84273", size = 11063, upload-time = "2025-09-26T14:40:36.069Z" }, +] + [[package]] name = "pypika" version = "0.51.1" @@ -4308,6 +4704,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/82/3b/64d4899d73f91ba49a8c18a8ff3f0ea8f1c1d75481760df8c68ef5235bf5/rich-15.0.0-py3-none-any.whl", hash = "sha256:33bd4ef74232fb73fe9279a257718407f169c09b78a87ad3d296f548e27de0bb", size = 310654, upload-time = "2026-04-12T08:24:02.83Z" }, ] +[[package]] +name = "rich-rst" +version = "2.0.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pygments", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "rich", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/57/56/3191bae66b08ccc637ea8120426068bcb361cc323c96404c310886937067/rich_rst-2.0.1.tar.gz", hash = "sha256:cbe236ed0901d1ec8427cc6a50bf0a34353ba28ad014dc24def68bfe7f3b9e68", size = 300570, upload-time = "2026-05-16T00:47:57.362Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/3d/55c17d3ebdf3cd81356002afe5bef9bb8af631db2819785b6eac845b925b/rich_rst-2.0.1-py3-none-any.whl", hash = "sha256:7ee15f345ce25fa02b582c272a6cdbaf0c21243e38061cea273cff659bf3ef61", size = 272922, upload-time = "2026-05-16T00:47:55.508Z" }, +] + [[package]] name = "rich-toolkit" version = "0.19.7" @@ -4345,7 +4754,7 @@ wheels = [ [[package]] name = "rlm-swe" -version = "0.4.2" +version = "0.4.3" source = { editable = "deps/research-environments/environments/rlm_swe" } dependencies = [ { name = "multi-swe-bench", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, @@ -4410,7 +4819,7 @@ dependencies = [ [package.metadata] requires-dist = [ { name = "math-verify", specifier = ">=0.8.0" }, - { name = "verifiers", specifier = ">=0.1.12.dev1" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -4458,6 +4867,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/83/11/00d3c3dfc25ad54e731d91449895a79e4bf2384dc3ac01809010ba88f6d5/seaborn-0.13.2-py3-none-any.whl", hash = "sha256:636f8336facf092165e27924f223d3c62ca560b1f2bb5dff7ab7fad265361987", size = 294914, upload-time = "2024-01-25T13:21:49.598Z" }, ] +[[package]] +name = "secretstorage" +version = "3.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "jeepney", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1c/03/e834bcd866f2f8a49a85eaff47340affa3bfa391ee9912a952a1faa68c7b/secretstorage-3.5.0.tar.gz", hash = "sha256:f04b8e4689cbce351744d5537bf6b1329c6fc68f91fa666f60a380edddcd11be", size = 19884, upload-time = "2025-11-23T19:02:53.191Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b7/46/f5af3402b579fd5e11573ce652019a67074317e18c1935cc0b4ba9b35552/secretstorage-3.5.0-py3-none-any.whl", hash = "sha256:0ce65888c0725fcb2c5bc0fdb8e5438eece02c523557ea40ce0703c266248137", size = 15554, upload-time = "2025-11-23T19:02:51.545Z" }, +] + [[package]] name = "sentencepiece" version = "0.2.1" @@ -4520,7 +4942,7 @@ dependencies = [ ] [package.metadata] -requires-dist = [{ name = "verifiers", specifier = ">=0.1.15.dev1" }] +requires-dist = [{ name = "verifiers", specifier = ">=0.1.15.dev17" }] [[package]] name = "six" @@ -4560,15 +4982,14 @@ wheels = [ [[package]] name = "sse-starlette" -version = "3.4.2" +version = "2.3.6" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, - { name = "starlette", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/38/82/10cdfab4ab663a6b6bd624d33f55b2cfa41af5105be033a6d5d135a92c5f/sse_starlette-3.4.2.tar.gz", hash = "sha256:2f9a7f51ed84395a0427fb9f66cb1ec11f7899d977a72cbc9070b962a2e14489", size = 35236, upload-time = "2026-05-06T19:42:13.727Z" } +sdist = { url = "https://files.pythonhosted.org/packages/8c/f4/989bc70cb8091eda43a9034ef969b25145291f3601703b82766e5172dfed/sse_starlette-2.3.6.tar.gz", hash = "sha256:0382336f7d4ec30160cf9ca0518962905e1b69b72d6c1c995131e0a703b436e3", size = 18284, upload-time = "2025-05-30T13:34:12.914Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c1/27/351c71e803c56090d8d3bf9520422debeb8ed938871fd4f7ef519805a6c5/sse_starlette-3.4.2-py3-none-any.whl", hash = "sha256:6ea5d35b7ce979a3de5a0db5f77fe886b1616e4b3e1ad93fba502bd9b5fb662f", size = 16516, upload-time = "2026-05-06T19:42:12.201Z" }, + { url = "https://files.pythonhosted.org/packages/81/05/78850ac6e79af5b9508f8841b0f26aa9fd329a1ba00bf65453c2d312bcc8/sse_starlette-2.3.6-py3-none-any.whl", hash = "sha256:d49a8285b182f6e2228e2609c350398b2ca2c36216c2675d875f81e93548f760", size = 10606, upload-time = "2025-05-30T13:34:11.703Z" }, ] [[package]] @@ -4598,6 +5019,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/0d/13d1d239a25cbfb19e740db83143e95c772a1fe10202dda4b76792b114dd/starlette-0.52.1-py3-none-any.whl", hash = "sha256:0029d43eb3d273bc4f83a08720b4912ea4b071087a3b48db01b7c839f7954d74", size = 74272, upload-time = "2026-01-18T13:34:09.188Z" }, ] +[[package]] +name = "structlog" +version = "25.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ef/52/9ba0f43b686e7f3ddfeaa78ac3af750292662284b3661e91ad5494f21dbc/structlog-25.5.0.tar.gz", hash = "sha256:098522a3bebed9153d4570c6d0288abf80a031dfdb2048d59a49e9dc2190fc98", size = 1460830, upload-time = "2025-10-27T08:28:23.028Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a8/45/a132b9074aa18e799b891b91ad72133c98d8042c70f6240e4c5f9dabee2f/structlog-25.5.0-py3-none-any.whl", hash = "sha256:a8453e9b9e636ec59bd9e79bbd4a72f025981b3ba0f5837aebf48f02f37a7f9f", size = 72510, upload-time = "2025-10-27T08:28:21.535Z" }, +] + [[package]] name = "supervisor" version = "4.3.0" @@ -4684,6 +5114,36 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252, upload-time = "2022-10-06T17:21:44.262Z" }, ] +[[package]] +name = "tasksets" +source = { editable = "deps/verifiers/packages/tasksets" } +dependencies = [ + { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] + +[package.optional-dependencies] +openenv = [ + { name = "openenv-core", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +openreward = [ + { name = "openreward", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +ta = [ + { name = "nltk", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "textarena", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] + +[package.metadata] +requires-dist = [ + { name = "nemo-gym", marker = "python_full_version >= '3.12' and extra == 'nemogym'", specifier = ">=0.2.1,<0.3" }, + { name = "nltk", marker = "extra == 'ta'" }, + { name = "openenv-core", marker = "extra == 'openenv'", specifier = ">=0.3.0" }, + { name = "openreward", marker = "python_full_version >= '3.11' and extra == 'openreward'", specifier = ">=0.1.123" }, + { name = "textarena", marker = "extra == 'ta'" }, + { name = "verifiers", specifier = ">=0.1.15.dev11" }, +] +provides-extras = ["nemogym", "openenv", "openreward", "ta"] + [[package]] name = "tau2" version = "0.2.1.dev0" @@ -4729,7 +5189,7 @@ dependencies = [ [package.metadata] requires-dist = [ { name = "tau2", git = "https://github.com/sierra-research/tau2-bench.git?rev=337326e" }, - { name = "verifiers", specifier = ">=0.1.15.dev1" }, + { name = "verifiers", specifier = ">=0.1.15.dev17" }, ] [[package]] @@ -4770,6 +5230,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/73/c6/825dab04195756cf8ff2e12698f22513b3db2f64925bdd41671bfb33aaa5/tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl", hash = "sha256:ef687163c24185ae9754ed5650eb5bc4d84ff257aabdc33f0cc6f74d8ba54530", size = 6590363, upload-time = "2023-10-23T21:23:35.583Z" }, ] +[[package]] +name = "textarena" +version = "0.7.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "chess", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "nltk", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "openai", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "python-dotenv", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "requests", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "rich", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "websockets", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ba/04/4a3ca42093d0be2a9c377ae3335a6c6baac1d278ae932562ec69f339d172/textarena-0.7.4.tar.gz", hash = "sha256:28bb9170d7718f2ae05e4515bea82262422731e563fc7318a9e7983de0cadd4f", size = 954969, upload-time = "2025-10-16T14:41:55.981Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/26/b4/9a9ba65154aff853c75b3d7324319d168ad9c69c6097f4aa3c16da7d9ef3/textarena-0.7.4-py3-none-any.whl", hash = "sha256:684784e78278e518066f67557ee93b47c238d16cbbd15d3abdaa3147562d3024", size = 1073570, upload-time = "2025-10-16T14:41:53.965Z" }, +] + [[package]] name = "textual" version = "8.2.5" @@ -5197,6 +5675,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/61/73/d21edf5b204d1467e06500080a50f79d49ef2b997c79123a536d4a17d97c/uc_micro_py-2.0.0-py3-none-any.whl", hash = "sha256:3603a3859af53e5a39bc7677713c78ea6589ff188d70f4fee165db88e22b242c", size = 6383, upload-time = "2026-03-01T06:31:26.257Z" }, ] +[[package]] +name = "uncalled-for" +version = "0.3.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/82/345cc927f7fbdae6065e7768759932fcc827fc20b29b45dfbafa2f1f7da4/uncalled_for-0.3.2.tar.gz", hash = "sha256:89f5dbcd71e2b8f47c030b1fa302e6cce2ec795d1ac565eeb6525c5fe55cb8a2", size = 50032, upload-time = "2026-05-06T13:38:25.204Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/25/2c87754f3a9e692315f7b811244090e68f362979fc8886b3fbd2985a1d8c/uncalled_for-0.3.2-py3-none-any.whl", hash = "sha256:0ff60b142c7d1f8070bde9d42afaa70aedc77dcc10998c227687e9c15713418e", size = 11444, upload-time = "2026-05-06T13:38:24.025Z" }, +] + [[package]] name = "unidiff" version = "0.7.5" @@ -5281,6 +5768,12 @@ dependencies = [ { name = "uvloop", marker = "(platform_machine == 'aarch64' and platform_python_implementation != 'PyPy' and sys_platform == 'linux') or (platform_machine == 'x86_64' and platform_python_implementation != 'PyPy' and sys_platform == 'linux')" }, ] +[package.optional-dependencies] +packages = [ + { name = "harnesses", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, + { name = "tasksets", extra = ["openenv", "openreward", "ta"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, +] + [package.metadata] requires-dist = [ { name = "accelerate", marker = "extra == 'rl'", specifier = ">=1.4.0" },