diff --git a/configs/private b/configs/private
index 70c3503e1d..00204b08e3 160000
--- a/configs/private
+++ b/configs/private
@@ -1 +1 @@
-Subproject commit 70c3503e1dc4ea499b09f0eee206b509169b79bd
+Subproject commit 00204b08e322ca36d49f7d1a468e80009a0e0bd5
diff --git a/deps/research-environments b/deps/research-environments
index c752781984..b07ace376f 160000
--- a/deps/research-environments
+++ b/deps/research-environments
@@ -1 +1 @@
-Subproject commit c752781984c1b4fbb0a3d7f4aac1e7ed67cc749e
+Subproject commit b07ace376ff32f53b3b4ad2d58007f2de92e0821
diff --git a/deps/verifiers b/deps/verifiers
index e1d4f2593a..dacceceda4 160000
--- a/deps/verifiers
+++ b/deps/verifiers
@@ -1 +1 @@
-Subproject commit e1d4f2593a66a2130584f5972bee4f3279e824e5
+Subproject commit dacceceda4b14b21354e342ee652b1f27dd96fbe
diff --git a/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py b/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py
index be5fe249f3..0fd25ff089 100644
--- a/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py
+++ b/packages/prime-rl-configs/src/prime_rl/configs/orchestrator.py
@@ -206,6 +206,72 @@ def resolve_timeout(self):
         return self
 
 
+class SystemRoleEchoConfig(BaseConfig):
+    """Echo supervision for system-message content tokens."""
+
+    alpha: float = Field(1.0, allow_inf_nan=False)
+    """Per-token echo weight."""
+
+
+class UserRoleEchoConfig(BaseConfig):
+    """Echo supervision for user-message content tokens."""
+
+    alpha: float = Field(1.0, allow_inf_nan=False)
+    """Per-token echo weight."""
+
+
+class AssistantRoleEchoConfig(BaseConfig):
+    """Echo supervision for assistant-message content and completion tokens."""
+
+    alpha: float = Field(1.0, allow_inf_nan=False)
+    """Per-token echo weight. ``alpha=0`` keeps the token supervised but gives it zero gradient."""
+
+
+class ToolRoleEchoConfig(BaseConfig):
+    """Echo supervision for tool-message content tokens."""
+
+    alpha: float = Field(1.0, allow_inf_nan=False)
+    """Per-token echo weight."""
+
+    tool_names: set[str] | None = Field(None, min_length=1)
+    """Restrict echo to these tool function names; None = all tools."""
+
+
+class EchoFilterConfig(BaseConfig):
+    """Optional callable that narrows role-selected echo tokens per rollout."""
+
+    import_path: str
+    """Dotted import path to the filter callable, e.g. ``"my_module.filter_warnings"``."""
+
+    kwargs: dict[str, Any] = Field(default_factory=dict)
+    """Keyword arguments forwarded to the filter as ``**kwargs``."""
+
+
+class EchoConfig(BaseConfig):
+    """Enable CE echo on selected message roles for this training env."""
+
+    system: SystemRoleEchoConfig | None = None
+    """System-message echo (default: disabled)."""
+
+    user: UserRoleEchoConfig | None = None
+    """User-message echo (default: disabled)."""
+
+    assistant: AssistantRoleEchoConfig | None = None
+    """Assistant-message echo (default: disabled)."""
+
+    tool: ToolRoleEchoConfig | None = None
+    """Tool-message echo (default: disabled)."""
+
+    filter: EchoFilterConfig | None = None
+    """Optional per-token filter on top of the role baseline."""
+
+    @model_validator(mode="after")
+    def validate_roles(self) -> "EchoConfig":
+        if self.system is self.user is self.assistant is self.tool is None:
+            raise ValueError("EchoConfig requires at least one of system, user, assistant, or tool.")
+        return self
+
+
 class TrainEnvConfig(EnvConfig):
     sampling: TrainSamplingConfig = TrainSamplingConfig()
     """Per-env sampling overrides. Unset fields inherit from the group-level train sampling config."""
@@ -214,6 +280,9 @@ class TrainEnvConfig(EnvConfig):
     """Rollouts generated per example for GRPO group-relative advantages.
     Inherits from ``orchestrator.group_size`` when unset."""
 
+    echo: EchoConfig | None = None
+    """Per-env per-role echo config."""
+
 
 class EvalEnvConfig(EnvConfig):
     sampling: EvalSamplingConfig = EvalSamplingConfig()
diff --git a/pyproject.toml b/pyproject.toml
index ca3639b3aa..b98dca0bff 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -69,6 +69,7 @@ envs = [
     "deepdive",
     "general-agent",
     "gpqa",
+    "harnesses",
     "hle",
     "ifeval",
     "livecodebench",
@@ -77,7 +78,6 @@ envs = [
     "math-python",
     "math500",
     "mini-swe-agent-plus",
-    "mini-swe-agent-plus-rlm",
     "mmlu-pro",
     "opencode-cp",
     "opencode-deepdive",
@@ -88,6 +88,7 @@ envs = [
     "rlm-swe",
     "science-env",
     "simpleqa-verified",
+    "tasksets",
     "tau2-bench",
     "wiki-search",
 ]
@@ -197,6 +198,8 @@ prime-rl-configs = { path = "packages/prime-rl-configs", editable = true }
 verifiers = { path = "deps/verifiers", editable = true }
 renderers = { path = "deps/renderers", editable = true }
 prime-pydantic-config = { path = "deps/pydantic-config", editable = true }
+harnesses = { path = "deps/verifiers/packages/harnesses", editable = true }
+tasksets = { path = "deps/verifiers/packages/tasksets", editable = true }
 aime2024 = { path = "deps/research-environments/environments/aime2024", editable = true }
 aime2025 = { path = "deps/research-environments/environments/aime2025", editable = true }
 alphabet-sort = { path = "deps/verifiers/environments/alphabet_sort", editable = true }
@@ -213,7 +216,6 @@ math-env = { path = "deps/research-environments/environments/math_env", editable
 math-python = { path = "deps/verifiers/environments/math_python", editable = true }
 math500 = { path = "deps/research-environments/environments/math500", editable = true }
 mini-swe-agent-plus = { path = "deps/research-environments/environments/mini_swe_agent_plus", editable = true }
-mini-swe-agent-plus-rlm = { path = "deps/research-environments/environments/mini_swe_agent_plus_rlm", editable = true }
 mmlu-pro = { path = "deps/research-environments/environments/mmlu_pro", editable = true }
 opencode-cp = { path = "deps/research-environments/environments/opencode_cp", editable = true }
 opencode-deepdive = { path = "deps/research-environments/environments/opencode_deepdive", editable = true }
diff --git a/src/prime_rl/orchestrator/echo.py b/src/prime_rl/orchestrator/echo.py
new file mode 100644
index 0000000000..ca9a6c3600
--- /dev/null
+++ b/src/prime_rl/orchestrator/echo.py
@@ -0,0 +1,138 @@
+from __future__ import annotations
+
+from collections.abc import Callable
+from dataclasses import dataclass
+
+import verifiers as vf
+
+from prime_rl.configs.orchestrator import EchoConfig
+
+
+@dataclass(frozen=True)
+class EchoAnnotations:
+    step_alpha: list[list[float | None]]
+
+    def initial_sample_alpha(self, step_idx: int) -> list[float | None] | None:
+        alpha = self.step_alpha[step_idx]
+        return list(alpha) if any(a is not None for a in alpha) else None
+
+    def extension_alpha(self, step_idx: int, prefix_len: int, prompt_len: int) -> list[float | None]:
+        alpha = self.step_alpha[step_idx]
+        return alpha[prefix_len:prompt_len] + alpha[prompt_len:]
+
+
+def build_echo_annotations(
+    rollout: vf.RolloutOutput,
+    echo_config: EchoConfig | None,
+    filter_fn: Callable[..., list[list[bool]]] | None = None,
+) -> EchoAnnotations | None:
+    if echo_config is None:
+        return None
+
+    trajectory = rollout["trajectory"]
+    step_tokens = []
+    for step in trajectory:
+        tokens = step["tokens"]
+        if tokens is None:
+            return None
+        step_tokens.append(tokens)
+
+    filter_masks = apply_echo_filter(rollout, filter_fn) if filter_fn is not None and trajectory else None
+    return EchoAnnotations(
+        step_alpha=[
+            _build_step_echo_alpha(
+                prompt_attribution=tokens.get("prompt_attribution"),
+                prompt_len=len(tokens["prompt_ids"]),
+                completion_len=len(tokens["completion_ids"]),
+                echo_config=echo_config,
+                filter_mask=filter_masks[step_idx] if filter_masks is not None else None,
+            )
+            for step_idx, tokens in enumerate(step_tokens)
+        ]
+    )
+
+
+def _build_step_echo_alpha(
+    prompt_attribution: dict | None,
+    prompt_len: int,
+    completion_len: int,
+    echo_config: EchoConfig | None,
+    filter_mask: list[bool] | None = None,
+) -> list[float | None]:
+    expected_total_len = prompt_len + completion_len
+    out: list[float | None] = [None] * expected_total_len
+    if echo_config is not None:
+        if echo_config.assistant is not None:
+            out[prompt_len:expected_total_len] = [echo_config.assistant.alpha] * completion_len
+
+        if prompt_attribution is not None:
+            message_roles = prompt_attribution.get("message_roles")
+            message_indices = prompt_attribution.get("message_indices")
+            is_content = prompt_attribution.get("is_content")
+            if message_roles is not None and is_content and message_indices:
+                if len(is_content) == prompt_len and len(message_indices) == prompt_len:
+                    role_alphas = {
+                        "system": echo_config.system.alpha if echo_config.system is not None else None,
+                        "user": echo_config.user.alpha if echo_config.user is not None else None,
+                        "assistant": echo_config.assistant.alpha if echo_config.assistant is not None else None,
+                    }
+                    tool_config = echo_config.tool
+                    tool_alpha = tool_config.alpha if tool_config is not None else None
+                    enabled_tools = tool_config.tool_names if tool_config is not None else None
+                    message_tool_names = prompt_attribution.get("message_tool_names") or []
+
+                    for k, mi in enumerate(message_indices):
+                        if mi < 0 or not is_content[k] or mi >= len(message_roles):
+                            continue
+                        role = message_roles[mi]
+                        if role == "tool":
+                            tool_name = message_tool_names[mi] if mi < len(message_tool_names) else None
+                            if tool_alpha is not None and (enabled_tools is None or tool_name in enabled_tools):
+                                out[k] = tool_alpha
+                            continue
+
+                        alpha = role_alphas.get(role)
+                        if alpha is not None:
+                            out[k] = alpha
+
+    if filter_mask is not None:
+        out = [alpha if keep else None for alpha, keep in zip(out, filter_mask, strict=True)]
+
+    return out
+
+
+def apply_echo_filter(
+    rollout: vf.RolloutOutput,
+    filter_fn: Callable[..., list[list[bool]]],
+) -> list[list[bool]]:
+    trajectory = rollout["trajectory"]
+    result = filter_fn(rollout)
+
+    if not isinstance(result, list):
+        raise TypeError(f"echo filter must return list[list[bool]], got {type(result).__name__}")
+    if len(result) != len(trajectory):
+        raise ValueError(
+            f"echo filter returned {len(result)} per-step masks but the rollout has {len(trajectory)} trajectory steps"
+        )
+
+    for step_idx, (step, mask) in enumerate(zip(trajectory, result)):
+        tokens = step["tokens"]
+        prompt_len = len(tokens["prompt_ids"])
+        completion_len = len(tokens["completion_ids"])
+        expected = prompt_len + completion_len
+
+        if not isinstance(mask, list):
+            raise TypeError(f"echo filter step {step_idx}: mask must be a list, got {type(mask).__name__}")
+        if len(mask) != expected:
+            raise ValueError(
+                f"echo filter step {step_idx}: mask length {len(mask)} "
+                f"!= expected {expected} "
+                f"(prompt_len={prompt_len}, completion_len={completion_len})"
+            )
+        for k, v in enumerate(mask):
+            if type(v) is not bool:
+                raise TypeError(
+                    f"echo filter step {step_idx}: mask[{k}] must be a plain bool, got {type(v).__name__} ({v!r})"
+                )
+
+    return result
diff --git a/src/prime_rl/orchestrator/envs.py b/src/prime_rl/orchestrator/envs.py
index fe02d2e61a..66129940f6 100644
--- a/src/prime_rl/orchestrator/envs.py
+++ b/src/prime_rl/orchestrator/envs.py
@@ -2,6 +2,7 @@
 
 import asyncio
 import atexit
+import functools
 import multiprocessing as mp
 import time
 from collections.abc import Awaitable, Callable, Iterator, Sequence
@@ -18,7 +19,7 @@
 from prime_rl.orchestrator.eval_utils import compute_pass_at_k
 from prime_rl.utils.logger import ProgressTracker, get_logger
 from prime_rl.utils.monitor import get_monitor
-from prime_rl.utils.utils import capitalize
+from prime_rl.utils.utils import capitalize, import_object
 
 REQUIRED_STATE_COLUMNS = ["trajectory"]
 
@@ -170,6 +171,10 @@ class TrainEnv(Env):
     def __init__(self, config: TrainEnvConfig):
         super().__init__(config)
         self.sampling_args = config.sampling.to_sampling_args()
+        self.echo_filter_fn: Callable[..., list[list[bool]]] | None = None
+        if config.echo is not None and config.echo.filter is not None:
+            fn = import_object(config.echo.filter.import_path)
+            self.echo_filter_fn = functools.partial(fn, **config.echo.filter.kwargs)
 
     def get_dataset(self, seed: int | None = None):
         return self.env.get_dataset(seed=seed)
diff --git a/src/prime_rl/orchestrator/train_sink.py b/src/prime_rl/orchestrator/train_sink.py
index 26e7b915b0..6751a752a9 100644
--- a/src/prime_rl/orchestrator/train_sink.py
+++ b/src/prime_rl/orchestrator/train_sink.py
@@ -19,6 +19,7 @@
 
 from prime_rl.configs.orchestrator import AdvantageConfig, OrchestratorConfig
 from prime_rl.orchestrator.advantage import assign_advantages, setup_advantage_fn
+from prime_rl.orchestrator.echo import build_echo_annotations
 from prime_rl.orchestrator.envs import TrainEnvs
 from prime_rl.orchestrator.filters import RolloutFilter, apply_filters
 from prime_rl.orchestrator.trajectories import (
@@ -160,11 +161,16 @@ async def process_rollout(self, rollout: TrainRollout) -> None:
         needs_backfill = any(s["tokens"] is None for s in raw.get("trajectory") or [])
         if needs_backfill:
             await asyncio.to_thread(backfill_rollout_tokens, raw, self.tokenizer, renderer=self.renderer)
+
+        env = self.train_envs.get(rollout.env_name)
+        echo_annotations = await asyncio.to_thread(build_echo_annotations, raw, env.config.echo, env.echo_filter_fn)
+
         samples = await asyncio.to_thread(
             interleave_rollout,
             raw,
             mm_token_type_ids_mapping=self.mm_token_type_ids_mapping,
             env_name=rollout.env_name,
+            echo_annotations=echo_annotations,
         )
         rollout.samples = samples or []
         # Offload base64 image bytes to disk as soon as the rollout is
diff --git a/src/prime_rl/orchestrator/trajectories.py b/src/prime_rl/orchestrator/trajectories.py
index 3e8431c12a..6df0d0337d 100644
--- a/src/prime_rl/orchestrator/trajectories.py
+++ b/src/prime_rl/orchestrator/trajectories.py
@@ -9,6 +9,7 @@
 import verifiers as vf
 from transformers.tokenization_utils import PreTrainedTokenizer
 
+from prime_rl.orchestrator.echo import EchoAnnotations
 from prime_rl.transport import RoutedExperts, TrainingSample
 from prime_rl.utils.chat_template import (
     common_prefix_len,
@@ -206,6 +207,7 @@ def interleave_rollout(
     mm_token_type_ids_mapping: dict[int, int] | None = None,
     *,
     env_name: str = "",
+    echo_annotations: EchoAnnotations | None = None,
 ) -> list[TrainingSample] | None:
     """
     Convert vf.RolloutOutput to trainable rollouts by interleaving trajectory steps
@@ -225,6 +227,12 @@ def interleave_rollout(
     For VLM models, each renderer-produced trajectory step carries its
     per-image processed tensors inline on ``multi_modal_data``; the last
     merged step's sidecar covers every image in the sample.
+
+    Args:
+        output: vf.RolloutOutput containing trajectory data
+        mm_token_type_ids_mapping: Maps prompt-token ids to mm_token_type_ids
+            (1 = image, 2 = video, 0 otherwise). Renderer-supplied.
+        echo_annotations: Optional per-step echo alpha annotations.
     """
     logger = get_logger()
 
@@ -238,6 +246,7 @@ def interleave_rollout(
         return None
 
     has_error = output["error"] is not None
+    # completion_temperatures is left empty; the train sink fills it per-env later.
 
     def prepare_step_tokens(step: vf.TrajectoryStep, step_idx: int) -> dict[str, Any] | None:
         tokens = step["tokens"]
@@ -308,6 +317,7 @@ def make_sample(tokens: dict[str, Any], step_idx: int) -> TrainingSample:
             env_name=env_name,
             mm_token_type_ids=None,
             routed_experts=None,  # deferred — finalized at end of interleave_rollout
+            echo_alpha=echo_annotations.initial_sample_alpha(step_idx) if echo_annotations is not None else None,
         )
         # Initialize routed-experts state for this sample. First chunk is the
         # raw step routed_experts (no pad, no copy). running_len is the
@@ -385,6 +395,15 @@ def extend_sample(
             sample.completion_mask.extend(tokens["completion_mask"])
         sample.completion_logprobs.extend(tokens["completion_logprobs"])
 
+        if echo_annotations is not None:
+            step_prompt_len = len(tokens["prompt_ids"])
+            extension = echo_annotations.extension_alpha(step_idx, prefix_len, step_prompt_len)
+            if any(a is not None for a in extension) or sample.echo_alpha is not None:
+                if sample.echo_alpha is None:
+                    existing_len = len(sample.prompt_ids) + len(sample.completion_ids) - len(extension)
+                    sample.echo_alpha = [None] * existing_len
+                sample.echo_alpha.extend(extension)
+
         step_routed = tokens.get("routed_experts")
         state = sample_routed_state.get(id(sample))
         if state is not None:
diff --git a/src/prime_rl/trainer/batch.py b/src/prime_rl/trainer/batch.py
index ea99859a35..a127aae6fa 100644
--- a/src/prime_rl/trainer/batch.py
+++ b/src/prime_rl/trainer/batch.py
@@ -41,6 +41,16 @@ def _append_routed_experts(dst: MicroBatch, src: MicroBatch) -> None:
     dst_routed.shape[0] += src_routed.shape[0]
 
 
+def _extend_optional_token_field(current, values, existing_len: int, new_len: int, fill_value):
+    if values is not None:
+        if current is None:
+            current = [fill_value] * existing_len
+        current.extend(values)
+    elif current is not None:
+        current.extend([fill_value] * new_len)
+    return current
+
+
 def _pad_routed_experts(micro_batch: MicroBatch, padding_size: int) -> None:
     routed_experts = micro_batch.routed_experts
     assert routed_experts is not None
@@ -50,12 +60,9 @@ def _pad_routed_experts(micro_batch: MicroBatch, padding_size: int) -> None:
 
 
 def prepare_sample(training_example: TrainingSample, seq_len: int) -> MicroBatch:
-    """
-    Prepare a problem for sequence packing training.
-    Tokenize and prepare tensors.
-    """
+    """Prepare a sample for sequence-packing training: tokenize and build tensors."""
     input_ids = training_example.prompt_ids + training_example.completion_ids
-    loss_mask = training_example.prompt_mask + training_example.completion_mask
+    loss_mask = list(training_example.prompt_mask) + list(training_example.completion_mask)
     inference_logprobs = [0.0] * len(training_example.prompt_ids) + training_example.completion_logprobs
     advantages = [training_example.advantage] * len(input_ids)
     reward = training_example.reward if training_example.reward is not None else float("nan")
@@ -64,6 +71,23 @@ def prepare_sample(training_example: TrainingSample, seq_len: int) -> MicroBatch
     mm_token_type_ids = training_example.mm_token_type_ids
     assert training_example.env_name != "all", "env_name='all' is reserved for aggregate metric keys"
     env_names = [training_example.env_name] * len(input_ids)
+    # Echo overlay: token 0 has no valid shifted current-token logprob, so it
+    # stays masked even if the producer supplied an alpha there.
+    echo_alpha = training_example.echo_alpha
+    echo_mask: list[bool] | None = None
+    if echo_alpha is not None:
+        if len(echo_alpha) != len(input_ids):
+            raise ValueError(
+                f"echo_alpha length must match prompt_ids + completion_ids length "
+                f"({len(echo_alpha)} != {len(input_ids)}) for env {training_example.env_name!r}"
+            )
+        echo_mask = [False] * len(input_ids)
+        for k, alpha in enumerate(echo_alpha[1:], start=1):
+            if alpha is None:
+                continue
+            echo_mask[k] = True
+            advantages[k] = alpha
+            loss_mask[k] = True
 
     # Per-token temperatures: prompt tokens use first completion temp (masked out anyway)
     # Default to 1.0 if completion is empty (e.g., model generated only tool calls with no text)
@@ -92,6 +116,8 @@ def prepare_sample(training_example: TrainingSample, seq_len: int) -> MicroBatch
         if mm_token_type_ids is not None:
             mm_token_type_ids = mm_token_type_ids[:seq_len]
         env_names = env_names[:seq_len]
+        if echo_mask is not None:
+            echo_mask = echo_mask[:seq_len]
 
     assert (
         len(input_ids)
@@ -104,6 +130,8 @@ def prepare_sample(training_example: TrainingSample, seq_len: int) -> MicroBatch
     ), (
         f"input_ids: {len(input_ids)}, advantages: {len(advantages)}, loss_mask: {len(loss_mask)}, position_ids: {len(position_ids)}, inference_logprobs: {len(inference_logprobs)}, rewards: {len(rewards)}, temperatures: {len(temperatures)}"
     )
+    if echo_mask is not None:
+        assert len(echo_mask) == len(input_ids), f"echo_mask: {len(echo_mask)}, input_ids: {len(input_ids)}"
     if teacher_logprobs is not None:
         assert len(teacher_logprobs) == len(input_ids), f"teacher_logprobs: {len(teacher_logprobs)}"
 
@@ -133,6 +161,7 @@ def prepare_sample(training_example: TrainingSample, seq_len: int) -> MicroBatch
         env_names=env_names,
         mm_kwargs=training_example.mm_kwargs,
         training_mode=training_example.training_mode,
+        echo_mask=echo_mask,
     )
 
 
@@ -177,15 +206,16 @@ def packed_samples_into_micro_bs(
                 and bin_content.training_mode == sample.training_mode
             ):
                 existing_len = len(bin_content.input_ids)
+                sample_len = len(sample.input_ids)
+                bin_content.echo_mask = _extend_optional_token_field(
+                    bin_content.echo_mask, sample.echo_mask, existing_len, sample_len, False
+                )
                 bin_content.input_ids.extend(sample.input_ids)
                 bin_content.loss_mask.extend(sample.loss_mask)
                 bin_content.advantages.extend(sample.advantages)
-                if sample.rewards is not None:
-                    if bin_content.rewards is None:
-                        bin_content.rewards = [float("nan")] * existing_len
-                    bin_content.rewards.extend(sample.rewards)
-                elif bin_content.rewards is not None:
-                    bin_content.rewards.extend([float("nan")] * len(sample.input_ids))
+                bin_content.rewards = _extend_optional_token_field(
+                    bin_content.rewards, sample.rewards, existing_len, sample_len, float("nan")
+                )
                 bin_content.inference_logprobs.extend(sample.inference_logprobs)
                 bin_content.temperatures.extend(sample.temperatures)
                 if sample.teacher_logprobs is not None:
@@ -254,6 +284,8 @@ def pad_micro_batch(micro_batch: MicroBatch, pad_to_multiple_of: int) -> MicroBa
         micro_batch.mm_token_type_ids.extend([0] * padding_size)
     if micro_batch.routed_experts is not None:
         _pad_routed_experts(micro_batch, padding_size)
+    if micro_batch.echo_mask is not None:
+        micro_batch.echo_mask.extend([False] * padding_size)
     micro_batch.env_names.extend([""] * padding_size)
 
     return micro_batch
@@ -264,6 +296,8 @@ def _make_dummy_batch(source: MicroBatch) -> MicroBatch:
     dummy = copy.deepcopy(source)
     dummy.advantages = [0.0] * len(dummy.input_ids)
     dummy.loss_mask = [False] * len(dummy.input_ids)
+    if dummy.echo_mask is not None:
+        dummy.echo_mask = [False] * len(dummy.input_ids)
     return dummy
 
 
diff --git a/src/prime_rl/trainer/ckpt.py b/src/prime_rl/trainer/ckpt.py
index 296554e0e8..690a65676b 100644
--- a/src/prime_rl/trainer/ckpt.py
+++ b/src/prime_rl/trainer/ckpt.py
@@ -171,7 +171,12 @@ def save_to_path(
         # Checkpoint the local dataloader
         if dataloader is not None:
             dataloader_dir = path / "dataloader"
-            dataloader_dir.mkdir(parents=True, exist_ok=True)
+            # Avoid concurrent mkdir from all ranks — on parallel filesystems
+            # (e.g. beegfs) a non-master rank can hit EEXIST + is_dir()==False
+            # right after master creates the dir and have exist_ok=True fail.
+            if self.world.is_master:
+                dataloader_dir.mkdir(parents=True, exist_ok=True)
+            torch.distributed.barrier()
             torch.save(dataloader.state_dict(), dataloader_dir / f"rank_{self.world.rank}.pt")
 
         # Save sharded state
@@ -239,7 +244,9 @@ def save(
     ) -> None:
         """Save the full checkpoint state for a specified step."""
         ckpt_path = self.get_ckpt_path(step)
-        ckpt_path.parent.mkdir(parents=True, exist_ok=True)
+        if self.world.is_master:
+            ckpt_path.parent.mkdir(parents=True, exist_ok=True)
+        torch.distributed.barrier()
 
         self.save_to_path(ckpt_path, model, optimizers, scheduler, progress, dataloader)
         bisect.insort(self.ckpt_steps, step)
@@ -390,7 +397,9 @@ def save(
     ):
         """Save a HF-compatible weight-only checkpoint for a given step."""
         step_path = self.get_step_path(step)
-        step_path.mkdir(parents=True, exist_ok=True)
+        if self.world.is_master:
+            step_path.mkdir(parents=True, exist_ok=True)
+        torch.distributed.barrier()
 
         # Gather all weights on master rank
         self.logger.debug("Gathering weights on master rank for weight checkpoint")
diff --git a/src/prime_rl/trainer/rl/data.py b/src/prime_rl/trainer/rl/data.py
index 45acdcb5c0..e121ff3ffa 100644
--- a/src/prime_rl/trainer/rl/data.py
+++ b/src/prime_rl/trainer/rl/data.py
@@ -46,6 +46,10 @@ class TensorMicroBatch(TypedDict):
     # sft → sft loss). All samples in a micro batch share the same mode.
     training_mode: str
 
+    # True where the token participates in echo CE; None when the micro-batch
+    # has no echo tokens. ``advantages`` carries alpha on echo positions.
+    echo_mask: Bool[Tensor, "batch seq"] | None
+
 
 class FakeDataLoader:
     def __init__(self, config: FakeDataLoaderConfig, seq_len: int, dp_world_size: int):
@@ -120,6 +124,7 @@ def _get_sample_micro_batch(self, generator: torch.Generator) -> TensorMicroBatc
             "mm_kwargs": None,
             "mm_token_type_ids": None,
             "training_mode": "rl",
+            "echo_mask": None,
         }
 
     def _get_micro_batch(self, generator: torch.Generator) -> TensorMicroBatch:
@@ -148,6 +153,7 @@ def _get_micro_batch(self, generator: torch.Generator) -> TensorMicroBatch:
             "mm_kwargs": None,
             "mm_token_type_ids": None,
             "training_mode": "rl",
+            "echo_mask": None,
         }
 
 
@@ -243,6 +249,9 @@ def _micro_batch_to_tensor(self, micro_batch: MicroBatch) -> TensorMicroBatch:
             else None,
             routed_experts=routed_experts,
             training_mode=micro_batch.training_mode,
+            echo_mask=torch.tensor(micro_batch.echo_mask, dtype=torch.bool).unsqueeze(0)
+            if micro_batch.echo_mask is not None
+            else None,
         )
 
 
diff --git a/src/prime_rl/trainer/rl/loss.py b/src/prime_rl/trainer/rl/loss.py
index 9a9eb25a63..4927c1c2ee 100644
--- a/src/prime_rl/trainer/rl/loss.py
+++ b/src/prime_rl/trainer/rl/loss.py
@@ -19,6 +19,18 @@ class LossInputs:
     teacher_logprobs: Float[Tensor, " seq"] | None
     advantages: Float[Tensor, " seq"]
     loss_mask: Bool[Tensor, " seq"]
+    # Echo tokens are excluded from RL loss/metrics and trained through the
+    # echo CE term. The advantage tensor carries alpha on these positions.
+    echo_mask: Bool[Tensor, " seq"] | None = None
+    rl_loss_scale: int = 1
+    echo_loss_scale: int = 1
+
+
+@dataclass
+class LossMasks:
+    loss: Bool[Tensor, " seq"]
+    rl: Bool[Tensor, " seq"]
+    echo: Bool[Tensor, " seq"]
 
 
 @dataclass
@@ -113,6 +125,16 @@ def compute_importance_ratio_and_mismatch_kl(
     return log_importance_ratio, importance_ratio, mismatch_kl
 
 
+def split_loss_masks(loss_mask: Tensor, echo_mask: Tensor | None) -> LossMasks:
+    if echo_mask is None:
+        echo_train_mask = torch.zeros_like(loss_mask, dtype=torch.bool)
+        rl_mask = loss_mask
+    else:
+        echo_train_mask = loss_mask & echo_mask
+        rl_mask = loss_mask & ~echo_mask
+    return LossMasks(loss=loss_mask, rl=rl_mask, echo=echo_train_mask)
+
+
 def default_loss_fn(inputs: LossInputs, loss_config: DefaultLossConfig) -> LossOutputs:
     """
     DPPO+KL loss for RL training, combining:
@@ -129,6 +151,7 @@ def default_loss_fn(inputs: LossInputs, loss_config: DefaultLossConfig) -> LossO
     inference_logprobs = inputs.inference_logprobs
     advantages = inputs.advantages
     loss_mask = inputs.loss_mask
+    masks = split_loss_masks(loss_mask, inputs.echo_mask)
 
     log_importance_ratio, importance_ratio, mismatch_kl = compute_importance_ratio_and_mismatch_kl(
         trainer_logprobs, inference_logprobs
@@ -141,27 +164,37 @@ def default_loss_fn(inputs: LossInputs, loss_config: DefaultLossConfig) -> LossO
     negative_advantages = advantages < 0
     dppo_invalid_mask = torch.where(positive_advantages, dppo_invalid_mask_high, dppo_invalid_mask_low)
 
-    is_masked = dppo_invalid_mask
-    is_masked_high = positive_advantages & dppo_invalid_mask_high
-    is_masked_low = negative_advantages & dppo_invalid_mask_low
-    drop_mask = loss_mask & is_masked
-    keep_mask = loss_mask & ~is_masked
+    dppo_drop_mask = masks.rl & dppo_invalid_mask
+    dppo_keep_mask = masks.rl & ~dppo_invalid_mask
+    is_masked_high = masks.rl & positive_advantages & dppo_invalid_mask_high
+    is_masked_low = masks.rl & negative_advantages & dppo_invalid_mask_low
 
     advantages = loss_config.adv_tau * advantages
-    pg_loss = keep_mask * advantages * importance_ratio
-    kl_loss = loss_mask * log_importance_ratio**2
-    loss = (-pg_loss + loss_config.kl_tau * kl_loss).sum()
+    rl_pg_loss = dppo_keep_mask * advantages * importance_ratio
+    rl_kl_loss = masks.rl * log_importance_ratio**2
+    rl_loss = (-rl_pg_loss + loss_config.kl_tau * rl_kl_loss).sum() / inputs.rl_loss_scale
+
+    if inputs.echo_mask is not None and masks.echo.any():
+        echo_loss = -(advantages * trainer_logprobs)[masks.echo].sum() / inputs.echo_loss_scale
+    else:
+        echo_loss = torch.zeros((), device=trainer_logprobs.device, dtype=trainer_logprobs.dtype)
+
+    loss = rl_loss + echo_loss
 
     metrics = {
-        "masked_mismatch_kl": _safe_mean(mismatch_kl, loss_mask & is_masked),  # all trainable, masked tokens
-        "unmasked_mismatch_kl": _safe_mean(mismatch_kl, keep_mask),  # all trainable, unmasked tokens
-        "is_masked": _safe_mean(is_masked, loss_mask),
-        "is_masked_low": _safe_mean(is_masked_low, loss_mask),
-        "is_masked_high": _safe_mean(is_masked_high, loss_mask),
-        "masked_advantage_positive": _safe_mean(positive_advantages, drop_mask),
-        "masked_advantage_negative": _safe_mean(negative_advantages, drop_mask),
+        "masked_mismatch_kl": _safe_mean(mismatch_kl, dppo_drop_mask),
+        "unmasked_mismatch_kl": _safe_mean(mismatch_kl, dppo_keep_mask),
+        "is_masked": _safe_mean(dppo_drop_mask, masks.rl),
+        "is_masked_low": _safe_mean(is_masked_low, masks.rl),
+        "is_masked_high": _safe_mean(is_masked_high, masks.rl),
+        "masked_advantage_positive": _safe_mean(positive_advantages, dppo_drop_mask),
+        "masked_advantage_negative": _safe_mean(negative_advantages, dppo_drop_mask),
     }
 
+    if inputs.echo_mask is not None:
+        metrics["echo_nll"] = _safe_mean(-trainer_logprobs, masks.echo)
+        metrics["echo_token_count"] = masks.echo.sum().float()
+
     return LossOutputs(loss=loss, metrics=metrics)
 
 
@@ -202,7 +235,7 @@ def opd_loss_fn(inputs: LossInputs) -> LossOutputs:
 
     pg_loss = keep_mask * advantages * importance_ratio
     kl_loss = loss_mask * log_importance_ratio**2
-    loss = (-pg_loss + 1e-3 * kl_loss).sum()
+    loss = (-pg_loss + 1e-3 * kl_loss).sum() / inputs.rl_loss_scale
 
     metrics = {
         "masked_mismatch_kl": _safe_mean(mismatch_kl, loss_mask & is_masked),
@@ -223,7 +256,7 @@ def sft_loss_fn(inputs: LossInputs) -> LossOutputs:
     trainer_logprobs = inputs.trainer_logprobs
     loss_mask = inputs.loss_mask
 
-    loss = -(trainer_logprobs[loss_mask]).sum()
+    loss = -(trainer_logprobs[loss_mask]).sum() / inputs.rl_loss_scale
     metrics = {
         "nll": _safe_mean(-trainer_logprobs, loss_mask),
     }
@@ -249,7 +282,13 @@ def setup_loss_fns(loss_config: LossConfig) -> dict[str, LossFn]:
         kwargs = loss_config.kwargs
 
         def rl_fn(inputs: LossInputs) -> LossOutputs:
-            return custom_fn(inputs, **kwargs)
+            if inputs.echo_mask is not None and inputs.echo_mask.any():
+                raise ValueError(
+                    "Echo is only supported with the default RL loss. "
+                    "CustomLossConfig receives the legacy loss_mask/advantages contract and cannot safely interpret echo."
+                )
+            result = custom_fn(inputs, **kwargs)
+            return LossOutputs(loss=result.loss / inputs.rl_loss_scale, metrics=result.metrics)
     else:
 
         def rl_fn(inputs: LossInputs) -> LossOutputs:
@@ -265,8 +304,10 @@ def compute_loss(
     advantages: list[Float[Tensor, " seq_i"]],
     loss_mask: list[Bool[Tensor, " seq_i"]],
     loss_fns: dict[str, LossFn],
-    loss_scale: int,
+    rl_loss_scale: int,
     training_mode: str = "rl",
+    echo_mask: list[Bool[Tensor, " seq_i"]] | None = None,
+    echo_loss_scale: int | None = None,
 ) -> tuple[Float[Tensor, ""], dict[str, Any]]:
     """
     Compute loss for packed sequences (batch size = 1, multiple sequences packed along sequence dimension).
@@ -282,11 +323,15 @@ def compute_loss(
         advantages: Advantages for each sequence
         loss_mask: Loss mask for each sequence
         loss_fns: Per-mode loss fn dispatch table from setup_loss_fns()
-        loss_scale: Scale factor to normalize the loss
+        rl_loss_scale: Global RL/non-echo token denominator
         training_mode: Selects which loss fn to apply
+        echo_mask: Per-sequence echo masks (parallel to loss_mask). Echo tokens
+            are excluded from RL terms and trained through the echo CE term.
+        echo_loss_scale: Global echo token denominator. Defaults to rl_loss_scale
+            for backward-compatible direct calls.
 
     Returns:
-        Tuple of (scaled_loss, aggregated_metrics)
+        Tuple of (total_loss, aggregated_metrics)
     """
     try:
         effective_loss_fn = loss_fns[training_mode]
@@ -301,13 +346,21 @@ def compute_loss(
 
     if teacher_logprobs is None:
         teacher_logprobs = [None] * len(trainer_logprobs)
+    if echo_mask is None:
+        echo_mask_list: list[Bool[Tensor, " seq_i"] | None] = [None] * len(trainer_logprobs)
+    else:
+        echo_mask_list = list(echo_mask)
+    if echo_loss_scale is None:
+        echo_loss_scale = rl_loss_scale
 
-    for t_logp, i_logp, teach_logp, adv, mask in zip(
+    for t_logp, i_logp, teach_logp, adv, mask, echo_m in zip(
         trainer_logprobs,
         inference_logprobs,
         teacher_logprobs,
         advantages,
         loss_mask,
+        echo_mask_list,
+        strict=True,
     ):
         inputs = LossInputs(
             trainer_logprobs=t_logp,
@@ -315,7 +368,12 @@ def compute_loss(
             teacher_logprobs=teach_logp,
             advantages=adv,
             loss_mask=mask,
+            echo_mask=echo_m,
+            rl_loss_scale=rl_loss_scale,
+            echo_loss_scale=echo_loss_scale,
         )
+        if echo_m is not None and echo_m.any() and training_mode != "rl":
+            raise ValueError("Echo is only supported for training_mode='rl'.")
 
         result = effective_loss_fn(inputs)
 
@@ -326,8 +384,6 @@ def compute_loss(
                 all_metrics[k] = []
             all_metrics[k].append(v)
 
-    scaled_loss = total_loss / loss_scale
-
     aggregated: dict[str, Any] = {}
     for k, v in all_metrics.items():
         if v[0].dim() == 0:
@@ -335,4 +391,4 @@ def compute_loss(
         else:
             aggregated[k] = torch.cat(v)
 
-    return scaled_loss, aggregated
+    return total_loss, aggregated
diff --git a/src/prime_rl/trainer/rl/packer.py b/src/prime_rl/trainer/rl/packer.py
index cf9dcfa02e..75af5185db 100644
--- a/src/prime_rl/trainer/rl/packer.py
+++ b/src/prime_rl/trainer/rl/packer.py
@@ -181,6 +181,11 @@ def _validate_sample(self, sample: TrainingSample) -> tuple[bool, str | None]:
                 False,
                 f"Run wrote a sample with teacher logprobs length != sample length ({len(sample.teacher_logprobs)} != {sample_length})",
             )
+        if sample.echo_alpha is not None and len(sample.echo_alpha) != sample_length:
+            return (
+                False,
+                f"Run wrote a sample with echo_alpha length != sample length ({len(sample.echo_alpha)} != {sample_length})",
+            )
         return True, None
 
     def _get_batch(self) -> None:
@@ -345,6 +350,20 @@ def setup_packer(
 ) -> BasePacker:
     multi_run_manager = get_multi_run_manager()
     if multi_run_manager.max_runs == 1:
-        return SinglePacker(dp_world_size, seq_len, pad_to_multiple_of, tokenizer, transport_config, start_step)
+        return SinglePacker(
+            dp_world_size,
+            seq_len,
+            pad_to_multiple_of,
+            tokenizer,
+            transport_config,
+            start_step,
+        )
     else:
-        return MultiPacker(dp_world_size, seq_len, pad_to_multiple_of, tokenizer, transport_config, start_step)
+        return MultiPacker(
+            dp_world_size,
+            seq_len,
+            pad_to_multiple_of,
+            tokenizer,
+            transport_config,
+            start_step,
+        )
diff --git a/src/prime_rl/trainer/rl/token_export.py b/src/prime_rl/trainer/rl/token_export.py
index b1f7c96cfa..6d3ffc878a 100644
--- a/src/prime_rl/trainer/rl/token_export.py
+++ b/src/prime_rl/trainer/rl/token_export.py
@@ -9,7 +9,7 @@
 from torch import Tensor
 
 from prime_rl.configs.trainer import DefaultLossConfig, TrainerConfig
-from prime_rl.trainer.rl.loss import compute_importance_ratio_and_mismatch_kl
+from prime_rl.trainer.rl.loss import compute_importance_ratio_and_mismatch_kl, split_loss_masks
 
 SCHEMA_VERSION = 1
 
@@ -124,20 +124,25 @@ def _export_columns(
     seq_len = len(token_ids)
     trainer_logprobs = model_output["logprobs"]
     export_tensors = _compute_export_tensors(micro_batch, trainer_logprobs, loss_config)
+    rl_loss_mask = export_tensors["rl_loss_mask"]
 
     return {
         "token_ids": token_ids,
         "position_ids": _tensor_to_ints(micro_batch["position_ids"]),
         "loss_mask": _tensor_to_bools(micro_batch["loss_mask"]),
+        "echo_mask": _optional_tensor_to_bools(micro_batch.get("echo_mask"), seq_len),
+        "rl_loss_mask": _optional_tensor_to_bools(rl_loss_mask, seq_len),
         "advantages": _tensor_to_floats(micro_batch["advantages"]),
         "rewards": _optional_tensor_to_floats(micro_batch.get("rewards"), seq_len),
         "inference_logprobs": _tensor_to_floats(micro_batch["inference_logprobs"]),
         "trainer_logprobs": _tensor_to_floats(trainer_logprobs),
         "entropy": _tensor_to_floats(model_output["entropy"]),
-        "mismatch_kl": _optional_tensor_to_floats(export_tensors["mismatch_kl"], seq_len),
-        "log_importance_ratio": _optional_tensor_to_floats(export_tensors["log_importance_ratio"], seq_len),
-        "importance_ratio": _optional_tensor_to_floats(export_tensors["importance_ratio"], seq_len),
-        "prob_delta": _optional_tensor_to_floats(export_tensors["prob_delta"], seq_len),
+        "mismatch_kl": _optional_tensor_to_floats(export_tensors["mismatch_kl"], seq_len, mask=rl_loss_mask),
+        "log_importance_ratio": _optional_tensor_to_floats(
+            export_tensors["log_importance_ratio"], seq_len, mask=rl_loss_mask
+        ),
+        "importance_ratio": _optional_tensor_to_floats(export_tensors["importance_ratio"], seq_len, mask=rl_loss_mask),
+        "prob_delta": _optional_tensor_to_floats(export_tensors["prob_delta"], seq_len, mask=rl_loss_mask),
         "is_masked": _optional_tensor_to_bools(export_tensors["is_masked"], seq_len),
         "is_masked_high": _optional_tensor_to_bools(export_tensors["is_masked_high"], seq_len),
         "is_masked_low": _optional_tensor_to_bools(export_tensors["is_masked_low"], seq_len),
@@ -156,16 +161,21 @@ def _compute_export_tensors(
         "is_masked": None,
         "is_masked_high": None,
         "is_masked_low": None,
+        "rl_loss_mask": None,
     }
     if micro_batch["training_mode"] == "sft":
         return fields
 
     inference_logprobs = micro_batch["inference_logprobs"].to(trainer_logprobs.device)
     loss_mask = micro_batch["loss_mask"].to(trainer_logprobs.device)
+    echo_mask_raw = micro_batch.get("echo_mask")
+    echo_mask = echo_mask_raw.to(trainer_logprobs.device) if echo_mask_raw is not None else None
+    masks = split_loss_masks(loss_mask, echo_mask)
     advantages = micro_batch["advantages"].to(trainer_logprobs.device)
     with torch.no_grad():
         log_ratio, ratio, mismatch_kl = compute_importance_ratio_and_mismatch_kl(trainer_logprobs, inference_logprobs)
         prob_delta = torch.exp(trainer_logprobs) - torch.exp(inference_logprobs)
+        fields["rl_loss_mask"] = masks.rl
         fields["log_importance_ratio"] = log_ratio
         fields["importance_ratio"] = ratio
         fields["mismatch_kl"] = mismatch_kl
@@ -176,9 +186,9 @@ def _compute_export_tensors(
             positive_advantages = advantages > 0
             negative_advantages = advantages < 0
             invalid = torch.where(positive_advantages, invalid_high, invalid_low)
-            fields["is_masked"] = loss_mask & invalid
-            fields["is_masked_high"] = loss_mask & positive_advantages & invalid_high
-            fields["is_masked_low"] = loss_mask & negative_advantages & invalid_low
+            fields["is_masked"] = masks.rl & invalid
+            fields["is_masked_high"] = masks.rl & positive_advantages & invalid_high
+            fields["is_masked_low"] = masks.rl & negative_advantages & invalid_low
     return fields
 
 
@@ -195,10 +205,15 @@ def _tensor_to_floats(tensor: Tensor) -> list[float | None]:
     return [_json_float(value) for value in values]
 
 
-def _optional_tensor_to_floats(tensor: Tensor | None, seq_len: int) -> list[float | None]:
+def _optional_tensor_to_floats(tensor: Tensor | None, seq_len: int, mask: Tensor | None = None) -> list[float | None]:
     if tensor is None:
         return [None] * seq_len
-    return _tensor_to_floats(tensor)
+    if mask is None:
+        return _tensor_to_floats(tensor)
+
+    values = tensor.detach().to(dtype=torch.float32, device="cpu").reshape(-1).tolist()
+    keep = mask.detach().to(dtype=torch.bool, device="cpu").reshape(-1).tolist()
+    return [_json_float(value) if keep_value else None for value, keep_value in zip(values, keep, strict=True)]
 
 
 def _optional_tensor_to_bools(tensor: Tensor | None, seq_len: int) -> list[bool | None]:
diff --git a/src/prime_rl/trainer/rl/train.py b/src/prime_rl/trainer/rl/train.py
index 83afa666dc..ad628be08f 100644
--- a/src/prime_rl/trainer/rl/train.py
+++ b/src/prime_rl/trainer/rl/train.py
@@ -32,6 +32,7 @@
     compute_importance_ratio_and_mismatch_kl,
     selective_log_softmax,
     setup_loss_fns,
+    split_loss_masks,
     shift_tensor_left,
     shift_tensor_right,
 )
@@ -350,15 +351,26 @@ def load_run_checkpoint(_optimizer, idx: int) -> None:
         forward_backward_start_time = time.perf_counter()
         seq_len = micro_batches[0]["input_ids"].shape[1]
 
-        # Normalize by the global (dp_cp) number of unmasked tokens in the batch, so every rank
-        # divides by the same denominator. With a per-rank denominator, ranks with fewer loss
-        # tokens implicitly upweight their per-token gradient contribution after FSDP averaging.
+        # Normalize by global (dp_cp) denominators, so every rank divides by
+        # the same values. RL and echo terms use separate denominators: echo can
+        # add prompt/tool tokens, but that should not dilute ordinary RL
+        # completion gradients.
         # FSDP's per-rank divide is undone after the microbatch loop via fsdp_gradient_divide_factor.
-        local_loss_scale = sum(micro_batch["loss_mask"].sum().item() for micro_batch in micro_batches)
-        global_loss_scale = torch.tensor(local_loss_scale, dtype=torch.int64, device="cuda")
+        local_rl_loss_scale = 0
+        local_echo_loss_scale = 0
+        for micro_batch in micro_batches:
+            masks = split_loss_masks(micro_batch["loss_mask"], micro_batch.get("echo_mask"))
+            local_rl_loss_scale += masks.rl.sum().item()
+            local_echo_loss_scale += masks.echo.sum().item()
+        global_loss_scales = torch.tensor(
+            [local_rl_loss_scale, local_echo_loss_scale],
+            dtype=torch.int64,
+            device="cuda",
+        )
         dp_cp_group = parallel_dims.get_mesh("dp_cp").get_group()
-        dist.all_reduce(global_loss_scale, op=dist.ReduceOp.SUM, group=dp_cp_group)
-        loss_scale = max(global_loss_scale.item(), 1)
+        dist.all_reduce(global_loss_scales, op=dist.ReduceOp.SUM, group=dp_cp_group)
+        rl_loss_scale = max(global_loss_scales[0].item(), 1)
+        echo_loss_scale = max(global_loss_scales[1].item(), 1)
 
         logger.debug(f"Starting forward and backward pass ({batch_size=})")
         tensors = Tensors()  # Used to accumulate tensor statistics across micro-batches and ranks for logging
@@ -372,6 +384,7 @@ def load_run_checkpoint(_optimizer, idx: int) -> None:
             position_ids = micro_batch["position_ids"].to("cuda")
             advantages = micro_batch["advantages"].to("cuda")
             loss_mask = micro_batch["loss_mask"].to("cuda")
+            echo_mask = micro_batch["echo_mask"].to("cuda") if micro_batch.get("echo_mask") is not None else None
             inference_logprobs = micro_batch["inference_logprobs"].to("cuda")
             teacher_logprobs = (
                 micro_batch["teacher_logprobs"].to("cuda") if micro_batch["teacher_logprobs"] is not None else None
@@ -473,6 +486,7 @@ def load_run_checkpoint(_optimizer, idx: int) -> None:
 
             # Compute loss
             response_lengths = get_response_lengths(position_ids)
+            echo_mask_split = echo_mask.squeeze().split(response_lengths) if echo_mask is not None else None
             loss, loss_tensors = compute_loss(
                 trainer_logprobs=out["logprobs"].squeeze().split(response_lengths),
                 inference_logprobs=inference_logprobs.squeeze().split(response_lengths),
@@ -482,8 +496,10 @@ def load_run_checkpoint(_optimizer, idx: int) -> None:
                 advantages=advantages.squeeze().split(response_lengths),
                 loss_mask=loss_mask.squeeze().split(response_lengths),
                 loss_fns=loss_fns,
-                loss_scale=loss_scale,
+                rl_loss_scale=rl_loss_scale,
                 training_mode=micro_batch["training_mode"],
+                echo_mask=echo_mask_split,
+                echo_loss_scale=echo_loss_scale,
             )
 
             # Backward pass
@@ -491,12 +507,13 @@ def load_run_checkpoint(_optimizer, idx: int) -> None:
                 loss.backward()
 
             # Add relevant tensors to tensor dict for logging purposes
-            entropy = out["entropy"][loss_mask].detach().to("cpu")
+            loss_masks = split_loss_masks(loss_mask, echo_mask)
+            entropy = out["entropy"][loss_masks.rl].detach().to("cpu")
             tensors["entropy/all"].append(entropy)
             tensors["loss"].append(loss.detach().to("cpu").unsqueeze(0))
 
             env_names = micro_batch["env_names"]
-            masked_env_names = [env_name for env_name, keep in zip(env_names, loss_mask.flatten().tolist()) if keep]
+            masked_env_names = [env_name for env_name, keep in zip(env_names, loss_masks.rl.flatten().tolist()) if keep]
             env_to_indices: dict[str, list[int]] = {}
             for idx, env_name in enumerate(masked_env_names):
                 env_to_indices.setdefault(env_name, []).append(idx)
@@ -507,7 +524,7 @@ def load_run_checkpoint(_optimizer, idx: int) -> None:
             if micro_batch["training_mode"] != "sft":
                 with torch.no_grad():
                     _, _, mismatch_kl = compute_importance_ratio_and_mismatch_kl(out["logprobs"], inference_logprobs)
-                mismatch_kl = mismatch_kl[loss_mask].detach().to("cpu")
+                mismatch_kl = mismatch_kl[loss_masks.rl].detach().to("cpu")
                 tensors["mismatch_kl/all"].append(mismatch_kl)
                 for env_name, indices in env_to_indices.items():
                     tensors[f"mismatch_kl/{env_name}"].append(mismatch_kl[indices])
diff --git a/src/prime_rl/transport/types.py b/src/prime_rl/transport/types.py
index 1bb31c9325..07bd4914c8 100644
--- a/src/prime_rl/transport/types.py
+++ b/src/prime_rl/transport/types.py
@@ -56,6 +56,11 @@ class TrainingSample(msgspec.Struct, array_like=True, gc=False, omit_defaults=Tr
     # taus), sft uses sft_loss_fn. Stamped by the orchestrator from training_mode.
     training_mode: TrainingMode = "rl"
 
+    # Per-token echo alpha parallel to prompt_ids + completion_ids. Field None
+    # means no echo; per-token None means ordinary RL; a float means echo CE
+    # with that alpha. ``0.0`` is distinct from None.
+    echo_alpha: list[float | None] | None = None
+
 
 class TrainingBatch(msgspec.Struct, array_like=True, gc=False, omit_defaults=True):
     """A batch of training examples with metadata for transport."""
@@ -89,3 +94,7 @@ class MicroBatch(msgspec.Struct, array_like=True, gc=False, omit_defaults=True):
     # sft → sft loss). All samples packed into a micro batch share the same mode.
     training_mode: TrainingMode = "rl"
     rewards: list[float] | None = None
+
+    # True where the token participates in echo CE. Survives packing/padding
+    # like ``loss_mask``; None if no sample echoes.
+    echo_mask: list[bool] | None = None
diff --git a/tests/unit/orchestrator/test_batch.py b/tests/unit/orchestrator/test_batch.py
index 7531423c72..574c808d71 100644
--- a/tests/unit/orchestrator/test_batch.py
+++ b/tests/unit/orchestrator/test_batch.py
@@ -117,6 +117,25 @@ def test_prepare_sample_propagates_training_mode(make_training_example):
     assert micro_batch.training_mode == "sft"
 
 
+def test_prepare_sample_echo_overlay(make_training_example):
+    example = make_training_example()
+    example.echo_alpha = [0.5, 0.0, 0.25, None]
+
+    micro_batch = prepare_sample(example, seq_len=16)
+
+    assert micro_batch.advantages == [1.0, 0.0, 0.25, 1.0]
+    assert micro_batch.loss_mask == [False, True, True, True]
+    assert micro_batch.echo_mask == [False, True, True, False]
+
+
+def test_prepare_sample_rejects_misaligned_echo_alpha(make_training_example):
+    example = make_training_example()
+    example.echo_alpha = [0.5]
+
+    with pytest.raises(ValueError, match="echo_alpha length"):
+        prepare_sample(example, seq_len=16)
+
+
 def test_prepare_batch_does_not_pack_mixed_training_mode(make_training_example):
     rl_example = make_training_example(training_mode="rl")
     sft_example = make_training_example(training_mode="sft")
diff --git a/tests/unit/orchestrator/test_trajectories.py b/tests/unit/orchestrator/test_trajectories.py
index bda129cd43..00a5836e78 100644
--- a/tests/unit/orchestrator/test_trajectories.py
+++ b/tests/unit/orchestrator/test_trajectories.py
@@ -1,15 +1,30 @@
+import asyncio
+import uuid
+from types import SimpleNamespace
 from unittest.mock import MagicMock
 
 import numpy as np
 import pybase64
 import pytest
 import verifiers as vf
-
+from pydantic import ValidationError
+
+from prime_rl.configs.orchestrator import (
+    AssistantRoleEchoConfig,
+    EchoConfig,
+    EchoFilterConfig,
+    SystemRoleEchoConfig,
+    ToolRoleEchoConfig,
+    UserRoleEchoConfig,
+)
+from prime_rl.orchestrator.echo import _build_step_echo_alpha, apply_echo_filter, build_echo_annotations
+from prime_rl.orchestrator.train_sink import TrainSink
 from prime_rl.orchestrator.trajectories import (
     _deserialize_tool_calls,
     align_routed_experts,
     interleave_rollout,
 )
+from prime_rl.orchestrator.types import TrainRollout
 
 _interleave_rollout = interleave_rollout
 
@@ -1378,3 +1393,402 @@ def test_interleave_rollout_packs_pixels_from_renderer_mm_data():
     assert _decode_mm_thw(sample) == [[1, 2, 3], [1, 4, 4]]
     # mm_token_type_ids: image at token 2, video at token 5, rest 0.
     assert sample.mm_token_type_ids == [0, 1, 0, 0, 2, 0, 0]
+
+
+# ---------------------------------------------------------------------------
+# Per-role echo_alpha construction
+# ---------------------------------------------------------------------------
+
+
+def _attribution(
+    message_indices: list[int],
+    is_content: list[bool],
+    message_roles: list[str] | None = None,
+    message_tool_names: list[str | None] | None = None,
+) -> dict:
+    out: dict = {"message_indices": message_indices, "is_content": is_content}
+    if message_roles is not None:
+        out["message_roles"] = message_roles
+    if message_tool_names is not None:
+        out["message_tool_names"] = message_tool_names
+    return out
+
+
+@pytest.mark.parametrize(
+    ("attribution", "prompt_len", "completion_len", "echo_config", "expected"),
+    [
+        pytest.param(
+            None,
+            4,
+            2,
+            EchoConfig(assistant=AssistantRoleEchoConfig(alpha=0.3)),
+            [None, None, None, None, 0.3, 0.3],
+            id="no_attribution_marks_assistant_completion",
+        ),
+        pytest.param(
+            _attribution(
+                message_indices=[0, 0, 1, 1, 2, 2],
+                is_content=[False, True, False, True, False, True],
+                message_roles=["user", "tool", "tool"],
+                message_tool_names=[None, "calc", "lookup"],
+            ),
+            6,
+            0,
+            EchoConfig(tool=ToolRoleEchoConfig(alpha=0.5, tool_names=["lookup"])),
+            [None, None, None, None, None, 0.5],
+            id="tool_name_filter",
+        ),
+        pytest.param(
+            _attribution(message_indices=[0], is_content=[True], message_roles=["user"]),
+            1,
+            2,
+            EchoConfig(assistant=AssistantRoleEchoConfig(alpha=0.0), tool=None),
+            [None, 0.0, 0.0],
+            id="assistant_zero_kills_rl",
+        ),
+        pytest.param(
+            _attribution(
+                message_indices=[0, 1, 2],
+                is_content=[True, True, True],
+                message_roles=["user", "tool", "system"],
+                message_tool_names=[None, "lookup", None],
+            ),
+            3,
+            2,
+            EchoConfig(
+                user=UserRoleEchoConfig(alpha=0.1),
+                tool=ToolRoleEchoConfig(alpha=0.5),
+                system=SystemRoleEchoConfig(alpha=0.05),
+                assistant=AssistantRoleEchoConfig(alpha=0.9),
+            ),
+            [0.1, 0.5, 0.05, 0.9, 0.9],
+            id="per_role_alphas_differ",
+        ),
+        pytest.param(
+            _attribution(
+                message_indices=[0, 0, 0],
+                is_content=[False, True, True],
+                message_roles=["system"],
+            ),
+            3,
+            0,
+            EchoConfig(system=SystemRoleEchoConfig(alpha=0.1), tool=None),
+            [None, 0.1, 0.1],
+            id="system_role",
+        ),
+        pytest.param(
+            _attribution(
+                message_indices=[0, 0, 1, 1],
+                is_content=[False, True, False, True],
+                message_roles=["user", "tool"],
+                message_tool_names=[None, "lookup"],
+            ),
+            4,
+            0,
+            EchoConfig(user=UserRoleEchoConfig(alpha=0.2), tool=None),
+            [None, 0.2, None, None],
+            id="user_role",
+        ),
+        pytest.param(
+            _attribution(
+                message_indices=[0, 0, 1, 1],
+                is_content=[False, True, False, True],
+                message_roles=["user", "assistant"],
+            ),
+            4,
+            3,
+            EchoConfig(assistant=AssistantRoleEchoConfig(alpha=0.8), tool=None),
+            [None, None, None, 0.8, 0.8, 0.8, 0.8],
+            id="assistant_prompt_and_completion",
+        ),
+        pytest.param(
+            _attribution(
+                message_indices=[0, 0, 1, 1, 1],
+                is_content=[False, True, False, True, True],
+                message_roles=["user", "tool"],
+                message_tool_names=[None, "lookup"],
+            ),
+            5,
+            2,
+            EchoConfig(tool=ToolRoleEchoConfig(alpha=0.7, tool_names=None)),
+            [None, None, None, 0.7, 0.7, None, None],
+            id="tool_default_all_tools",
+        ),
+        pytest.param(
+            _attribution(
+                message_indices=[0, 0, 0, 0],
+                is_content=[False, False, True, False],
+                message_roles=["tool"],
+                message_tool_names=["lookup"],
+            ),
+            4,
+            0,
+            EchoConfig(tool=ToolRoleEchoConfig(alpha=0.4)),
+            [None, None, 0.4, None],
+            id="skips_non_content_tokens",
+        ),
+    ],
+)
+def test_build_step_echo_alpha_baseline(attribution, prompt_len, completion_len, echo_config, expected):
+    assert (
+        _build_step_echo_alpha(
+            prompt_attribution=attribution,
+            prompt_len=prompt_len,
+            completion_len=completion_len,
+            echo_config=echo_config,
+        )
+        == expected
+    )
+
+
+def test_echo_config_rejects_filter_without_role():
+    with pytest.raises(ValidationError, match="at least one of"):
+        EchoConfig(filter=EchoFilterConfig(import_path="my_module.my_filter"))
+
+
+def test_tool_role_echo_config_rejects_empty_tool_names():
+    with pytest.raises(ValidationError, match=r"too_short|at least 1"):
+        ToolRoleEchoConfig(tool_names=[])
+
+
+def test_tool_role_echo_config_coerces_tool_names_to_set():
+    config = ToolRoleEchoConfig(tool_names=["lookup", "lookup", "calc"])
+
+    assert config.tool_names == {"lookup", "calc"}
+
+
+# ---------------------------------------------------------------------------
+# _build_step_echo_alpha — filter_mask composition
+# ---------------------------------------------------------------------------
+
+
+def _tool_only_attribution(prompt_len: int) -> dict:
+    return _attribution(
+        message_indices=[0] * prompt_len,
+        is_content=[True] * prompt_len,
+        message_roles=["tool"],
+        message_tool_names=["lookup"],
+    )
+
+
+@pytest.mark.parametrize(
+    ("attribution", "prompt_len", "completion_len", "echo_config", "filter_mask", "expected"),
+    [
+        pytest.param(
+            _tool_only_attribution(4),
+            4,
+            2,
+            EchoConfig(tool=ToolRoleEchoConfig(alpha=0.5)),
+            [True, False, True, False, False, False],
+            [0.5, None, 0.5, None, None, None],
+            id="narrows_baseline",
+        ),
+        pytest.param(
+            _attribution(
+                message_indices=[0, 0, 1, 1],
+                is_content=[True, True, True, True],
+                message_roles=["user", "tool"],
+                message_tool_names=[None, "lookup"],
+            ),
+            4,
+            0,
+            EchoConfig(tool=ToolRoleEchoConfig(alpha=0.5)),
+            [True, True, True, True],
+            [None, None, 0.5, 0.5],
+            id="cannot_add_echo_to_disabled_role",
+        ),
+        pytest.param(
+            _attribution(
+                message_indices=[0, 1, 2],
+                is_content=[True, True, True],
+                message_roles=["system", "user", "tool"],
+                message_tool_names=[None, None, "lookup"],
+            ),
+            3,
+            2,
+            EchoConfig(
+                system=SystemRoleEchoConfig(alpha=0.05),
+                user=UserRoleEchoConfig(alpha=0.1),
+                tool=ToolRoleEchoConfig(alpha=0.5),
+                assistant=AssistantRoleEchoConfig(alpha=0.9),
+            ),
+            [True, False, True, True, False],
+            [0.05, None, 0.5, 0.9, None],
+            id="mixed_roles",
+        ),
+    ],
+)
+def test_build_step_echo_alpha_filter_composition(
+    attribution, prompt_len, completion_len, echo_config, filter_mask, expected
+):
+    assert (
+        _build_step_echo_alpha(
+            prompt_attribution=attribution,
+            prompt_len=prompt_len,
+            completion_len=completion_len,
+            echo_config=echo_config,
+            filter_mask=filter_mask,
+        )
+        == expected
+    )
+
+
+# ---------------------------------------------------------------------------
+# apply_echo_filter — shape/type validation + invocation contract
+# ---------------------------------------------------------------------------
+
+
+def _step_with_tokens(prompt_len: int, completion_len: int, attribution: dict | None = None) -> vf.TrajectoryStep:
+    tokens_kwargs: dict = dict(
+        prompt_ids=list(range(prompt_len)),
+        prompt_mask=[0] * prompt_len,
+        completion_ids=list(range(prompt_len, prompt_len + completion_len)),
+        completion_mask=[1] * completion_len,
+        completion_logprobs=[-0.1] * completion_len,
+        overlong_prompt=False,
+        is_truncated=False,
+    )
+    if attribution is not None:
+        tokens_kwargs["prompt_attribution"] = attribution
+    return vf.TrajectoryStep(
+        prompt=[{"role": "user", "content": "U"}],
+        completion=[{"role": "assistant", "content": "A"}],
+        response=MagicMock(),
+        tokens=vf.TrajectoryStepTokens(**tokens_kwargs),
+        reward=None,
+        advantage=None,
+        is_truncated=False,
+        trajectory_id="t",
+        extras={},
+    )
+
+
+def _rollout_with_steps(*step_dims: tuple, env_name: str = "test-env") -> vf.RolloutOutput:
+    return vf.RolloutOutput(
+        example_id=0,
+        env_name=env_name,
+        trajectory=[_step_with_tokens(*dims) for dims in step_dims],
+        sampling_args={"temperature": 1.0},
+        error=None,
+    )
+
+
+def _const_filter(masks):
+    def filter_fn(rollout):
+        return masks
+
+    return filter_fn
+
+
+@pytest.mark.parametrize(
+    ("dims", "filter_return", "exc_type", "match"),
+    [
+        pytest.param(
+            [(3, 2), (4, 1)], [[True] * 5], ValueError, r"returned 1 per-step masks.*has 2", id="outer_too_short"
+        ),
+        pytest.param(
+            [(3, 2)], [[True] * 5, [True] * 5], ValueError, r"returned 2 per-step masks.*has 1", id="outer_too_long"
+        ),
+        pytest.param(
+            [(3, 2), (4, 1)],
+            [[True] * 5, [True] * 3],
+            ValueError,
+            r"step 1.*mask length 3.*expected 5.*prompt_len=4.*completion_len=1",
+            id="inner_mismatch",
+        ),
+        pytest.param([(2, 1)], "not a list", TypeError, r"must return list.*got str", id="non_list_return"),
+        pytest.param(
+            [(1, 1)], [[True, 1]], TypeError, r"step 0.*mask\[1\].*must be a plain bool.*int", id="non_bool_int"
+        ),
+    ],
+)
+def test_apply_echo_filter_invalid_raises(dims, filter_return, exc_type, match):
+    rollout = _rollout_with_steps(*dims)
+    with pytest.raises(exc_type, match=match):
+        apply_echo_filter(rollout, _const_filter(filter_return))
+
+
+def test_apply_echo_filter_receives_full_rollout():
+    rollout = _rollout_with_steps((2, 1))
+    seen: dict = {}
+
+    def filter_fn(rollout):
+        seen.update(example_id=rollout["example_id"], error=rollout["error"], n=len(rollout["trajectory"]))
+        return [[True] * 3]
+
+    apply_echo_filter(rollout, filter_fn)
+    assert seen == {"example_id": 0, "error": None, "n": 1}
+
+
+def test_apply_echo_filter_empty_trajectory_returns_empty_masks():
+    rollout = vf.RolloutOutput(
+        example_id=0, env_name="test-env", trajectory=[], sampling_args={"temperature": 1.0}, error=None
+    )
+    assert apply_echo_filter(rollout, _const_filter([])) == []
+
+
+def test_apply_echo_filter_propagates_user_exception():
+    rollout = _rollout_with_steps((2, 1))
+
+    class FilterCrash(RuntimeError):
+        pass
+
+    def filter_fn(rollout):
+        raise FilterCrash("boom")
+
+    with pytest.raises(FilterCrash, match="boom"):
+        apply_echo_filter(rollout, filter_fn)
+
+
+_TOOL_ATTRIBUTION = {
+    "message_indices": [0, 0, 0],
+    "is_content": [True, True, True],
+    "message_roles": ["tool"],
+    "message_tool_names": ["lookup"],
+}
+
+
+def test_interleave_rollout_filter_masks_narrows_sample_echo_alpha():
+    rollout = _rollout_with_steps((3, 2, _TOOL_ATTRIBUTION))
+    echo_config = EchoConfig(tool=ToolRoleEchoConfig(alpha=0.5))
+    annotations = build_echo_annotations(rollout, echo_config, _const_filter([[True, False, True, True, True]]))
+
+    filtered = _interleave_rollout(rollout, echo_annotations=annotations)
+    assert filtered[0].echo_alpha == [0.5, None, 0.5, None, None]
+
+
+def test_train_sink_runs_echo_filter_without_prompt_attribution(tmp_path):
+    rollout_output = _rollout_with_steps((2, 2))
+    filter_fn = MagicMock(return_value=[[True, True, True, False]])
+
+    env = SimpleNamespace(
+        echo_filter_fn=filter_fn,
+        config=SimpleNamespace(echo=EchoConfig(assistant=AssistantRoleEchoConfig(alpha=0.3))),
+    )
+    train_envs = SimpleNamespace(get=lambda _env_name: env)
+    sink = TrainSink(
+        SimpleNamespace(output_dir=tmp_path),
+        tokenizer=None,
+        renderer=None,
+        train_envs=train_envs,
+        mm_token_type_ids_mapping=None,
+        batch_size=1,
+        token_batch_size=None,
+        advantage_config=None,
+        pre_filters=[],
+        post_filters=[],
+    )
+    rollout = TrainRollout(
+        raw=rollout_output,
+        env_name="test-env",
+        example_id=0,
+        group_id=uuid.uuid4(),
+        policy_version=0,
+        off_policy_steps=0,
+    )
+
+    asyncio.run(sink.process_rollout(rollout))
+
+    filter_fn.assert_called_once_with(rollout_output)
+    assert len(rollout.samples) == 1
+    assert rollout.samples[0].echo_alpha == [None, None, 0.3, None]
diff --git a/tests/unit/train/rl/test_loss.py b/tests/unit/train/rl/test_loss.py
index 1585dac7bd..7fa524d256 100644
--- a/tests/unit/train/rl/test_loss.py
+++ b/tests/unit/train/rl/test_loss.py
@@ -22,7 +22,7 @@ def test_grpo_loss():
         advantages,
         loss_mask=loss_mask,
         loss_fns=loss_fns,
-        loss_scale=1.0,
+        rl_loss_scale=1.0,
     )
     assert loss.shape == ()
 
@@ -42,7 +42,7 @@ def test_gspo_loss():
         advantages,
         loss_mask=loss_mask,
         loss_fns=loss_fns,
-        loss_scale=1.0,
+        rl_loss_scale=1.0,
     )
     assert loss.shape == ()
 
@@ -89,11 +89,11 @@ def test_sft_loss_matches_masked_nll():
         advantages=advantages,
         loss_mask=loss_mask,
         loss_fns=loss_fns,
-        loss_scale=2,
+        rl_loss_scale=2,
         training_mode="sft",
     )
 
-    # loss = -sum(masked logprobs) / loss_scale = -(-0.1 - 0.2) / 2 = 0.15
+    # loss = -sum(masked logprobs) / rl_loss_scale = -(-0.1 - 0.2) / 2 = 0.15
     assert torch.isclose(loss, torch.tensor(0.15, device=loss.device), atol=1e-6)
     assert "nll" in metrics
 
@@ -112,7 +112,7 @@ def test_sft_loss_override_uses_masked_nll_with_default_loss_config():
         advantages=advantages,
         loss_mask=loss_mask,
         loss_fns=loss_fns,
-        loss_scale=2,
+        rl_loss_scale=2,
         training_mode="sft",
     )
 
@@ -121,6 +121,76 @@ def test_sft_loss_override_uses_masked_nll_with_default_loss_config():
     assert "mismatch_kl" not in metrics
 
 
+def test_default_loss_fn_uses_separate_echo_loss_scale():
+    trainer_logprobs = [torch.tensor([-0.2, -0.4], dtype=torch.float32, device="cuda", requires_grad=True)]
+    inference_logprobs = [trainer_logprobs[0].detach().clone()]
+    advantages = [torch.tensor([1.0, 2.0], dtype=torch.float32, device="cuda")]
+    loss_mask = [torch.tensor([True, True], dtype=torch.bool, device="cuda")]
+    echo_mask = [torch.tensor([False, True], dtype=torch.bool, device="cuda")]
+
+    loss_fns = setup_loss_fns(DefaultLossConfig(dppo_mask_high=10.0, dppo_mask_low=10.0, kl_tau=0.0))
+    loss, metrics = compute_loss(
+        trainer_logprobs=trainer_logprobs,
+        inference_logprobs=inference_logprobs,
+        teacher_logprobs=None,
+        advantages=advantages,
+        loss_mask=loss_mask,
+        loss_fns=loss_fns,
+        rl_loss_scale=1,
+        echo_loss_scale=4,
+        echo_mask=echo_mask,
+    )
+
+    # RL term: -adv * ratio = -1.0. Echo term: -alpha * logprob / 4 = 0.2.
+    assert torch.isclose(loss, torch.tensor(-0.8, device=loss.device), atol=1e-6)
+    assert metrics["echo_token_count"].item() == 1
+    loss.backward()
+    assert torch.isclose(trainer_logprobs[0].grad[1], torch.tensor(-0.5, device="cuda"), atol=1e-6)
+
+
+@pytest.mark.parametrize("training_mode", ["sft", "opd"])
+def test_echo_rejected_for_non_rl_modes(training_mode):
+    trainer_logprobs = [torch.tensor([-0.1, -0.2], dtype=torch.float32, device="cuda")]
+    inference_logprobs = [torch.zeros(2, dtype=torch.float32, device="cuda")]
+    teacher_logprobs = [torch.zeros(2, dtype=torch.float32, device="cuda")] if training_mode == "opd" else None
+    advantages = [torch.ones(2, dtype=torch.float32, device="cuda")]
+    loss_mask = [torch.ones(2, dtype=torch.bool, device="cuda")]
+    echo_mask = [torch.tensor([False, True], dtype=torch.bool, device="cuda")]
+
+    with pytest.raises(ValueError, match="Echo is only supported"):
+        compute_loss(
+            trainer_logprobs=trainer_logprobs,
+            inference_logprobs=inference_logprobs,
+            teacher_logprobs=teacher_logprobs,
+            advantages=advantages,
+            loss_mask=loss_mask,
+            loss_fns=setup_loss_fns(DefaultLossConfig()),
+            rl_loss_scale=1,
+            training_mode=training_mode,
+            echo_mask=echo_mask,
+        )
+
+
+def test_echo_rejected_for_custom_rl_loss():
+    loss_fns = setup_loss_fns(
+        CustomLossConfig(
+            import_path="tests.unit.train.rl.test_loss._dummy_custom_loss",
+            kwargs={"multiplier": 2.0},
+        )
+    )
+    inputs = LossInputs(
+        trainer_logprobs=torch.randn(2, dtype=torch.float32, device="cuda"),
+        inference_logprobs=torch.randn(2, dtype=torch.float32, device="cuda"),
+        teacher_logprobs=None,
+        advantages=torch.ones(2, dtype=torch.float32, device="cuda"),
+        loss_mask=torch.ones(2, dtype=torch.bool, device="cuda"),
+        echo_mask=torch.tensor([False, True], dtype=torch.bool, device="cuda"),
+    )
+
+    with pytest.raises(ValueError, match="Echo is only supported with the default RL loss"):
+        loss_fns["rl"](inputs)
+
+
 def _dummy_custom_loss(inputs: LossInputs, multiplier: float = 1.0) -> LossOutputs:
     """A simple custom loss for testing."""
     loss = (inputs.trainer_logprobs[inputs.loss_mask].sum() * multiplier).abs()
diff --git a/tests/unit/train/rl/test_token_export.py b/tests/unit/train/rl/test_token_export.py
new file mode 100644
index 0000000000..bdc16f042e
--- /dev/null
+++ b/tests/unit/train/rl/test_token_export.py
@@ -0,0 +1,32 @@
+import math
+
+import torch
+
+from prime_rl.configs.trainer import DefaultLossConfig
+from prime_rl.trainer.rl.token_export import _export_columns
+
+
+def test_token_export_masks_rl_diagnostics_without_nan_sentinel():
+    micro_batch = {
+        "input_ids": torch.tensor([101, 102, 103]),
+        "position_ids": torch.tensor([0, 1, 2]),
+        "loss_mask": torch.tensor([True, True, True]),
+        "echo_mask": torch.tensor([False, True, False]),
+        "advantages": torch.tensor([1.0, 0.5, -1.0]),
+        "rewards": None,
+        "inference_logprobs": torch.tensor([-0.2, -0.3, -0.4]),
+        "env_names": ["env", "env", "env"],
+        "training_mode": "rl",
+    }
+    model_output = {
+        "logprobs": torch.tensor([-0.1, -0.5, -0.45]),
+        "entropy": torch.tensor([1.0, 1.1, 1.2]),
+    }
+
+    columns = _export_columns(micro_batch, model_output, DefaultLossConfig())
+
+    assert columns["rl_loss_mask"] == [True, False, True]
+    for key in ("mismatch_kl", "log_importance_ratio", "importance_ratio", "prob_delta"):
+        assert columns[key][1] is None
+        assert math.isfinite(columns[key][0])
+        assert math.isfinite(columns[key][2])
diff --git a/uv.lock b/uv.lock
index a3e998ef50..6ea2e43ebd 100644
--- a/uv.lock
+++ b/uv.lock
@@ -11,7 +11,7 @@ supported-markers = [
 ]
 
 [options]
-exclude-newer = "2026-05-25T23:50:03.973690224Z"
+exclude-newer = "0001-01-01T00:00:00Z" # This has no effect and is included for backwards compatibility when using relative exclude-newer values.
 exclude-newer-span = "P7D"
 
 [options.exclude-newer-package]
@@ -76,7 +76,7 @@ dependencies = [
 [package.metadata]
 requires-dist = [
     { name = "math-verify", specifier = ">=0.8.0" },
-    { name = "verifiers", specifier = ">=0.1.12.dev1" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -91,7 +91,7 @@ dependencies = [
 [package.metadata]
 requires-dist = [
     { name = "math-verify", specifier = ">=0.8.0" },
-    { name = "verifiers", specifier = ">=0.1.12.dev1" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -276,6 +276,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/64/b4/17d4b0b2a2dc85a6df63d1157e028ed19f90d4cd97c36717afef2bc2f395/attrs-26.1.0-py3-none-any.whl", hash = "sha256:c647aa4a12dfbad9333ca4e71fe62ddc36f4e63b2d260a37a8b83d2f043ac309", size = 67548, upload-time = "2026-03-19T14:22:23.645Z" },
 ]
 
+[[package]]
+name = "authlib"
+version = "1.7.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cryptography", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "joserfc", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/36/98/7d93f30d029643c0275dbc0bd6d5a6f670661ee6c9a94d93af7ab4887600/authlib-1.7.2.tar.gz", hash = "sha256:2cea25fefcd4e7173bdf1372c0afc265c8034b23a8cd5dcb6a9164b826c64231", size = 176511, upload-time = "2026-05-06T08:10:23.116Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fb/95/adcb68e20c34162e9135f370d6e31737719c2b6f94bc953fe7ed1f10fe21/authlib-1.7.2-py2.py3-none-any.whl", hash = "sha256:3e1faedc9d87e7d56a164eca3ccb6ace0d61b94abe83e92242f8dc8bba9b4a9f", size = 259548, upload-time = "2026-05-06T08:10:21.436Z" },
+]
+
 [[package]]
 name = "backoff"
 version = "2.2.1"
@@ -369,6 +382,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/0d/fe/6bea5c9162869c5beba5d9c8abbed835ec85bf1ec1fba05a3822325c45f3/build-1.5.0-py3-none-any.whl", hash = "sha256:13f3eecb844759ab66efec90ca17639bbf14dc06cb2fdf37a9010322d9c50a6f", size = 26018, upload-time = "2026-04-30T03:18:23.644Z" },
 ]
 
+[[package]]
+name = "burner-redis"
+version = "0.1.7"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/52/89/54706febafc135095b2a9d797cfbd4eed2ab1ad7819808b99b587020471b/burner_redis-0.1.7.tar.gz", hash = "sha256:7474ff092669fd11ef765411572cdafcc3d89b8054aef4ca0617be6d6be4c680", size = 638644, upload-time = "2026-05-08T15:01:42.961Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/6f/c0/31c25cc88143eac2dddcc394151a0db627923d44c94376a83768552c9f13/burner_redis-0.1.7-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20eba1917e3bca9eea5957d5700ff8defcb5a209e57a7841d005549aa0151f44", size = 1337341, upload-time = "2026-05-08T15:01:30.397Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/32/95cfa1833316ca2b6b2e58150a4900bc1ad256043cdd36198f1887618ccc/burner_redis-0.1.7-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39111467059b8a28f15ea061d2414ec25c3e57c65759983f90f4d358e7d6a72d", size = 1366800, upload-time = "2026-05-08T15:01:32.891Z" },
+    { url = "https://files.pythonhosted.org/packages/34/ad/93c3916f053f89b7b5760da5bf855cd78b7885d480f9cfcc64f3732c1dc2/burner_redis-0.1.7-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9b5adfe99aeb8407f468078f3769b2a63e9168fea12f7709df5d2a3b152706e4", size = 1538160, upload-time = "2026-05-08T15:01:34.667Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/b9/19bae42cb124932d71168bc8e5bcb1da33aa62b908e5e632b3d298d7cb15/burner_redis-0.1.7-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:591a9d20685f9d6d22bf0c863b50b12dfcf328b06111b3f62c33cd3185d48ce0", size = 1591491, upload-time = "2026-05-08T15:01:36.708Z" },
+]
+
 [[package]]
 name = "cachetools"
 version = "7.1.1"
@@ -447,6 +472,12 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/db/8f/61959034484a4a7c527811f4721e75d02d653a35afb0b6054474d8185d4c/charset_normalizer-3.4.7-py3-none-any.whl", hash = "sha256:3dce51d0f5e7951f8bb4900c257dad282f49190fdbebecd4ba99bcc41fef404d", size = 61958, upload-time = "2026-04-02T09:28:37.794Z" },
 ]
 
+[[package]]
+name = "chess"
+version = "1.11.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/93/09/7d04d7581ae3bb8b598017941781bceb7959dd1b13e3ebf7b6a2cd843bc9/chess-1.11.2.tar.gz", hash = "sha256:a8b43e5678fdb3000695bdaa573117ad683761e5ca38e591c4826eba6d25bb39", size = 6131385, upload-time = "2025-02-25T19:10:27.328Z" }
+
 [[package]]
 name = "chromadb"
 version = "1.5.9"
@@ -506,7 +537,7 @@ wheels = [
 
 [[package]]
 name = "code-env"
-version = "0.3.1"
+version = "0.3.2"
 source = { editable = "deps/research-environments/environments/code_env" }
 dependencies = [
     { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
@@ -518,7 +549,7 @@ requires-dist = [
     { name = "prime-sandboxes", specifier = ">=0.2.19" },
     { name = "pytest", marker = "extra == 'dev'" },
     { name = "pytest-asyncio", marker = "extra == 'dev'" },
-    { name = "verifiers", specifier = ">=0.1.13.dev8" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 provides-extras = ["dev"]
 
@@ -534,7 +565,7 @@ dependencies = [
 [package.metadata]
 requires-dist = [
     { name = "pillow", specifier = ">=10.0.0" },
-    { name = "verifiers", specifier = ">=0.1.10" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -589,6 +620,14 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/33/1d/acad9bd4e97f13f3e2b18a3977fe1b4a37ecf3d38d815333980c6c72e963/contourpy-1.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:459c1f020cd59fcfe6650180678a9993932d80d44ccde1fa1868977438f0b411", size = 1403386, upload-time = "2025-07-26T12:01:33.947Z" },
 ]
 
+[[package]]
+name = "cronsim"
+version = "2.7"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fb/1a/02f105147f7f2e06ed4f734ff5a6439590bb275a53dd91fc73df6312298a/cronsim-2.7-py3-none-any.whl", hash = "sha256:1e1431fa08c51dc7f72e67e571c7c7a09af26420169b607badd4ca9677ffad1e", size = 14213, upload-time = "2025-10-21T16:38:20.431Z" },
+]
+
 [[package]]
 name = "cryptography"
 version = "48.0.0"
@@ -711,6 +750,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321, upload-time = "2023-10-07T05:32:16.783Z" },
 ]
 
+[[package]]
+name = "cyclopts"
+version = "4.16.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "attrs", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "docstring-parser", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "rich", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "rich-rst", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/34/07/bf61d13de86d96a4c46aff00c9ca0eced44bcc8c3e16280605c1253e5720/cyclopts-4.16.1.tar.gz", hash = "sha256:8aa47bf92a5fb33abca5af05e576eecdb0d2f79893ad29238046df78370fc4a8", size = 181196, upload-time = "2026-05-25T15:29:08.518Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/00/8d/7f362c2fb8ef4decd2160bc24d4292c6ca658cc6d9a161b89ca5122bbdbf/cyclopts-4.16.1-py3-none-any.whl", hash = "sha256:617795392c4113a2c2cc7af716f20244900e87f23daa05442d1268d81472a592", size = 219020, upload-time = "2026-05-25T15:29:09.646Z" },
+]
+
 [[package]]
 name = "dataclasses-json"
 version = "0.6.7"
@@ -814,7 +868,7 @@ requires-dist = [
     { name = "datasets", specifier = ">=4.0.0" },
     { name = "diskcache", specifier = ">=5.6.0" },
     { name = "pdfminer-six", specifier = ">=20251107" },
-    { name = "verifiers", specifier = ">=0.1.11.dev0" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -938,6 +992,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/de/15/545e2b6cf2e3be84bc1ed85613edd75b8aea69807a71c26f4ca6a9258e82/email_validator-2.3.0-py3-none-any.whl", hash = "sha256:80f13f623413e6b197ae73bb10bf4eb0908faf509ad8362c5edeb0be7fd450b4", size = 35604, upload-time = "2025-08-26T13:09:05.858Z" },
 ]
 
+[[package]]
+name = "exceptiongroup"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" },
+]
+
 [[package]]
 name = "executing"
 version = "2.2.1"
@@ -1045,6 +1111,33 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a2/1f/ace39fe4719367cf68a4a70a2f6cb2207f285dbfc9c6a0c2f11f6cf12aa7/fastcore-1.12.45-py3-none-any.whl", hash = "sha256:4175a16e47e5ebdbd68d19c75667b056d37c34c9a378ba50d42ff692c973322f", size = 104753, upload-time = "2026-05-08T03:36:12.143Z" },
 ]
 
+[[package]]
+name = "fastmcp"
+version = "2.14.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "authlib", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "cyclopts", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "exceptiongroup", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "httpx", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "jsonschema-path", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "mcp", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "openapi-pydantic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "platformdirs", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "py-key-value-aio", extra = ["disk", "keyring", "memory"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pydantic", extra = ["email"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pydocket", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pyperclip", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "python-dotenv", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "rich", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "uvicorn", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "websockets", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/35/50/9bb042a2d290ccadb35db3580ac507f192e1a39c489eb8faa167cd5e3b57/fastmcp-2.14.0.tar.gz", hash = "sha256:c1f487b36a3e4b043dbf3330e588830047df2e06f8ef0920d62dfb34d0905727", size = 8232562, upload-time = "2025-12-11T23:04:27.134Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/54/73/b5656172a6beb2eacec95f04403ddea1928e4b22066700fd14780f8f45d1/fastmcp-2.14.0-py3-none-any.whl", hash = "sha256:7b374c0bcaf1ef1ef46b9255ea84c607f354291eaf647ff56a47c69f5ec0c204", size = 398965, upload-time = "2025-12-11T23:04:25.587Z" },
+]
+
 [[package]]
 name = "fastokens"
 version = "0.2.0"
@@ -1250,7 +1343,7 @@ source = { editable = "deps/research-environments/environments/general_agent" }
 dependencies = [
     { name = "mcp", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "tyro", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
@@ -1261,7 +1354,7 @@ requires-dist = [
     { name = "ruff", marker = "extra == 'dev'" },
     { name = "ty", marker = "extra == 'dev'" },
     { name = "tyro", specifier = ">=0.9" },
-    { name = "verifiers", specifier = ">=0.1.15.dev2" },
+    { name = "verifiers", extras = ["packages"], specifier = ">=0.1.15.dev17" },
 ]
 provides-extras = ["dev", "test"]
 
@@ -1331,6 +1424,45 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/20/7a/1c6e3562dfd8950adbb11ffbc65d21e7c89d01a6e4f137fa981056de25c5/gitpython-3.1.50-py3-none-any.whl", hash = "sha256:d352abe2908d07355014abdd21ddf798c2a961469239afec4962e9da884858f9", size = 212507, upload-time = "2026-05-06T04:01:23.799Z" },
 ]
 
+[[package]]
+name = "google-auth"
+version = "2.53.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cryptography", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pyasn1-modules", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c6/ad/ff781329bbbdc0974a098d996e89c9e1f7024262f9e3eec442fbb9ad1ac6/google_auth-2.53.0.tar.gz", hash = "sha256:e7e6aa16f6bee7b2b264830fd04f08087a1d5a836df516251a5d15327b246c9c", size = 335844, upload-time = "2026-05-15T20:53:07.928Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4a/c9/db44165ba7c581268c6d46017ef63339110378305062830104fc7fa144cb/google_auth-2.53.0-py3-none-any.whl", hash = "sha256:6e7449917c599b35126a99ec268ec6880301f2fea41dce198fe8fd83ff642b68", size = 246071, upload-time = "2026-05-15T20:53:05.609Z" },
+]
+
+[package.optional-dependencies]
+requests = [
+    { name = "requests", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+
+[[package]]
+name = "google-genai"
+version = "2.6.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "distro", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "google-auth", extra = ["requests"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "httpx", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pydantic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "requests", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "sniffio", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "tenacity", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "typing-extensions", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "websockets", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/dd/ec/6e49f50f5c70588d97c6ed25e0b8c18828bf4d58895f397b53a7522168a1/google_genai-2.6.0.tar.gz", hash = "sha256:7d4f777234002f2e94be499dbdfb43b506a6aca9dbbec13e61d3dc6ce640ffa7", size = 554809, upload-time = "2026-05-22T01:34:33.581Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b3/9e/e8ba4e58a9d5daf42343f3ea1cb0efb721eba36a1d6624e9873d039a5c1e/google_genai-2.6.0-py3-none-any.whl", hash = "sha256:272b6f6320f5d355735241ad441f972af095ec80dc10cb075cb430d96721648a", size = 821003, upload-time = "2026-05-22T01:34:31.55Z" },
+]
+
 [[package]]
 name = "googleapis-common-protos"
 version = "1.75.0"
@@ -1355,7 +1487,7 @@ dependencies = [
 [package.metadata]
 requires-dist = [
     { name = "math-verify", specifier = ">=0.8.0" },
-    { name = "verifiers", specifier = ">=0.1.12.dev1" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -1432,6 +1564,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/69/b2/119f6e6dcbd96f9069ce9a2665e0146588dc9f88f29549711853645e736a/h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd", size = 61779, upload-time = "2025-08-23T18:12:17.779Z" },
 ]
 
+[[package]]
+name = "harnesses"
+source = { editable = "deps/verifiers/packages/harnesses" }
+dependencies = [
+    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "aiohttp", marker = "extra == 'nemogym'", specifier = ">=3.9.0" },
+    { name = "nemo-gym", marker = "python_full_version >= '3.12' and extra == 'nemogym'", specifier = ">=0.2.1,<0.3" },
+    { name = "verifiers", specifier = ">=0.1.15.dev11" },
+]
+provides-extras = ["nemogym"]
+
 [[package]]
 name = "hf-xet"
 version = "1.5.0"
@@ -1456,7 +1603,7 @@ dependencies = [
 [package.metadata]
 requires-dist = [
     { name = "pillow", specifier = ">=12.0.0" },
-    { name = "verifiers", specifier = ">=0.1.15.dev1" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -1609,7 +1756,7 @@ requires-dist = [
     { name = "immutabledict" },
     { name = "langdetect" },
     { name = "nltk" },
-    { name = "verifiers", specifier = ">=0.1.10" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -1744,6 +1891,39 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/56/6d/0d9848617b9f753b87f214f1c682592f7ca42de085f564352f10f0843026/ipywidgets-8.1.8-py3-none-any.whl", hash = "sha256:ecaca67aed704a338f88f67b1181b58f821ab5dc89c1f0f5ef99db43c1c2921e", size = 139808, upload-time = "2025-11-01T21:18:10.956Z" },
 ]
 
+[[package]]
+name = "jaraco-classes"
+version = "3.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "more-itertools", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/06/c0/ed4a27bc5571b99e3cff68f8a9fa5b56ff7df1c2251cc715a652ddd26402/jaraco.classes-3.4.0.tar.gz", hash = "sha256:47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd", size = 11780, upload-time = "2024-03-31T07:27:36.643Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7f/66/b15ce62552d84bbfcec9a4873ab79d993a1dd4edb922cbfccae192bd5b5f/jaraco.classes-3.4.0-py3-none-any.whl", hash = "sha256:f662826b6bed8cace05e7ff873ce0f9283b5c924470fe664fff1c2f00f581790", size = 6777, upload-time = "2024-03-31T07:27:34.792Z" },
+]
+
+[[package]]
+name = "jaraco-context"
+version = "6.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/af/50/4763cd07e722bb6285316d390a164bc7e479db9d90daa769f22578f698b4/jaraco_context-6.1.2.tar.gz", hash = "sha256:f1a6c9d391e661cc5b8d39861ff077a7dc24dc23833ccee564b234b81c82dfe3", size = 16801, upload-time = "2026-03-20T22:13:33.922Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f2/58/bc8954bda5fcda97bd7c19be11b85f91973d67a706ed4a3aec33e7de22db/jaraco_context-6.1.2-py3-none-any.whl", hash = "sha256:bf8150b79a2d5d91ae48629d8b427a8f7ba0e1097dd6202a9059f29a36379535", size = 7871, upload-time = "2026-03-20T22:13:32.808Z" },
+]
+
+[[package]]
+name = "jaraco-functools"
+version = "4.5.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "more-itertools", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/36/cf/ea4ef2920830dea3f5ab2ea4da6fb67724e6dca80ee2553788c3607243d0/jaraco_functools-4.5.0.tar.gz", hash = "sha256:3bb5665ea4a020cf78a7040e89154c77edadb3ca74f366479669c5999aa70b03", size = 20272, upload-time = "2026-05-15T21:34:10.025Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/96/9a/982e48afcffcd727a9144506720ffd4224b6b7e355c98641866f38b7c043/jaraco_functools-4.5.0-py3-none-any.whl", hash = "sha256:79ce39246eddbde4b3a03b77ea5f0f7878dc669b166a66cf3fa8e266aa3fa2f4", size = 10594, upload-time = "2026-05-15T21:34:08.595Z" },
+]
+
 [[package]]
 name = "jaxtyping"
 version = "0.3.9"
@@ -1768,6 +1948,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/9a/93/242e2eab5fe682ffcb8b0084bde703a41d51e17ee0f3a31ff0d9d813620a/jedi-0.20.0-py2.py3-none-any.whl", hash = "sha256:7bdd9c2634f56713299976f4cbd59cb3fa92165cc5e05ea811fb253480728b67", size = 4884812, upload-time = "2026-05-01T23:38:43.919Z" },
 ]
 
+[[package]]
+name = "jeepney"
+version = "0.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7b/6f/357efd7602486741aa73ffc0617fb310a29b588ed0fd69c2399acbb85b0c/jeepney-0.9.0.tar.gz", hash = "sha256:cf0e9e845622b81e4a28df94c40345400256ec608d0e55bb8a3feaa9163f5732", size = 106758, upload-time = "2025-02-27T18:51:01.684Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b2/a3/e137168c9c44d18eff0376253da9f1e9234d0239e0ee230d2fee6cea8e55/jeepney-0.9.0-py3-none-any.whl", hash = "sha256:97e5714520c16fc0a45695e5365a2e11b81ea79bba796e26f9f1d178cb182683", size = 49010, upload-time = "2025-02-27T18:51:00.104Z" },
+]
+
 [[package]]
 name = "jinja2"
 version = "3.1.6"
@@ -1812,6 +2001,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/7b/91/984aca2ec129e2757d1e4e3c81c3fcda9d0f85b74670a094cc443d9ee949/joblib-1.5.3-py3-none-any.whl", hash = "sha256:5fc3c5039fc5ca8c0276333a188bbd59d6b7ab37fe6632daa76bc7f9ec18e713", size = 309071, upload-time = "2025-12-15T08:41:44.973Z" },
 ]
 
+[[package]]
+name = "joserfc"
+version = "1.6.7"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cryptography", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/1b/cb/52e479f20804904f5df20ac4539d292dcecd1287aaa33cba1d1def1d9d8e/joserfc-1.6.7.tar.gz", hash = "sha256:6999fe89457069ecacd8cc797c88a805f83054dd883333fa0409f74b46479fd7", size = 232158, upload-time = "2026-05-23T01:46:44.069Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c5/e4/bcf6718b5662894c6831f46296b73cd4b1a2e90c20b6d437e20c4997388c/joserfc-1.6.7-py3-none-any.whl", hash = "sha256:9e51e4a64840aa1734a058258e80a4480e2ff2d5686e480e7c92c954a92fbe05", size = 70603, upload-time = "2026-05-23T01:46:42.129Z" },
+]
+
 [[package]]
 name = "jsonschema"
 version = "4.25.1"
@@ -1827,6 +2028,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/bf/9c/8c95d856233c1f82500c2450b8c68576b4cf1c871db3afac5c34ff84e6fd/jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63", size = 90040, upload-time = "2025-08-18T17:03:48.373Z" },
 ]
 
+[[package]]
+name = "jsonschema-path"
+version = "0.5.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "attrs", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pathable", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pyyaml", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "referencing", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/39/79/cd02a4df6d9270efdc7d3feefe6edd730b0820c39eeaa107a2faee8322d5/jsonschema_path-0.5.0.tar.gz", hash = "sha256:493b156ba895c97602655b620a8456caa2ce08c1aa389f5a7addec065e6e855c", size = 19597, upload-time = "2026-05-19T20:45:00.971Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/2c/9e69d73c4297508be9e3b64a970ea3971b3eb8db64ffc5802d40bd25981f/jsonschema_path-0.5.0-py3-none-any.whl", hash = "sha256:2790a070bc7abb08ea3dbe4d340ece4efadf639223001f020c7503229ba068e2", size = 24077, upload-time = "2026-05-19T20:44:59.225Z" },
+]
+
 [[package]]
 name = "jsonschema-specifications"
 version = "2025.9.1"
@@ -1905,6 +2121,22 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fe/12/a156ea12972ff5bfa53c5669ec8a71a7dda8474bd540acc9211b0cacbb82/kernels_data-0.14.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:4e9f39e3678f525560f95a4af048388504dc261b7386ecc2a1017095aaa44997", size = 1414335, upload-time = "2026-05-06T08:21:27.804Z" },
 ]
 
+[[package]]
+name = "keyring"
+version = "25.7.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "jaraco-classes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "jaraco-context", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "jaraco-functools", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "jeepney", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "secretstorage", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/43/4b/674af6ef2f97d56f0ab5153bf0bfa28ccb6c3ed4d1babf4305449668807b/keyring-25.7.0.tar.gz", hash = "sha256:fe01bd85eb3f8fb3dd0405defdeac9a5b4f6f0439edbb3149577f244a2e8245b", size = 63516, upload-time = "2025-11-16T16:26:09.482Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/81/db/e655086b7f3a705df045bf0933bdd9c2f79bb3c97bfef1384598bb79a217/keyring-25.7.0-py3-none-any.whl", hash = "sha256:be4a0b195f149690c166e850609a477c532ddbfbaed96a404d4e43f8d5e2689f", size = 39160, upload-time = "2025-11-16T16:26:08.402Z" },
+]
+
 [[package]]
 name = "kiwisolver"
 version = "1.5.0"
@@ -2038,7 +2270,7 @@ wheels = [
 
 [[package]]
 name = "livecodebench"
-version = "0.2.6"
+version = "0.2.7"
 source = { editable = "deps/research-environments/environments/livecodebench" }
 dependencies = [
     { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
@@ -2048,7 +2280,7 @@ dependencies = [
 [package.metadata]
 requires-dist = [
     { name = "prime-sandboxes", specifier = ">=0.2.19" },
-    { name = "verifiers", specifier = ">=0.1.15.dev1" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -2102,7 +2334,7 @@ requires-dist = [
     { name = "markdown", specifier = ">=3.5.1" },
     { name = "math-verify", specifier = ">=0.8.0" },
     { name = "sympy", specifier = ">=1.12.0" },
-    { name = "verifiers", specifier = ">=0.1.15.dev2" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -2181,7 +2413,7 @@ wheels = [
 
 [[package]]
 name = "math-env"
-version = "0.1.5"
+version = "0.1.6"
 source = { editable = "deps/research-environments/environments/math_env" }
 dependencies = [
     { name = "math-verify", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
@@ -2193,7 +2425,7 @@ dependencies = [
 requires-dist = [
     { name = "math-verify", specifier = ">=0.8.0" },
     { name = "prime-sandboxes", specifier = ">=0.2.19" },
-    { name = "verifiers", specifier = ">=0.1.12.dev1" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -2237,7 +2469,7 @@ dependencies = [
 [package.metadata]
 requires-dist = [
     { name = "math-verify", specifier = ">=0.8.0" },
-    { name = "verifiers", specifier = ">=0.1.12.dev1" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -2321,7 +2553,7 @@ wheels = [
 
 [[package]]
 name = "mini-swe-agent-plus"
-version = "0.2.24"
+version = "0.2.25"
 source = { editable = "deps/research-environments/environments/mini_swe_agent_plus" }
 dependencies = [
     { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
@@ -2335,26 +2567,7 @@ requires-dist = [
     { name = "prime-sandboxes", specifier = ">=0.2.19" },
     { name = "swebench", specifier = "==4.1.0" },
     { name = "tenacity" },
-    { name = "verifiers", specifier = ">=0.1.12.dev1" },
-]
-
-[[package]]
-name = "mini-swe-agent-plus-rlm"
-version = "0.1.6"
-source = { editable = "deps/research-environments/environments/mini_swe_agent_plus_rlm" }
-dependencies = [
-    { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "swebench", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "tenacity", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-]
-
-[package.metadata]
-requires-dist = [
-    { name = "prime-sandboxes", specifier = ">=0.2.19" },
-    { name = "swebench", specifier = "==4.1.0" },
-    { name = "tenacity" },
-    { name = "verifiers", specifier = ">=0.1.12.dev1" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -2418,7 +2631,7 @@ dependencies = [
 [package.metadata]
 requires-dist = [
     { name = "math-verify", specifier = ">=0.8.0" },
-    { name = "verifiers", specifier = ">=0.1.15.dev1" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -2494,6 +2707,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/3a/a4/a187adcd485ff27bdbdb5c2b4d9cf210427bc74bcaacfc8226409db17535/mooncake_transfer_engine-0.3.11.post1-cp312-cp312-manylinux_2_39_aarch64.whl", hash = "sha256:1ccad9f44cf1a67f4e0494bd02f505503139ab606ecbe76cd6050d7a069247d5", size = 18089789, upload-time = "2026-05-24T16:19:01.828Z" },
 ]
 
+[[package]]
+name = "more-itertools"
+version = "11.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/de/1d/f4da6f02cdffe04d6362210b807146a26044c88d839208aec273bb0d9184/more_itertools-11.1.0.tar.gz", hash = "sha256:48e8f4d9e7e5878571ecf6f2b4e57634f93cd474cc8cfbd2376f2d11b396e30d", size = 145772, upload-time = "2026-05-22T14:14:29.909Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e8/3d/1087453384dbde46a8c7f9356eead2c58be8a7bf156bca40243377c85715/more_itertools-11.1.0-py3-none-any.whl", hash = "sha256:4b65538ae22f6fed0ce4874efd317463a7489796a0939fa66824dd542125a192", size = 72226, upload-time = "2026-05-22T14:14:28.824Z" },
+]
+
 [[package]]
 name = "mpmath"
 version = "1.3.0"
@@ -3015,19 +3237,31 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/60/c3/3d1e01e2dba517a91760e4a03e4f20ffc75039a6fe584d0e6f9b5c78fd15/openai_harmony-0.0.8-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:007b0476a1f331f8130783f901f1da6f5a7057af1a4891f1b6a31dec364189b5", size = 3205080, upload-time = "2025-11-05T19:07:05.078Z" },
 ]
 
+[[package]]
+name = "openapi-pydantic"
+version = "0.5.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pydantic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/02/2e/58d83848dd1a79cb92ed8e63f6ba901ca282c5f09d04af9423ec26c56fd7/openapi_pydantic-0.5.1.tar.gz", hash = "sha256:ff6835af6bde7a459fb93eb93bb92b8749b754fc6e51b2f1590a19dc3005ee0d", size = 60892, upload-time = "2025-01-08T19:29:27.083Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/12/cf/03675d8bd8ecbf4445504d8071adab19f5f993676795708e36402ab38263/openapi_pydantic-0.5.1-py3-none-any.whl", hash = "sha256:a3a09ef4586f5bd760a8df7f43028b60cafb6d9f61de2acba9574766255ab146", size = 96381, upload-time = "2025-01-08T19:29:25.275Z" },
+]
+
 [[package]]
 name = "opencode-cp"
-version = "0.3.10"
+version = "0.3.11"
 source = { editable = "deps/research-environments/environments/opencode_cp" }
 dependencies = [
     { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
 requires-dist = [
     { name = "prime-sandboxes", specifier = ">=0.2.19" },
-    { name = "verifiers", specifier = ">=0.1.15.dev2" },
+    { name = "verifiers", extras = ["packages"], specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -3044,54 +3278,54 @@ dependencies = [
 requires-dist = [
     { name = "datasets", specifier = ">=4.0.0" },
     { name = "prime-sandboxes", specifier = ">=0.2.25" },
-    { name = "verifiers", specifier = ">=0.1.15.dev2" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
 name = "opencode-math"
-version = "0.4.11"
+version = "0.4.12"
 source = { editable = "deps/research-environments/environments/opencode_math" }
 dependencies = [
     { name = "math-verify", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
 requires-dist = [
     { name = "math-verify", specifier = ">=0.8.0" },
-    { name = "verifiers", specifier = ">=0.1.15.dev2" },
+    { name = "verifiers", extras = ["packages"], specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
 name = "opencode-science"
-version = "0.3.11"
+version = "0.3.12"
 source = { editable = "deps/research-environments/environments/opencode_science" }
 dependencies = [
     { name = "math-verify", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
 requires-dist = [
     { name = "math-verify", specifier = ">=0.8.0" },
-    { name = "verifiers", specifier = ">=0.1.15.dev2" },
+    { name = "verifiers", extras = ["packages"], specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
 name = "opencode-swe"
-version = "0.4.7"
+version = "0.4.8"
 source = { editable = "deps/research-environments/environments/opencode_swe" }
 dependencies = [
     { name = "prime-sandboxes", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "swebench", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "verifiers", extra = ["packages"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
 
 [package.metadata]
 requires-dist = [
     { name = "prime-sandboxes", specifier = ">=0.2.23" },
     { name = "swebench", specifier = "==4.1.0" },
-    { name = "verifiers", specifier = ">=0.1.15.dev2" },
+    { name = "verifiers", extras = ["packages"], specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -3108,6 +3342,54 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/4b/33/b5db29a6c00eb8f50708110d8d453747ca125c8b805bc437b289dbdcc057/opencv_python_headless-4.13.0.92-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:0bd48544f77c68b2941392fcdf9bcd2b9cdf00e98cb8c29b2455d194763cf99e", size = 60391106, upload-time = "2026-02-05T10:30:14.236Z" },
 ]
 
+[[package]]
+name = "openenv-core"
+version = "0.2.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "fastapi", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "fastmcp", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "huggingface-hub", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "openai", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pydantic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pyyaml", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "requests", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "rich", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "tomli", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "tomli-w", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "typer", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "uvicorn", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "websockets", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d1/d3/d2cef0e459158c9410f073ffd2ad6eca7c09232e7c53d4987acc0b942d28/openenv_core-0.2.1.tar.gz", hash = "sha256:0caa44411af7d866e451e50744d1adab57cdf9a2cf7a1b3f81042675110aebc7", size = 102728, upload-time = "2026-02-04T10:25:24.263Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a1/5a/a7f8b0e53eac45faedcf6fbfacdd28a104f815d3471f2deceefb4234d8be/openenv_core-0.2.1-py3-none-any.whl", hash = "sha256:5868722833df3220b7a3288f581e6c0825c2d8fae42d932ff90d2bb60765813a", size = 121855, upload-time = "2026-02-04T10:25:22.82Z" },
+]
+
+[[package]]
+name = "openreward"
+version = "0.1.125"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "aiohttp", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "anthropic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "click", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "fastapi", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "google-genai", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "openai", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pydantic", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pyyaml", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "sse-starlette", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "structlog", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "tenacity", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "typing-extensions", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "uvicorn", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/9d/dd/b69264c77fd9720f69443c5f6420a7ae9934b4e2799e276d8655f6dc0721/openreward-0.1.125.tar.gz", hash = "sha256:519687307f960ab3a395bf844d6c2fc018d8a0faad0fc367ad3b24331366d390", size = 138936, upload-time = "2026-05-21T10:24:10.075Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/72/11/51c5473d5b3d209ecaacee6ac5418eef15114d0817a98f9cee0a2d6364ee/openreward-0.1.125-py3-none-any.whl", hash = "sha256:784faeeef6aba2ce8f175bd9af5dff29b3bb0c07a4c3642eb56dc431c9af2924", size = 135521, upload-time = "2026-05-21T10:24:11.369Z" },
+]
+
 [[package]]
 name = "opentelemetry-api"
 version = "1.37.0"
@@ -3313,6 +3595,24 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/42/32/658973117bf0fd82a24abbfb94fe73a5e86216e49342985e10acce54775a/partial_json_parser-0.2.1.1.post7-py3-none-any.whl", hash = "sha256:145119e5eabcf80cbb13844a6b50a85c68bf99d376f8ed771e2a3c3b03e653ae", size = 10877, upload-time = "2025-11-17T07:27:40.457Z" },
 ]
 
+[[package]]
+name = "pathable"
+version = "0.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/66/f3/5a20387de9bcd0607871bfc2198ee0e15836da7baa4592ccd7f24c27c986/pathable-0.6.0.tar.gz", hash = "sha256:6404b8b82aef5ff0fd478934137128b99b12212ba35afdde5525ca4f8388ea58", size = 18970, upload-time = "2026-05-19T18:15:11.911Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a2/e8/6d75ffd9784bce2e93d1ae4415649427e39a53bb172d4672b2b59c6f0a7b/pathable-0.6.0-py3-none-any.whl", hash = "sha256:82c4ca6c98c502ad12e0d4e9779b6210afee93c38990988c8c5d1b49bdcdf566", size = 18983, upload-time = "2026-05-19T18:15:10.728Z" },
+]
+
+[[package]]
+name = "pathvalidate"
+version = "3.3.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fa/2a/52a8da6fe965dea6192eb716b357558e103aea0a1e9a8352ad575a8406ca/pathvalidate-3.3.1.tar.gz", hash = "sha256:b18c07212bfead624345bb8e1d6141cdcf15a39736994ea0b94035ad2b1ba177", size = 63262, upload-time = "2025-06-15T09:07:20.736Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9a/70/875f4a23bfc4731703a5835487d0d2fb999031bd415e7d17c0ae615c18b7/pathvalidate-3.3.1-py3-none-any.whl", hash = "sha256:5263baab691f8e1af96092fa5137ee17df5bdfbd6cff1fcac4d6ef4bc2e1735f", size = 24305, upload-time = "2025-06-15T09:07:19.117Z" },
+]
+
 [[package]]
 name = "pdfminer-six"
 version = "20260107"
@@ -3526,6 +3826,7 @@ envs = [
     { name = "deepdive", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "general-agent", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "gpqa", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "harnesses", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "hle", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "ifeval", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "livecodebench", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
@@ -3534,7 +3835,6 @@ envs = [
     { name = "math-python", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "math500", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "mini-swe-agent-plus", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "mini-swe-agent-plus-rlm", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "mmlu-pro", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "opencode-cp", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "opencode-deepdive", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
@@ -3545,6 +3845,7 @@ envs = [
     { name = "rlm-swe", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "science-env", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "simpleqa-verified", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "tasksets", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "tau2-bench", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "wiki-search", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
@@ -3600,6 +3901,7 @@ requires-dist = [
     { name = "flash-linear-attention", git = "https://github.com/fla-org/flash-linear-attention" },
     { name = "general-agent", marker = "extra == 'envs'", editable = "deps/research-environments/environments/general_agent" },
     { name = "gpqa", marker = "extra == 'envs'", editable = "deps/research-environments/environments/gpqa" },
+    { name = "harnesses", marker = "extra == 'envs'", editable = "deps/verifiers/packages/harnesses" },
     { name = "hle", marker = "extra == 'envs'", editable = "deps/research-environments/environments/hle" },
     { name = "ifeval", marker = "extra == 'envs'", editable = "deps/research-environments/environments/ifeval" },
     { name = "jaxtyping", specifier = ">=0.3.2" },
@@ -3612,7 +3914,6 @@ requires-dist = [
     { name = "math-python", marker = "extra == 'envs'", editable = "deps/verifiers/environments/math_python" },
     { name = "math500", marker = "extra == 'envs'", editable = "deps/research-environments/environments/math500" },
     { name = "mini-swe-agent-plus", marker = "extra == 'envs'", editable = "deps/research-environments/environments/mini_swe_agent_plus" },
-    { name = "mini-swe-agent-plus-rlm", marker = "extra == 'envs'", editable = "deps/research-environments/environments/mini_swe_agent_plus_rlm" },
     { name = "mmlu-pro", marker = "extra == 'envs'", editable = "deps/research-environments/environments/mmlu_pro" },
     { name = "modelexpress", marker = "extra == 'modelexpress'", specifier = "==0.3.0" },
     { name = "mooncake-transfer-engine", specifier = ">=0.3.10.post2" },
@@ -3645,6 +3946,7 @@ requires-dist = [
     { name = "science-env", marker = "extra == 'envs'", editable = "deps/research-environments/environments/science_env" },
     { name = "setproctitle", specifier = ">=1.3.0" },
     { name = "simpleqa-verified", marker = "extra == 'envs'", editable = "deps/research-environments/environments/simpleqa_verified" },
+    { name = "tasksets", marker = "extra == 'envs'", editable = "deps/verifiers/packages/tasksets" },
     { name = "tau2-bench", marker = "extra == 'envs'", editable = "deps/research-environments/environments/tau2_bench" },
     { name = "tenacity", specifier = ">=8.2.0" },
     { name = "tilelang", specifier = ">=0.1.8" },
@@ -3825,6 +4127,47 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e0/a9/023730ba63db1e494a271cb018dcd361bd2c917ba7004c3e49d5daf795a2/py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5", size = 22335, upload-time = "2022-10-25T20:38:27.636Z" },
 ]
 
+[[package]]
+name = "py-key-value-aio"
+version = "0.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "beartype", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "py-key-value-shared", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/93/ce/3136b771dddf5ac905cc193b461eb67967cf3979688c6696e1f2cdcde7ea/py_key_value_aio-0.3.0.tar.gz", hash = "sha256:858e852fcf6d696d231266da66042d3355a7f9871650415feef9fca7a6cd4155", size = 50801, upload-time = "2025-11-17T16:50:04.711Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/99/10/72f6f213b8f0bce36eff21fda0a13271834e9eeff7f9609b01afdc253c79/py_key_value_aio-0.3.0-py3-none-any.whl", hash = "sha256:1c781915766078bfd608daa769fefb97e65d1d73746a3dfb640460e322071b64", size = 96342, upload-time = "2025-11-17T16:50:03.801Z" },
+]
+
+[package.optional-dependencies]
+disk = [
+    { name = "diskcache", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "pathvalidate", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+keyring = [
+    { name = "keyring", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+memory = [
+    { name = "cachetools", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+redis = [
+    { name = "redis", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+
+[[package]]
+name = "py-key-value-shared"
+version = "0.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "beartype", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "typing-extensions", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/7b/e4/1971dfc4620a3a15b4579fe99e024f5edd6e0967a71154771a059daff4db/py_key_value_shared-0.3.0.tar.gz", hash = "sha256:8fdd786cf96c3e900102945f92aa1473138ebe960ef49da1c833790160c28a4b", size = 11666, upload-time = "2025-11-17T16:50:06.849Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/51/e4/b8b0a03ece72f47dce2307d36e1c34725b7223d209fc679315ffe6a4e2c3/py_key_value_shared-0.3.0-py3-none-any.whl", hash = "sha256:5b0efba7ebca08bb158b1e93afc2f07d30b8f40c2fc12ce24a4c0d84f42f9298", size = 19560, upload-time = "2025-11-17T16:50:05.954Z" },
+]
+
 [[package]]
 name = "pyarrow"
 version = "24.0.0"
@@ -3837,6 +4180,27 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/36/7a/82c363caa145fff88fb475da50d3bf52bb024f61917be5424c3392eaf878/pyarrow-24.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:25ea65d868eb04015cd18e6df2fbe98f07e5bda2abefabcb88fce39a947716f6", size = 51929490, upload-time = "2026-04-21T10:47:55.981Z" },
 ]
 
+[[package]]
+name = "pyasn1"
+version = "0.6.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/5c/5f/6583902b6f79b399c9c40674ac384fd9cd77805f9e6205075f828ef11fb2/pyasn1-0.6.3.tar.gz", hash = "sha256:697a8ecd6d98891189184ca1fa05d1bb00e2f84b5977c481452050549c8a72cf", size = 148685, upload-time = "2026-03-17T01:06:53.382Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5d/a0/7d793dce3fa811fe047d6ae2431c672364b462850c6235ae306c0efd025f/pyasn1-0.6.3-py3-none-any.whl", hash = "sha256:a80184d120f0864a52a073acc6fc642847d0be408e7c7252f31390c0f4eadcde", size = 83997, upload-time = "2026-03-17T01:06:52.036Z" },
+]
+
+[[package]]
+name = "pyasn1-modules"
+version = "0.4.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pyasn1", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e9/e6/78ebbb10a8c8e4b61a59249394a4a594c1a7af95593dc933a349c8d00964/pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6", size = 307892, upload-time = "2025-03-28T02:41:22.17Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259, upload-time = "2025-03-28T02:41:19.028Z" },
+]
+
 [[package]]
 name = "pybase64"
 version = "1.4.3"
@@ -3950,6 +4314,29 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ae/8d/f1af3832f5e6eb13ba94ee809e72b8ecb5eef226d27ee0bef7d963d943c7/pydantic_settings-2.14.1-py3-none-any.whl", hash = "sha256:6e3c7edfd8277687cdc598f56e5cff0e9bfff0910a3749deaa8d4401c3a2b9de", size = 60964, upload-time = "2026-05-08T13:40:04.958Z" },
 ]
 
+[[package]]
+name = "pydocket"
+version = "0.21.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "burner-redis", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "cloudpickle", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "cronsim", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "opentelemetry-api", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "prometheus-client", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "py-key-value-aio", extra = ["memory", "redis"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "python-json-logger", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "redis", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "rich", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "typer", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "typing-extensions", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "uncalled-for", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ad/71/e267ddae6fa3524bfbc00fd409fe2157cc8814751ebf3e2cf22879c1732e/pydocket-0.21.0.tar.gz", hash = "sha256:2fcfc67f05a98689505e6af127af7f71b9612c08a139cfe1a690706c43810968", size = 398122, upload-time = "2026-05-26T15:28:51.812Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/19/d1/fcfa26ced70c37714b5b4c5da9bb2ea9c28116367e8db8994de7b91d0b5f/pydocket-0.21.0-py3-none-any.whl", hash = "sha256:b98f8fcd48fbd5258f6ab0be9080fbc36dcab52a73a9acc0509652de0b445df0", size = 116953, upload-time = "2026-05-26T15:28:50.246Z" },
+]
+
 [[package]]
 name = "pyelftools"
 version = "0.32"
@@ -4026,6 +4413,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781, upload-time = "2026-01-21T03:57:55.912Z" },
 ]
 
+[[package]]
+name = "pyperclip"
+version = "1.11.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e8/52/d87eba7cb129b81563019d1679026e7a112ef76855d6159d24754dbd2a51/pyperclip-1.11.0.tar.gz", hash = "sha256:244035963e4428530d9e3a6101a1ef97209c6825edab1567beac148ccc1db1b6", size = 12185, upload-time = "2025-09-26T14:40:37.245Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/df/80/fc9d01d5ed37ba4c42ca2b55b4339ae6e200b456be3a1aaddf4a9fa99b8c/pyperclip-1.11.0-py3-none-any.whl", hash = "sha256:299403e9ff44581cb9ba2ffeed69c7aa96a008622ad0c46cb575ca75b5b84273", size = 11063, upload-time = "2025-09-26T14:40:36.069Z" },
+]
+
 [[package]]
 name = "pypika"
 version = "0.51.1"
@@ -4308,6 +4704,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/82/3b/64d4899d73f91ba49a8c18a8ff3f0ea8f1c1d75481760df8c68ef5235bf5/rich-15.0.0-py3-none-any.whl", hash = "sha256:33bd4ef74232fb73fe9279a257718407f169c09b78a87ad3d296f548e27de0bb", size = 310654, upload-time = "2026-04-12T08:24:02.83Z" },
 ]
 
+[[package]]
+name = "rich-rst"
+version = "2.0.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pygments", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "rich", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/57/56/3191bae66b08ccc637ea8120426068bcb361cc323c96404c310886937067/rich_rst-2.0.1.tar.gz", hash = "sha256:cbe236ed0901d1ec8427cc6a50bf0a34353ba28ad014dc24def68bfe7f3b9e68", size = 300570, upload-time = "2026-05-16T00:47:57.362Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a0/3d/55c17d3ebdf3cd81356002afe5bef9bb8af631db2819785b6eac845b925b/rich_rst-2.0.1-py3-none-any.whl", hash = "sha256:7ee15f345ce25fa02b582c272a6cdbaf0c21243e38061cea273cff659bf3ef61", size = 272922, upload-time = "2026-05-16T00:47:55.508Z" },
+]
+
 [[package]]
 name = "rich-toolkit"
 version = "0.19.7"
@@ -4345,7 +4754,7 @@ wheels = [
 
 [[package]]
 name = "rlm-swe"
-version = "0.4.2"
+version = "0.4.3"
 source = { editable = "deps/research-environments/environments/rlm_swe" }
 dependencies = [
     { name = "multi-swe-bench", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
@@ -4410,7 +4819,7 @@ dependencies = [
 [package.metadata]
 requires-dist = [
     { name = "math-verify", specifier = ">=0.8.0" },
-    { name = "verifiers", specifier = ">=0.1.12.dev1" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -4458,6 +4867,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/83/11/00d3c3dfc25ad54e731d91449895a79e4bf2384dc3ac01809010ba88f6d5/seaborn-0.13.2-py3-none-any.whl", hash = "sha256:636f8336facf092165e27924f223d3c62ca560b1f2bb5dff7ab7fad265361987", size = 294914, upload-time = "2024-01-25T13:21:49.598Z" },
 ]
 
+[[package]]
+name = "secretstorage"
+version = "3.5.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cryptography", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "jeepney", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/1c/03/e834bcd866f2f8a49a85eaff47340affa3bfa391ee9912a952a1faa68c7b/secretstorage-3.5.0.tar.gz", hash = "sha256:f04b8e4689cbce351744d5537bf6b1329c6fc68f91fa666f60a380edddcd11be", size = 19884, upload-time = "2025-11-23T19:02:53.191Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b7/46/f5af3402b579fd5e11573ce652019a67074317e18c1935cc0b4ba9b35552/secretstorage-3.5.0-py3-none-any.whl", hash = "sha256:0ce65888c0725fcb2c5bc0fdb8e5438eece02c523557ea40ce0703c266248137", size = 15554, upload-time = "2025-11-23T19:02:51.545Z" },
+]
+
 [[package]]
 name = "sentencepiece"
 version = "0.2.1"
@@ -4520,7 +4942,7 @@ dependencies = [
 ]
 
 [package.metadata]
-requires-dist = [{ name = "verifiers", specifier = ">=0.1.15.dev1" }]
+requires-dist = [{ name = "verifiers", specifier = ">=0.1.15.dev17" }]
 
 [[package]]
 name = "six"
@@ -4560,15 +4982,14 @@ wheels = [
 
 [[package]]
 name = "sse-starlette"
-version = "3.4.2"
+version = "2.3.6"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
-    { name = "starlette", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/38/82/10cdfab4ab663a6b6bd624d33f55b2cfa41af5105be033a6d5d135a92c5f/sse_starlette-3.4.2.tar.gz", hash = "sha256:2f9a7f51ed84395a0427fb9f66cb1ec11f7899d977a72cbc9070b962a2e14489", size = 35236, upload-time = "2026-05-06T19:42:13.727Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/8c/f4/989bc70cb8091eda43a9034ef969b25145291f3601703b82766e5172dfed/sse_starlette-2.3.6.tar.gz", hash = "sha256:0382336f7d4ec30160cf9ca0518962905e1b69b72d6c1c995131e0a703b436e3", size = 18284, upload-time = "2025-05-30T13:34:12.914Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c1/27/351c71e803c56090d8d3bf9520422debeb8ed938871fd4f7ef519805a6c5/sse_starlette-3.4.2-py3-none-any.whl", hash = "sha256:6ea5d35b7ce979a3de5a0db5f77fe886b1616e4b3e1ad93fba502bd9b5fb662f", size = 16516, upload-time = "2026-05-06T19:42:12.201Z" },
+    { url = "https://files.pythonhosted.org/packages/81/05/78850ac6e79af5b9508f8841b0f26aa9fd329a1ba00bf65453c2d312bcc8/sse_starlette-2.3.6-py3-none-any.whl", hash = "sha256:d49a8285b182f6e2228e2609c350398b2ca2c36216c2675d875f81e93548f760", size = 10606, upload-time = "2025-05-30T13:34:11.703Z" },
 ]
 
 [[package]]
@@ -4598,6 +5019,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/81/0d/13d1d239a25cbfb19e740db83143e95c772a1fe10202dda4b76792b114dd/starlette-0.52.1-py3-none-any.whl", hash = "sha256:0029d43eb3d273bc4f83a08720b4912ea4b071087a3b48db01b7c839f7954d74", size = 74272, upload-time = "2026-01-18T13:34:09.188Z" },
 ]
 
+[[package]]
+name = "structlog"
+version = "25.5.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ef/52/9ba0f43b686e7f3ddfeaa78ac3af750292662284b3661e91ad5494f21dbc/structlog-25.5.0.tar.gz", hash = "sha256:098522a3bebed9153d4570c6d0288abf80a031dfdb2048d59a49e9dc2190fc98", size = 1460830, upload-time = "2025-10-27T08:28:23.028Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a8/45/a132b9074aa18e799b891b91ad72133c98d8042c70f6240e4c5f9dabee2f/structlog-25.5.0-py3-none-any.whl", hash = "sha256:a8453e9b9e636ec59bd9e79bbd4a72f025981b3ba0f5837aebf48f02f37a7f9f", size = 72510, upload-time = "2025-10-27T08:28:21.535Z" },
+]
+
 [[package]]
 name = "supervisor"
 version = "4.3.0"
@@ -4684,6 +5114,36 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252, upload-time = "2022-10-06T17:21:44.262Z" },
 ]
 
+[[package]]
+name = "tasksets"
+source = { editable = "deps/verifiers/packages/tasksets" }
+dependencies = [
+    { name = "verifiers", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+
+[package.optional-dependencies]
+openenv = [
+    { name = "openenv-core", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+openreward = [
+    { name = "openreward", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+ta = [
+    { name = "nltk", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "textarena", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "nemo-gym", marker = "python_full_version >= '3.12' and extra == 'nemogym'", specifier = ">=0.2.1,<0.3" },
+    { name = "nltk", marker = "extra == 'ta'" },
+    { name = "openenv-core", marker = "extra == 'openenv'", specifier = ">=0.3.0" },
+    { name = "openreward", marker = "python_full_version >= '3.11' and extra == 'openreward'", specifier = ">=0.1.123" },
+    { name = "textarena", marker = "extra == 'ta'" },
+    { name = "verifiers", specifier = ">=0.1.15.dev11" },
+]
+provides-extras = ["nemogym", "openenv", "openreward", "ta"]
+
 [[package]]
 name = "tau2"
 version = "0.2.1.dev0"
@@ -4729,7 +5189,7 @@ dependencies = [
 [package.metadata]
 requires-dist = [
     { name = "tau2", git = "https://github.com/sierra-research/tau2-bench.git?rev=337326e" },
-    { name = "verifiers", specifier = ">=0.1.15.dev1" },
+    { name = "verifiers", specifier = ">=0.1.15.dev17" },
 ]
 
 [[package]]
@@ -4770,6 +5230,24 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/73/c6/825dab04195756cf8ff2e12698f22513b3db2f64925bdd41671bfb33aaa5/tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl", hash = "sha256:ef687163c24185ae9754ed5650eb5bc4d84ff257aabdc33f0cc6f74d8ba54530", size = 6590363, upload-time = "2023-10-23T21:23:35.583Z" },
 ]
 
+[[package]]
+name = "textarena"
+version = "0.7.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "chess", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "nltk", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "openai", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "python-dotenv", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "requests", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "rich", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "websockets", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ba/04/4a3ca42093d0be2a9c377ae3335a6c6baac1d278ae932562ec69f339d172/textarena-0.7.4.tar.gz", hash = "sha256:28bb9170d7718f2ae05e4515bea82262422731e563fc7318a9e7983de0cadd4f", size = 954969, upload-time = "2025-10-16T14:41:55.981Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/26/b4/9a9ba65154aff853c75b3d7324319d168ad9c69c6097f4aa3c16da7d9ef3/textarena-0.7.4-py3-none-any.whl", hash = "sha256:684784e78278e518066f67557ee93b47c238d16cbbd15d3abdaa3147562d3024", size = 1073570, upload-time = "2025-10-16T14:41:53.965Z" },
+]
+
 [[package]]
 name = "textual"
 version = "8.2.5"
@@ -5197,6 +5675,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/61/73/d21edf5b204d1467e06500080a50f79d49ef2b997c79123a536d4a17d97c/uc_micro_py-2.0.0-py3-none-any.whl", hash = "sha256:3603a3859af53e5a39bc7677713c78ea6589ff188d70f4fee165db88e22b242c", size = 6383, upload-time = "2026-03-01T06:31:26.257Z" },
 ]
 
+[[package]]
+name = "uncalled-for"
+version = "0.3.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b5/82/345cc927f7fbdae6065e7768759932fcc827fc20b29b45dfbafa2f1f7da4/uncalled_for-0.3.2.tar.gz", hash = "sha256:89f5dbcd71e2b8f47c030b1fa302e6cce2ec795d1ac565eeb6525c5fe55cb8a2", size = 50032, upload-time = "2026-05-06T13:38:25.204Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3b/25/2c87754f3a9e692315f7b811244090e68f362979fc8886b3fbd2985a1d8c/uncalled_for-0.3.2-py3-none-any.whl", hash = "sha256:0ff60b142c7d1f8070bde9d42afaa70aedc77dcc10998c227687e9c15713418e", size = 11444, upload-time = "2026-05-06T13:38:24.025Z" },
+]
+
 [[package]]
 name = "unidiff"
 version = "0.7.5"
@@ -5281,6 +5768,12 @@ dependencies = [
     { name = "uvloop", marker = "(platform_machine == 'aarch64' and platform_python_implementation != 'PyPy' and sys_platform == 'linux') or (platform_machine == 'x86_64' and platform_python_implementation != 'PyPy' and sys_platform == 'linux')" },
 ]
 
+[package.optional-dependencies]
+packages = [
+    { name = "harnesses", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+    { name = "tasksets", extra = ["openenv", "openreward", "ta"], marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
+]
+
 [package.metadata]
 requires-dist = [
     { name = "accelerate", marker = "extra == 'rl'", specifier = ">=1.4.0" },