Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ class TrajectoryStepTokens(TypedDict):
overlong_prompt: bool
is_truncated: bool
routed_experts: list[list[list[int]]] | None # [seq_len, layers, topk] to enable router replay
multi_modal_data: NotRequired[Any] # renderers.MultiModalData sidecar (pixel_values, placeholder ranges) — set only on multimodal rollouts
```

Token-level data for training.
Expand Down
10 changes: 2 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ dev = [
"aiohttp>=3.9.0",
"python-dotenv>=1.0.0",
"nltk",
"renderers>=0.1.6",
"renderers>=0.1.8.dev0",
Comment thread
willccbb marked this conversation as resolved.
]

[project.optional-dependencies]
Expand All @@ -94,7 +94,7 @@ browser = [
"python-dotenv>=1.0.0",
]
renderers = [
"renderers>=0.1.6",
"renderers>=0.1.8.dev0",
]
rl = [
"torch>=2.8.0,<2.9.0",
Expand Down Expand Up @@ -125,12 +125,6 @@ prime-tunnel = false
prime-sandboxes = false
renderers = false

[tool.uv.sources]
# Pinned to renderers main until the next PyPI release lands; drop after.
# fe67f9f = renderers main: PR #4 squash-merge — construction-time
# preserve_*_thinking flags on create_renderer / create_renderer_pool.
renderers = { git = "https://github.com/PrimeIntellect-ai/renderers.git", rev = "fe67f9f" }

[tool.uv.extra-build-dependencies]
flash-attn = [{ requirement = "torch", match-runtime = true }]

Expand Down
30 changes: 18 additions & 12 deletions tests/test_renderer_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import verifiers as vf
from renderers import RendererPool
from renderers.base import ParsedResponse, create_renderer
from renderers.base import ParsedResponse, RenderedTokens, create_renderer
from verifiers.clients.renderer_client import (
RendererClient,
_attach_tool_call_names,
Expand Down Expand Up @@ -280,11 +280,13 @@ def bridge_to_next_turn(
stop_idx = len(self.bridge_base) - 1
trailing = list(self.bridge_base[stop_idx + 1 :])
extension = list(self.bridge_full[len(self.bridge_base) :])
return (
list(previous_prompt_ids)
+ list(previous_completion_ids)
+ trailing
+ extension
return RenderedTokens(
token_ids=(
list(previous_prompt_ids)
+ list(previous_completion_ids)
+ trailing
+ extension
)
)

def parse_response(self, token_ids):
Expand Down Expand Up @@ -345,7 +347,8 @@ async def test_get_incremental_prompt_ids_matches_tool_tail_without_rerendering_
renderer=renderer, prompt=prompt, state=state, tools=None
)

assert result == [1, 2, 3, 99, 30, 40]
assert result is not None
assert result.token_ids == [1, 2, 3, 99, 30, 40]
# The bridge stitches over the completion without re-rendering it —
# one bridge call, zero render_ids calls (older diff-based bridges
# called render_ids twice).
Expand Down Expand Up @@ -387,7 +390,8 @@ async def test_get_incremental_prompt_ids_accepts_tool_then_user_tail():
renderer=renderer, prompt=prompt, state=state, tools=None
)

assert result == [1, 2, 3, 99, 40, 50]
assert result is not None
assert result.token_ids == [1, 2, 3, 99, 40, 50]


@pytest.mark.asyncio
Expand Down Expand Up @@ -446,7 +450,8 @@ async def test_get_incremental_prompt_ids_accepts_multimodal_tool_user_tail():
renderer=renderer, prompt=prompt, state=state, tools=None
)

assert result == [1, 2, 3, 99, 40, 50]
assert result is not None
assert result.token_ids == [1, 2, 3, 99, 40, 50]


# ── Parity across real renderers: truncated most-recent step ──────────
Expand Down Expand Up @@ -478,7 +483,7 @@ async def test_get_incremental_prompt_ids_accepts_multimodal_tool_user_tail():
"auto",
id="nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16",
),
pytest.param("openai/gpt-oss-20b", "gpt_oss", id="openai/gpt-oss-20b"),
pytest.param("openai/gpt-oss-20b", "gpt-oss", id="openai/gpt-oss-20b"),
]


Expand Down Expand Up @@ -552,11 +557,12 @@ async def test_get_incremental_prompt_ids_bridges_over_truncated_step(

prefix = list(prev_prompt_ids) + list(prev_completion_ids)
assert result is not None, f"{model_id}: bridge returned None on truncated anchor"
assert result[: len(prefix)] == prefix, (
result_ids = result.token_ids
assert result_ids[: len(prefix)] == prefix, (
f"{model_id}: bridge result does not prefix-preserve "
f"prev_prompt + prev_completion"
)
assert len(result) > len(prefix), (
assert len(result_ids) > len(prefix), (
f"{model_id}: bridge produced no tail tokens for the new user turn"
)

Expand Down
255 changes: 255 additions & 0 deletions tests/test_save_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
)
from verifiers.utils.save_utils import (
GenerateOutputsBuilder,
_delta_intermediate_mm_data,
extract_usage_tokens,
load_outputs,
make_serializable,
Expand Down Expand Up @@ -897,3 +898,257 @@ def test_correctness_threshold_boundary(self):
)
pass_at_k, _ = m.compute()
assert pass_at_k["1"] == pytest.approx(0.5)


class TestDeltaIntermediateMmData:
"""Verify per-step delta encoding of trajectory mm_data sidecars.

Renderer bridge_to_next_turn emits cumulative mm_data on every
step. The transport-layer delta strips items whose mm_hash already
appeared in the prior step, so the per-window TrainingSample
assembler can recover its window's images by unioning step-deltas.
"""

@staticmethod
def _mm(*hashes: str):
"""Build a renderers.MultiModalData with one image item per hash."""
from renderers.base import MultiModalData, PlaceholderRange

return MultiModalData(
mm_hashes={"image": list(hashes)},
mm_placeholders={
"image": [
PlaceholderRange(offset=i * 10, length=4)
for i in range(len(hashes))
]
},
mm_items={"image": [{"pixel_values": f"px-{h}"} for h in hashes]},
)

def _step(self, mm):
return {"tokens": {"multi_modal_data": mm}}

def test_none_and_single_step_passthrough(self):
assert _delta_intermediate_mm_data(None) is None
assert _delta_intermediate_mm_data([]) == []
only = [self._step(self._mm("A"))]
assert _delta_intermediate_mm_data(only) is only

def test_linear_extension_keeps_only_new_items_per_step(self):
traj = [
self._step(self._mm("A")),
self._step(self._mm("A", "B")),
self._step(self._mm("A", "B", "C")),
]
out = _delta_intermediate_mm_data(traj)

assert out[0]["tokens"]["multi_modal_data"].mm_hashes == {"image": ["A"]}
assert out[1]["tokens"]["multi_modal_data"].mm_hashes == {"image": ["B"]}
assert out[2]["tokens"]["multi_modal_data"].mm_hashes == {"image": ["C"]}
# Items and placeholders are reindexed in lockstep with hashes.
assert out[1]["tokens"]["multi_modal_data"].mm_items["image"] == [
{"pixel_values": "px-B"}
]
assert (
out[2]["tokens"]["multi_modal_data"].mm_placeholders["image"][0].offset
== 20
)

def test_compaction_two_training_samples_assemble_correctly(self):
"""Rollout with one compaction event → two TrainingSamples.

Models the prime-rl compaction flow: a single rollout produces
multiple ``TrainingSample`` objects, one per compaction window.
The pre-compaction sample's images are no longer in the
post-compaction step's cumulative ``mm_data`` — the previous
"keep last" strategy would have silently dropped them. With
delta encoding, each per-window assembler recovers exactly the
images its tokens reference: no leakage in either direction.
"""
from renderers.base import MultiModalData, PlaceholderRange

def step(*hashes: str, offsets: list[int]):
return {
"tokens": {
"multi_modal_data": MultiModalData(
mm_hashes={"image": list(hashes)},
mm_placeholders={
"image": [
PlaceholderRange(offset=o, length=4) for o in offsets
]
},
mm_items={
"image": [{"pixel_values": f"px-{h}"} for h in hashes]
},
)
}
}

# Turn 1: image A. Cumulative {A}.
# Turn 2: image B. Cumulative {A, B}.
# ── compaction event: turns 1+2 summarized in text, images dropped ──
# Turn 3: image C. Cumulative {C} (offsets reset against the
# post-compaction prompt).
# Turn 4: image D. Cumulative {C, D}.
traj = [
step("A", offsets=[10]),
step("A", "B", offsets=[10, 50]),
step("C", offsets=[8]),
step("C", "D", offsets=[8, 40]),
]
out = _delta_intermediate_mm_data(traj)

# Per-step deltas keep only what's new since the immediately prior step.
deltas = [s["tokens"]["multi_modal_data"].mm_hashes for s in out]
assert deltas == [
{"image": ["A"]},
{"image": ["B"]},
{"image": ["C"]},
{"image": ["D"]},
]

def assemble(steps):
hashes: list[str] = []
items: list[dict] = []
placeholders: list[PlaceholderRange] = []
for s in steps:
mm = s["tokens"]["multi_modal_data"]
hashes += mm.mm_hashes.get("image", [])
items += mm.mm_items.get("image", [])
placeholders += mm.mm_placeholders.get("image", [])
return hashes, items, placeholders

ts1_hashes, ts1_items, ts1_phs = assemble(out[0:2]) # pre-compaction
ts2_hashes, ts2_items, ts2_phs = assemble(out[2:4]) # post-compaction

assert ts1_hashes == ["A", "B"]
assert ts2_hashes == ["C", "D"]
# The invariant the previous "keep last" broke: pre-compaction TS
# does not see post-compaction images, and vice versa.
assert set(ts1_hashes).isdisjoint(set(ts2_hashes))

# Items / placeholders are reindexed lock-step with hashes (no
# off-by-one or cross-contamination during reindex).
assert ts1_items == [{"pixel_values": "px-A"}, {"pixel_values": "px-B"}]
assert ts2_items == [{"pixel_values": "px-C"}, {"pixel_values": "px-D"}]

# Placeholder offsets travel verbatim per step; the assembler is
# responsible for shifting them into each window's local frame.
assert [p.offset for p in ts1_phs] == [10, 50]
assert [p.offset for p in ts2_phs] == [8, 40]

def test_same_image_rendered_in_two_turns_uses_multiset_diff(self):
"""Same image hash appearing N times must keep the right N-prior occurrences.

The renderer doesn't dedupe by hash: ``emit_image`` appends to
the parallel lists every time an image content part is rendered.
So if image A is shown in turn 1 *and* turn 3, the cumulative
``mm_hashes`` is ``["A", "A"]`` with two distinct placeholder
offsets, and ``mm_items`` is ``[pixA, pixA]`` (literally the
same payload twice). Both placeholder runs need their own item
— set-based diff would drop both as "already seen" and orphan
the second placeholder. Multiset diff drops only the first.
"""
from renderers.base import MultiModalData, PlaceholderRange

def step(hashes, offsets):
return {
"tokens": {
"multi_modal_data": MultiModalData(
mm_hashes={"image": list(hashes)},
mm_placeholders={
"image": [
PlaceholderRange(offset=o, length=4) for o in offsets
]
},
mm_items={
"image": [{"pixel_values": f"px-{h}"} for h in hashes]
},
)
}
}

# Turn 1: image A at offset 10. Cumulative ["A"].
# Turn 2: no image. Cumulative unchanged ["A"].
# Turn 3: image A re-rendered at offset 200. Cumulative ["A", "A"].
traj = [
step(["A"], offsets=[10]),
step(["A"], offsets=[10]),
step(["A", "A"], offsets=[10, 200]),
]
out = _delta_intermediate_mm_data(traj)

# Step 0 keeps everything (no prior).
assert out[0]["tokens"]["multi_modal_data"].mm_hashes == {"image": ["A"]}
assert [
p.offset
for p in out[0]["tokens"]["multi_modal_data"].mm_placeholders["image"]
] == [10]

# Step 1 introduced no new image (cumulative unchanged).
assert out[1]["tokens"]["multi_modal_data"].mm_hashes == {"image": []}

# Step 2: prior was ["A"], current is ["A", "A"]. Multiset budget
# consumes the first A; the *second* A (the new one at offset
# 200) survives the diff with its pixel_values intact. Set-based
# diff would have produced [].
step2_mm = out[2]["tokens"]["multi_modal_data"]
assert step2_mm.mm_hashes == {"image": ["A"]}
assert step2_mm.mm_items == {"image": [{"pixel_values": "px-A"}]}
assert [p.offset for p in step2_mm.mm_placeholders["image"]] == [200]

# End-to-end: assembling the single TrainingSample (no
# compaction) recovers both placeholder runs with matching
# pixel_values, so the trainer can satisfy both image-pad
# token runs in the prompt.
all_hashes: list[str] = []
all_phs: list[PlaceholderRange] = []
for s in out:
mm = s["tokens"]["multi_modal_data"]
all_hashes += mm.mm_hashes.get("image", [])
all_phs += mm.mm_placeholders.get("image", [])
assert all_hashes == ["A", "A"]
assert [p.offset for p in all_phs] == [10, 200]

def test_image_reintroduction_after_compaction(self):
"""A hash dropped at compaction and re-rendered later is re-transmitted.

The delta is computed against the *immediately prior step's*
cumulative, not a global seen-set. If image A appears in turn
1, is compacted away (step 2's cumulative is empty), and is
re-rendered in turn 3, A shows up in step 0's delta *and* step
2's delta — necessary so the post-compaction TrainingSample
also receives A's bytes.
"""
traj = [
self._step(self._mm("A")),
self._step(self._mm()),
self._step(self._mm("A")),
]
out = _delta_intermediate_mm_data(traj)

assert out[0]["tokens"]["multi_modal_data"].mm_hashes == {"image": ["A"]}
assert out[1]["tokens"]["multi_modal_data"].mm_hashes == {"image": []}
# A re-emerges in step 2's delta — its absence from step 1's
# cumulative means it counts as "new" again.
assert out[2]["tokens"]["multi_modal_data"].mm_hashes == {"image": ["A"]}

def test_steps_with_no_new_items_collapse_to_empty_delta(self):
# Step 2's cumulative equals step 1's — no new items.
traj = [
self._step(self._mm("A", "B")),
self._step(self._mm("A", "B")),
self._step(self._mm("A", "B", "C")),
]
out = _delta_intermediate_mm_data(traj)

assert out[1]["tokens"]["multi_modal_data"].mm_hashes == {"image": []}
assert out[1]["tokens"]["multi_modal_data"].mm_items == {"image": []}
assert out[2]["tokens"]["multi_modal_data"].mm_hashes == {"image": ["C"]}

def test_non_mapping_steps_pass_through(self):
traj = [self._step(self._mm("A")), "not-a-dict", self._step(self._mm("A", "B"))]
out = _delta_intermediate_mm_data(traj)
assert out[1] == "not-a-dict"
# Delta of step 2 still computed against step 0 (last seen cumulative).
assert out[2]["tokens"]["multi_modal_data"].mm_hashes == {"image": ["B"]}
Loading
Loading