PrimeIntellect-ai · xeophon · May 11, 2026
diff --git a/tests/test_client_multimodal_types.py b/tests/test_client_multimodal_types.py
@@ -1,5 +1,6 @@
 import pytest
 from types import SimpleNamespace
+from typing import Any
 
 from verifiers.clients.openai_chat_completions_client import OpenAIChatCompletionsClient
 from verifiers.types import (
@@ -18,6 +19,26 @@
 from verifiers.utils.response_utils import parse_response_message
 
 
+class _RecordingCreate:
+    def __init__(self, response: Any) -> None:
+        self.response = response
+        self.calls: list[dict[str, Any]] = []
+
+    async def create(self, **kwargs: Any) -> Any:
+        self.calls.append(kwargs)
+        return self.response
+
+
+def _recording_openai(response: Any) -> tuple[Any, _RecordingCreate]:
+    recorder = _RecordingCreate(response)
+    return SimpleNamespace(chat=SimpleNamespace(completions=recorder)), recorder
+
+
+def _recording_anthropic(response: Any) -> tuple[Any, _RecordingCreate]:
+    recorder = _RecordingCreate(response)
+    return SimpleNamespace(messages=recorder), recorder
+
+
 @pytest.mark.asyncio
 async def test_openai_to_native_prompt_with_typed_multimodal_content_parts():
     client = OpenAIChatCompletionsClient(object())
@@ -52,6 +73,59 @@ async def test_openai_to_native_prompt_with_typed_multimodal_content_parts():
     ]
 
 
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    ("model", "effort"),
+    [
+        ("anthropic/claude-opus-4.7", "xhigh"),
+        ("anthropic/claude-sonnet-4.6", "max"),
+    ],
+)
+async def test_openrouter_anthropic_reasoning_effort_maps_to_verbosity(
+    model: str, effort: str
+):
+    recording_client, recorder = _recording_openai(SimpleNamespace())
+    client = OpenAIChatCompletionsClient(recording_client)
+    client._config = SimpleNamespace(api_base_url="https://openrouter.ai/api/v1")
+
+    response = await client.get_native_response(
+        prompt=[],
+        model=model,
+        sampling_args={
+            "n": 1,
+            "reasoning_effort": effort,
+            "extra_body": {"reasoning": {"enabled": True}},
+        },
+    )
+
+    assert response is recorder.response
+    call = recorder.calls[0]
+    assert "reasoning_effort" not in call
+    assert call["extra_body"] == {
+        "reasoning": {"enabled": True},
+        "verbosity": effort,
+    }
+
+
+@pytest.mark.asyncio
+async def test_openrouter_anthropic_reasoning_effort_enables_reasoning():
+    recording_client, recorder = _recording_openai(SimpleNamespace())
+    client = OpenAIChatCompletionsClient(recording_client)
+    client._config = SimpleNamespace(api_base_url="https://api.pinference.ai/api/v1")
+
+    await client.get_native_response(
+        prompt=[],
+        model="anthropic/claude-opus-4.7",
+        sampling_args={"reasoning_effort": "high"},
+    )
+
+    call = recorder.calls[0]
+    assert call["extra_body"] == {
+        "reasoning": {"enabled": True},
+        "verbosity": "high",
+    }
+
+
 @pytest.mark.asyncio
 async def test_anthropic_to_native_prompt_with_typed_multimodal_content_parts():
     pytest.importorskip("anthropic")
@@ -151,6 +225,58 @@ async def test_anthropic_merges_consecutive_tool_results_into_single_user_messag
     ]
 
 
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    ("model", "effort"),
+    [("claude-opus-4-7", "xhigh"), ("claude-sonnet-4-6", "max")],
+)
+async def test_anthropic_reasoning_effort_maps_to_output_config(
+    model: str, effort: str
+):
+    pytest.importorskip("anthropic")
+    from verifiers.clients.anthropic_messages_client import AnthropicMessagesClient
+
+    recording_client, recorder = _recording_anthropic(SimpleNamespace())
+    client = AnthropicMessagesClient(recording_client)
+
+    response = await client.get_native_response(
+        prompt=[],
+        model=model,
+        sampling_args={"max_tokens": 128, "reasoning_effort": effort},
+    )
+
+    assert response is recorder.response
+    call = recorder.calls[0]
+    assert "reasoning_effort" not in call
+    assert call["output_config"] == {"effort": effort}
+    assert call["thinking"] == {"type": "adaptive"}
+
+
+@pytest.mark.asyncio
+async def test_anthropic_reasoning_effort_preserves_existing_output_config():
+    pytest.importorskip("anthropic")
+    from verifiers.clients.anthropic_messages_client import AnthropicMessagesClient
+
+    recording_client, recorder = _recording_anthropic(SimpleNamespace())
+    client = AnthropicMessagesClient(recording_client)
+
+    await client.get_native_response(
+        prompt=[],
+        model="claude-opus-4-7",
+        sampling_args={
+            "max_tokens": 128,
+            "reasoning_effort": "high",
+            "output_config": {"format": {"type": "text"}},
+        },
+    )
+
+    call = recorder.calls[0]
+    assert call["output_config"] == {
+        "format": {"type": "text"},
+        "effort": "high",
+    }
+
+
 @pytest.mark.asyncio
 async def test_anthropic_from_native_response_extracts_usage():
     anthropic = pytest.importorskip("anthropic")

diff --git a/verifiers/clients/anthropic_messages_client.py b/verifiers/clients/anthropic_messages_client.py
@@ -342,6 +342,20 @@ async def get_native_response(
     ) -> AnthropicMessage:
         def normalize_sampling_args(sampling_args: SamplingArgs) -> dict:
             sampling_args = dict(sampling_args)
+            reasoning_effort = sampling_args.pop("reasoning_effort", None)
+            model_id = model.lower().replace(".", "-").replace("_", "-")
+            if reasoning_effort is not None and (
+                model_id.startswith("anthropic/") or model_id.startswith("claude-")
+            ):
+                output_config = dict(sampling_args.get("output_config") or {})
+                output_config["effort"] = reasoning_effort
+                sampling_args["output_config"] = output_config
+                if "thinking" not in sampling_args and (
+                    "4-7" in model_id or "4-6" in model_id
+                ):
+                    sampling_args["thinking"] = {"type": "adaptive"}
+            elif reasoning_effort is not None:
+                sampling_args["reasoning_effort"] = reasoning_effort
             max_tokens = sampling_args.pop("max_tokens", None)
             sampling_args.pop("n", None)
             sampling_args.pop("stop", None)

diff --git a/verifiers/clients/openai_chat_completions_client.py b/verifiers/clients/openai_chat_completions_client.py
@@ -252,6 +252,29 @@ async def get_native_response(
     ) -> OpenAIChatResponse:
         def normalize_sampling_args(sampling_args: SamplingArgs):
             sampling_args = dict(sampling_args)
+            api_base_url = None
+            if hasattr(self.client, "base_url"):
+                api_base_url = str(self.client.base_url)
+            elif self._config is not None:
+                api_base_url = self._config.api_base_url
+            reasoning_effort = sampling_args.pop("reasoning_effort", None)
+            model_id = model.lower().replace(".", "-").replace("_", "-")
+            api_base_url = (api_base_url or "").lower()
+            if (
+                reasoning_effort is not None
+                and (
+                    model_id.startswith("anthropic/") or model_id.startswith("claude-")
+                )
+                and ("openrouter.ai" in api_base_url or "pinference.ai" in api_base_url)
+            ):
+                extra_body = dict(sampling_args.get("extra_body") or {})
+                extra_body["verbosity"] = reasoning_effort
+                reasoning = dict(extra_body.get("reasoning") or {})
+                reasoning.setdefault("enabled", True)
+                extra_body["reasoning"] = reasoning
+                sampling_args["extra_body"] = extra_body
+            elif reasoning_effort is not None:
+                sampling_args["reasoning_effort"] = reasoning_effort
             if "max_tokens" in sampling_args:
                 sampling_args["max_completion_tokens"] = sampling_args.pop("max_tokens")
             return {k: v for k, v in sampling_args.items() if v is not None}