Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,14 @@ Reasoning scatter (tokens/cost toggle in the viewer) vs. green rate.
```bash
export OPENROUTER_API_KEY=your_key_here
export OPENAI_API_KEY=your_openai_key_here # required only for models routed to OpenAI
export MINIMAX_API_KEY=your_minimax_key_here # required only for models routed to MiniMax
export OPENAI_PROJECT=proj_xxx # optional: force OpenAI requests to a specific project
export OPENAI_ORGANIZATION=org_xxx # optional: force organization context
```

Provider routing is configured per model via `collect.model_providers` and
`grade.model_providers` in config (default is OpenRouter), for example:
`{"*":"openrouter","gpt-5.3":"openai"}`.
`{"*":"openrouter","gpt-5.3":"openai","minimax/*":"minimax"}`.

2. Run collection + primary judge (Claude by default):

Expand Down
9 changes: 6 additions & 3 deletions config.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,15 @@
"google/gemma-3-27b-it",
"qwen/qwen3.5-397b-a17b",
"moonshotai/kimi-k2.5",
"minimax/minimax-m2.5"
"minimax/minimax-m2.7",
"minimax/minimax-m2.7-highspeed"
],
"models_file": "",
"model_providers": {
"*": "openrouter",
"openai/gpt-5.4-mini": "openai",
"openai/gpt-5.4-nano": "openai"
"openai/gpt-5.4-nano": "openai",
"minimax/*": "minimax"
},
"num_runs": 1,
"parallelism": 12,
Expand Down Expand Up @@ -93,7 +95,8 @@
"z-ai/glm-5": ["none", "high"],
"qwen/qwen3.5-397b-a17b": ["none", "high"],
"moonshotai/kimi-k2.5": ["none", "high"],
"minimax/minimax-m2.5": ["low", "high"]
"minimax/minimax-m2.7": ["none", "high"],
"minimax/minimax-m2.7-highspeed": ["none", "high"]
},
"shuffle_tasks": true
},
Expand Down
14 changes: 10 additions & 4 deletions config.v2.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,15 @@
"google/gemma-3-27b-it",
"qwen/qwen3.5-397b-a17b",
"moonshotai/kimi-k2.5",
"minimax/minimax-m2.5"
"minimax/minimax-m2.7",
"minimax/minimax-m2.7-highspeed"
],
"models_file": "",
"model_providers": {
"*": "openrouter",
"openai/gpt-5.4-mini": "openai",
"openai/gpt-5.4-nano": "openai"
"openai/gpt-5.4-nano": "openai",
"minimax/*": "minimax"
},
"num_runs": 1,
"parallelism": 64,
Expand Down Expand Up @@ -175,8 +177,12 @@
"none",
"high"
],
"minimax/minimax-m2.5": [
"low",
"minimax/minimax-m2.7": [
"none",
"high"
],
"minimax/minimax-m2.7-highspeed": [
"none",
"high"
]
},
Expand Down
156 changes: 156 additions & 0 deletions scripts/openrouter_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,13 @@
"openrouter": "openrouter",
"or": "openrouter",
"openai": "openai",
"minimax": "minimax",
}

MODEL_PROVIDER_VALUES: tuple[str, ...] = (
"openrouter",
"openai",
"minimax",
)

DEFAULT_MODEL_PROVIDER = "openrouter"
Expand Down Expand Up @@ -2195,6 +2197,10 @@ class OpenAIAPIError(ProviderAPIError):
"""Errors from OpenAI Responses API calls."""


class MiniMaxAPIError(ProviderAPIError):
"""Errors from MiniMax chat/completions calls."""


class OpenRouterClient:
def __init__(self, api_key: str, timeout_seconds: int) -> None:
if timeout_seconds < 1:
Expand Down Expand Up @@ -2443,6 +2449,134 @@ def chat(
raise last_error


def _minimax_model_id(model: str) -> str:
"""Strip the ``minimax/`` namespace prefix if present."""
cleaned = str(model).strip()
if cleaned.startswith("minimax/"):
_, remainder = cleaned.split("/", 1)
if remainder:
return remainder
return cleaned


def _minimax_clamp_temperature(temperature: float | None) -> float | None:
"""MiniMax requires temperature in the open interval (0.0, 1.0]."""
if temperature is None:
return None
return max(0.01, min(float(temperature), 1.0))


def _strip_think_tags(text: str) -> str:
"""Remove ``<think>…</think>`` blocks that MiniMax M2.5+ may emit."""
return re.sub(r"<think>[\s\S]*?</think>", "", text).strip()


class MiniMaxClient:
"""Client for the MiniMax OpenAI-compatible chat/completions API."""

def __init__(self, api_key: str, timeout_seconds: int) -> None:
if timeout_seconds < 1:
raise ValueError("timeout_seconds must be >= 1")
self.api_key = api_key
self.timeout_seconds = timeout_seconds
self.base_url = "https://api.minimax.io/v1/chat/completions"

def chat(
self,
*,
model: str,
messages: list[dict[str, str]],
temperature: float | None,
max_tokens: int,
retries: int,
extra_payload: dict[str, Any] | None = None,
) -> dict[str, Any]:
payload: dict[str, Any] = {
"model": _minimax_model_id(model),
"messages": messages,
}
clamped_temp = _minimax_clamp_temperature(temperature)
if clamped_temp is not None:
payload["temperature"] = clamped_temp
if max_tokens > 0:
payload["max_tokens"] = max_tokens
if extra_payload:
# MiniMax supports reasoning via the ``reasoning`` key (same
# schema as OpenRouter). Provider-specific keys like
# ``provider`` are silently dropped.
for key, value in extra_payload.items():
if key in {"provider"}:
continue
payload[key] = value
encoded = json.dumps(payload).encode("utf-8")

headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}

if retries < 1:
raise ValueError("retries must be >= 1")

last_error: Exception | None = None
for attempt in range(1, retries + 1):
retry_after_header: str | None = None
retry_after_seconds: float | None = None
request = urllib.request.Request(
self.base_url,
data=encoded,
headers=headers,
method="POST",
)
try:
with urllib.request.urlopen(request, timeout=self.timeout_seconds) as resp:
raw = resp.read().decode("utf-8")
parsed = json.loads(raw)
if not isinstance(parsed, dict):
raise RuntimeError("MiniMax returned non-object JSON.")
# Strip <think>…</think> blocks from the response text so
# that internal reasoning traces do not pollute benchmark
# answers or judge inputs.
choices = parsed.get("choices")
if isinstance(choices, list):
for choice in choices:
if not isinstance(choice, dict):
continue
msg = choice.get("message")
if isinstance(msg, dict) and isinstance(msg.get("content"), str):
msg["content"] = _strip_think_tags(msg["content"])
return parsed
except urllib.error.HTTPError as exc:
detail = exc.read().decode("utf-8", errors="ignore")
retry_after_header = exc.headers.get("Retry-After") if exc.headers else None
retry_after_seconds = parse_retry_after_seconds(retry_after_header)
retryable = is_retryable_http_status(exc.code)
last_error = MiniMaxAPIError(
f"HTTP {exc.code} from MiniMax (attempt {attempt}/{retries})"
f"{' [retryable]' if retryable else ' [non-retryable]'}: {detail}"
+ (
f" (retry_after_seconds={retry_after_seconds})"
if retry_after_seconds is not None
else ""
),
status_code=exc.code,
retryable=retryable,
retry_after_seconds=retry_after_seconds,
)
if not retryable:
raise last_error from exc
except Exception as exc: # pylint: disable=broad-except
last_error = RuntimeError(
f"MiniMax call failed (attempt {attempt}/{retries}): {exc}"
)

if attempt < retries:
time.sleep(compute_retry_delay_seconds(attempt, retry_after_header))

assert last_error is not None
raise last_error


def extract_model_text(api_response: dict[str, Any]) -> str:
if api_response.get("error"):
err = api_response.get("error")
Expand Down Expand Up @@ -3016,6 +3150,17 @@ def run_collect(args: argparse.Namespace) -> int:
project_id=openai_project_id,
organization_id=openai_organization_id,
)
if "minimax" in providers_in_use:
minimax_key = os.getenv("MINIMAX_API_KEY", "").strip()
if not minimax_key:
raise RuntimeError(
"MINIMAX_API_KEY is required for models routed to minimax "
"unless --dry-run is set."
)
clients["minimax"] = MiniMaxClient(
api_key=minimax_key,
timeout_seconds=args.timeout_seconds,
)

started = time.perf_counter()
records: list[dict[str, Any]] = list(checkpoint_records)
Expand Down Expand Up @@ -4115,6 +4260,17 @@ def run_grade(args: argparse.Namespace) -> int:
project_id=openai_project_id,
organization_id=openai_organization_id,
)
elif judge_provider == "minimax":
minimax_key = os.getenv("MINIMAX_API_KEY", "").strip()
if not minimax_key:
raise RuntimeError(
"MINIMAX_API_KEY is required for judge models routed to minimax "
"unless --dry-run is set."
)
clients["minimax"] = MiniMaxClient(
api_key=minimax_key,
timeout_seconds=args.timeout_seconds,
)

started = time.perf_counter()
grade_rows: list[dict[str, Any]] = list(checkpoint_rows)
Expand Down
Empty file added tests/__init__.py
Empty file.
Loading