petergpt · octo-patch · Mar 30, 2026
diff --git a/README.md b/README.md
@@ -88,13 +88,14 @@ Reasoning scatter (tokens/cost toggle in the viewer) vs. green rate.
 ```bash
 export OPENROUTER_API_KEY=your_key_here
 export OPENAI_API_KEY=your_openai_key_here  # required only for models routed to OpenAI
+export MINIMAX_API_KEY=your_minimax_key_here # required only for models routed to MiniMax
 export OPENAI_PROJECT=proj_xxx              # optional: force OpenAI requests to a specific project
 export OPENAI_ORGANIZATION=org_xxx          # optional: force organization context
 ```
 
 Provider routing is configured per model via `collect.model_providers` and
 `grade.model_providers` in config (default is OpenRouter), for example:
-`{"*":"openrouter","gpt-5.3":"openai"}`.
+`{"*":"openrouter","gpt-5.3":"openai","minimax/*":"minimax"}`.
 
 2. Run collection + primary judge (Claude by default):
 

diff --git a/config.json b/config.json
@@ -50,13 +50,15 @@
       "google/gemma-3-27b-it",
       "qwen/qwen3.5-397b-a17b",
       "moonshotai/kimi-k2.5",
-      "minimax/minimax-m2.5"
+      "minimax/minimax-m2.7",
+      "minimax/minimax-m2.7-highspeed"
     ],
     "models_file": "",
     "model_providers": {
       "*": "openrouter",
       "openai/gpt-5.4-mini": "openai",
-      "openai/gpt-5.4-nano": "openai"
+      "openai/gpt-5.4-nano": "openai",
+      "minimax/*": "minimax"
     },
     "num_runs": 1,
     "parallelism": 12,
@@ -93,7 +95,8 @@
       "z-ai/glm-5": ["none", "high"],
       "qwen/qwen3.5-397b-a17b": ["none", "high"],
       "moonshotai/kimi-k2.5": ["none", "high"],
-      "minimax/minimax-m2.5": ["low", "high"]
+      "minimax/minimax-m2.7": ["none", "high"],
+      "minimax/minimax-m2.7-highspeed": ["none", "high"]
     },
     "shuffle_tasks": true
   },

diff --git a/config.v2.json b/config.v2.json
@@ -50,13 +50,15 @@
       "google/gemma-3-27b-it",
       "qwen/qwen3.5-397b-a17b",
       "moonshotai/kimi-k2.5",
-      "minimax/minimax-m2.5"
+      "minimax/minimax-m2.7",
+      "minimax/minimax-m2.7-highspeed"
     ],
     "models_file": "",
     "model_providers": {
       "*": "openrouter",
       "openai/gpt-5.4-mini": "openai",
-      "openai/gpt-5.4-nano": "openai"
+      "openai/gpt-5.4-nano": "openai",
+      "minimax/*": "minimax"
     },
     "num_runs": 1,
     "parallelism": 64,
@@ -175,8 +177,12 @@
         "none",
         "high"
       ],
-      "minimax/minimax-m2.5": [
-        "low",
+      "minimax/minimax-m2.7": [
+        "none",
+        "high"
+      ],
+      "minimax/minimax-m2.7-highspeed": [
+        "none",
         "high"
       ]
     },

diff --git a/scripts/openrouter_benchmark.py b/scripts/openrouter_benchmark.py
@@ -65,11 +65,13 @@
     "openrouter": "openrouter",
     "or": "openrouter",
     "openai": "openai",
+    "minimax": "minimax",
 }
 
 MODEL_PROVIDER_VALUES: tuple[str, ...] = (
     "openrouter",
     "openai",
+    "minimax",
 )
 
 DEFAULT_MODEL_PROVIDER = "openrouter"
@@ -2195,6 +2197,10 @@ class OpenAIAPIError(ProviderAPIError):
     """Errors from OpenAI Responses API calls."""
 
 
+class MiniMaxAPIError(ProviderAPIError):
+    """Errors from MiniMax chat/completions calls."""
+
+
 class OpenRouterClient:
     def __init__(self, api_key: str, timeout_seconds: int) -> None:
         if timeout_seconds < 1:
@@ -2443,6 +2449,134 @@ def chat(
         raise last_error
 
 
+def _minimax_model_id(model: str) -> str:
+    """Strip the ``minimax/`` namespace prefix if present."""
+    cleaned = str(model).strip()
+    if cleaned.startswith("minimax/"):
+        _, remainder = cleaned.split("/", 1)
+        if remainder:
+            return remainder
+    return cleaned
+
+
+def _minimax_clamp_temperature(temperature: float | None) -> float | None:
+    """MiniMax requires temperature in the open interval (0.0, 1.0]."""
+    if temperature is None:
+        return None
+    return max(0.01, min(float(temperature), 1.0))
+
+
+def _strip_think_tags(text: str) -> str:
+    """Remove ``<think>…</think>`` blocks that MiniMax M2.5+ may emit."""
+    return re.sub(r"<think>[\s\S]*?</think>", "", text).strip()
+
+
+class MiniMaxClient:
+    """Client for the MiniMax OpenAI-compatible chat/completions API."""
+
+    def __init__(self, api_key: str, timeout_seconds: int) -> None:
+        if timeout_seconds < 1:
+            raise ValueError("timeout_seconds must be >= 1")
+        self.api_key = api_key
+        self.timeout_seconds = timeout_seconds
+        self.base_url = "https://api.minimax.io/v1/chat/completions"
+
+    def chat(
+        self,
+        *,
+        model: str,
+        messages: list[dict[str, str]],
+        temperature: float | None,
+        max_tokens: int,
+        retries: int,
+        extra_payload: dict[str, Any] | None = None,
+    ) -> dict[str, Any]:
+        payload: dict[str, Any] = {
+            "model": _minimax_model_id(model),
+            "messages": messages,
+        }
+        clamped_temp = _minimax_clamp_temperature(temperature)
+        if clamped_temp is not None:
+            payload["temperature"] = clamped_temp
+        if max_tokens > 0:
+            payload["max_tokens"] = max_tokens
+        if extra_payload:
+            # MiniMax supports reasoning via the ``reasoning`` key (same
+            # schema as OpenRouter).  Provider-specific keys like
+            # ``provider`` are silently dropped.
+            for key, value in extra_payload.items():
+                if key in {"provider"}:
+                    continue
+                payload[key] = value
+        encoded = json.dumps(payload).encode("utf-8")
+
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+        }
+
+        if retries < 1:
+            raise ValueError("retries must be >= 1")
+
+        last_error: Exception | None = None
+        for attempt in range(1, retries + 1):
+            retry_after_header: str | None = None
+            retry_after_seconds: float | None = None
+            request = urllib.request.Request(
+                self.base_url,
+                data=encoded,
+                headers=headers,
+                method="POST",
+            )
+            try:
+                with urllib.request.urlopen(request, timeout=self.timeout_seconds) as resp:
+                    raw = resp.read().decode("utf-8")
+                parsed = json.loads(raw)
+                if not isinstance(parsed, dict):
+                    raise RuntimeError("MiniMax returned non-object JSON.")
+                # Strip <think>…</think> blocks from the response text so
+                # that internal reasoning traces do not pollute benchmark
+                # answers or judge inputs.
+                choices = parsed.get("choices")
+                if isinstance(choices, list):
+                    for choice in choices:
+                        if not isinstance(choice, dict):
+                            continue
+                        msg = choice.get("message")
+                        if isinstance(msg, dict) and isinstance(msg.get("content"), str):
+                            msg["content"] = _strip_think_tags(msg["content"])
+                return parsed
+            except urllib.error.HTTPError as exc:
+                detail = exc.read().decode("utf-8", errors="ignore")
+                retry_after_header = exc.headers.get("Retry-After") if exc.headers else None
+                retry_after_seconds = parse_retry_after_seconds(retry_after_header)
+                retryable = is_retryable_http_status(exc.code)
+                last_error = MiniMaxAPIError(
+                    f"HTTP {exc.code} from MiniMax (attempt {attempt}/{retries})"
+                    f"{' [retryable]' if retryable else ' [non-retryable]'}: {detail}"
+                    + (
+                        f" (retry_after_seconds={retry_after_seconds})"
+                        if retry_after_seconds is not None
+                        else ""
+                    ),
+                    status_code=exc.code,
+                    retryable=retryable,
+                    retry_after_seconds=retry_after_seconds,
+                )
+                if not retryable:
+                    raise last_error from exc
+            except Exception as exc:  # pylint: disable=broad-except
+                last_error = RuntimeError(
+                    f"MiniMax call failed (attempt {attempt}/{retries}): {exc}"
+                )
+
+            if attempt < retries:
+                time.sleep(compute_retry_delay_seconds(attempt, retry_after_header))
+
+        assert last_error is not None
+        raise last_error
+
+
 def extract_model_text(api_response: dict[str, Any]) -> str:
     if api_response.get("error"):
         err = api_response.get("error")
@@ -3016,6 +3150,17 @@ def run_collect(args: argparse.Namespace) -> int:
                 project_id=openai_project_id,
                 organization_id=openai_organization_id,
             )
+        if "minimax" in providers_in_use:
+            minimax_key = os.getenv("MINIMAX_API_KEY", "").strip()
+            if not minimax_key:
+                raise RuntimeError(
+                    "MINIMAX_API_KEY is required for models routed to minimax "
+                    "unless --dry-run is set."
+                )
+            clients["minimax"] = MiniMaxClient(
+                api_key=minimax_key,
+                timeout_seconds=args.timeout_seconds,
+            )
 
     started = time.perf_counter()
     records: list[dict[str, Any]] = list(checkpoint_records)
@@ -4115,6 +4260,17 @@ def run_grade(args: argparse.Namespace) -> int:
                 project_id=openai_project_id,
                 organization_id=openai_organization_id,
             )
+        elif judge_provider == "minimax":
+            minimax_key = os.getenv("MINIMAX_API_KEY", "").strip()
+            if not minimax_key:
+                raise RuntimeError(
+                    "MINIMAX_API_KEY is required for judge models routed to minimax "
+                    "unless --dry-run is set."
+                )
+            clients["minimax"] = MiniMaxClient(
+                api_key=minimax_key,
+                timeout_seconds=args.timeout_seconds,
+            )
 
     started = time.perf_counter()
     grade_rows: list[dict[str, Any]] = list(checkpoint_rows)

diff --git a/tests/__init__.py b/tests/__init__.py