jundot · nightguarder · May 9, 2026 · May 9, 2026
diff --git a/omlx/scheduler.py b/omlx/scheduler.py
@@ -1707,7 +1707,10 @@ def _build_sampler_and_processors(
             if think_end_ids:
                 from .api.thinking import ThinkingBudgetProcessor
 
-                think_start_id = self._get_think_token_id("think_start_id")
+                try:
+                    think_start_id = getattr(self.tokenizer, 'think_start_id', None)
+                except (ValueError, TypeError):
+                    think_start_id = None
                 leading_ids, trailing_ids = self._resolve_think_close_pattern()
                 processor = ThinkingBudgetProcessor(
                     think_end_token_ids=think_end_ids,
@@ -1770,7 +1773,11 @@ def _resolve_think_end_token_ids(self) -> list[int] | None:
         </think> and </longcat_think> automatically.
         """
         # Tier 1: mlx-lm tokenizer attribute (covers all known think variants)
-        think_end_id = self._get_think_token_id("think_end_id")
+        try:
+            think_end_id = getattr(self.tokenizer, 'think_end_id', None)
+        except (ValueError, TypeError):
+            # Multi-token think end (e.g. Gemma 4) - fall through to Tier 2
+            think_end_id = None
         if think_end_id is not None:
             return [think_end_id]
 
@@ -1888,7 +1895,12 @@ def _detect_needs_think_prefix(self, request: "Request") -> bool:
         Returns False for disabled-thinking patterns like <think></think>
         where </think> immediately follows <think> in the prompt tail.
         """
-        think_start_id = self._get_think_token_id("think_start_id")
+        try:
+            think_start_id = getattr(self.tokenizer, 'think_start_id', None)
+        except (ValueError, TypeError):
+            # Multi-token think start (e.g. Gemma 4 <|channel>thought) -
+            # single-token detection not applicable, handled by output parser
+            return False
         if think_start_id is None:
             try:
                 think_start_id = self.tokenizer.convert_tokens_to_ids("<think>")