From f1627ace80c84d41ac92bfa1d9374eed7bab1e24 Mon Sep 17 00:00:00 2001
From: nightguarder <73370044+nightguarder@users.noreply.github.com>
Date: Sat, 9 May 2026 16:02:45 +0200
Subject: [PATCH] fix: catch TypeError when accessing think token properties on
 non-thinking models

This enables support for TranslateGemma and other non-thinking models whose
tokenizer lacks _think_start_tokens. Previously blocked by TypeError,
TranslateGemma-4b-it now works correctly via /v1/completions with a
client-side chat template.

Note: TranslateGemma uses a custom chat template requiring
source_lang_code/target_lang_code fields that OMLX /v1/chat/completions
does not support. The chat template must be applied client-side and the
resulting prompt sent to /v1/completions instead. See the model's
chat_template.jinja for the exact prompt format.

Catch (ValueError, TypeError) on all three sites:
- _detect_needs_think_prefix in scheduler step
- ThinkingBudgetProcessor think_start_id resolution
- _resolve_think_end_token_ids think_end_id resolution
---
 omlx/scheduler.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/omlx/scheduler.py b/omlx/scheduler.py
index 97f46132d..dd78772f1 100644
--- a/omlx/scheduler.py
+++ b/omlx/scheduler.py
@@ -1263,7 +1263,7 @@ def _build_sampler_and_processors(
 
                 try:
                     think_start_id = getattr(self.tokenizer, 'think_start_id', None)
-                except ValueError:
+                except (ValueError, TypeError):
                     think_start_id = None
                 leading_ids, trailing_ids = self._resolve_think_close_pattern()
                 processor = ThinkingBudgetProcessor(
@@ -1312,7 +1312,7 @@ def _resolve_think_end_token_ids(self) -> list[int] | None:
         # Tier 1: mlx-lm tokenizer attribute (covers all known think variants)
         try:
             think_end_id = getattr(self.tokenizer, 'think_end_id', None)
-        except ValueError:
+        except (ValueError, TypeError):
             # Multi-token think end (e.g. Gemma 4) - fall through to Tier 2
             think_end_id = None
         if think_end_id is not None:
@@ -1425,7 +1425,7 @@ def _detect_needs_think_prefix(self, request: "Request") -> bool:
         """
         try:
             think_start_id = getattr(self.tokenizer, 'think_start_id', None)
-        except ValueError:
+        except (ValueError, TypeError):
             # Multi-token think start (e.g. Gemma 4 <|channel>thought) -
             # single-token detection not applicable, handled by output parser
             return False