From f1627ace80c84d41ac92bfa1d9374eed7bab1e24 Mon Sep 17 00:00:00 2001 From: nightguarder <73370044+nightguarder@users.noreply.github.com> Date: Sat, 9 May 2026 16:02:45 +0200 Subject: [PATCH] fix: catch TypeError when accessing think token properties on non-thinking models This enables support for TranslateGemma and other non-thinking models whose tokenizer lacks _think_start_tokens. Previously blocked by TypeError, TranslateGemma-4b-it now works correctly via /v1/completions with a client-side chat template. Note: TranslateGemma uses a custom chat template requiring source_lang_code/target_lang_code fields that OMLX /v1/chat/completions does not support. The chat template must be applied client-side and the resulting prompt sent to /v1/completions instead. See the model's chat_template.jinja for the exact prompt format. Catch (ValueError, TypeError) on all three sites: - _detect_needs_think_prefix in scheduler step - ThinkingBudgetProcessor think_start_id resolution - _resolve_think_end_token_ids think_end_id resolution --- omlx/scheduler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/omlx/scheduler.py b/omlx/scheduler.py index 97f46132d..dd78772f1 100644 --- a/omlx/scheduler.py +++ b/omlx/scheduler.py @@ -1263,7 +1263,7 @@ def _build_sampler_and_processors( try: think_start_id = getattr(self.tokenizer, 'think_start_id', None) - except ValueError: + except (ValueError, TypeError): think_start_id = None leading_ids, trailing_ids = self._resolve_think_close_pattern() processor = ThinkingBudgetProcessor( @@ -1312,7 +1312,7 @@ def _resolve_think_end_token_ids(self) -> list[int] | None: # Tier 1: mlx-lm tokenizer attribute (covers all known think variants) try: think_end_id = getattr(self.tokenizer, 'think_end_id', None) - except ValueError: + except (ValueError, TypeError): # Multi-token think end (e.g. Gemma 4) - fall through to Tier 2 think_end_id = None if think_end_id is not None: @@ -1425,7 +1425,7 @@ def _detect_needs_think_prefix(self, request: "Request") -> bool: """ try: think_start_id = getattr(self.tokenizer, 'think_start_id', None) - except ValueError: + except (ValueError, TypeError): # Multi-token think start (e.g. Gemma 4 <|channel>thought) - # single-token detection not applicable, handled by output parser return False