From f47ae676b9c7cd7f97a9d01b5f2209c106152630 Mon Sep 17 00:00:00 2001 From: hallerite Date: Wed, 13 May 2026 17:11:30 +0530 Subject: [PATCH 1/3] fix(load_tokenizer): default to use_fast=True Forces transformers' Rust TokenizersBackend instead of letting AutoTokenizer silently fall back to PythonBackend, whose __init__ -> _add_tokens -> get_vocab() raises NotImplementedError (get_vocab is unimplemented on the PythonBackend class). Reproduced under concurrent RendererPool init for GLM-4.5-Air with trust_remote_code=False; the failure rate climbs with per-step concurrency. If fast tokenizer files are missing, AutoTokenizer with use_fast=True raises a clear OSError instead of corrupting the pool silently. Co-Authored-By: Claude Opus 4.7 (1M context) --- renderers/base.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/renderers/base.py b/renderers/base.py index a8823fe..6336121 100644 --- a/renderers/base.py +++ b/renderers/base.py @@ -664,7 +664,11 @@ def load_tokenizer(model_name_or_path: str): trust_remote_code=True, revision=revision, ) - return AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=False) + return AutoTokenizer.from_pretrained( + model_name_or_path, + trust_remote_code=False, + use_fast=True, + ) def _populate_registry(): From e6562829b646545919cc66788975de359bcdb916 Mon Sep 17 00:00:00 2001 From: hallerite Date: Wed, 13 May 2026 11:53:33 +0000 Subject: [PATCH 2/3] test(load_tokenizer): align mock assertions with use_fast=True default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The default-path call site in `load_tokenizer` now passes `use_fast=True`; update the two call-shape tests that compare the captured kwargs by equality. The Kimi pinned-revision branch is intentionally unchanged — its source call site does not pass `use_fast=True`, and the corresponding assertion already reflects that. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/test_load_tokenizer.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/test_load_tokenizer.py b/tests/test_load_tokenizer.py index 5445687..b367574 100644 --- a/tests/test_load_tokenizer.py +++ b/tests/test_load_tokenizer.py @@ -50,11 +50,11 @@ def test_trusted_revisions_are_full_shas(): @patch("transformers.AutoTokenizer.from_pretrained") def test_unlisted_model_loads_without_remote_code(mock_from_pretrained): - """Default path: trust_remote_code=False, no revision pin.""" + """Default path: trust_remote_code=False, use_fast=True, no revision pin.""" load_tokenizer("Qwen/Qwen3-0.6B") args, kwargs = mock_from_pretrained.call_args assert args == ("Qwen/Qwen3-0.6B",) - assert kwargs == {"trust_remote_code": False} + assert kwargs == {"trust_remote_code": False, "use_fast": True} @patch("transformers.AutoTokenizer.from_pretrained") @@ -87,8 +87,9 @@ def test_unknown_path_falls_through_to_no_remote_code(mock_from_pretrained): load_tokenizer(name) args, kwargs = mock_from_pretrained.call_args assert args == (name,) - assert kwargs == {"trust_remote_code": False}, ( - f"{name}: unlisted path leaked trust_remote_code=True" + assert kwargs == {"trust_remote_code": False, "use_fast": True}, ( + f"{name}: unlisted path leaked trust_remote_code=True " + f"or dropped use_fast=True" ) From b2930ebe8b0b5aebe39bb9656aee5289b74a104d Mon Sep 17 00:00:00 2001 From: hallerite Date: Wed, 13 May 2026 12:02:14 +0000 Subject: [PATCH 3/3] fix(load_tokenizer): apply use_fast=True on Kimi pinned-revision path too The Kimi pinned-revision branch traverses the same HF dispatch logic that exhibits the silent-slow-fallback race; setting use_fast=True explicitly forces the failure to be loud on that path as well. Kimi-K2 family ships tokenizer.json so the fast path is available in practice. Co-Authored-By: Claude Opus 4.7 (1M context) --- renderers/base.py | 1 + tests/test_load_tokenizer.py | 1 + 2 files changed, 2 insertions(+) diff --git a/renderers/base.py b/renderers/base.py index 6336121..170e31d 100644 --- a/renderers/base.py +++ b/renderers/base.py @@ -663,6 +663,7 @@ def load_tokenizer(model_name_or_path: str): model_name_or_path, trust_remote_code=True, revision=revision, + use_fast=True, ) return AutoTokenizer.from_pretrained( model_name_or_path, diff --git a/tests/test_load_tokenizer.py b/tests/test_load_tokenizer.py index b367574..f40ee1b 100644 --- a/tests/test_load_tokenizer.py +++ b/tests/test_load_tokenizer.py @@ -67,6 +67,7 @@ def test_kimi_loads_with_pinned_revision(mock_from_pretrained): assert kwargs == { "trust_remote_code": True, "revision": TRUSTED_REVISIONS["moonshotai/Kimi-K2.5"], + "use_fast": True, }