diff --git a/renderers/base.py b/renderers/base.py index a8823fe..170e31d 100644 --- a/renderers/base.py +++ b/renderers/base.py @@ -663,8 +663,13 @@ def load_tokenizer(model_name_or_path: str): model_name_or_path, trust_remote_code=True, revision=revision, + use_fast=True, ) - return AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=False) + return AutoTokenizer.from_pretrained( + model_name_or_path, + trust_remote_code=False, + use_fast=True, + ) def _populate_registry(): diff --git a/tests/test_load_tokenizer.py b/tests/test_load_tokenizer.py index 5445687..f40ee1b 100644 --- a/tests/test_load_tokenizer.py +++ b/tests/test_load_tokenizer.py @@ -50,11 +50,11 @@ def test_trusted_revisions_are_full_shas(): @patch("transformers.AutoTokenizer.from_pretrained") def test_unlisted_model_loads_without_remote_code(mock_from_pretrained): - """Default path: trust_remote_code=False, no revision pin.""" + """Default path: trust_remote_code=False, use_fast=True, no revision pin.""" load_tokenizer("Qwen/Qwen3-0.6B") args, kwargs = mock_from_pretrained.call_args assert args == ("Qwen/Qwen3-0.6B",) - assert kwargs == {"trust_remote_code": False} + assert kwargs == {"trust_remote_code": False, "use_fast": True} @patch("transformers.AutoTokenizer.from_pretrained") @@ -67,6 +67,7 @@ def test_kimi_loads_with_pinned_revision(mock_from_pretrained): assert kwargs == { "trust_remote_code": True, "revision": TRUSTED_REVISIONS["moonshotai/Kimi-K2.5"], + "use_fast": True, } @@ -87,8 +88,9 @@ def test_unknown_path_falls_through_to_no_remote_code(mock_from_pretrained): load_tokenizer(name) args, kwargs = mock_from_pretrained.call_args assert args == (name,) - assert kwargs == {"trust_remote_code": False}, ( - f"{name}: unlisted path leaked trust_remote_code=True" + assert kwargs == {"trust_remote_code": False, "use_fast": True}, ( + f"{name}: unlisted path leaked trust_remote_code=True " + f"or dropped use_fast=True" )