Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 37 additions & 7 deletions renderers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,27 @@ def is_multimodal(r: object) -> bool:
return cached


def _resolve_pool_init_workers(size: int) -> int:
"""Resolve the worker count used to populate a ``RendererPool``.

Defaults to 1 (serial). Opt into parallel construction via the
``RENDERERS_POOL_INIT_WORKERS`` env var; the resolved value is clamped
to ``[1, min(size, 8)]``. Invalid values fall back to 1.
"""
import os

raw = os.environ.get("RENDERERS_POOL_INIT_WORKERS")
if raw is None:
return 1
try:
requested = int(raw)
except ValueError:
return 1
if requested < 1:
return 1
return min(requested, size, 8)


class RendererPool:
"""Pool of Renderer instances that itself satisfies the Renderer protocol.

Expand All @@ -450,9 +471,14 @@ class RendererPool:
Construction parallelism for ``size > 1``: ``AutoTokenizer.from_pretrained``
takes hundreds of ms per call (JSON parse + Rust tokenizer build + HF
cache lookup), so populating a 32-slot pool serially costs ~10-15s on
startup and shows up directly as a step-0 stall. We fan the factory out
across a short-lived thread pool; the GIL-bound Python portion stops
scaling past ~8 workers, so we clamp there.
startup and shows up directly as a step-0 stall. Default is serial
construction (``workers=1``); under concurrent ``from_pretrained`` we have
observed intermittent ``NotImplementedError`` raised from the transformers
Python tokenizer fallback path during pool init for some models (rare but
catastrophic — it poisons the pool). Set the env var
``RENDERERS_POOL_INIT_WORKERS`` to opt back into parallel construction;
the GIL-bound Python portion stops scaling past ~8 workers, so we still
clamp the resolved value there.
"""

def __init__(self, factory: Callable[[], Renderer], size: int):
Expand All @@ -471,10 +497,14 @@ def __init__(self, factory: Callable[[], Renderer], size: int):
self._sole = None
self._lock = None
self._pool = queue.Queue(maxsize=size)
workers = min(size, 8)
with ThreadPoolExecutor(max_workers=workers) as executor:
for renderer in executor.map(lambda _: factory(), range(size)):
self._pool.put(renderer)
workers = _resolve_pool_init_workers(size)
if workers == 1:
for _ in range(size):
self._pool.put(factory())
else:
with ThreadPoolExecutor(max_workers=workers) as executor:
for renderer in executor.map(lambda _: factory(), range(size)):
self._pool.put(renderer)
# Peek without removing — safe at construction time before any
# checkout has been served.
sample = self._pool.queue[0]
Expand Down
46 changes: 46 additions & 0 deletions tests/test_pool_init_workers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""Unit tests for ``renderers.base._resolve_pool_init_workers``.

The pool defaults to serial construction (``workers=1``) because concurrent
``AutoTokenizer.from_pretrained`` calls have surfaced a rare but catastrophic
``NotImplementedError`` from the transformers Python tokenizer fallback path.
Users can opt back into parallel construction via the
``RENDERERS_POOL_INIT_WORKERS`` env var.
"""

from __future__ import annotations

import pytest

from renderers.base import _resolve_pool_init_workers


def test_default_is_serial(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.delenv("RENDERERS_POOL_INIT_WORKERS", raising=False)
assert _resolve_pool_init_workers(32) == 1


def test_env_opts_into_parallel(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setenv("RENDERERS_POOL_INIT_WORKERS", "4")
assert _resolve_pool_init_workers(32) == 4


def test_clamped_to_size(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setenv("RENDERERS_POOL_INIT_WORKERS", "16")
assert _resolve_pool_init_workers(4) == 4


def test_clamped_to_eight(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setenv("RENDERERS_POOL_INIT_WORKERS", "32")
assert _resolve_pool_init_workers(64) == 8


def test_zero_and_negative_fall_back_to_one(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setenv("RENDERERS_POOL_INIT_WORKERS", "0")
assert _resolve_pool_init_workers(32) == 1
monkeypatch.setenv("RENDERERS_POOL_INIT_WORKERS", "-2")
assert _resolve_pool_init_workers(32) == 1


def test_garbage_falls_back_to_one(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.setenv("RENDERERS_POOL_INIT_WORKERS", "not-an-int")
assert _resolve_pool_init_workers(32) == 1
Loading