From 4d6028a785655e4b9f89d8a0756a81b45e74c7b9 Mon Sep 17 00:00:00 2001 From: Tristan Helmich Date: Thu, 7 May 2026 13:14:40 +0200 Subject: [PATCH 1/2] fix(transcribe): auto-fallback to CPU with int8 when CUDA is unavailable Instead of raising ValueError when the requested CUDA device is not present, automatically fall back to CPU and downgrade compute_type from float16 to int8 (float16 is unsupported on CPU). Also indicate whether CPU is forced or a fallback in the model-loading print message. --- meet/transcribe.py | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/meet/transcribe.py b/meet/transcribe.py index a5f2b42..abeeae2 100644 --- a/meet/transcribe.py +++ b/meet/transcribe.py @@ -494,23 +494,31 @@ def __post_init__(self): # Validate device availability when torch is installed. We deliberately # skip validation when torch can't be imported so that # `TranscriptionConfig` remains constructible in torch-less test - # environments and lightweight CLI helpers (e.g. `meet check`). + # environments and lightweight CLI helpers. + # + # When the requested device is not available we automatically fall back + # to CPU instead of raising — this handles the common case where CUDA + # was requested but no GPU is present (e.g. running on a laptop or + # inside a container without GPU passthrough). for field_name, value in ( ("device", self.device), ("torch_device", self.torch_device), ): available = _torch_device_available(value) if available is None: - # torch is not installed (or device string is unknown to our - # helper) — skip. continue if not available: - raise ValueError( - f"{field_name}='{value}' but {value.upper()} is not " - f"available on this system. " - "Try --device cpu (and --torch-device cpu/mps) or install " - "the appropriate torch build." + fallback = "cpu" if value == "cuda" else "cpu" + log.warning( + "%s='%s' is not available, falling back to '%s'", + field_name, value, fallback, ) + if field_name == "device": + self.device = fallback + if self.compute_type == "float16": + self.compute_type = "int8" + elif field_name == "torch_device": + self.torch_device = fallback if self.hf_token is None: self.hf_token = os.environ.get("HF_TOKEN") @@ -822,8 +830,18 @@ def _load_whisperx_asr_model(config: TranscriptionConfig, language: str | None): "vad_onset": config.vad_onset, "vad_offset": config.vad_offset, } + device_note = "" + if config.device == "cpu": + try: + import torch as _torch + if _torch.cuda.is_available(): + device_note = " (forced)" + else: + device_note = " (fallback — no GPU)" + except ImportError: + device_note = " (no torch)" print( - f" Loading model: {config.model} ({config.compute_type}) on {config.device}" + f" Loading model: {config.model} ({config.compute_type}) on {config.device}{device_note}" ) return whisperx.load_model( config.model, From 54ea7e60b757b1cb79d02077d3434c41ff646da8 Mon Sep 17 00:00:00 2001 From: pretyflaco Date: Thu, 14 May 2026 17:07:32 +0300 Subject: [PATCH 2/2] fix(transcribe): warn on compute_type downgrade + accurate fallback log MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to #19 (cherry-picked) addressing review feedback: - __post_init__ now emits a second warning when compute_type is flipped from float16 to int8 because the device fell back to CPU. Previously the user only saw the device fallback message; the compute_type change was silent. - TranscriptionConfig gains an internal _device_auto_fallback flag set when device is auto-flipped to cpu. _load_whisperx_asr_model reads the flag instead of re-sniffing torch at print time, so the "(forced)" vs "(fallback — no GPU)" annotation is accurate even when the user explicitly passes --device cpu on a no-GPU machine. - Removed dead conditional `fallback = "cpu" if value == "cuda" else "cpu"`. - tests/test_transcribe.py: rewrote the two raise-expecting tests (test_invalid_torch_device_{cuda,mps}_raises) to assert the new fallback behavior, and added three tests covering the compute_type warning, the no-spurious-warning case when compute_type is already int8, and that explicit --device cpu does not set _device_auto_fallback. - CHANGELOG: v0.7.1 entry crediting @fadenb. --- CHANGELOG.md | 21 +++++++++++++ meet/transcribe.py | 27 ++++++++++------- tests/test_transcribe.py | 65 ++++++++++++++++++++++++++++++++++++---- 3 files changed, 98 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 89f6e20..504bae3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,26 @@ # Changelog +## v0.7.1 — 2026-05-14 + +### Fixes + +- **Transcription auto-falls back to CPU + `int8` when CUDA is + unavailable** (#19, thanks @fadenb) — running `meet run` on a machine + without a GPU (laptop, container without passthrough, CI runner) + previously crashed with `ValueError: device='cuda' but CUDA is not + available`. `TranscriptionConfig` now warns and falls back to + `device=cpu`, downgrading `compute_type=float16` to `int8` (float16 + is unsupported on CPU). The model-load log line annotates whether + CPU was forced (`--device cpu`) or auto-selected because no GPU was + found, so the diagnostic distinction is preserved. + +### Internals + +- `TranscriptionConfig` gains an internal `_device_auto_fallback` + flag set in `__post_init__` when the device is auto-flipped, so + `_load_whisperx_asr_model` can label the load line accurately + without re-sniffing torch at print time. + ## v0.7.0 — 2026-05-08 ### Features diff --git a/meet/transcribe.py b/meet/transcribe.py index abeeae2..4a80746 100644 --- a/meet/transcribe.py +++ b/meet/transcribe.py @@ -18,7 +18,7 @@ import os import subprocess import tempfile -from dataclasses import dataclass +from dataclasses import dataclass, field from pathlib import Path from typing import Any, Callable @@ -464,6 +464,12 @@ class TranscriptionConfig: # "dual" = transcribe each channel separately, label as YOU/REMOTE mixdown: str = "mono" + # Internal: set to True by __post_init__ when `device` was auto-flipped + # to 'cpu' because the requested accelerator was unavailable. Used to + # produce an honest annotation in the model-load log line — distinguishes + # "user explicitly passed --device cpu" from "we fell back". + _device_auto_fallback: bool = field(default=False, init=False, repr=False) + def __post_init__(self): if self.mixdown not in ("mono", "dual"): raise ValueError( @@ -508,14 +514,19 @@ def __post_init__(self): if available is None: continue if not available: - fallback = "cpu" if value == "cuda" else "cpu" + fallback = "cpu" log.warning( "%s='%s' is not available, falling back to '%s'", field_name, value, fallback, ) if field_name == "device": self.device = fallback + self._device_auto_fallback = True if self.compute_type == "float16": + log.warning( + "compute_type='float16' is unsupported on CPU, " + "downgrading to 'int8'" + ) self.compute_type = "int8" elif field_name == "torch_device": self.torch_device = fallback @@ -832,14 +843,10 @@ def _load_whisperx_asr_model(config: TranscriptionConfig, language: str | None): } device_note = "" if config.device == "cpu": - try: - import torch as _torch - if _torch.cuda.is_available(): - device_note = " (forced)" - else: - device_note = " (fallback — no GPU)" - except ImportError: - device_note = " (no torch)" + if config._device_auto_fallback: + device_note = " (fallback — no GPU)" + else: + device_note = " (forced)" print( f" Loading model: {config.model} ({config.compute_type}) on {config.device}{device_note}" ) diff --git a/tests/test_transcribe.py b/tests/test_transcribe.py index 785886a..9572908 100644 --- a/tests/test_transcribe.py +++ b/tests/test_transcribe.py @@ -422,19 +422,74 @@ def test_torch_device_can_split_from_asr_device(self, monkeypatch): assert config.torch_device == "mps" def test_invalid_torch_device_cuda_raises(self, monkeypatch): - # Force the helper to report cuda unavailable. + # PR #19 changed this from raising to auto-falling-back. Renamed test + # kept here as a thin alias so failure trail still searches the old + # name; full coverage lives in test_cuda_unavailable_falls_back_*. def fake_avail(d): return False if d == "cuda" else True monkeypatch.setattr("meet.transcribe._torch_device_available", fake_avail) - with pytest.raises(ValueError, match="CUDA is not available"): - TranscriptionConfig(device="cuda", torch_device="cuda") + config = TranscriptionConfig(device="cuda", torch_device="cuda") + # Both 'device' and 'torch_device' fall back to cpu when cuda is + # unavailable; compute_type downgrades because device flipped. + assert config.device == "cpu" + assert config.torch_device == "cpu" + assert config.compute_type == "int8" + assert config._device_auto_fallback is True def test_invalid_torch_device_mps_raises(self, monkeypatch): + # PR #19: mps unavailability falls back to cpu instead of raising. + # Only torch_device is affected; device/compute_type are untouched + # (compute_type only flips when *device* falls back). def fake_avail(d): return False if d == "mps" else True monkeypatch.setattr("meet.transcribe._torch_device_available", fake_avail) - with pytest.raises(ValueError, match="MPS is not available"): - TranscriptionConfig(device="cpu", torch_device="mps") + config = TranscriptionConfig( + device="cpu", torch_device="mps", compute_type="float16" + ) + assert config.device == "cpu" + assert config.torch_device == "cpu" + # device was already cpu (not auto-flipped), so compute_type stays. + assert config.compute_type == "float16" + assert config._device_auto_fallback is False + + def test_cuda_unavailable_logs_both_warnings(self, monkeypatch, caplog): + def fake_avail(d): + return False if d == "cuda" else True + monkeypatch.setattr("meet.transcribe._torch_device_available", fake_avail) + with caplog.at_level(logging.WARNING, logger="meet.transcribe"): + TranscriptionConfig(device="cuda", torch_device="cuda", + compute_type="float16") + messages = [r.getMessage() for r in caplog.records] + # Device fallback warning (formatted via %-args) + assert any("device='cuda'" in m and "falling back to 'cpu'" in m + for m in messages), messages + # compute_type downgrade warning + assert any("compute_type='float16'" in m and "int8" in m + for m in messages), messages + + def test_cuda_unavailable_with_int8_does_not_log_compute_type_change( + self, monkeypatch, caplog + ): + def fake_avail(d): + return False if d == "cuda" else True + monkeypatch.setattr("meet.transcribe._torch_device_available", fake_avail) + with caplog.at_level(logging.WARNING, logger="meet.transcribe"): + config = TranscriptionConfig(device="cuda", torch_device="cuda", + compute_type="int8") + assert config.compute_type == "int8" + messages = [r.getMessage() for r in caplog.records] + assert not any("compute_type" in m for m in messages), messages + + def test_explicit_cpu_is_not_marked_as_auto_fallback(self, monkeypatch): + # User passing --device cpu on a no-GPU machine must NOT be flagged + # as a fallback (guards _load_whisperx_asr_model's "(forced)" vs + # "(fallback — no GPU)" annotation). + monkeypatch.setattr( + "meet.transcribe._torch_device_available", lambda d: True + ) + config = TranscriptionConfig(device="cpu", torch_device="cpu", + compute_type="int8") + assert config._device_auto_fallback is False def test_validation_skipped_when_torch_missing(self, monkeypatch): # When torch is not installed, the helper returns None; validation