diff --git a/CHANGELOG.md b/CHANGELOG.md index 89f6e20..504bae3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,26 @@ # Changelog +## v0.7.1 — 2026-05-14 + +### Fixes + +- **Transcription auto-falls back to CPU + `int8` when CUDA is + unavailable** (#19, thanks @fadenb) — running `meet run` on a machine + without a GPU (laptop, container without passthrough, CI runner) + previously crashed with `ValueError: device='cuda' but CUDA is not + available`. `TranscriptionConfig` now warns and falls back to + `device=cpu`, downgrading `compute_type=float16` to `int8` (float16 + is unsupported on CPU). The model-load log line annotates whether + CPU was forced (`--device cpu`) or auto-selected because no GPU was + found, so the diagnostic distinction is preserved. + +### Internals + +- `TranscriptionConfig` gains an internal `_device_auto_fallback` + flag set in `__post_init__` when the device is auto-flipped, so + `_load_whisperx_asr_model` can label the load line accurately + without re-sniffing torch at print time. + ## v0.7.0 — 2026-05-08 ### Features diff --git a/meet/transcribe.py b/meet/transcribe.py index a5f2b42..4a80746 100644 --- a/meet/transcribe.py +++ b/meet/transcribe.py @@ -18,7 +18,7 @@ import os import subprocess import tempfile -from dataclasses import dataclass +from dataclasses import dataclass, field from pathlib import Path from typing import Any, Callable @@ -464,6 +464,12 @@ class TranscriptionConfig: # "dual" = transcribe each channel separately, label as YOU/REMOTE mixdown: str = "mono" + # Internal: set to True by __post_init__ when `device` was auto-flipped + # to 'cpu' because the requested accelerator was unavailable. Used to + # produce an honest annotation in the model-load log line — distinguishes + # "user explicitly passed --device cpu" from "we fell back". + _device_auto_fallback: bool = field(default=False, init=False, repr=False) + def __post_init__(self): if self.mixdown not in ("mono", "dual"): raise ValueError( @@ -494,23 +500,36 @@ def __post_init__(self): # Validate device availability when torch is installed. We deliberately # skip validation when torch can't be imported so that # `TranscriptionConfig` remains constructible in torch-less test - # environments and lightweight CLI helpers (e.g. `meet check`). + # environments and lightweight CLI helpers. + # + # When the requested device is not available we automatically fall back + # to CPU instead of raising — this handles the common case where CUDA + # was requested but no GPU is present (e.g. running on a laptop or + # inside a container without GPU passthrough). for field_name, value in ( ("device", self.device), ("torch_device", self.torch_device), ): available = _torch_device_available(value) if available is None: - # torch is not installed (or device string is unknown to our - # helper) — skip. continue if not available: - raise ValueError( - f"{field_name}='{value}' but {value.upper()} is not " - f"available on this system. " - "Try --device cpu (and --torch-device cpu/mps) or install " - "the appropriate torch build." + fallback = "cpu" + log.warning( + "%s='%s' is not available, falling back to '%s'", + field_name, value, fallback, ) + if field_name == "device": + self.device = fallback + self._device_auto_fallback = True + if self.compute_type == "float16": + log.warning( + "compute_type='float16' is unsupported on CPU, " + "downgrading to 'int8'" + ) + self.compute_type = "int8" + elif field_name == "torch_device": + self.torch_device = fallback if self.hf_token is None: self.hf_token = os.environ.get("HF_TOKEN") @@ -822,8 +841,14 @@ def _load_whisperx_asr_model(config: TranscriptionConfig, language: str | None): "vad_onset": config.vad_onset, "vad_offset": config.vad_offset, } + device_note = "" + if config.device == "cpu": + if config._device_auto_fallback: + device_note = " (fallback — no GPU)" + else: + device_note = " (forced)" print( - f" Loading model: {config.model} ({config.compute_type}) on {config.device}" + f" Loading model: {config.model} ({config.compute_type}) on {config.device}{device_note}" ) return whisperx.load_model( config.model, diff --git a/tests/test_transcribe.py b/tests/test_transcribe.py index 785886a..9572908 100644 --- a/tests/test_transcribe.py +++ b/tests/test_transcribe.py @@ -422,19 +422,74 @@ def test_torch_device_can_split_from_asr_device(self, monkeypatch): assert config.torch_device == "mps" def test_invalid_torch_device_cuda_raises(self, monkeypatch): - # Force the helper to report cuda unavailable. + # PR #19 changed this from raising to auto-falling-back. Renamed test + # kept here as a thin alias so failure trail still searches the old + # name; full coverage lives in test_cuda_unavailable_falls_back_*. def fake_avail(d): return False if d == "cuda" else True monkeypatch.setattr("meet.transcribe._torch_device_available", fake_avail) - with pytest.raises(ValueError, match="CUDA is not available"): - TranscriptionConfig(device="cuda", torch_device="cuda") + config = TranscriptionConfig(device="cuda", torch_device="cuda") + # Both 'device' and 'torch_device' fall back to cpu when cuda is + # unavailable; compute_type downgrades because device flipped. + assert config.device == "cpu" + assert config.torch_device == "cpu" + assert config.compute_type == "int8" + assert config._device_auto_fallback is True def test_invalid_torch_device_mps_raises(self, monkeypatch): + # PR #19: mps unavailability falls back to cpu instead of raising. + # Only torch_device is affected; device/compute_type are untouched + # (compute_type only flips when *device* falls back). def fake_avail(d): return False if d == "mps" else True monkeypatch.setattr("meet.transcribe._torch_device_available", fake_avail) - with pytest.raises(ValueError, match="MPS is not available"): - TranscriptionConfig(device="cpu", torch_device="mps") + config = TranscriptionConfig( + device="cpu", torch_device="mps", compute_type="float16" + ) + assert config.device == "cpu" + assert config.torch_device == "cpu" + # device was already cpu (not auto-flipped), so compute_type stays. + assert config.compute_type == "float16" + assert config._device_auto_fallback is False + + def test_cuda_unavailable_logs_both_warnings(self, monkeypatch, caplog): + def fake_avail(d): + return False if d == "cuda" else True + monkeypatch.setattr("meet.transcribe._torch_device_available", fake_avail) + with caplog.at_level(logging.WARNING, logger="meet.transcribe"): + TranscriptionConfig(device="cuda", torch_device="cuda", + compute_type="float16") + messages = [r.getMessage() for r in caplog.records] + # Device fallback warning (formatted via %-args) + assert any("device='cuda'" in m and "falling back to 'cpu'" in m + for m in messages), messages + # compute_type downgrade warning + assert any("compute_type='float16'" in m and "int8" in m + for m in messages), messages + + def test_cuda_unavailable_with_int8_does_not_log_compute_type_change( + self, monkeypatch, caplog + ): + def fake_avail(d): + return False if d == "cuda" else True + monkeypatch.setattr("meet.transcribe._torch_device_available", fake_avail) + with caplog.at_level(logging.WARNING, logger="meet.transcribe"): + config = TranscriptionConfig(device="cuda", torch_device="cuda", + compute_type="int8") + assert config.compute_type == "int8" + messages = [r.getMessage() for r in caplog.records] + assert not any("compute_type" in m for m in messages), messages + + def test_explicit_cpu_is_not_marked_as_auto_fallback(self, monkeypatch): + # User passing --device cpu on a no-GPU machine must NOT be flagged + # as a fallback (guards _load_whisperx_asr_model's "(forced)" vs + # "(fallback — no GPU)" annotation). + monkeypatch.setattr( + "meet.transcribe._torch_device_available", lambda d: True + ) + config = TranscriptionConfig(device="cpu", torch_device="cpu", + compute_type="int8") + assert config._device_auto_fallback is False def test_validation_skipped_when_torch_missing(self, monkeypatch): # When torch is not installed, the helper returns None; validation