Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,26 @@
# Changelog

## v0.7.1 — 2026-05-14

### Fixes

- **Transcription auto-falls back to CPU + `int8` when CUDA is
unavailable** (#19, thanks @fadenb) — running `meet run` on a machine
without a GPU (laptop, container without passthrough, CI runner)
previously crashed with `ValueError: device='cuda' but CUDA is not
available`. `TranscriptionConfig` now warns and falls back to
`device=cpu`, downgrading `compute_type=float16` to `int8` (float16
is unsupported on CPU). The model-load log line annotates whether
CPU was forced (`--device cpu`) or auto-selected because no GPU was
found, so the diagnostic distinction is preserved.

### Internals

- `TranscriptionConfig` gains an internal `_device_auto_fallback`
flag set in `__post_init__` when the device is auto-flipped, so
`_load_whisperx_asr_model` can label the load line accurately
without re-sniffing torch at print time.

## v0.7.0 — 2026-05-08

### Features
Expand Down
45 changes: 35 additions & 10 deletions meet/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import os
import subprocess
import tempfile
from dataclasses import dataclass
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Callable

Expand Down Expand Up @@ -464,6 +464,12 @@ class TranscriptionConfig:
# "dual" = transcribe each channel separately, label as YOU/REMOTE
mixdown: str = "mono"

# Internal: set to True by __post_init__ when `device` was auto-flipped
# to 'cpu' because the requested accelerator was unavailable. Used to
# produce an honest annotation in the model-load log line — distinguishes
# "user explicitly passed --device cpu" from "we fell back".
_device_auto_fallback: bool = field(default=False, init=False, repr=False)

def __post_init__(self):
if self.mixdown not in ("mono", "dual"):
raise ValueError(
Expand Down Expand Up @@ -494,23 +500,36 @@ def __post_init__(self):
# Validate device availability when torch is installed. We deliberately
# skip validation when torch can't be imported so that
# `TranscriptionConfig` remains constructible in torch-less test
# environments and lightweight CLI helpers (e.g. `meet check`).
# environments and lightweight CLI helpers.
#
# When the requested device is not available we automatically fall back
# to CPU instead of raising — this handles the common case where CUDA
# was requested but no GPU is present (e.g. running on a laptop or
# inside a container without GPU passthrough).
for field_name, value in (
("device", self.device),
("torch_device", self.torch_device),
):
available = _torch_device_available(value)
if available is None:
# torch is not installed (or device string is unknown to our
# helper) — skip.
continue
if not available:
raise ValueError(
f"{field_name}='{value}' but {value.upper()} is not "
f"available on this system. "
"Try --device cpu (and --torch-device cpu/mps) or install "
"the appropriate torch build."
fallback = "cpu"
log.warning(
"%s='%s' is not available, falling back to '%s'",
field_name, value, fallback,
)
if field_name == "device":
self.device = fallback
self._device_auto_fallback = True
if self.compute_type == "float16":
log.warning(
"compute_type='float16' is unsupported on CPU, "
"downgrading to 'int8'"
)
self.compute_type = "int8"
elif field_name == "torch_device":
self.torch_device = fallback

if self.hf_token is None:
self.hf_token = os.environ.get("HF_TOKEN")
Expand Down Expand Up @@ -822,8 +841,14 @@ def _load_whisperx_asr_model(config: TranscriptionConfig, language: str | None):
"vad_onset": config.vad_onset,
"vad_offset": config.vad_offset,
}
device_note = ""
if config.device == "cpu":
if config._device_auto_fallback:
device_note = " (fallback — no GPU)"
else:
device_note = " (forced)"
print(
f" Loading model: {config.model} ({config.compute_type}) on {config.device}"
f" Loading model: {config.model} ({config.compute_type}) on {config.device}{device_note}"
)
return whisperx.load_model(
config.model,
Expand Down
65 changes: 60 additions & 5 deletions tests/test_transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,19 +422,74 @@ def test_torch_device_can_split_from_asr_device(self, monkeypatch):
assert config.torch_device == "mps"

def test_invalid_torch_device_cuda_raises(self, monkeypatch):
# Force the helper to report cuda unavailable.
# PR #19 changed this from raising to auto-falling-back. Renamed test
# kept here as a thin alias so failure trail still searches the old
# name; full coverage lives in test_cuda_unavailable_falls_back_*.
def fake_avail(d):
return False if d == "cuda" else True
monkeypatch.setattr("meet.transcribe._torch_device_available", fake_avail)
with pytest.raises(ValueError, match="CUDA is not available"):
TranscriptionConfig(device="cuda", torch_device="cuda")
config = TranscriptionConfig(device="cuda", torch_device="cuda")
# Both 'device' and 'torch_device' fall back to cpu when cuda is
# unavailable; compute_type downgrades because device flipped.
assert config.device == "cpu"
assert config.torch_device == "cpu"
assert config.compute_type == "int8"
assert config._device_auto_fallback is True

def test_invalid_torch_device_mps_raises(self, monkeypatch):
# PR #19: mps unavailability falls back to cpu instead of raising.
# Only torch_device is affected; device/compute_type are untouched
# (compute_type only flips when *device* falls back).
def fake_avail(d):
return False if d == "mps" else True
monkeypatch.setattr("meet.transcribe._torch_device_available", fake_avail)
with pytest.raises(ValueError, match="MPS is not available"):
TranscriptionConfig(device="cpu", torch_device="mps")
config = TranscriptionConfig(
device="cpu", torch_device="mps", compute_type="float16"
)
assert config.device == "cpu"
assert config.torch_device == "cpu"
# device was already cpu (not auto-flipped), so compute_type stays.
assert config.compute_type == "float16"
assert config._device_auto_fallback is False

def test_cuda_unavailable_logs_both_warnings(self, monkeypatch, caplog):
def fake_avail(d):
return False if d == "cuda" else True
monkeypatch.setattr("meet.transcribe._torch_device_available", fake_avail)
with caplog.at_level(logging.WARNING, logger="meet.transcribe"):
TranscriptionConfig(device="cuda", torch_device="cuda",
compute_type="float16")
messages = [r.getMessage() for r in caplog.records]
# Device fallback warning (formatted via %-args)
assert any("device='cuda'" in m and "falling back to 'cpu'" in m
for m in messages), messages
# compute_type downgrade warning
assert any("compute_type='float16'" in m and "int8" in m
for m in messages), messages

def test_cuda_unavailable_with_int8_does_not_log_compute_type_change(
self, monkeypatch, caplog
):
def fake_avail(d):
return False if d == "cuda" else True
monkeypatch.setattr("meet.transcribe._torch_device_available", fake_avail)
with caplog.at_level(logging.WARNING, logger="meet.transcribe"):
config = TranscriptionConfig(device="cuda", torch_device="cuda",
compute_type="int8")
assert config.compute_type == "int8"
messages = [r.getMessage() for r in caplog.records]
assert not any("compute_type" in m for m in messages), messages

def test_explicit_cpu_is_not_marked_as_auto_fallback(self, monkeypatch):
# User passing --device cpu on a no-GPU machine must NOT be flagged
# as a fallback (guards _load_whisperx_asr_model's "(forced)" vs
# "(fallback — no GPU)" annotation).
monkeypatch.setattr(
"meet.transcribe._torch_device_available", lambda d: True
)
config = TranscriptionConfig(device="cpu", torch_device="cpu",
compute_type="int8")
assert config._device_auto_fallback is False

def test_validation_skipped_when_torch_missing(self, monkeypatch):
# When torch is not installed, the helper returns None; validation
Expand Down
Loading