From 4d6028a785655e4b9f89d8a0756a81b45e74c7b9 Mon Sep 17 00:00:00 2001
From: Tristan Helmich <fadenb@utzutzutz.net>
Date: Thu, 7 May 2026 13:14:40 +0200
Subject: [PATCH 1/2] fix(transcribe): auto-fallback to CPU with int8 when CUDA
 is unavailable

Instead of raising ValueError when the requested CUDA device is not
present, automatically fall back to CPU and downgrade compute_type from
float16 to int8 (float16 is unsupported on CPU).  Also indicate whether
CPU is forced or a fallback in the model-loading print message.
---
 meet/transcribe.py | 36 +++++++++++++++++++++++++++---------
 1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/meet/transcribe.py b/meet/transcribe.py
index a5f2b42..abeeae2 100644
--- a/meet/transcribe.py
+++ b/meet/transcribe.py
@@ -494,23 +494,31 @@ def __post_init__(self):
         # Validate device availability when torch is installed.  We deliberately
         # skip validation when torch can't be imported so that
         # `TranscriptionConfig` remains constructible in torch-less test
-        # environments and lightweight CLI helpers (e.g. `meet check`).
+        # environments and lightweight CLI helpers.
+        #
+        # When the requested device is not available we automatically fall back
+        # to CPU instead of raising — this handles the common case where CUDA
+        # was requested but no GPU is present (e.g. running on a laptop or
+        # inside a container without GPU passthrough).
         for field_name, value in (
             ("device", self.device),
             ("torch_device", self.torch_device),
         ):
             available = _torch_device_available(value)
             if available is None:
-                # torch is not installed (or device string is unknown to our
-                # helper) — skip.
                 continue
             if not available:
-                raise ValueError(
-                    f"{field_name}='{value}' but {value.upper()} is not "
-                    f"available on this system. "
-                    "Try --device cpu (and --torch-device cpu/mps) or install "
-                    "the appropriate torch build."
+                fallback = "cpu" if value == "cuda" else "cpu"
+                log.warning(
+                    "%s='%s' is not available, falling back to '%s'",
+                    field_name, value, fallback,
                 )
+                if field_name == "device":
+                    self.device = fallback
+                    if self.compute_type == "float16":
+                        self.compute_type = "int8"
+                elif field_name == "torch_device":
+                    self.torch_device = fallback
 
         if self.hf_token is None:
             self.hf_token = os.environ.get("HF_TOKEN")
@@ -822,8 +830,18 @@ def _load_whisperx_asr_model(config: TranscriptionConfig, language: str | None):
         "vad_onset": config.vad_onset,
         "vad_offset": config.vad_offset,
     }
+    device_note = ""
+    if config.device == "cpu":
+        try:
+            import torch as _torch
+            if _torch.cuda.is_available():
+                device_note = " (forced)"
+            else:
+                device_note = " (fallback — no GPU)"
+        except ImportError:
+            device_note = " (no torch)"
     print(
-        f"  Loading model: {config.model} ({config.compute_type}) on {config.device}"
+        f"  Loading model: {config.model} ({config.compute_type}) on {config.device}{device_note}"
     )
     return whisperx.load_model(
         config.model,

From 54ea7e60b757b1cb79d02077d3434c41ff646da8 Mon Sep 17 00:00:00 2001
From: pretyflaco <kemal@ideasarelikeflames.com>
Date: Thu, 14 May 2026 17:07:32 +0300
Subject: [PATCH 2/2] fix(transcribe): warn on compute_type downgrade +
 accurate fallback log
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow-up to #19 (cherry-picked) addressing review feedback:

- __post_init__ now emits a second warning when compute_type is flipped
  from float16 to int8 because the device fell back to CPU.  Previously
  the user only saw the device fallback message; the compute_type change
  was silent.
- TranscriptionConfig gains an internal _device_auto_fallback flag set
  when device is auto-flipped to cpu.  _load_whisperx_asr_model reads
  the flag instead of re-sniffing torch at print time, so the
  "(forced)" vs "(fallback — no GPU)" annotation is accurate even when
  the user explicitly passes --device cpu on a no-GPU machine.
- Removed dead conditional `fallback = "cpu" if value == "cuda" else "cpu"`.
- tests/test_transcribe.py: rewrote the two raise-expecting tests
  (test_invalid_torch_device_{cuda,mps}_raises) to assert the new
  fallback behavior, and added three tests covering the compute_type
  warning, the no-spurious-warning case when compute_type is already
  int8, and that explicit --device cpu does not set _device_auto_fallback.
- CHANGELOG: v0.7.1 entry crediting @fadenb.
---
 CHANGELOG.md             | 21 +++++++++++++
 meet/transcribe.py       | 27 ++++++++++-------
 tests/test_transcribe.py | 65 ++++++++++++++++++++++++++++++++++++----
 3 files changed, 98 insertions(+), 15 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 89f6e20..504bae3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,26 @@
 # Changelog
 
+## v0.7.1 — 2026-05-14
+
+### Fixes
+
+- **Transcription auto-falls back to CPU + `int8` when CUDA is
+  unavailable** (#19, thanks @fadenb) — running `meet run` on a machine
+  without a GPU (laptop, container without passthrough, CI runner)
+  previously crashed with `ValueError: device='cuda' but CUDA is not
+  available`. `TranscriptionConfig` now warns and falls back to
+  `device=cpu`, downgrading `compute_type=float16` to `int8` (float16
+  is unsupported on CPU). The model-load log line annotates whether
+  CPU was forced (`--device cpu`) or auto-selected because no GPU was
+  found, so the diagnostic distinction is preserved.
+
+### Internals
+
+- `TranscriptionConfig` gains an internal `_device_auto_fallback`
+  flag set in `__post_init__` when the device is auto-flipped, so
+  `_load_whisperx_asr_model` can label the load line accurately
+  without re-sniffing torch at print time.
+
 ## v0.7.0 — 2026-05-08
 
 ### Features
diff --git a/meet/transcribe.py b/meet/transcribe.py
index abeeae2..4a80746 100644
--- a/meet/transcribe.py
+++ b/meet/transcribe.py
@@ -18,7 +18,7 @@
 import os
 import subprocess
 import tempfile
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Callable
 
@@ -464,6 +464,12 @@ class TranscriptionConfig:
     #   "dual" = transcribe each channel separately, label as YOU/REMOTE
     mixdown: str = "mono"
 
+    # Internal: set to True by __post_init__ when `device` was auto-flipped
+    # to 'cpu' because the requested accelerator was unavailable.  Used to
+    # produce an honest annotation in the model-load log line — distinguishes
+    # "user explicitly passed --device cpu" from "we fell back".
+    _device_auto_fallback: bool = field(default=False, init=False, repr=False)
+
     def __post_init__(self):
         if self.mixdown not in ("mono", "dual"):
             raise ValueError(
@@ -508,14 +514,19 @@ def __post_init__(self):
             if available is None:
                 continue
             if not available:
-                fallback = "cpu" if value == "cuda" else "cpu"
+                fallback = "cpu"
                 log.warning(
                     "%s='%s' is not available, falling back to '%s'",
                     field_name, value, fallback,
                 )
                 if field_name == "device":
                     self.device = fallback
+                    self._device_auto_fallback = True
                     if self.compute_type == "float16":
+                        log.warning(
+                            "compute_type='float16' is unsupported on CPU, "
+                            "downgrading to 'int8'"
+                        )
                         self.compute_type = "int8"
                 elif field_name == "torch_device":
                     self.torch_device = fallback
@@ -832,14 +843,10 @@ def _load_whisperx_asr_model(config: TranscriptionConfig, language: str | None):
     }
     device_note = ""
     if config.device == "cpu":
-        try:
-            import torch as _torch
-            if _torch.cuda.is_available():
-                device_note = " (forced)"
-            else:
-                device_note = " (fallback — no GPU)"
-        except ImportError:
-            device_note = " (no torch)"
+        if config._device_auto_fallback:
+            device_note = " (fallback — no GPU)"
+        else:
+            device_note = " (forced)"
     print(
         f"  Loading model: {config.model} ({config.compute_type}) on {config.device}{device_note}"
     )
diff --git a/tests/test_transcribe.py b/tests/test_transcribe.py
index 785886a..9572908 100644
--- a/tests/test_transcribe.py
+++ b/tests/test_transcribe.py
@@ -422,19 +422,74 @@ def test_torch_device_can_split_from_asr_device(self, monkeypatch):
         assert config.torch_device == "mps"
 
     def test_invalid_torch_device_cuda_raises(self, monkeypatch):
-        # Force the helper to report cuda unavailable.
+        # PR #19 changed this from raising to auto-falling-back.  Renamed test
+        # kept here as a thin alias so failure trail still searches the old
+        # name; full coverage lives in test_cuda_unavailable_falls_back_*.
         def fake_avail(d):
             return False if d == "cuda" else True
         monkeypatch.setattr("meet.transcribe._torch_device_available", fake_avail)
-        with pytest.raises(ValueError, match="CUDA is not available"):
-            TranscriptionConfig(device="cuda", torch_device="cuda")
+        config = TranscriptionConfig(device="cuda", torch_device="cuda")
+        # Both 'device' and 'torch_device' fall back to cpu when cuda is
+        # unavailable; compute_type downgrades because device flipped.
+        assert config.device == "cpu"
+        assert config.torch_device == "cpu"
+        assert config.compute_type == "int8"
+        assert config._device_auto_fallback is True
 
     def test_invalid_torch_device_mps_raises(self, monkeypatch):
+        # PR #19: mps unavailability falls back to cpu instead of raising.
+        # Only torch_device is affected; device/compute_type are untouched
+        # (compute_type only flips when *device* falls back).
         def fake_avail(d):
             return False if d == "mps" else True
         monkeypatch.setattr("meet.transcribe._torch_device_available", fake_avail)
-        with pytest.raises(ValueError, match="MPS is not available"):
-            TranscriptionConfig(device="cpu", torch_device="mps")
+        config = TranscriptionConfig(
+            device="cpu", torch_device="mps", compute_type="float16"
+        )
+        assert config.device == "cpu"
+        assert config.torch_device == "cpu"
+        # device was already cpu (not auto-flipped), so compute_type stays.
+        assert config.compute_type == "float16"
+        assert config._device_auto_fallback is False
+
+    def test_cuda_unavailable_logs_both_warnings(self, monkeypatch, caplog):
+        def fake_avail(d):
+            return False if d == "cuda" else True
+        monkeypatch.setattr("meet.transcribe._torch_device_available", fake_avail)
+        with caplog.at_level(logging.WARNING, logger="meet.transcribe"):
+            TranscriptionConfig(device="cuda", torch_device="cuda",
+                                compute_type="float16")
+        messages = [r.getMessage() for r in caplog.records]
+        # Device fallback warning (formatted via %-args)
+        assert any("device='cuda'" in m and "falling back to 'cpu'" in m
+                   for m in messages), messages
+        # compute_type downgrade warning
+        assert any("compute_type='float16'" in m and "int8" in m
+                   for m in messages), messages
+
+    def test_cuda_unavailable_with_int8_does_not_log_compute_type_change(
+        self, monkeypatch, caplog
+    ):
+        def fake_avail(d):
+            return False if d == "cuda" else True
+        monkeypatch.setattr("meet.transcribe._torch_device_available", fake_avail)
+        with caplog.at_level(logging.WARNING, logger="meet.transcribe"):
+            config = TranscriptionConfig(device="cuda", torch_device="cuda",
+                                         compute_type="int8")
+        assert config.compute_type == "int8"
+        messages = [r.getMessage() for r in caplog.records]
+        assert not any("compute_type" in m for m in messages), messages
+
+    def test_explicit_cpu_is_not_marked_as_auto_fallback(self, monkeypatch):
+        # User passing --device cpu on a no-GPU machine must NOT be flagged
+        # as a fallback (guards _load_whisperx_asr_model's "(forced)" vs
+        # "(fallback — no GPU)" annotation).
+        monkeypatch.setattr(
+            "meet.transcribe._torch_device_available", lambda d: True
+        )
+        config = TranscriptionConfig(device="cpu", torch_device="cpu",
+                                     compute_type="int8")
+        assert config._device_auto_fallback is False
 
     def test_validation_skipped_when_torch_missing(self, monkeypatch):
         # When torch is not installed, the helper returns None; validation