diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a3c224df..12d223bc 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -191,10 +191,10 @@ jobs: pip install --no-deps chatterbox-tts pip install --no-deps hume-tada - - name: Install PyTorch with CUDA 12.8 + - name: Install PyTorch with CUDA 12.8 (nightly for RTX 50-series / sm_120 support) run: | - pip install torch --index-url https://download.pytorch.org/whl/cu128 --force-reinstall --no-deps - pip install torchaudio --index-url https://download.pytorch.org/whl/cu128 --force-reinstall --no-deps + pip install torch --index-url https://download.pytorch.org/whl/nightly/cu128 --force-reinstall --no-deps + pip install torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128 --force-reinstall --no-deps - name: Verify CUDA support in torch run: | diff --git a/backend/app.py b/backend/app.py index 1293460a..393fd634 100644 --- a/backend/app.py +++ b/backend/app.py @@ -146,11 +146,18 @@ def _get_gpu_status() -> str: """Return a human-readable string describing GPU availability.""" backend_type = get_backend_type() if torch.cuda.is_available(): + from .backends.base import check_cuda_compatibility + device_name = torch.cuda.get_device_name(0) + compatible, _warning = check_cuda_compatibility() is_rocm = hasattr(torch.version, "hip") and torch.version.hip is not None if is_rocm: - return f"ROCm ({device_name})" - return f"CUDA ({device_name})" + label = f"ROCm ({device_name})" + else: + label = f"CUDA ({device_name})" + if not compatible: + label += " [UNSUPPORTED - see logs]" + return label elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available(): return "MPS (Apple Silicon)" elif backend_type == "mlx": @@ -230,6 +237,13 @@ async def startup_event(): logger.info("Backend: %s", backend_type.upper()) logger.info("GPU: %s", _get_gpu_status()) + # Warn if GPU architecture is not supported by this PyTorch build + from .backends.base import check_cuda_compatibility + + _compatible, _cuda_warning = check_cuda_compatibility() + if not _compatible: + logger.warning("GPU COMPATIBILITY: %s", _cuda_warning) + from .services.cuda import check_and_update_cuda_binary create_background_task(check_and_update_cuda_binary()) diff --git a/backend/backends/base.py b/backend/backends/base.py index 0fdfa344..c566af10 100644 --- a/backend/backends/base.py +++ b/backend/backends/base.py @@ -126,6 +126,44 @@ def get_torch_device( return "cpu" +def check_cuda_compatibility() -> tuple[bool, str | None]: + """Check if the installed PyTorch supports the current GPU's compute capability. + + Returns: + (compatible, warning_message) — compatible is True if OK or no CUDA GPU, + warning_message is a human-readable string if there's a problem. + """ + import torch + + if not torch.cuda.is_available(): + return True, None + + major, minor = torch.cuda.get_device_capability(0) + capability = f"{major}.{minor}" + device_name = torch.cuda.get_device_name(0) + sm_tag = f"sm_{major}{minor}" + + # torch.cuda._get_arch_list() returns the SM architectures this build + # was compiled for (e.g. ["sm_50", "sm_60", ..., "sm_90"]). + try: + arch_list = torch.cuda._get_arch_list() + if arch_list: + # Check for both sm_XX and compute_XX (JIT-compiled) entries + compute_tag = f"compute_{major}{minor}" + if sm_tag not in arch_list and compute_tag not in arch_list: + return False, ( + f"{device_name} (compute capability {capability} / {sm_tag}) " + f"is not supported by this PyTorch build. " + f"Supported architectures: {', '.join(arch_list)}. " + f"Install PyTorch nightly (cu128) for newer GPU support: " + f"pip install torch --index-url https://download.pytorch.org/whl/nightly/cu128" + ) + except AttributeError: + pass + + return True, None + + def empty_device_cache(device: str) -> None: """ Free cached memory on the given device (CUDA or XPU). diff --git a/backend/build_binary.py b/backend/build_binary.py index 43ad0719..0338eaba 100644 --- a/backend/build_binary.py +++ b/backend/build_binary.py @@ -410,7 +410,8 @@ def build_server(cuda=False): "torchvision", "torchaudio", "--index-url", - "https://download.pytorch.org/whl/cu128", + # Nightly for RTX 50-series (sm_120/Blackwell) support + "https://download.pytorch.org/whl/nightly/cu128", "--force-reinstall", "-q", ], diff --git a/backend/models.py b/backend/models.py index f2f43d4b..f2b590d3 100644 --- a/backend/models.py +++ b/backend/models.py @@ -182,6 +182,7 @@ class HealthResponse(BaseModel): vram_used_mb: Optional[float] = None backend_type: Optional[str] = None # Backend type (mlx or pytorch) backend_variant: Optional[str] = None # Binary variant (cpu or cuda) + gpu_compatibility_warning: Optional[str] = None # Warning if GPU arch unsupported class DirectoryCheck(BaseModel): diff --git a/backend/routes/health.py b/backend/routes/health.py index 66cc9b62..79c513f5 100644 --- a/backend/routes/health.py +++ b/backend/routes/health.py @@ -93,6 +93,12 @@ async def health(): except ImportError: pass + gpu_compat_warning = None + if has_cuda: + from ..backends.base import check_cuda_compatibility + + _compatible, gpu_compat_warning = check_cuda_compatibility() + gpu_available = has_cuda or has_mps or has_xpu or has_directml or backend_type == "mlx" gpu_type = None @@ -171,6 +177,7 @@ async def health(): "VOICEBOX_BACKEND_VARIANT", "cuda" if torch.cuda.is_available() else ("xpu" if has_xpu else "cpu"), ), + gpu_compatibility_warning=gpu_compat_warning, ) diff --git a/justfile b/justfile index b17243fb..3a74a185 100644 --- a/justfile +++ b/justfile @@ -74,8 +74,8 @@ setup-python: $hasNvidia = ($gpus | Where-Object { $_ -match 'NVIDIA' }).Count -gt 0 $hasIntelArc = ($gpus | Where-Object { $_ -match 'Arc' }).Count -gt 0 if ($hasNvidia) { \ - Write-Host "NVIDIA GPU detected — installing PyTorch with CUDA support..."; \ - & "{{ pip }}" install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128; \ + Write-Host "NVIDIA GPU detected — installing PyTorch with CUDA support (nightly for RTX 50-series)..."; \ + & "{{ pip }}" install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128; \ } elseif ($hasIntelArc) { \ Write-Host "Intel Arc GPU detected — installing PyTorch with XPU support..."; \ & "{{ pip }}" install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/xpu; \