rag-fusion · hitesh-kumar123 · Feb 14, 2026 · Feb 14, 2026 · Feb 14, 2026
diff --git a/backend/app/embeddings/generate.py b/backend/app/embeddings/generate.py
@@ -18,37 +18,11 @@
 _clip_preprocess = None
 _clip_tokenizer = None
 
-def _ensure_cpu_environment():
-    """Force CPU-only environment for PyTorch operations."""
-    import os
-    import torch
-
-    # Environment variables
-    os.environ['CUDA_VISIBLE_DEVICES'] = ''
-    os.environ['USE_CUDA'] = '0'
-
-    # PyTorch settings
-    torch.cuda.is_available = lambda: False
-    if hasattr(torch.backends, 'cudnn'):
-        torch.backends.cudnn.enabled = False
-    if hasattr(torch.backends, 'cuda'):
-        torch.backends.cuda.matmul.allow_tf32 = False
-        torch.backends.cuda.is_built = lambda: False
-
-    # Set default tensor type to CPU
-    torch.set_default_tensor_type(torch.FloatTensor)
-
-    # Disable JIT
-    try:
-        torch.jit.enable_onednn_fusion(False)
-    except:
-        pass
-    try:
-        torch._C._jit_set_profiling_executor(False)
-    except:
-        pass
-
-    return torch.device('cpu')
+def _get_device() -> torch.device:
+    """Get the best available device."""
+    if torch.cuda.is_available():
+        return torch.device("cuda")
+    return torch.device("cpu")
 
 
 def _get_clip_model() -> tuple[torch.nn.Module, any]:
@@ -59,16 +33,16 @@ def _get_clip_model() -> tuple[torch.nn.Module, any]:
     global _clip_model, _clip_preprocess, _clip_tokenizer
 
     if _clip_model is None:
-        device = _ensure_cpu_environment()
-        print("Initializing unified CLIP model (ViT-B-32) on CPU...")
+        device = _get_device()
+        print(f"Initializing unified CLIP model (ViT-B-32) on {device}...")
 
         try:
             # Always use the same settings for both text and image!
             # force_quick_gelu=True is standard for OpenAI CLIP weights
             model, preprocess, _ = open_clip.create_model_and_transforms(
                 "ViT-B-32",
                 pretrained="openai",
-                device="cpu",
+                device=device,
                 jit=False,
                 force_quick_gelu=True
             )
@@ -80,7 +54,7 @@ def _get_clip_model() -> tuple[torch.nn.Module, any]:
             _clip_model = model
             _clip_preprocess = preprocess
             _clip_tokenizer = open_clip.get_tokenizer("ViT-B-32")
-            print("CLIP model initialized successfully.")
+            print(f"CLIP model initialized successfully on {device}.")
 
         except Exception as e:
             print(f"Error initializing CLIP model: {e}")
@@ -115,7 +89,7 @@ def embed_text(text: Union[str, List[str]]) -> np.ndarray:
     if isinstance(text, str):
         text = [text]
 
-    # Determine device
+    # Determine device from model
     device = next(model.parameters()).device
 
     # Tokenize text (batch processing)
@@ -138,66 +112,43 @@ def embed_text(text: Union[str, List[str]]) -> np.ndarray:
 
 
 def embed_image(path: str) -> np.ndarray:
-    """Generate embeddings for image with strict CPU-only mode.
+    """Generate embeddings for image using best available device.
 
     Returns:
         numpy array of shape (1, 512) containing normalized CLIP image embedding.
     """
     global _clip_model, _clip_preprocess
 
-    # Force CPU-only environment
-    device = _ensure_cpu_environment()
-
     try:
         # Get model and preprocess function
         model, preprocess = get_clip()
 
-        # Ensure model is in CPU mode
-        model = model.cpu()
+        # Determine device from model
+        device = next(model.parameters()).device
 
         # Load and preprocess image
         with Image.open(path) as img:
             # Convert to RGB and preprocess
             image = preprocess(img.convert("RGB"))
 
-        # Ensure tensor is on CPU and create batch
-        image = image.cpu()
-        batch = torch.stack([image]).cpu()
+        # Move to device and create batch
+        image = image.to(device)
+        batch = torch.stack([image]).to(device)
 
         # Generate embeddings with strict error handling
         with torch.no_grad():
-            try:
-                # Encode image and normalize
-                feats = model.encode_image(batch)
-                # Normalize: feats shape is (1, 512), normalize per-sample
-                feats = feats / feats.norm(dim=-1, keepdim=True)
-                result = feats.cpu().numpy().astype(np.float32)
-
-                # Ensure shape is (1, 512)
-                if result.ndim == 1:
-                    result = result.reshape(1, -1)
-
-                assert result.shape == (1, 512), f"Expected shape (1, 512), got {result.shape}"
-                return result
-
-            except RuntimeError as e:
-                error_msg = str(e)
-                if "CUDA" in error_msg or "cuda" in error_msg.lower():
-                    print("Warning: CUDA operation attempted. Reinitializing in CPU-only mode...")
-                    # Reset model to force CPU reinitialization
-                    _clip_model = None
-                    _clip_preprocess = None
-                    # Retry with fresh CPU model
-                    model, preprocess = get_clip()
-                    image = preprocess(Image.open(path).convert("RGB"))
-                    batch = torch.stack([image]).cpu()
-                    feats = model.encode_image(batch)
-                    feats = feats / feats.norm(dim=-1, keepdim=True)
-                    result = feats.cpu().numpy().astype(np.float32)
-                    if result.ndim == 1:
-                        result = result.reshape(1, -1)
-                    return result
-                raise  # Re-raise if it's not a CUDA error
+            # Encode image and normalize
+            feats = model.encode_image(batch)
+            # Normalize: feats shape is (1, 512), normalize per-sample
+            feats = feats / feats.norm(dim=-1, keepdim=True)
+            result = feats.cpu().numpy().astype(np.float32)
+
+            # Ensure shape is (1, 512)
+            if result.ndim == 1:
+                result = result.reshape(1, -1)
+
+            assert result.shape == (1, 512), f"Expected shape (1, 512), got {result.shape}"
+            return result
 
     except Exception as e:
         print(f"Error processing image {path}: {str(e)}")

diff --git a/backend/app/main.py b/backend/app/main.py
@@ -34,7 +34,24 @@
 app.mount("/static", StaticFiles(directory=storage_dir), name="static")
 
 # Event Handlers
-app.add_event_handler("startup", connect_to_mongo)
+# Event Handlers
+@app.on_event("startup")
+async def startup_event():
+    import torch
+    import logging
+    logger = logging.getLogger("uvicorn")
+
+    logger.info("=" * 50)
+    logger.info("Startup Check:")
+    if torch.cuda.is_available():
+        gpu_name = torch.cuda.get_device_name(0)
+        logger.info(f"✅ GPU detected: {gpu_name}")
+        logger.info(f"CUDA Version: {torch.version.cuda}")
+    else:
+        logger.warning("⚠️ GPU NOT DETECTED. Running in CPU mode.")
+    logger.info("=" * 50)
+    await connect_to_mongo()
+
 app.add_event_handler("shutdown", close_mongo_connection)
 
 class QueryRequest(BaseModel):

diff --git a/cli.py b/cli.py
diff --git a/debug_rag_sources.py b/debug_rag_sources.py
diff --git a/package-lock.json b/package-lock.json