diff --git a/backend/app/embeddings/generate.py b/backend/app/embeddings/generate.py
index aaa6b3e..12b8325 100644
--- a/backend/app/embeddings/generate.py
+++ b/backend/app/embeddings/generate.py
@@ -18,37 +18,11 @@
 _clip_preprocess = None
 _clip_tokenizer = None
 
-def _ensure_cpu_environment():
-    """Force CPU-only environment for PyTorch operations."""
-    import os
-    import torch
-    
-    # Environment variables
-    os.environ['CUDA_VISIBLE_DEVICES'] = ''
-    os.environ['USE_CUDA'] = '0'
-    
-    # PyTorch settings
-    torch.cuda.is_available = lambda: False
-    if hasattr(torch.backends, 'cudnn'):
-        torch.backends.cudnn.enabled = False
-    if hasattr(torch.backends, 'cuda'):
-        torch.backends.cuda.matmul.allow_tf32 = False
-        torch.backends.cuda.is_built = lambda: False
-    
-    # Set default tensor type to CPU
-    torch.set_default_tensor_type(torch.FloatTensor)
-    
-    # Disable JIT
-    try:
-        torch.jit.enable_onednn_fusion(False)
-    except:
-        pass
-    try:
-        torch._C._jit_set_profiling_executor(False)
-    except:
-        pass
-    
-    return torch.device('cpu')
+def _get_device() -> torch.device:
+    """Get the best available device."""
+    if torch.cuda.is_available():
+        return torch.device("cuda")
+    return torch.device("cpu")
 
 
 def _get_clip_model() -> tuple[torch.nn.Module, any]:
@@ -59,8 +33,8 @@ def _get_clip_model() -> tuple[torch.nn.Module, any]:
     global _clip_model, _clip_preprocess, _clip_tokenizer
     
     if _clip_model is None:
-        device = _ensure_cpu_environment()
-        print("Initializing unified CLIP model (ViT-B-32) on CPU...")
+        device = _get_device()
+        print(f"Initializing unified CLIP model (ViT-B-32) on {device}...")
         
         try:
             # Always use the same settings for both text and image!
@@ -68,7 +42,7 @@ def _get_clip_model() -> tuple[torch.nn.Module, any]:
             model, preprocess, _ = open_clip.create_model_and_transforms(
                 "ViT-B-32",
                 pretrained="openai",
-                device="cpu",
+                device=device,
                 jit=False,
                 force_quick_gelu=True
             )
@@ -80,7 +54,7 @@ def _get_clip_model() -> tuple[torch.nn.Module, any]:
             _clip_model = model
             _clip_preprocess = preprocess
             _clip_tokenizer = open_clip.get_tokenizer("ViT-B-32")
-            print("CLIP model initialized successfully.")
+            print(f"CLIP model initialized successfully on {device}.")
             
         except Exception as e:
             print(f"Error initializing CLIP model: {e}")
@@ -115,7 +89,7 @@ def embed_text(text: Union[str, List[str]]) -> np.ndarray:
     if isinstance(text, str):
         text = [text]
     
-    # Determine device
+    # Determine device from model
     device = next(model.parameters()).device
     
     # Tokenize text (batch processing)
@@ -138,66 +112,43 @@ def embed_text(text: Union[str, List[str]]) -> np.ndarray:
 
 
 def embed_image(path: str) -> np.ndarray:
-    """Generate embeddings for image with strict CPU-only mode.
+    """Generate embeddings for image using best available device.
     
     Returns:
         numpy array of shape (1, 512) containing normalized CLIP image embedding.
     """
     global _clip_model, _clip_preprocess
     
-    # Force CPU-only environment
-    device = _ensure_cpu_environment()
-    
     try:
         # Get model and preprocess function
         model, preprocess = get_clip()
         
-        # Ensure model is in CPU mode
-        model = model.cpu()
+        # Determine device from model
+        device = next(model.parameters()).device
         
         # Load and preprocess image
         with Image.open(path) as img:
             # Convert to RGB and preprocess
             image = preprocess(img.convert("RGB"))
             
-        # Ensure tensor is on CPU and create batch
-        image = image.cpu()
-        batch = torch.stack([image]).cpu()
+        # Move to device and create batch
+        image = image.to(device)
+        batch = torch.stack([image]).to(device)
         
         # Generate embeddings with strict error handling
         with torch.no_grad():
-            try:
-                # Encode image and normalize
-                feats = model.encode_image(batch)
-                # Normalize: feats shape is (1, 512), normalize per-sample
-                feats = feats / feats.norm(dim=-1, keepdim=True)
-                result = feats.cpu().numpy().astype(np.float32)
-                
-                # Ensure shape is (1, 512)
-                if result.ndim == 1:
-                    result = result.reshape(1, -1)
-                
-                assert result.shape == (1, 512), f"Expected shape (1, 512), got {result.shape}"
-                return result
-                
-            except RuntimeError as e:
-                error_msg = str(e)
-                if "CUDA" in error_msg or "cuda" in error_msg.lower():
-                    print("Warning: CUDA operation attempted. Reinitializing in CPU-only mode...")
-                    # Reset model to force CPU reinitialization
-                    _clip_model = None
-                    _clip_preprocess = None
-                    # Retry with fresh CPU model
-                    model, preprocess = get_clip()
-                    image = preprocess(Image.open(path).convert("RGB"))
-                    batch = torch.stack([image]).cpu()
-                    feats = model.encode_image(batch)
-                    feats = feats / feats.norm(dim=-1, keepdim=True)
-                    result = feats.cpu().numpy().astype(np.float32)
-                    if result.ndim == 1:
-                        result = result.reshape(1, -1)
-                    return result
-                raise  # Re-raise if it's not a CUDA error
+            # Encode image and normalize
+            feats = model.encode_image(batch)
+            # Normalize: feats shape is (1, 512), normalize per-sample
+            feats = feats / feats.norm(dim=-1, keepdim=True)
+            result = feats.cpu().numpy().astype(np.float32)
+            
+            # Ensure shape is (1, 512)
+            if result.ndim == 1:
+                result = result.reshape(1, -1)
+            
+            assert result.shape == (1, 512), f"Expected shape (1, 512), got {result.shape}"
+            return result
                 
     except Exception as e:
         print(f"Error processing image {path}: {str(e)}")
diff --git a/backend/app/main.py b/backend/app/main.py
index e010032..0b1f756 100644
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -34,7 +34,24 @@
 app.mount("/static", StaticFiles(directory=storage_dir), name="static")
 
 # Event Handlers
-app.add_event_handler("startup", connect_to_mongo)
+# Event Handlers
+@app.on_event("startup")
+async def startup_event():
+    import torch
+    import logging
+    logger = logging.getLogger("uvicorn")
+    
+    logger.info("=" * 50)
+    logger.info("Startup Check:")
+    if torch.cuda.is_available():
+        gpu_name = torch.cuda.get_device_name(0)
+        logger.info(f"✅ GPU detected: {gpu_name}")
+        logger.info(f"CUDA Version: {torch.version.cuda}")
+    else:
+        logger.warning("⚠️ GPU NOT DETECTED. Running in CPU mode.")
+    logger.info("=" * 50)
+    await connect_to_mongo()
+
 app.add_event_handler("shutdown", close_mongo_connection)
 
 class QueryRequest(BaseModel):
diff --git a/cli.py b/cli.py
deleted file mode 100644
index bbbc370..0000000
--- a/cli.py
+++ /dev/null
@@ -1,153 +0,0 @@
-#!/usr/bin/env python3
-"""
-Simple command-line interface for the RAG system.
-"""
-
-import sys
-import os
-from pathlib import Path
-
-# Add the backend directory to Python path
-backend_dir = Path(__file__).parent / "backend"
-sys.path.insert(0, str(backend_dir))
-
-from app.ingestion import extract_any
-from app.embeddings import embed_text, embed_image
-from app.vector_store import get_store
-from app.retriever import get_retriever
-from app.llm import build_adapter, load_config, generate_answer
-from app.utils import create_citations
-
-
-def ingest_file(file_path: str) -> str:
-    """Ingest a file from command line."""
-    if not os.path.exists(file_path):
-        return f"File not found: {file_path}"
-    
-    try:
-        file_name = os.path.basename(file_path)
-        chunks = extract_any(file_path, file_name, "")
-        
-        if not chunks:
-            return f"No content extracted from {file_name}"
-        
-        store = get_store()
-        items = []
-        
-        for chunk in chunks:
-            if chunk.file_type == 'image':
-                embedding = embed_image(chunk.filepath)
-            else:
-                embedding = embed_text(chunk.content)
-            
-            items.append({
-                'embedding': embedding[0],
-                'metadata': {
-                    'content': chunk.content,
-                    'file_name': chunk.file_name,
-                    'file_type': chunk.file_type,
-                    'page_number': chunk.page_number,
-                    'timestamp': chunk.timestamp,
-                    'filepath': chunk.filepath,
-                    'width': getattr(chunk, 'width', None),
-                    'height': getattr(chunk, 'height', None),
-                    'bbox': getattr(chunk, 'bbox', None),
-                    'char_start': getattr(chunk, 'char_start', None),
-                    'char_end': getattr(chunk, 'char_end', None),
-                    'modality': chunk.file_type
-                }
-            })
-        
-        added = store.upsert(items)
-        return f"Successfully ingested {file_name}: {added} chunks added to index."
-        
-    except Exception as e:
-        return f"Error ingesting file: {str(e)}"
-
-
-def query_text(question: str, top_k: int = 5) -> str:
-    """Query the system from command line."""
-    if not question.strip():
-        return "Please enter a question."
-    
-    try:
-        retriever = get_retriever()
-        results = retriever.retrieve(question, top_k)
-        
-        if not results:
-            return "I don't have any relevant information to answer this question."
-        
-        config_path = os.path.join(os.path.dirname(__file__), "backend", "config.yaml")
-        config = load_config(config_path)
-        adapter = build_adapter(config)
-        
-        answer = generate_answer(question, results, adapter)
-        
-        # Add sources
-        citations = create_citations(results)
-        sources_text = "\n\nSources:\n"
-        for i, citation in enumerate(citations, 1):
-            sources_text += f"{i}. {citation['file_name']} ({citation['file_type']})\n"
-        
-        return answer + sources_text
-        
-    except Exception as e:
-        return f"Error processing query: {str(e)}"
-
-
-def show_status() -> str:
-    """Show system status."""
-    try:
-        store = get_store()
-        status = store.status()
-        return f"""System Status:
-- Total vectors: {status['vectors']}
-- Files indexed: {status['files']}
-- Embedding dimension: {status['dimension']}
-- Modalities: {', '.join(status['modalities']) if status['modalities'] else 'None'}"""
-    except Exception as e:
-        return f"Error getting status: {str(e)}"
-
-
-def main():
-    """Main CLI interface."""
-    print("Offline Multimodal RAG - Command Line Interface")
-    print("=" * 50)
-    
-    while True:
-        print("\nOptions:")
-        print("1. Ingest file")
-        print("2. Ask question")
-        print("3. Show status")
-        print("4. Exit")
-        
-        choice = input("\nEnter your choice (1-4): ").strip()
-        
-        if choice == "1":
-            file_path = input("Enter file path: ").strip()
-            if file_path:
-                result = ingest_file(file_path)
-                print(f"\n{result}")
-        
-        elif choice == "2":
-            question = input("Enter your question: ").strip()
-            if question:
-                top_k = input("Number of sources (default 5): ").strip()
-                top_k = int(top_k) if top_k.isdigit() else 5
-                result = query_text(question, top_k)
-                print(f"\n{result}")
-        
-        elif choice == "3":
-            result = show_status()
-            print(f"\n{result}")
-        
-        elif choice == "4":
-            print("Goodbye!")
-            break
-        
-        else:
-            print("Invalid choice. Please enter 1-4.")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/debug_rag_sources.py b/debug_rag_sources.py
deleted file mode 100644
index 8966ec8..0000000
--- a/debug_rag_sources.py
+++ /dev/null
@@ -1,31 +0,0 @@
-
-import sys
-import os
-sys.path.insert(0, os.path.abspath("backend"))
-
-from app.rag import answer_query
-
-def debug_rag(query):
-    # Resolve config path
-    cfg_path = os.path.abspath(os.path.join("backend", "config.yaml"))
-    print(f"Using config: {cfg_path}")
-    
-    try:
-        response = answer_query(cfg_path, query)
-        print("\n--- Response ---")
-        print(f"Answer: {response.get('answer', '')[:50]}...")
-        print("Sources:")
-        for s in response.get('sources', []):
-            print(f"  [{s.get('id')}] File: {s.get('file_name')}")
-            snippet = s.get('snippet', '')
-            print(f"      Snippet ({len(snippet)} chars): {snippet[:50]}...")
-            if not snippet:
-                print("      WARNING: Snippet is empty!")
-    except Exception as e:
-        print(f"Error: {e}")
-
-if __name__ == "__main__":
-    query = "test"
-    if len(sys.argv) > 1:
-        query = sys.argv[1]
-    debug_rag(query)
diff --git a/package-lock.json b/package-lock.json
deleted file mode 100644
index 12b6041..0000000
--- a/package-lock.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-  "name": "rag-offline-chatbot",
-  "lockfileVersion": 3,
-  "requires": true,
-  "packages": {
-    "": {
-      "devDependencies": {
-        "baseline-browser-mapping": "^2.9.16"
-      }
-    },
-    "node_modules/baseline-browser-mapping": {
-      "version": "2.9.16",
-      "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.9.16.tgz",
-      "integrity": "sha512-KeUZdBuxngy825i8xvzaK1Ncnkx0tBmb3k8DkEuqjKRkmtvNTjey2ZsNeh8Dw4lfKvbCOu9oeNx2TKm2vHqcRw==",
-      "dev": true,
-      "bin": {
-        "baseline-browser-mapping": "dist/cli.js"
-      }
-    }
-  }
-}
diff --git a/package.json b/package.json
deleted file mode 100644
index 74b0163..0000000
--- a/package.json
+++ /dev/null
@@ -1,5 +0,0 @@
-{
-  "devDependencies": {
-    "baseline-browser-mapping": "^2.9.16"
-  }
-}
diff --git a/requirements.txt b/requirements.txt
index c47719a..1423c02 100644
Binary files a/requirements.txt and b/requirements.txt differ
diff --git a/verify_content.py b/verify_content.py
deleted file mode 100644
index a06369c..0000000
--- a/verify_content.py
+++ /dev/null
@@ -1,71 +0,0 @@
-import requests
-import sys
-
-BASE_URL = "http://127.0.0.1:8000"
-EMAIL = "test@example.com"
-PASSWORD = "password123"
-
-def get_token():
-    try:
-        # Try login first
-        response = requests.post(f"{BASE_URL}/api/auth/login", data={"username": EMAIL, "password": PASSWORD})
-        if response.status_code == 200:
-            return response.json()["access_token"]
-        
-        # If login fails, try register
-        print("Login failed, trying registration...")
-        response = requests.post(f"{BASE_URL}/api/auth/register", json={"email": EMAIL, "password": PASSWORD, "name": "Test User"})
-        if response.status_code == 200:
-            # Login again
-            response = requests.post(f"{BASE_URL}/api/auth/login", data={"username": EMAIL, "password": PASSWORD})
-            return response.json()["access_token"]
-        else:
-            print(f"Registration failed: {response.text}")
-            sys.exit(1)
-    except Exception as e:
-        print(f"Auth failed: {e}")
-        sys.exit(1)
-
-def test_content_retrieval():
-    token = get_token()
-    headers = {"Authorization": f"Bearer {token}"}
-    
-    # 1. Ingest a file
-    print("Ingesting test file...", flush=True)
-    files = {'files': ('test_content.txt', 'This is a unique test phrase for verification: BANANA_SPLIT.', 'text/plain')}
-    response = requests.post(f"{BASE_URL}/ingest", headers=headers, files=files)
-    if response.status_code != 200:
-        print(f"Ingest failed: {response.text}", flush=True)
-        return
-
-    print("Ingest successful.", flush=True)
-    
-    # 2. Query
-    print("Querying...", flush=True)
-    # Using a query that matches the content
-    response = requests.post(f"{BASE_URL}/query", json={"query": "BANANA_SPLIT"}, headers=headers)
-    
-    if response.status_code == 200:
-        data = response.json()
-        sources = data.get("sources", [])
-        print(f"Found {len(sources)} sources.")
-        
-        found_content = False
-        for s in sources:
-            text = s.get("text", "")
-            if "BANANA_SPLIT" in text:
-                print("SUCCESS: Found expected content in source text!")
-                print(f"Source preview: {text[:50]}...")
-                found_content = True
-                break
-            else:
-                print(f"Source text (first 50 chars): {text[:50]}...")
-        
-        if not found_content:
-            print("FAILURE: Did not find expected content in any source.")
-            print("Full sources:", sources)
-    else:
-        print(f"Query failed: {response.status_code} {response.text}")
-
-if __name__ == "__main__":
-    test_content_retrieval()