diff --git a/backend/app/embeddings/generate.py b/backend/app/embeddings/generate.py index aaa6b3e..12b8325 100644 --- a/backend/app/embeddings/generate.py +++ b/backend/app/embeddings/generate.py @@ -18,37 +18,11 @@ _clip_preprocess = None _clip_tokenizer = None -def _ensure_cpu_environment(): - """Force CPU-only environment for PyTorch operations.""" - import os - import torch - - # Environment variables - os.environ['CUDA_VISIBLE_DEVICES'] = '' - os.environ['USE_CUDA'] = '0' - - # PyTorch settings - torch.cuda.is_available = lambda: False - if hasattr(torch.backends, 'cudnn'): - torch.backends.cudnn.enabled = False - if hasattr(torch.backends, 'cuda'): - torch.backends.cuda.matmul.allow_tf32 = False - torch.backends.cuda.is_built = lambda: False - - # Set default tensor type to CPU - torch.set_default_tensor_type(torch.FloatTensor) - - # Disable JIT - try: - torch.jit.enable_onednn_fusion(False) - except: - pass - try: - torch._C._jit_set_profiling_executor(False) - except: - pass - - return torch.device('cpu') +def _get_device() -> torch.device: + """Get the best available device.""" + if torch.cuda.is_available(): + return torch.device("cuda") + return torch.device("cpu") def _get_clip_model() -> tuple[torch.nn.Module, any]: @@ -59,8 +33,8 @@ def _get_clip_model() -> tuple[torch.nn.Module, any]: global _clip_model, _clip_preprocess, _clip_tokenizer if _clip_model is None: - device = _ensure_cpu_environment() - print("Initializing unified CLIP model (ViT-B-32) on CPU...") + device = _get_device() + print(f"Initializing unified CLIP model (ViT-B-32) on {device}...") try: # Always use the same settings for both text and image! @@ -68,7 +42,7 @@ def _get_clip_model() -> tuple[torch.nn.Module, any]: model, preprocess, _ = open_clip.create_model_and_transforms( "ViT-B-32", pretrained="openai", - device="cpu", + device=device, jit=False, force_quick_gelu=True ) @@ -80,7 +54,7 @@ def _get_clip_model() -> tuple[torch.nn.Module, any]: _clip_model = model _clip_preprocess = preprocess _clip_tokenizer = open_clip.get_tokenizer("ViT-B-32") - print("CLIP model initialized successfully.") + print(f"CLIP model initialized successfully on {device}.") except Exception as e: print(f"Error initializing CLIP model: {e}") @@ -115,7 +89,7 @@ def embed_text(text: Union[str, List[str]]) -> np.ndarray: if isinstance(text, str): text = [text] - # Determine device + # Determine device from model device = next(model.parameters()).device # Tokenize text (batch processing) @@ -138,66 +112,43 @@ def embed_text(text: Union[str, List[str]]) -> np.ndarray: def embed_image(path: str) -> np.ndarray: - """Generate embeddings for image with strict CPU-only mode. + """Generate embeddings for image using best available device. Returns: numpy array of shape (1, 512) containing normalized CLIP image embedding. """ global _clip_model, _clip_preprocess - # Force CPU-only environment - device = _ensure_cpu_environment() - try: # Get model and preprocess function model, preprocess = get_clip() - # Ensure model is in CPU mode - model = model.cpu() + # Determine device from model + device = next(model.parameters()).device # Load and preprocess image with Image.open(path) as img: # Convert to RGB and preprocess image = preprocess(img.convert("RGB")) - # Ensure tensor is on CPU and create batch - image = image.cpu() - batch = torch.stack([image]).cpu() + # Move to device and create batch + image = image.to(device) + batch = torch.stack([image]).to(device) # Generate embeddings with strict error handling with torch.no_grad(): - try: - # Encode image and normalize - feats = model.encode_image(batch) - # Normalize: feats shape is (1, 512), normalize per-sample - feats = feats / feats.norm(dim=-1, keepdim=True) - result = feats.cpu().numpy().astype(np.float32) - - # Ensure shape is (1, 512) - if result.ndim == 1: - result = result.reshape(1, -1) - - assert result.shape == (1, 512), f"Expected shape (1, 512), got {result.shape}" - return result - - except RuntimeError as e: - error_msg = str(e) - if "CUDA" in error_msg or "cuda" in error_msg.lower(): - print("Warning: CUDA operation attempted. Reinitializing in CPU-only mode...") - # Reset model to force CPU reinitialization - _clip_model = None - _clip_preprocess = None - # Retry with fresh CPU model - model, preprocess = get_clip() - image = preprocess(Image.open(path).convert("RGB")) - batch = torch.stack([image]).cpu() - feats = model.encode_image(batch) - feats = feats / feats.norm(dim=-1, keepdim=True) - result = feats.cpu().numpy().astype(np.float32) - if result.ndim == 1: - result = result.reshape(1, -1) - return result - raise # Re-raise if it's not a CUDA error + # Encode image and normalize + feats = model.encode_image(batch) + # Normalize: feats shape is (1, 512), normalize per-sample + feats = feats / feats.norm(dim=-1, keepdim=True) + result = feats.cpu().numpy().astype(np.float32) + + # Ensure shape is (1, 512) + if result.ndim == 1: + result = result.reshape(1, -1) + + assert result.shape == (1, 512), f"Expected shape (1, 512), got {result.shape}" + return result except Exception as e: print(f"Error processing image {path}: {str(e)}") diff --git a/backend/app/main.py b/backend/app/main.py index e010032..0b1f756 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -34,7 +34,24 @@ app.mount("/static", StaticFiles(directory=storage_dir), name="static") # Event Handlers -app.add_event_handler("startup", connect_to_mongo) +# Event Handlers +@app.on_event("startup") +async def startup_event(): + import torch + import logging + logger = logging.getLogger("uvicorn") + + logger.info("=" * 50) + logger.info("Startup Check:") + if torch.cuda.is_available(): + gpu_name = torch.cuda.get_device_name(0) + logger.info(f"✅ GPU detected: {gpu_name}") + logger.info(f"CUDA Version: {torch.version.cuda}") + else: + logger.warning("⚠️ GPU NOT DETECTED. Running in CPU mode.") + logger.info("=" * 50) + await connect_to_mongo() + app.add_event_handler("shutdown", close_mongo_connection) class QueryRequest(BaseModel): diff --git a/cli.py b/cli.py deleted file mode 100644 index bbbc370..0000000 --- a/cli.py +++ /dev/null @@ -1,153 +0,0 @@ -#!/usr/bin/env python3 -""" -Simple command-line interface for the RAG system. -""" - -import sys -import os -from pathlib import Path - -# Add the backend directory to Python path -backend_dir = Path(__file__).parent / "backend" -sys.path.insert(0, str(backend_dir)) - -from app.ingestion import extract_any -from app.embeddings import embed_text, embed_image -from app.vector_store import get_store -from app.retriever import get_retriever -from app.llm import build_adapter, load_config, generate_answer -from app.utils import create_citations - - -def ingest_file(file_path: str) -> str: - """Ingest a file from command line.""" - if not os.path.exists(file_path): - return f"File not found: {file_path}" - - try: - file_name = os.path.basename(file_path) - chunks = extract_any(file_path, file_name, "") - - if not chunks: - return f"No content extracted from {file_name}" - - store = get_store() - items = [] - - for chunk in chunks: - if chunk.file_type == 'image': - embedding = embed_image(chunk.filepath) - else: - embedding = embed_text(chunk.content) - - items.append({ - 'embedding': embedding[0], - 'metadata': { - 'content': chunk.content, - 'file_name': chunk.file_name, - 'file_type': chunk.file_type, - 'page_number': chunk.page_number, - 'timestamp': chunk.timestamp, - 'filepath': chunk.filepath, - 'width': getattr(chunk, 'width', None), - 'height': getattr(chunk, 'height', None), - 'bbox': getattr(chunk, 'bbox', None), - 'char_start': getattr(chunk, 'char_start', None), - 'char_end': getattr(chunk, 'char_end', None), - 'modality': chunk.file_type - } - }) - - added = store.upsert(items) - return f"Successfully ingested {file_name}: {added} chunks added to index." - - except Exception as e: - return f"Error ingesting file: {str(e)}" - - -def query_text(question: str, top_k: int = 5) -> str: - """Query the system from command line.""" - if not question.strip(): - return "Please enter a question." - - try: - retriever = get_retriever() - results = retriever.retrieve(question, top_k) - - if not results: - return "I don't have any relevant information to answer this question." - - config_path = os.path.join(os.path.dirname(__file__), "backend", "config.yaml") - config = load_config(config_path) - adapter = build_adapter(config) - - answer = generate_answer(question, results, adapter) - - # Add sources - citations = create_citations(results) - sources_text = "\n\nSources:\n" - for i, citation in enumerate(citations, 1): - sources_text += f"{i}. {citation['file_name']} ({citation['file_type']})\n" - - return answer + sources_text - - except Exception as e: - return f"Error processing query: {str(e)}" - - -def show_status() -> str: - """Show system status.""" - try: - store = get_store() - status = store.status() - return f"""System Status: -- Total vectors: {status['vectors']} -- Files indexed: {status['files']} -- Embedding dimension: {status['dimension']} -- Modalities: {', '.join(status['modalities']) if status['modalities'] else 'None'}""" - except Exception as e: - return f"Error getting status: {str(e)}" - - -def main(): - """Main CLI interface.""" - print("Offline Multimodal RAG - Command Line Interface") - print("=" * 50) - - while True: - print("\nOptions:") - print("1. Ingest file") - print("2. Ask question") - print("3. Show status") - print("4. Exit") - - choice = input("\nEnter your choice (1-4): ").strip() - - if choice == "1": - file_path = input("Enter file path: ").strip() - if file_path: - result = ingest_file(file_path) - print(f"\n{result}") - - elif choice == "2": - question = input("Enter your question: ").strip() - if question: - top_k = input("Number of sources (default 5): ").strip() - top_k = int(top_k) if top_k.isdigit() else 5 - result = query_text(question, top_k) - print(f"\n{result}") - - elif choice == "3": - result = show_status() - print(f"\n{result}") - - elif choice == "4": - print("Goodbye!") - break - - else: - print("Invalid choice. Please enter 1-4.") - - -if __name__ == "__main__": - main() diff --git a/debug_rag_sources.py b/debug_rag_sources.py deleted file mode 100644 index 8966ec8..0000000 --- a/debug_rag_sources.py +++ /dev/null @@ -1,31 +0,0 @@ - -import sys -import os -sys.path.insert(0, os.path.abspath("backend")) - -from app.rag import answer_query - -def debug_rag(query): - # Resolve config path - cfg_path = os.path.abspath(os.path.join("backend", "config.yaml")) - print(f"Using config: {cfg_path}") - - try: - response = answer_query(cfg_path, query) - print("\n--- Response ---") - print(f"Answer: {response.get('answer', '')[:50]}...") - print("Sources:") - for s in response.get('sources', []): - print(f" [{s.get('id')}] File: {s.get('file_name')}") - snippet = s.get('snippet', '') - print(f" Snippet ({len(snippet)} chars): {snippet[:50]}...") - if not snippet: - print(" WARNING: Snippet is empty!") - except Exception as e: - print(f"Error: {e}") - -if __name__ == "__main__": - query = "test" - if len(sys.argv) > 1: - query = sys.argv[1] - debug_rag(query) diff --git a/package-lock.json b/package-lock.json deleted file mode 100644 index 12b6041..0000000 --- a/package-lock.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "name": "rag-offline-chatbot", - "lockfileVersion": 3, - "requires": true, - "packages": { - "": { - "devDependencies": { - "baseline-browser-mapping": "^2.9.16" - } - }, - "node_modules/baseline-browser-mapping": { - "version": "2.9.16", - "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.9.16.tgz", - "integrity": "sha512-KeUZdBuxngy825i8xvzaK1Ncnkx0tBmb3k8DkEuqjKRkmtvNTjey2ZsNeh8Dw4lfKvbCOu9oeNx2TKm2vHqcRw==", - "dev": true, - "bin": { - "baseline-browser-mapping": "dist/cli.js" - } - } - } -} diff --git a/package.json b/package.json deleted file mode 100644 index 74b0163..0000000 --- a/package.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "devDependencies": { - "baseline-browser-mapping": "^2.9.16" - } -} diff --git a/requirements.txt b/requirements.txt index c47719a..1423c02 100644 Binary files a/requirements.txt and b/requirements.txt differ diff --git a/verify_content.py b/verify_content.py deleted file mode 100644 index a06369c..0000000 --- a/verify_content.py +++ /dev/null @@ -1,71 +0,0 @@ -import requests -import sys - -BASE_URL = "http://127.0.0.1:8000" -EMAIL = "test@example.com" -PASSWORD = "password123" - -def get_token(): - try: - # Try login first - response = requests.post(f"{BASE_URL}/api/auth/login", data={"username": EMAIL, "password": PASSWORD}) - if response.status_code == 200: - return response.json()["access_token"] - - # If login fails, try register - print("Login failed, trying registration...") - response = requests.post(f"{BASE_URL}/api/auth/register", json={"email": EMAIL, "password": PASSWORD, "name": "Test User"}) - if response.status_code == 200: - # Login again - response = requests.post(f"{BASE_URL}/api/auth/login", data={"username": EMAIL, "password": PASSWORD}) - return response.json()["access_token"] - else: - print(f"Registration failed: {response.text}") - sys.exit(1) - except Exception as e: - print(f"Auth failed: {e}") - sys.exit(1) - -def test_content_retrieval(): - token = get_token() - headers = {"Authorization": f"Bearer {token}"} - - # 1. Ingest a file - print("Ingesting test file...", flush=True) - files = {'files': ('test_content.txt', 'This is a unique test phrase for verification: BANANA_SPLIT.', 'text/plain')} - response = requests.post(f"{BASE_URL}/ingest", headers=headers, files=files) - if response.status_code != 200: - print(f"Ingest failed: {response.text}", flush=True) - return - - print("Ingest successful.", flush=True) - - # 2. Query - print("Querying...", flush=True) - # Using a query that matches the content - response = requests.post(f"{BASE_URL}/query", json={"query": "BANANA_SPLIT"}, headers=headers) - - if response.status_code == 200: - data = response.json() - sources = data.get("sources", []) - print(f"Found {len(sources)} sources.") - - found_content = False - for s in sources: - text = s.get("text", "") - if "BANANA_SPLIT" in text: - print("SUCCESS: Found expected content in source text!") - print(f"Source preview: {text[:50]}...") - found_content = True - break - else: - print(f"Source text (first 50 chars): {text[:50]}...") - - if not found_content: - print("FAILURE: Did not find expected content in any source.") - print("Full sources:", sources) - else: - print(f"Query failed: {response.status_code} {response.text}") - -if __name__ == "__main__": - test_content_retrieval()