Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 28 additions & 77 deletions backend/app/embeddings/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,37 +18,11 @@
_clip_preprocess = None
_clip_tokenizer = None

def _ensure_cpu_environment():
"""Force CPU-only environment for PyTorch operations."""
import os
import torch

# Environment variables
os.environ['CUDA_VISIBLE_DEVICES'] = ''
os.environ['USE_CUDA'] = '0'

# PyTorch settings
torch.cuda.is_available = lambda: False
if hasattr(torch.backends, 'cudnn'):
torch.backends.cudnn.enabled = False
if hasattr(torch.backends, 'cuda'):
torch.backends.cuda.matmul.allow_tf32 = False
torch.backends.cuda.is_built = lambda: False

# Set default tensor type to CPU
torch.set_default_tensor_type(torch.FloatTensor)

# Disable JIT
try:
torch.jit.enable_onednn_fusion(False)
except:
pass
try:
torch._C._jit_set_profiling_executor(False)
except:
pass

return torch.device('cpu')
def _get_device() -> torch.device:
"""Get the best available device."""
if torch.cuda.is_available():
return torch.device("cuda")
return torch.device("cpu")


def _get_clip_model() -> tuple[torch.nn.Module, any]:
Expand All @@ -59,16 +33,16 @@ def _get_clip_model() -> tuple[torch.nn.Module, any]:
global _clip_model, _clip_preprocess, _clip_tokenizer

if _clip_model is None:
device = _ensure_cpu_environment()
print("Initializing unified CLIP model (ViT-B-32) on CPU...")
device = _get_device()
print(f"Initializing unified CLIP model (ViT-B-32) on {device}...")

try:
# Always use the same settings for both text and image!
# force_quick_gelu=True is standard for OpenAI CLIP weights
model, preprocess, _ = open_clip.create_model_and_transforms(
"ViT-B-32",
pretrained="openai",
device="cpu",
device=device,
jit=False,
force_quick_gelu=True
)
Expand All @@ -80,7 +54,7 @@ def _get_clip_model() -> tuple[torch.nn.Module, any]:
_clip_model = model
_clip_preprocess = preprocess
_clip_tokenizer = open_clip.get_tokenizer("ViT-B-32")
print("CLIP model initialized successfully.")
print(f"CLIP model initialized successfully on {device}.")

except Exception as e:
print(f"Error initializing CLIP model: {e}")
Expand Down Expand Up @@ -115,7 +89,7 @@ def embed_text(text: Union[str, List[str]]) -> np.ndarray:
if isinstance(text, str):
text = [text]

# Determine device
# Determine device from model
device = next(model.parameters()).device

# Tokenize text (batch processing)
Expand All @@ -138,66 +112,43 @@ def embed_text(text: Union[str, List[str]]) -> np.ndarray:


def embed_image(path: str) -> np.ndarray:
"""Generate embeddings for image with strict CPU-only mode.
"""Generate embeddings for image using best available device.

Returns:
numpy array of shape (1, 512) containing normalized CLIP image embedding.
"""
global _clip_model, _clip_preprocess

# Force CPU-only environment
device = _ensure_cpu_environment()

try:
# Get model and preprocess function
model, preprocess = get_clip()

# Ensure model is in CPU mode
model = model.cpu()
# Determine device from model
device = next(model.parameters()).device

# Load and preprocess image
with Image.open(path) as img:
# Convert to RGB and preprocess
image = preprocess(img.convert("RGB"))

# Ensure tensor is on CPU and create batch
image = image.cpu()
batch = torch.stack([image]).cpu()
# Move to device and create batch
image = image.to(device)
batch = torch.stack([image]).to(device)

# Generate embeddings with strict error handling
with torch.no_grad():
try:
# Encode image and normalize
feats = model.encode_image(batch)
# Normalize: feats shape is (1, 512), normalize per-sample
feats = feats / feats.norm(dim=-1, keepdim=True)
result = feats.cpu().numpy().astype(np.float32)

# Ensure shape is (1, 512)
if result.ndim == 1:
result = result.reshape(1, -1)

assert result.shape == (1, 512), f"Expected shape (1, 512), got {result.shape}"
return result

except RuntimeError as e:
error_msg = str(e)
if "CUDA" in error_msg or "cuda" in error_msg.lower():
print("Warning: CUDA operation attempted. Reinitializing in CPU-only mode...")
# Reset model to force CPU reinitialization
_clip_model = None
_clip_preprocess = None
# Retry with fresh CPU model
model, preprocess = get_clip()
image = preprocess(Image.open(path).convert("RGB"))
batch = torch.stack([image]).cpu()
feats = model.encode_image(batch)
feats = feats / feats.norm(dim=-1, keepdim=True)
result = feats.cpu().numpy().astype(np.float32)
if result.ndim == 1:
result = result.reshape(1, -1)
return result
raise # Re-raise if it's not a CUDA error
# Encode image and normalize
feats = model.encode_image(batch)
# Normalize: feats shape is (1, 512), normalize per-sample
feats = feats / feats.norm(dim=-1, keepdim=True)
result = feats.cpu().numpy().astype(np.float32)

# Ensure shape is (1, 512)
if result.ndim == 1:
result = result.reshape(1, -1)

assert result.shape == (1, 512), f"Expected shape (1, 512), got {result.shape}"
return result

except Exception as e:
print(f"Error processing image {path}: {str(e)}")
Expand Down
19 changes: 18 additions & 1 deletion backend/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,24 @@
app.mount("/static", StaticFiles(directory=storage_dir), name="static")

# Event Handlers
app.add_event_handler("startup", connect_to_mongo)
# Event Handlers
@app.on_event("startup")
async def startup_event():
import torch
import logging
logger = logging.getLogger("uvicorn")

logger.info("=" * 50)
logger.info("Startup Check:")
if torch.cuda.is_available():
gpu_name = torch.cuda.get_device_name(0)
logger.info(f"✅ GPU detected: {gpu_name}")
logger.info(f"CUDA Version: {torch.version.cuda}")
else:
logger.warning("⚠️ GPU NOT DETECTED. Running in CPU mode.")
logger.info("=" * 50)
await connect_to_mongo()

app.add_event_handler("shutdown", close_mongo_connection)

class QueryRequest(BaseModel):
Expand Down
153 changes: 0 additions & 153 deletions cli.py

This file was deleted.

31 changes: 0 additions & 31 deletions debug_rag_sources.py

This file was deleted.

21 changes: 0 additions & 21 deletions package-lock.json

This file was deleted.

Loading
Loading