diff --git a/CLAUDE.md b/CLAUDE.md index 4c33ae3..626770e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -34,6 +34,32 @@ Backend runs on port 8000, frontend on port 5173. If port 8000 is in use: lsof -ti:8000 | xargs kill -9 # Kill process on port 8000 ``` +## Code Validation + +**IMPORTANT**: After making code changes, always validate that the Docker image still builds successfully: + +```bash +docker compose build +``` + +This command: +- Validates frontend code compiles (Vite build) +- Validates backend dependencies install correctly +- Ensures production build artifacts are created +- Catches build-time errors before deployment + +Common build failures: +- CSS class name errors (invalid Tailwind utilities) +- TypeScript/JavaScript import errors +- Python dependency conflicts +- Missing environment variables in build process + +If the build fails, fix the errors before committing. A successful build should complete with: +``` +✓ built in XXXms +bjeans/multi-ai-chat:latest Built +``` + ## Architecture ### Backend Request Flow diff --git a/backend/.env.example b/backend/.env.example index 73d0a30..df7c79e 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -2,3 +2,7 @@ LITELLM_PROXY_URL=http://localhost:4000 LITELLM_API_KEY=your-api-key-here DATABASE_URL=sqlite+aiosqlite:///./database.db CORS_ORIGINS=http://localhost:5173,http://localhost:3000 + +# Cache TTL for server groups (in seconds) +# Default: 120 (2 minutes) +CACHE_TTL_SECONDS=120 diff --git a/backend/api/config.py b/backend/api/config.py index c3e6815..7a4712b 100644 --- a/backend/api/config.py +++ b/backend/api/config.py @@ -1,12 +1,41 @@ from fastapi import APIRouter -from typing import List +from typing import List, Optional from pydantic import BaseModel +import asyncio +import time +import os +import logging -from models.schemas import ModelInfo +from models.schemas import ModelInfo, ServerGroup, SelectionAnalysis from services.litellm_client import LiteLLMClient +from services.model_processor import process_models_with_health, analyze_selection + +logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/config", tags=["config"]) +# Global cache for server groups with expiration +_server_groups_cache: Optional[List[ServerGroup]] = None +_cache_timestamp: Optional[float] = None +_cache_lock = asyncio.Lock() + +# Parse cache TTL with error handling +def _get_cache_ttl() -> int: + """Get cache TTL from environment with validation""" + default_ttl = 120 + try: + ttl_str = os.getenv("CACHE_TTL_SECONDS", str(default_ttl)) + ttl = int(ttl_str) + if ttl <= 0: + logger.warning(f"Invalid CACHE_TTL_SECONDS={ttl_str} (must be > 0), using default {default_ttl}") + return default_ttl + return ttl + except ValueError: + logger.warning(f"Invalid CACHE_TTL_SECONDS={os.getenv('CACHE_TTL_SECONDS')} (must be integer), using default {default_ttl}") + return default_ttl + +CACHE_TTL_SECONDS = _get_cache_ttl() + class TestModelRequest(BaseModel): model_id: str @@ -39,3 +68,50 @@ async def test_model(request: TestModelRequest): model_id=request.model_id, available=available, ) + + +@router.get("/models/by-server", response_model=List[ServerGroup]) +async def get_models_by_server(): + """ + Get models grouped by Ollama server with health status and size info + """ + global _server_groups_cache, _cache_timestamp + + # Use lock to prevent race conditions on cache access + async with _cache_lock: + now = time.time() + + # Return cached data if valid and not expired + if _server_groups_cache is not None and _cache_timestamp is not None: + if now - _cache_timestamp < CACHE_TTL_SECONDS: + return _server_groups_cache + + # Fetch fresh data + client = LiteLLMClient() + raw_data = await client.get_model_info() + + server_groups = await process_models_with_health(raw_data) + + # Update cache with timestamp + _server_groups_cache = server_groups + _cache_timestamp = now + + return server_groups + + +class AnalyzeSelectionRequest(BaseModel): + model_ids: List[str] + + +@router.post("/models/analyze-selection", response_model=SelectionAnalysis) +async def analyze_model_selection(request: AnalyzeSelectionRequest): + """ + Analyze selected models for resource conflicts and provide recommendations + """ + # Get current server groups + server_groups = await get_models_by_server() + + # Analyze selection + analysis = await analyze_selection(request.model_ids, server_groups) + + return analysis diff --git a/backend/models/schemas.py b/backend/models/schemas.py index 1cd57fb..56a44ef 100644 --- a/backend/models/schemas.py +++ b/backend/models/schemas.py @@ -16,6 +16,96 @@ class ModelInfo(BaseModel): provider: Optional[str] = None +class ModelHealth(BaseModel): + status: str # "healthy", "unhealthy", "unknown" + healthy_count: int + unhealthy_count: int + response_time_ms: float + last_checked: Optional[datetime] + error_message: Optional[str] = None + + +class ModelSize(BaseModel): + parameters: str # "70B", "32B", "3B", etc. + parameters_billions: float # Numeric for sorting + estimated_memory_gb: int # Rough estimate for warnings + size_tier: str # "tiny" (<2B), "small" (2-10B), "medium" (10-30B), "large" (30B+) + + +class OllamaServerInfo(BaseModel): + api_base: str + host: str + tpm: int + rpm: int + performance_tier: str + health_status: str = "unknown" + model_count: int = 0 + selected_model_count: int = 0 + total_selected_memory_gb: int = 0 + + +class ModelInfoDetailed(BaseModel): + id: str + display_name: str + base_model: str + actual_tag: Optional[str] = None + is_latest_alias: bool = False + resolves_to: Optional[str] = None + + # Server information + api_base: str + server_host: str + server_tpm: int + server_rpm: int + + # Metadata + provider: str + model_family: str + model_category: str + + # Model specs + size: ModelSize + health: Optional[ModelHealth] = None + max_tokens: int = 4096 + supports_function_calling: bool = False + + # Duplication info + is_duplicate: bool = False + better_server: Optional[str] = None + duplicate_count: int = 1 + + +class ServerGroup(BaseModel): + server: OllamaServerInfo + models: List[ModelInfoDetailed] + warnings: List[str] = [] + recommendations: List[str] = [] + + +class SelectionWarning(BaseModel): + severity: str # "high", "medium", "info" + server: str + message: str + models: Optional[List[str]] = None + estimated_total_memory: Optional[str] = None + + +class SelectionRecommendation(BaseModel): + type: str + model: str + from_server: str + to_server: str + reason: str + + +class SelectionAnalysis(BaseModel): + warnings: List[SelectionWarning] + recommendations: List[SelectionRecommendation] + total_models_selected: int + servers_used: int + diversity_score: int + + class ResponseSchema(BaseModel): id: int decision_id: int diff --git a/backend/services/litellm_client.py b/backend/services/litellm_client.py index 62d9777..ec817f1 100644 --- a/backend/services/litellm_client.py +++ b/backend/services/litellm_client.py @@ -52,6 +52,34 @@ async def get_available_models(self) -> List[Dict[str, any]]: print(f"Error fetching models: {e}") return [] + async def get_model_info(self) -> dict: + """Fetch detailed model info from /v1/model/info endpoint""" + try: + async with httpx.AsyncClient(timeout=self.timeout, verify=False) as client: + response = await client.get( + f"{self.base_url}/v1/model/info", + headers=self._get_headers() + ) + response.raise_for_status() + return response.json() + except Exception as e: + print(f"Error fetching model info: {e}") + return {"data": []} + + async def get_health_status(self) -> dict: + """Fetch health status from /health/latest endpoint""" + try: + async with httpx.AsyncClient(timeout=self.timeout, verify=False) as client: + response = await client.get( + f"{self.base_url}/health/latest", + headers=self._get_headers() + ) + response.raise_for_status() + return response.json() + except Exception as e: + print(f"Error fetching health status: {e}") + return {"latest_health_checks": {}, "total_models": 0} + async def test_model(self, model_id: str) -> bool: """Test if a model is available and responding""" try: diff --git a/backend/services/model_processor.py b/backend/services/model_processor.py new file mode 100644 index 0000000..ba883a5 --- /dev/null +++ b/backend/services/model_processor.py @@ -0,0 +1,397 @@ +""" +Model processing service for organizing and analyzing LiteLLM models +""" +import re +from typing import List +from urllib.parse import urlparse + +from models.schemas import ( + ModelInfoDetailed, + ModelHealth, + ModelSize, + OllamaServerInfo, + ServerGroup, + SelectionWarning, + SelectionRecommendation, + SelectionAnalysis, +) +from services.litellm_client import LiteLLMClient + + +def extract_hostname(api_base: str) -> str: + """ + Safely extract hostname from api_base URL + + Args: + api_base: URL string (e.g., "http://192.168.1.100:11434") + + Returns: + hostname (e.g., "192.168.1.100") or full api_base if parsing fails + """ + try: + parsed = urlparse(api_base) + # hostname property handles IPv4, IPv6, and regular hostnames + # Falls back to netloc.split(':')[0] for edge cases + return parsed.hostname or parsed.netloc.split(':')[0] + except Exception: + # Fallback to api_base if parsing fails + return api_base + + +def parse_model_name(model_name: str, litellm_model: str) -> dict: + """ + Parse model_name and litellm_params.model to extract metadata + + Examples: + - model_name="gpt-oss:latest", litellm_model="ollama_chat/gpt-oss:20b" + → base="gpt-oss", tag="latest", actual_tag="20b", is_latest=True + + - model_name="llama3.2:3b", litellm_model="ollama_chat/llama3.2:3b" + → base="llama3.2", tag="3b", actual_tag="3b", is_latest=False + """ + # Split model_name on ":" + parts = model_name.split(":") + base = parts[0] + tag = parts[1] if len(parts) > 1 else None + + # Extract actual tag from litellm_model + # "ollama_chat/gpt-oss:20b" → "20b" + litellm_parts = litellm_model.split("/")[-1].split(":") + actual_tag = litellm_parts[1] if len(litellm_parts) > 1 else None + + return { + "base_model": base, + "tag": tag, + "actual_tag": actual_tag, + "is_latest_alias": tag == "latest", + "display_name": model_name + } + + +def parse_model_size(model_name: str, litellm_model: str) -> ModelSize: + """ + Extract model size from name + + Examples: + - llama3.3:70b → 70B + - deepseek-r1:32b → 32B + - llama3.2:3b → 3B + - tinyllama:1.1b → 1.1B + """ + # Try to find pattern like "70b", "32b", "3b", "1.1b" + patterns = [model_name, litellm_model] + for pattern in patterns: + match = re.search(r'(\d+\.?\d*)b', pattern.lower()) + if match: + size_b = float(match.group(1)) + + # Determine tier + if size_b < 2: + tier = "tiny" + elif size_b < 10: + tier = "small" + elif size_b < 30: + tier = "medium" + else: + tier = "large" + + # Estimate memory (rough approximation) + # Rule of thumb: ~1.2-1.5 GB per billion parameters for float16 + memory_gb = int(size_b * 1.5) + + return ModelSize( + parameters=f"{size_b}B", + parameters_billions=size_b, + estimated_memory_gb=memory_gb, + size_tier=tier + ) + + # Default if size not found in name + return ModelSize( + parameters="Unknown", + parameters_billions=0, + estimated_memory_gb=4, + size_tier="small" + ) + + +def infer_provider(model_name: str) -> str: + """Infer provider from model name""" + lower = model_name.lower() + + if "llama" in lower: + return "Meta" + elif "mistral" in lower or "codestral" in lower: + return "Mistral AI" + elif "gemma" in lower or "codegemma" in lower: + return "Google" + elif "deepseek" in lower: + return "DeepSeek" + elif "qwen" in lower: + return "Alibaba (Qwen)" + elif "phi" in lower: + return "Microsoft" + elif "granite" in lower: + return "IBM" + elif "gpt-oss" in lower: + return "GPT-OSS" + else: + return "Other" + + +def infer_category(model_name: str) -> str: + """Infer model category from name""" + lower = model_name.lower() + + if "coder" in lower or "code" in lower: + return "code" + elif "vision" in lower: + return "vision" + elif "r1" in lower or "reasoning" in lower: + return "reasoning" + else: + return "chat" + + +def get_performance_tier(tpm: int) -> str: + """Determine performance tier based on TPM""" + if tpm >= 50000: + return "high" + elif tpm >= 20000: + return "medium" + else: + return "low" + + +async def process_models_with_health(raw_data: dict) -> List[ServerGroup]: + """ + Process models with health checks and group by server + """ + client = LiteLLMClient() + + # Fetch health data + health_data = await client.get_health_status() + health_map = health_data.get("latest_health_checks", {}) + + # Step 1: Group models by server + server_map = {} # Key: api_base, Value: {server_info, models} + + for item in raw_data["data"]: + model_name = item["model_name"] + litellm_params = item["litellm_params"] + api_base = litellm_params["api_base"] + + # Initialize server if first time seeing it + if api_base not in server_map: + host = extract_hostname(api_base) + server_map[api_base] = { + "server_info": OllamaServerInfo( + api_base=api_base, + host=host, + tpm=litellm_params.get("tpm", 0), + rpm=litellm_params.get("rpm", 0), + performance_tier=get_performance_tier(litellm_params.get("tpm", 0)), + health_status="unknown", + model_count=0 + ), + "models": [] + } + + # Parse model info + parsed = parse_model_name(model_name, litellm_params["model"]) + size = parse_model_size(model_name, litellm_params["model"]) + + # Get health info for this model + health = None + if model_name in health_map: + h = health_map[model_name] + health = ModelHealth( + status=h.get("status", "unknown"), + healthy_count=h.get("healthy_count", 0), + unhealthy_count=h.get("unhealthy_count", 0), + response_time_ms=h.get("response_time_ms", 0), + last_checked=h.get("checked_at"), + error_message=h.get("error_message") + ) + + # Extract host from api_base + host = extract_hostname(api_base) + + # Infer model family from base_model + base_parts = parsed["base_model"].split(".")[0].split("-")[0] + model_family = base_parts.capitalize() + + # Create model info + model = ModelInfoDetailed( + id=f"{model_name}_{host}", + display_name=model_name, + base_model=parsed["base_model"], + actual_tag=parsed["actual_tag"], + is_latest_alias=parsed["is_latest_alias"], + resolves_to=f"{parsed['base_model']}:{parsed['actual_tag']}" if parsed["is_latest_alias"] and parsed["actual_tag"] else None, + api_base=api_base, + server_host=host, + server_tpm=litellm_params.get("tpm", 0), + server_rpm=litellm_params.get("rpm", 0), + provider=infer_provider(parsed["base_model"]), + model_family=model_family, + model_category=infer_category(model_name), + size=size, + health=health, + max_tokens=litellm_params.get("max_tokens", 4096), + supports_function_calling=litellm_params.get("supports_function_calling", False) + ) + + server_map[api_base]["models"].append(model) + server_map[api_base]["server_info"].model_count += 1 + + # Step 2: Detect duplicates and find better servers + all_models = [] + for server_data in server_map.values(): + all_models.extend(server_data["models"]) + + # Group by base_model:actual_tag to find duplicates + model_groups = {} + for model in all_models: + key = f"{model.base_model}:{model.actual_tag}" + if key not in model_groups: + model_groups[key] = [] + model_groups[key].append(model) + + # Mark duplicates and set better_server + for key, models in model_groups.items(): + if len(models) > 1: + # Sort by TPM (descending) to find best server + models_sorted = sorted( + models, + key=lambda m: m.server_tpm, + reverse=True + ) + best_server = models_sorted[0].api_base + + for model in models: + model.is_duplicate = True + model.duplicate_count = len(models) + if model.api_base != best_server: + model.better_server = best_server + + # Step 3: Create ServerGroup list + result = [] + for api_base in sorted(server_map.keys()): + server_data = server_map[api_base] + models = sorted(server_data["models"], key=lambda m: m.display_name.lower()) + + result.append(ServerGroup( + server=server_data["server_info"], + models=models, + warnings=[], + recommendations=[] + )) + + return result + + +async def analyze_selection(selected_model_ids: List[str], all_server_groups: List[ServerGroup]) -> SelectionAnalysis: + """ + Analyze selected models for resource conflicts + + Returns warnings and recommendations + """ + # Build a map of model_id -> model + model_map = {} + for group in all_server_groups: + for model in group.models: + model_map[model.id] = model + + # Get selected models + selected_models = [model_map.get(mid) for mid in selected_model_ids if mid in model_map] + selected_models = [m for m in selected_models if m is not None] + + # Group by server + by_server = {} + for model in selected_models: + if model.api_base not in by_server: + by_server[model.api_base] = [] + by_server[model.api_base].append(model) + + warnings = [] + recommendations = [] + + # Check each server for resource conflicts + for api_base, server_models in by_server.items(): + total_memory = sum(m.size.estimated_memory_gb for m in server_models) + large_models = [m for m in server_models if m.size.size_tier == "large"] + medium_models = [m for m in server_models if m.size.size_tier == "medium"] + + # Warning: Multiple large models on same server + if len(large_models) > 1: + warnings.append(SelectionWarning( + severity="high", + server=api_base, + message=f"⚠️ {len(large_models)} large models selected on {extract_hostname(api_base)}. Expect significant delays during model swapping.", + models=[m.display_name for m in large_models], + estimated_total_memory=f"{total_memory}GB" + )) + + # Recommend moving one to another server + for model in large_models[1:]: # Skip first one + if model.is_duplicate and model.better_server: + recommendations.append(SelectionRecommendation( + type="move_to_better_server", + model=model.display_name, + from_server=api_base, + to_server=model.better_server, + reason="Higher TPM and avoids memory contention" + )) + + # Warning: Large + medium models + if len(large_models) >= 1 and len(medium_models) >= 1: + warnings.append(SelectionWarning( + severity="medium", + server=api_base, + message=f"⚠️ Large model + medium model on {extract_hostname(api_base)}. May cause delays.", + estimated_total_memory=f"{total_memory}GB" + )) + + # Info: High memory usage + if total_memory > 50: + warnings.append(SelectionWarning( + severity="info", + server=api_base, + message=f"ℹ️ Selected models require ~{total_memory}GB on {extract_hostname(api_base)}" + )) + + # Calculate diversity score + providers = set(m.provider for m in selected_models) + categories = set(m.model_category for m in selected_models) + unique_base_models = set(m.base_model for m in selected_models) + + diversity_score = calculate_diversity_score(len(providers), len(categories), len(unique_base_models)) + + return SelectionAnalysis( + warnings=warnings, + recommendations=recommendations, + total_models_selected=len(selected_models), + servers_used=len(by_server), + diversity_score=diversity_score + ) + + +def calculate_diversity_score(providers: int, categories: int, unique_models: int) -> int: + """Calculate diversity score (0-100)""" + score = 0 + + if providers >= 3: + score += 40 + elif providers >= 2: + score += 25 + + if categories >= 2: + score += 30 + + if unique_models >= 3: + score += 30 + elif unique_models >= 2: + score += 15 + + return min(score, 100) diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx index 29fec8e..1fed015 100644 --- a/frontend/src/App.jsx +++ b/frontend/src/App.jsx @@ -7,7 +7,14 @@ import SynthesisPanel from './components/SynthesisPanel'; import HistoryBrowser from './components/HistoryBrowser'; export default function App() { - const { models, loading: modelsLoading } = useLLMModels(); + const { + models, + serverGroups, + selectedModels, + selectionAnalysis, + loading: modelsLoading, + handleModelSelect, + } = useLLMModels(); const { debating, decisionId, @@ -136,8 +143,13 @@ export default function App() {
diff --git a/frontend/src/components/ModelCard.jsx b/frontend/src/components/ModelCard.jsx new file mode 100644 index 0000000..9e93dd7 --- /dev/null +++ b/frontend/src/components/ModelCard.jsx @@ -0,0 +1,126 @@ +import { useState } from 'react'; +import { + formatResponseTime, + formatMemory, + getHealthStatusIcon, + getSizeClassName, + extractHostname +} from '../utils/modelSelection'; + +export function ModelCard({ model, isSelected, onSelect }) { + const [showDetails, setShowDetails] = useState(false); + + const handleKeyDown = (e) => { + if (e.key === ' ' || e.key === 'Enter') { + e.preventDefault(); + onSelect(); + } + }; + + return ( +
+
+ e.stopPropagation()} + tabIndex={-1} + /> + +
+
+ {model.display_name} + + {/* Badges */} + {model.is_latest_alias && ( + + Latest → {model.actual_tag} + + )} + + {model.size.parameters} + + {model.is_duplicate && ( + + {model.duplicate_count} servers + + )} +
+ +
+ {model.provider} + {model.model_category} + {formatMemory(model.size.estimated_memory_gb)} + + {/* Health status */} + {model.health && ( + + {getHealthStatusIcon(model.health.status)} + {' '} + {formatResponseTime(model.health.response_time_ms)} + + )} +
+ + {/* Better server recommendation */} + {model.is_duplicate && model.better_server && ( +
+ 💡 Also on {extractHostname(model.better_server)} (better performance) +
+ )} +
+ + {/* Details toggle */} + {model.health && ( + + )} +
+ + {/* Health details */} + {showDetails && model.health && ( +
+
+ Health: + {model.health.healthy_count} healthy, {model.health.unhealthy_count} unhealthy +
+ {model.health.last_checked && ( +
+ Last checked: + {new Date(model.health.last_checked).toLocaleString()} +
+ )} + {model.health.error_message && ( +
+ Error: + {model.health.error_message} +
+ )} +
+ Max tokens: + {model.max_tokens.toLocaleString()} +
+
+ Function calling: + {model.supports_function_calling ? 'Yes' : 'No'} +
+
+ )} +
+ ); +} diff --git a/frontend/src/components/QueryInput.jsx b/frontend/src/components/QueryInput.jsx index dd0007c..ba9a6e2 100644 --- a/frontend/src/components/QueryInput.jsx +++ b/frontend/src/components/QueryInput.jsx @@ -1,4 +1,6 @@ import { useState } from 'react'; +import { ServerGroup } from './ServerGroup'; +import { SelectionAnalysis } from './SelectionAnalysis'; // Quick prompt examples - moved outside component for performance const QUICK_PROMPTS = { @@ -7,7 +9,7 @@ const QUICK_PROMPTS = { 'Architecture': 'What are the pros and cons of different database architectures for a high-traffic e-commerce platform?', }; -// Helper function to get model provider label +// Helper function to get model provider label (for legacy flat view) const getModelProviderLabel = (modelId) => { if (modelId.includes('gpt')) return 'OpenAI Flagship'; if (modelId.includes('claude')) return 'Anthropic Flagship'; @@ -19,44 +21,63 @@ const getModelProviderLabel = (modelId) => { return 'AI Model'; }; -export default function QueryInput({ models, onSubmit, disabled }) { +export default function QueryInput({ + models, + serverGroups = [], + selectedModels = [], + selectionAnalysis = null, + onModelSelect, + onSubmit, + disabled, + useServerGrouping = true +}) { const [query, setQuery] = useState(''); - const [selectedModels, setSelectedModels] = useState([]); const [chairman, setChairman] = useState(''); + // For backward compatibility - local state if not using hook + const [localSelectedModels, setLocalSelectedModels] = useState([]); + + // Use provided or local state + const activeSelectedModels = onModelSelect ? selectedModels : localSelectedModels; + const handleSubmit = (e) => { e.preventDefault(); - if (query.trim() && selectedModels.length >= 2 && chairman) { - onSubmit(query, selectedModels, chairman); + if (query.trim() && activeSelectedModels.length >= 2 && chairman) { + // Extract model display names for submission + const modelNames = activeSelectedModels.map(id => { + // id format is "model_name_host", so extract just the model name + return id.split('_').slice(0, -1).join('_'); + }); + onSubmit(query, modelNames, chairman.split('_').slice(0, -1).join('_')); } }; - const toggleModel = (modelId) => { - setSelectedModels(prev => { - if (prev.includes(modelId)) { - return prev.filter(id => id !== modelId); - } else { - return [...prev, modelId]; - } - }); + const handleLocalModelSelect = (modelId, isSelected) => { + if (onModelSelect) { + onModelSelect(modelId, isSelected); + } else { + setLocalSelectedModels(prev => { + if (isSelected) { + return [...prev, modelId]; + } else { + return prev.filter(id => id !== modelId); + } + }); + } }; const handleQuickPrompt = (prompt) => { setQuery(QUICK_PROMPTS[prompt] || ''); }; - const handleModelKeyDown = (e, modelId) => { - if (e.key === 'Enter' || e.key === ' ') { - e.preventDefault(); - toggleModel(modelId); - } - }; - - const availableModels = models.filter(m => m.available); + // Get all available models (flatten from serverGroups or use models) + const availableModels = useServerGrouping && serverGroups.length > 0 + ? serverGroups.flatMap(group => group.models) + : models.filter(m => m.available); // Set default chairman to Claude Opus 4.5 if available const recommendedChairman = availableModels.find(m => - m.id.includes('claude-opus-4') || m.name.includes('Claude Opus') + m.display_name?.includes('claude-opus-4') || m.name?.includes('Claude Opus') ); return ( @@ -93,6 +114,11 @@ export default function QueryInput({ models, onSubmit, disabled }) { + {/* Selection Analysis */} + {selectionAnalysis && activeSelectedModels.length > 0 && ( + + )} + {/* Council Members */}
@@ -100,50 +126,64 @@ export default function QueryInput({ models, onSubmit, disabled }) { Council Members - Select 2+ models for best results + Select 2+ models for best results ({activeSelectedModels.length} selected)
-
- {availableModels.length === 0 ? ( -

No models available

- ) : ( - availableModels.map(model => { - const isSelected = selectedModels.includes(model.id); - return ( -
!disabled && toggleModel(model.id)} - onKeyDown={(e) => !disabled && handleModelKeyDown(e, model.id)} - role="checkbox" - aria-checked={isSelected} - tabIndex={disabled ? -1 : 0} - className={`model-card ${isSelected ? 'model-card-selected' : ''}`} - > -
- toggleModel(model.id)} - disabled={disabled} - tabIndex={-1} - aria-hidden="true" - className="mt-1 rounded text-blue-600 focus:ring-blue-500 bg-transparent border-gray-600" - /> -
-
- {model.name} -
-
- {getModelProviderLabel(model.id)} + {/* Server-grouped or flat layout */} + {useServerGrouping && serverGroups.length > 0 ? ( +
+ {serverGroups.map(group => ( + + ))} +
+ ) : ( +
+ {availableModels.length === 0 ? ( +

No models available

+ ) : ( + availableModels.map(model => { + const modelId = model.id || model.name; + const isSelected = activeSelectedModels.includes(modelId); + return ( +
!disabled && handleLocalModelSelect(modelId, !isSelected)} + role="checkbox" + aria-checked={isSelected} + tabIndex={disabled ? -1 : 0} + className={`model-card ${isSelected ? 'model-card-selected' : ''}`} + > +
+ handleLocalModelSelect(modelId, !isSelected)} + disabled={disabled} + tabIndex={-1} + aria-hidden="true" + className="mt-1 rounded text-blue-600 focus:ring-blue-500 bg-transparent border-gray-600" + /> +
+
+ {model.name || model.display_name} +
+
+ {getModelProviderLabel(modelId)} +
-
- ); - }) - )} -
+ ); + }) + )} +
+ )}
{/* Chairman Model */} @@ -159,10 +199,12 @@ export default function QueryInput({ models, onSubmit, disabled }) { > {availableModels.map(model => { - const isRecommended = recommendedChairman && model.id === recommendedChairman.id; + const modelId = model.id || model.name; + const modelName = model.name || model.display_name; + const isRecommended = recommendedChairman && modelId === recommendedChairman.id; return ( - ); })} @@ -172,7 +214,7 @@ export default function QueryInput({ models, onSubmit, disabled }) { {/* Submit Button */} + +
+ {/* Resource warnings for this server */} + {expanded && stats.selectedCount > 0 && ( +
+
+ {stats.selectedCount} model(s) selected + {stats.totalMemory > 0 && ` • ~${stats.totalMemory}GB memory`} +
+ + {stats.hasConflict && stats.largeModels.length > 1 && ( +
+ ⚠️ {stats.largeModels.length} large models selected. Expect significant delays during model swapping! +
+ )} + + {stats.hasConflict && stats.largeModels.length === 1 && stats.mediumModels.length >= 1 && ( +
+ ⚠️ Large model + medium model selected. May cause delays. +
+ )} + + {!stats.hasConflict && stats.selectedCount > 0 && ( +
+ ✓ Good selection: {stats.selectedCount === 1 ? 'Single model' : 'Small models only'} +
+ )} +
+ )} + + {/* Models list */} + {expanded && ( +
+ {models.map(model => ( + onModelSelect(model.id, !selectedModels.includes(model.id))} + /> + ))} +
+ )} +
+
+ ); +} diff --git a/frontend/src/hooks/useLLMModels.js b/frontend/src/hooks/useLLMModels.js index 01346dc..4c2a87e 100644 --- a/frontend/src/hooks/useLLMModels.js +++ b/frontend/src/hooks/useLLMModels.js @@ -1,21 +1,40 @@ -import { useState, useEffect } from 'react'; +import { useState, useEffect, useCallback } from 'react'; export function useLLMModels() { const [models, setModels] = useState([]); + const [serverGroups, setServerGroups] = useState([]); + const [selectedModels, setSelectedModels] = useState([]); + const [selectionAnalysis, setSelectionAnalysis] = useState(null); const [loading, setLoading] = useState(true); const [error, setError] = useState(null); + const [useServerGrouping, setUseServerGrouping] = useState(true); useEffect(() => { fetchModels(); - }, []); + }, [useServerGrouping]); const fetchModels = async () => { try { setLoading(true); - const response = await fetch('/api/config/models'); - if (!response.ok) throw new Error('Failed to fetch models'); - const data = await response.json(); - setModels(data); + + if (useServerGrouping) { + // Fetch server-grouped models with health info + const response = await fetch('/api/config/models/by-server'); + if (!response.ok) throw new Error('Failed to fetch models'); + const data = await response.json(); + setServerGroups(data); + + // Also flatten for backward compatibility + const flatModels = data.flatMap(group => group.models); + setModels(flatModels); + } else { + // Fallback to simple model list + const response = await fetch('/api/config/models'); + if (!response.ok) throw new Error('Failed to fetch models'); + const data = await response.json(); + setModels(data); + setServerGroups([]); + } } catch (err) { setError(err.message); } finally { @@ -23,5 +42,53 @@ export function useLLMModels() { } }; - return { models, loading, error, refetch: fetchModels }; + const analyzeSelection = useCallback(async (modelIds) => { + if (modelIds.length === 0) { + setSelectionAnalysis(null); + return; + } + + try { + const response = await fetch('/api/config/models/analyze-selection', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ model_ids: modelIds }) + }); + + if (!response.ok) throw new Error('Failed to analyze selection'); + const analysis = await response.json(); + setSelectionAnalysis(analysis); + } catch (err) { + console.error('Error analyzing selection:', err); + setSelectionAnalysis(null); + } + }, []); + + const handleModelSelect = useCallback((modelId, isSelected) => { + const newSelection = isSelected + ? [...selectedModels, modelId] + : selectedModels.filter(id => id !== modelId); + + setSelectedModels(newSelection); + analyzeSelection(newSelection); + }, [selectedModels, analyzeSelection]); + + const clearSelection = useCallback(() => { + setSelectedModels([]); + setSelectionAnalysis(null); + }, []); + + return { + models, + serverGroups, + selectedModels, + selectionAnalysis, + loading, + error, + useServerGrouping, + setUseServerGrouping, + handleModelSelect, + clearSelection, + refetch: fetchModels + }; } diff --git a/frontend/src/styles/index.css b/frontend/src/styles/index.css index 357c62c..f35ef1d 100644 --- a/frontend/src/styles/index.css +++ b/frontend/src/styles/index.css @@ -26,7 +26,11 @@ } .model-card { - @apply bg-[#0f1623] border border-gray-700 rounded-lg p-4 transition-all hover:border-blue-500 focus:border-blue-500 focus:outline-none focus:ring-2 focus:ring-blue-500/50 cursor-pointer; + @apply bg-[#0f1623] border border-gray-700 rounded-lg p-4 transition-all hover:border-blue-500 focus-within:border-blue-500 focus-within:ring-2 focus-within:ring-blue-500/50 cursor-pointer; + } + + .model-card input[type="checkbox"] { + @apply focus:outline-none focus:ring-0; } .model-card-selected { @@ -36,4 +40,273 @@ .quick-prompt-pill { @apply px-4 py-2 bg-[#0f1623] border border-gray-600 rounded-full text-sm text-gray-300 hover:border-blue-500 hover:text-blue-400 transition-colors cursor-pointer; } + + /* Server Group Styles */ + .server-group { + @apply bg-[#0f1623] border border-gray-700 rounded-lg mb-4 overflow-hidden; + } + + .server-header { + @apply w-full text-left bg-[#1a2332] p-4 cursor-pointer hover:bg-[#1f2938] transition-colors border-0; + } + + .server-title { + @apply flex items-center gap-3 mb-2; + } + + .server-icon { + @apply text-xl; + } + + .server-host { + @apply text-sm font-medium text-gray-100; + } + + .health-badge { + @apply text-xs px-2 py-1 rounded-full; + } + + .health-badge.healthy { + @apply bg-green-900/30 text-green-400 border border-green-700; + } + + .health-badge.unhealthy { + @apply bg-red-900/30 text-red-400 border border-red-700; + } + + .server-stats { + @apply flex items-center gap-4 flex-wrap text-xs text-gray-400; + } + + .server-stats .stat { + @apply px-2 py-1 bg-[#0f1623] rounded; + } + + .badge-high { + @apply px-2 py-1 bg-blue-900/30 text-blue-400 border border-blue-700 rounded; + } + + .expand-icon { + @apply ml-auto transform transition-transform; + } + + .expand-icon.expanded { + @apply rotate-180; + } + + /* Server Usage Warnings */ + .server-usage { + @apply px-4 py-3 bg-[#1a2332] border-t border-gray-700; + } + + .usage-summary { + @apply text-sm text-gray-300 mb-2; + } + + .warning { + @apply px-3 py-2 rounded-lg text-sm mt-2; + } + + .warning.severity-high { + @apply bg-red-900/20 border border-red-700 text-red-300; + } + + .warning.severity-medium { + @apply bg-yellow-900/20 border border-yellow-700 text-yellow-300; + } + + .warning.severity-info { + @apply bg-blue-900/20 border border-blue-700 text-blue-300; + } + + /* Models List */ + .models-list { + @apply p-4 grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-3; + } + + .server-groups-container { + @apply space-y-4; + } + + /* Model Card Styles (Enhanced) */ + .model-main { + @apply flex items-start gap-3 cursor-pointer; + } + + .model-info { + @apply flex-1 min-w-0; + } + + .model-name { + @apply text-sm font-medium text-gray-100 mb-2 flex items-center gap-2 flex-wrap; + } + + .badge { + @apply text-xs px-2 py-0.5 rounded-full font-normal; + } + + .badge.latest { + @apply bg-purple-900/30 text-purple-400 border border-purple-700; + } + + .badge.size-tiny { + @apply bg-green-900/30 text-green-400 border border-green-700; + } + + .badge.size-small { + @apply bg-blue-900/30 text-blue-400 border border-blue-700; + } + + .badge.size-medium { + @apply bg-yellow-900/30 text-yellow-400 border border-yellow-700; + } + + .badge.size-large { + @apply bg-red-900/30 text-red-400 border border-red-700; + } + + .badge.duplicate { + @apply bg-gray-700 text-gray-300 border border-gray-600; + } + + .model-meta { + @apply flex items-center gap-3 flex-wrap text-xs text-gray-400; + } + + .provider { + @apply px-2 py-0.5 bg-[#1a2332] rounded; + } + + .category-badge { + @apply px-2 py-0.5 bg-[#1a2332] rounded; + } + + .memory { + @apply px-2 py-0.5 bg-[#1a2332] rounded; + } + + .health { + @apply px-2 py-0.5 rounded; + } + + .health.healthy { + @apply bg-green-900/30 text-green-400; + } + + .health.unhealthy { + @apply bg-red-900/30 text-red-400; + } + + .recommendation { + @apply text-xs text-blue-400 mt-2 flex items-center gap-1; + } + + .details-toggle { + @apply text-gray-400 hover:text-gray-200 transition-colors px-2; + } + + .model-details { + @apply mt-3 pt-3 border-t border-gray-700 text-xs space-y-2; + } + + .detail-row { + @apply flex justify-between text-gray-400; + } + + .detail-row.error { + @apply text-red-400; + } + + /* Selection Analysis Styles */ + .selection-analysis { + @apply bg-[#0f1623] border rounded-lg p-4 mb-4; + } + + .selection-analysis.good { + @apply border-green-700; + } + + .selection-analysis.has-medium-warnings { + @apply border-yellow-700; + } + + .selection-analysis.has-high-warnings { + @apply border-red-700; + } + + .selection-analysis h3 { + @apply text-sm font-medium text-gray-100 mb-3; + } + + .summary { + @apply flex gap-6 mb-4; + } + + .summary-item { + @apply flex flex-col; + } + + .summary-item .label { + @apply text-xs text-gray-400; + } + + .summary-item .value { + @apply text-lg font-semibold text-gray-100; + } + + .value.diversity-good { + @apply text-green-400; + } + + .value.diversity-fair { + @apply text-yellow-400; + } + + .value.diversity-poor { + @apply text-red-400; + } + + .warnings-section, + .recommendations-section { + @apply mt-4; + } + + .warnings-section h4, + .recommendations-section h4 { + @apply text-sm font-medium text-gray-300 mb-2; + } + + .warnings-list, + .recommendations-list { + @apply space-y-2; + } + + .warning-item { + @apply px-3 py-2 rounded-lg; + } + + .warning-message { + @apply text-sm font-medium mb-1; + } + + .warning-models, + .warning-memory { + @apply text-xs opacity-80 mt-1; + } + + .recommendation-item { + @apply px-3 py-2 bg-blue-900/20 border border-blue-700 rounded-lg; + } + + .recommendation-action { + @apply text-sm text-blue-300 mb-1; + } + + .recommendation-reason { + @apply text-xs text-blue-400/70; + } + + .positive-message { + @apply px-3 py-2 bg-green-900/20 border border-green-700 text-green-300 rounded-lg text-sm; + } } diff --git a/frontend/src/utils/modelSelection.js b/frontend/src/utils/modelSelection.js new file mode 100644 index 0000000..a5fc944 --- /dev/null +++ b/frontend/src/utils/modelSelection.js @@ -0,0 +1,103 @@ +/** + * Utility functions for model selection and analysis + */ + +/** + * Safely extract hostname from URL string + * @param {string} url - URL to parse (e.g., "http://192.168.1.100:11434") + * @returns {string} hostname or full URL if parsing fails + */ +export function extractHostname(url) { + try { + const parsed = new URL(url); + return parsed.hostname; + } catch { + // Fallback: try to extract after // and before : + const match = url.match(/\/\/([^:\/]+)/); + return match ? match[1] : url; + } +} + +export function findModelById(modelId, serverGroups) { + for (const group of serverGroups) { + const model = group.models.find(m => m.id === modelId); + if (model) return model; + } + return null; +} + +export function analyzeSelectionDiversity(selectedModelIds, serverGroups) { + const selectedModels = selectedModelIds + .map(id => findModelById(id, serverGroups)) + .filter(m => m !== null); + + const providers = new Set(selectedModels.map(m => m.provider)); + const categories = new Set(selectedModels.map(m => m.model_category)); + const baseModels = new Set(selectedModels.map(m => m.base_model)); + + return { + providerCount: providers.size, + categoryCount: categories.size, + uniqueModelCount: baseModels.size, + hasDuplicates: selectedModelIds.length > baseModels.size, + providers: Array.from(providers), + categories: Array.from(categories) + }; +} + +export function getServerStats(serverGroup, selectedModelIds) { + const selectedOnServer = serverGroup.models.filter(m => + selectedModelIds.includes(m.id) + ); + + const totalMemory = selectedOnServer.reduce( + (sum, m) => sum + m.size.estimated_memory_gb, + 0 + ); + + const largeModels = selectedOnServer.filter(m => m.size.size_tier === 'large'); + const mediumModels = selectedOnServer.filter(m => m.size.size_tier === 'medium'); + + return { + selectedCount: selectedOnServer.length, + totalMemory, + largeModels, + mediumModels, + hasConflict: largeModels.length > 1 || (largeModels.length >= 1 && mediumModels.length >= 1) + }; +} + +export function formatResponseTime(ms) { + if (ms < 1000) { + return `${Math.round(ms)}ms`; + } else { + return `${(ms / 1000).toFixed(1)}s`; + } +} + +export function formatMemory(gb) { + if (gb < 1) { + return `${Math.round(gb * 1024)}MB`; + } else { + return `${gb}GB`; + } +} + +export function getHealthStatusIcon(status) { + switch (status) { + case 'healthy': + return '✓'; + case 'unhealthy': + return '✗'; + default: + return '?'; + } +} + +export function getSizeClassName(sizeTier) { + return `size-${sizeTier}`; +} + +export function getSeverityClassName(severity) { + return `severity-${severity}`; +}