diff --git a/server/app/main.py b/server/app/main.py index 037853d..5eebca7 100644 --- a/server/app/main.py +++ b/server/app/main.py @@ -1,3 +1,4 @@ +import asyncio import logging from contextlib import asynccontextmanager from pathlib import Path @@ -7,6 +8,7 @@ from fastapi.responses import FileResponse from rich.logging import RichHandler +from . import scheduler from .config import settings from .runner import load_persisted_runs from .task_registry import load_persisted_tasks @@ -30,12 +32,16 @@ async def lifespan(app): load_persisted_runs() load_persisted_tools() load_persisted_tasks() + count = scheduler.load_persisted_schedules() + logger.info("Loaded %d schedules", count) + scheduler_task = asyncio.create_task(scheduler.run_scheduler()) logger.info( "Sentifish started — results dir: %s (%d runs loaded)", results_path.resolve(), len(list(results_path.glob("*.json"))), ) yield + scheduler_task.cancel() logger.info("Sentifish shutting down") diff --git a/server/app/models.py b/server/app/models.py index c3f05a5..696cda5 100644 --- a/server/app/models.py +++ b/server/app/models.py @@ -228,3 +228,19 @@ def cost_by_provider(self) -> dict[str, float]: for s in self.scores: costs[s.provider] = costs.get(s.provider, 0.0) + s.cost_usd return costs + + +class EvalSchedule(BaseModel): + """A scheduled recurring evaluation.""" + + id: str = Field(default_factory=lambda: str(uuid.uuid4())) + name: str + dataset_name: str + providers: list[str] + top_k: int = Field(default=10, ge=1, le=100) + interval_minutes: int = Field(default=360, ge=5, le=10080) # 5min to 7 days + enabled: bool = True + created_at: float = Field(default_factory=time.time) + last_run_id: str | None = None + last_run_at: float | None = None + run_count: int = 0 diff --git a/server/app/scheduler.py b/server/app/scheduler.py new file mode 100644 index 0000000..f46ddc5 --- /dev/null +++ b/server/app/scheduler.py @@ -0,0 +1,119 @@ +"""Simple async scheduler for recurring evaluations.""" + +from __future__ import annotations + +import asyncio +import json +import logging +import time +from pathlib import Path + +from .config import settings +from .models import EvalSchedule + +logger = logging.getLogger(__name__) + +_schedules: dict[str, EvalSchedule] = {} +_SCHEDULES_DIR: Path | None = None + + +def _get_schedules_dir() -> Path: + global _SCHEDULES_DIR + if _SCHEDULES_DIR is None: + _SCHEDULES_DIR = Path(settings.results_dir) / "schedules" + _SCHEDULES_DIR.mkdir(parents=True, exist_ok=True) + return _SCHEDULES_DIR + + +def list_schedules() -> list[EvalSchedule]: + return sorted(_schedules.values(), key=lambda s: s.created_at, reverse=True) + + +def get_schedule(schedule_id: str) -> EvalSchedule | None: + return _schedules.get(schedule_id) + + +def create_schedule(schedule: EvalSchedule) -> EvalSchedule: + _schedules[schedule.id] = schedule + _persist_schedule(schedule) + return schedule + + +def delete_schedule(schedule_id: str) -> bool: + if schedule_id not in _schedules: + return False + del _schedules[schedule_id] + path = _get_schedules_dir() / f"{schedule_id}.json" + path.unlink(missing_ok=True) + return True + + +def toggle_schedule(schedule_id: str) -> EvalSchedule | None: + schedule = _schedules.get(schedule_id) + if schedule is None: + return None + schedule.enabled = not schedule.enabled + _persist_schedule(schedule) + return schedule + + +def _persist_schedule(schedule: EvalSchedule) -> None: + path = _get_schedules_dir() / f"{schedule.id}.json" + path.write_text(schedule.model_dump_json(indent=2)) + + +def load_persisted_schedules() -> int: + d = _get_schedules_dir() + count = 0 + for path in d.glob("*.json"): + try: + data = json.loads(path.read_text()) + schedule = EvalSchedule(**data) + _schedules[schedule.id] = schedule + count += 1 + except Exception as exc: + logger.warning("Failed to load schedule %s: %s", path.name, exc) + return count + + +async def run_scheduler() -> None: + """Background loop that checks schedules every 60 seconds.""" + # Import here to avoid circular imports + from . import datasets as ds + from . import runner + + logger.info("Scheduler started") + while True: + await asyncio.sleep(60) + now = time.time() + for schedule in list(_schedules.values()): + if not schedule.enabled: + continue + # Check if enough time has passed since last run + interval_secs = schedule.interval_minutes * 60 + last = schedule.last_run_at or schedule.created_at + if now - last < interval_secs: + continue + # Time to run + try: + dataset = ds.load_dataset(schedule.dataset_name) + except FileNotFoundError: + logger.warning( + "Schedule %s: dataset %r not found, skipping", + schedule.id, + schedule.dataset_name, + ) + continue + logger.info( + "Schedule %s: triggering run on %s", + schedule.name, + schedule.dataset_name, + ) + run = runner.create_run(dataset, schedule.providers, schedule.top_k) + asyncio.create_task( + runner.execute_run(run, dataset, schedule.providers, schedule.top_k) + ) + schedule.last_run_id = run.id + schedule.last_run_at = now + schedule.run_count += 1 + _persist_schedule(schedule) diff --git a/server/app/views.py b/server/app/views.py index b8da601..611679b 100644 --- a/server/app/views.py +++ b/server/app/views.py @@ -15,11 +15,12 @@ from . import datasets as ds from . import narrator from . import runner +from . import scheduler from . import task_registry from . import tool_registry from .config import settings from .metric_recommender import AVAILABLE_METRICS -from .models import EvalConfig, EvalMetricWeight +from .models import EvalConfig, EvalMetricWeight, EvalSchedule from .providers import PROVIDERS, available_providers logger = logging.getLogger(__name__) @@ -563,3 +564,48 @@ def get_run_report(run_id: str): "query_winners": query_winners, "duration_seconds": (run.completed_at or 0) - run.created_at, } + + +# -- Schedules --------------------------------------------------------------- + + +@router.get("/schedules") +def list_schedules(): + return {"schedules": [s.model_dump() for s in scheduler.list_schedules()]} + + +@router.post("/schedules", dependencies=[Depends(_require_write_auth)]) +def create_schedule(body: dict): + try: + schedule = EvalSchedule(**body) + except Exception: + raise HTTPException(status_code=400, detail="Invalid schedule data") + # Validate dataset exists + try: + ds.load_dataset(schedule.dataset_name) + except FileNotFoundError: + raise HTTPException(status_code=404, detail=f"Dataset not found: {schedule.dataset_name!r}") + # Validate providers + unknown = set(schedule.providers) - set(PROVIDERS) + if unknown: + raise HTTPException( + status_code=400, + detail=f"Unknown provider(s): {', '.join(sorted(unknown))}", + ) + scheduler.create_schedule(schedule) + return {"ok": True, "id": schedule.id} + + +@router.delete("/schedules/{schedule_id}", dependencies=[Depends(_require_write_auth)]) +def delete_schedule_endpoint(schedule_id: str): + if not scheduler.delete_schedule(schedule_id): + raise HTTPException(status_code=404, detail="Schedule not found") + return {"ok": True, "deleted": schedule_id} + + +@router.patch("/schedules/{schedule_id}/toggle", dependencies=[Depends(_require_write_auth)]) +def toggle_schedule_endpoint(schedule_id: str): + schedule = scheduler.toggle_schedule(schedule_id) + if schedule is None: + raise HTTPException(status_code=404, detail="Schedule not found") + return {"ok": True, "enabled": schedule.enabled} diff --git a/server/tests/test_scheduler.py b/server/tests/test_scheduler.py new file mode 100644 index 0000000..144b072 --- /dev/null +++ b/server/tests/test_scheduler.py @@ -0,0 +1,94 @@ +"""Tests for the scheduler module.""" + +from app.models import EvalSchedule +from app import scheduler + + +def test_create_and_list_schedule(tmp_path, monkeypatch): + """Create a schedule and verify it appears in the list.""" + monkeypatch.setattr(scheduler, "_schedules", {}) + monkeypatch.setattr(scheduler, "_SCHEDULES_DIR", tmp_path) + + s = EvalSchedule( + name="test-schedule", + dataset_name="sample", + providers=["brave"], + interval_minutes=60, + ) + scheduler.create_schedule(s) + + schedules = scheduler.list_schedules() + assert len(schedules) == 1 + assert schedules[0].name == "test-schedule" + assert schedules[0].enabled is True + + +def test_toggle_schedule(tmp_path, monkeypatch): + """Toggle a schedule's enabled state.""" + monkeypatch.setattr(scheduler, "_schedules", {}) + monkeypatch.setattr(scheduler, "_SCHEDULES_DIR", tmp_path) + + s = EvalSchedule( + name="toggle-test", + dataset_name="sample", + providers=["serper"], + interval_minutes=30, + ) + scheduler.create_schedule(s) + assert s.enabled is True + + toggled = scheduler.toggle_schedule(s.id) + assert toggled is not None + assert toggled.enabled is False + + toggled2 = scheduler.toggle_schedule(s.id) + assert toggled2 is not None + assert toggled2.enabled is True + + +def test_delete_schedule(tmp_path, monkeypatch): + """Delete a schedule.""" + monkeypatch.setattr(scheduler, "_schedules", {}) + monkeypatch.setattr(scheduler, "_SCHEDULES_DIR", tmp_path) + + s = EvalSchedule( + name="delete-test", + dataset_name="sample", + providers=["tavily"], + interval_minutes=360, + ) + scheduler.create_schedule(s) + assert len(scheduler.list_schedules()) == 1 + + result = scheduler.delete_schedule(s.id) + assert result is True + assert len(scheduler.list_schedules()) == 0 + + +def test_delete_nonexistent(tmp_path, monkeypatch): + """Deleting a nonexistent schedule returns False.""" + monkeypatch.setattr(scheduler, "_schedules", {}) + assert scheduler.delete_schedule("nonexistent-id") is False + + +def test_load_persisted(tmp_path, monkeypatch): + """Schedules persist to disk and reload.""" + monkeypatch.setattr(scheduler, "_schedules", {}) + monkeypatch.setattr(scheduler, "_SCHEDULES_DIR", tmp_path) + + s = EvalSchedule( + name="persist-test", + dataset_name="sample", + providers=["brave", "serper"], + interval_minutes=720, + ) + scheduler.create_schedule(s) + + # Clear in-memory state + monkeypatch.setattr(scheduler, "_schedules", {}) + assert len(scheduler.list_schedules()) == 0 + + # Reload from disk + count = scheduler.load_persisted_schedules() + assert count == 1 + assert scheduler.list_schedules()[0].name == "persist-test" diff --git a/ui/src/App.tsx b/ui/src/App.tsx index a325edf..901743f 100644 --- a/ui/src/App.tsx +++ b/ui/src/App.tsx @@ -4,6 +4,7 @@ import { Toaster as Sonner } from "@/components/ui/sonner"; import { Toaster } from "@/components/ui/toaster"; import { TooltipProvider } from "@/components/ui/tooltip"; import ErrorBoundary from "./components/ErrorBoundary"; +import NetworkStatus from "./components/NetworkStatus"; import Landing from "./pages/Landing"; import Dashboard from "./pages/Dashboard"; import Leaderboard from "./pages/Leaderboard"; @@ -11,11 +12,19 @@ import Report from "./pages/Report"; import Configure from "./pages/Configure"; import NotFound from "./pages/NotFound"; -const queryClient = new QueryClient(); +const queryClient = new QueryClient({ + defaultOptions: { + queries: { + retry: 3, + retryDelay: (attempt) => Math.min(1000 * 2 ** attempt, 10000), + }, + }, +}); const App = () => ( + diff --git a/ui/src/components/NetworkStatus.tsx b/ui/src/components/NetworkStatus.tsx new file mode 100644 index 0000000..359ac53 --- /dev/null +++ b/ui/src/components/NetworkStatus.tsx @@ -0,0 +1,27 @@ +import { useHealth } from "@/hooks/useApi"; +import { motion, AnimatePresence } from "framer-motion"; +import { WifiOff } from "lucide-react"; + +export default function NetworkStatus() { + const { isError, isLoading } = useHealth(); + const showBanner = isError && !isLoading; + + return ( + + {showBanner && ( + +
+ + Unable to reach Sentifish API. Retrying... +
+
+ )} +
+ ); +} diff --git a/ui/src/components/dashboard/SchedulePanel.tsx b/ui/src/components/dashboard/SchedulePanel.tsx new file mode 100644 index 0000000..b7df321 --- /dev/null +++ b/ui/src/components/dashboard/SchedulePanel.tsx @@ -0,0 +1,277 @@ +import { useState } from "react"; +import { motion, AnimatePresence } from "framer-motion"; +import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; +import { Button } from "@/components/ui/button"; +import { Input } from "@/components/ui/input"; +import { Label } from "@/components/ui/label"; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from "@/components/ui/select"; +import { + useSchedules, + useCreateSchedule, + useDeleteSchedule, + useToggleSchedule, + useDatasets, + useProviders, +} from "@/hooks/useApi"; +import { Calendar, Plus, Trash2, Power, PowerOff, Loader2 } from "lucide-react"; +import { toast } from "sonner"; + +const INTERVAL_OPTIONS = [ + { label: "Every 30 min", value: 30 }, + { label: "Every 1 hour", value: 60 }, + { label: "Every 6 hours", value: 360 }, + { label: "Every 12 hours", value: 720 }, + { label: "Every 24 hours", value: 1440 }, + { label: "Every 7 days", value: 10080 }, +]; + +export default function SchedulePanel() { + const { data: schedules, isLoading } = useSchedules(); + const { data: datasets } = useDatasets(); + const { data: providers } = useProviders(); + const createSchedule = useCreateSchedule(); + const deleteSchedule = useDeleteSchedule(); + const toggleSchedule = useToggleSchedule(); + + const [showForm, setShowForm] = useState(false); + const [name, setName] = useState(""); + const [datasetName, setDatasetName] = useState(""); + const [interval, setInterval] = useState(360); + const [selectedProviders, setSelectedProviders] = useState>(new Set()); + + const handleCreate = async () => { + if (!name || !datasetName || selectedProviders.size === 0) return; + try { + await createSchedule.mutateAsync({ + name, + dataset_name: datasetName, + providers: Array.from(selectedProviders), + top_k: 10, + interval_minutes: interval, + enabled: true, + }); + toast.success("Schedule created"); + setShowForm(false); + setName(""); + setDatasetName(""); + setSelectedProviders(new Set()); + } catch (err) { + toast.error("Failed to create schedule", { + description: err instanceof Error ? err.message : "Unknown error", + }); + } + }; + + const handleDelete = async (id: string) => { + try { + await deleteSchedule.mutateAsync(id); + toast.success("Schedule deleted"); + } catch { + toast.error("Failed to delete schedule"); + } + }; + + const handleToggle = async (id: string) => { + try { + await toggleSchedule.mutateAsync(id); + } catch { + toast.error("Failed to toggle schedule"); + } + }; + + const formatInterval = (minutes: number) => { + if (minutes < 60) return `${minutes}m`; + if (minutes < 1440) return `${minutes / 60}h`; + return `${minutes / 1440}d`; + }; + + const formatLastRun = (ts: number | null) => { + if (!ts) return "Never"; + const ago = Math.floor((Date.now() / 1000 - ts) / 60); + if (ago < 60) return `${ago}m ago`; + if (ago < 1440) return `${Math.floor(ago / 60)}h ago`; + return `${Math.floor(ago / 1440)}d ago`; + }; + + return ( + + + +
+
+ + + Scheduled Evaluations + +
+ +
+
+ + {/* Create form */} + + {showForm && ( + +
+
+
+ + setName(e.target.value)} + placeholder="Nightly benchmark" + className="h-8 text-sm" + /> +
+
+ + +
+
+
+ + +
+
+ +
+ {providers?.map((p) => ( + + ))} +
+
+ +
+
+ )} +
+ + {/* Schedule list */} + {isLoading ? ( +
+ + Loading schedules... +
+ ) : !schedules?.length ? ( +

+ No scheduled evaluations yet. +

+ ) : ( +
+ {schedules.map((s) => ( +
+
+

{s.name}

+

+ {s.dataset_name} · {s.providers.length} providers · every{" "} + {formatInterval(s.interval_minutes)} · {s.run_count} runs · last:{" "} + {formatLastRun(s.last_run_at)} +

+
+
+ + +
+
+ ))} +
+ )} +
+
+
+ ); +} diff --git a/ui/src/hooks/useApi.ts b/ui/src/hooks/useApi.ts index 5f3e359..9e2991f 100644 --- a/ui/src/hooks/useApi.ts +++ b/ui/src/hooks/useApi.ts @@ -1,5 +1,5 @@ import { useQuery, useMutation, useQueryClient } from "@tanstack/react-query"; -import { sentifishApi, type EvalRunRequest, type MultiEvalRunRequest, type ToolDefinition, type LatestRunSummary } from "@/lib/api/sentifish"; +import { sentifishApi, type EvalRunRequest, type MultiEvalRunRequest, type ToolDefinition, type LatestRunSummary, type EvalSchedule } from "@/lib/api/sentifish"; const API_BASE = import.meta.env.VITE_SENTIFISH_API_URL || ""; @@ -213,3 +213,35 @@ export function useCreateTask() { export function useRecommendMetrics() { return useMutation({ mutationFn: sentifishApi.recommendMetrics }); } + +export function useSchedules() { + return useQuery({ + queryKey: ["schedules"], + queryFn: sentifishApi.getSchedules, + staleTime: 30_000, + }); +} + +export function useCreateSchedule() { + const qc = useQueryClient(); + return useMutation({ + mutationFn: sentifishApi.createSchedule, + onSuccess: () => qc.invalidateQueries({ queryKey: ["schedules"] }), + }); +} + +export function useDeleteSchedule() { + const qc = useQueryClient(); + return useMutation({ + mutationFn: sentifishApi.deleteSchedule, + onSuccess: () => qc.invalidateQueries({ queryKey: ["schedules"] }), + }); +} + +export function useToggleSchedule() { + const qc = useQueryClient(); + return useMutation({ + mutationFn: sentifishApi.toggleSchedule, + onSuccess: () => qc.invalidateQueries({ queryKey: ["schedules"] }), + }); +} diff --git a/ui/src/lib/api/sentifish.ts b/ui/src/lib/api/sentifish.ts index 1510520..80fe761 100644 --- a/ui/src/lib/api/sentifish.ts +++ b/ui/src/lib/api/sentifish.ts @@ -179,6 +179,20 @@ export interface MetricRecommendation { llm_used: boolean; } +export interface EvalSchedule { + id: string; + name: string; + dataset_name: string; + providers: string[]; + top_k: number; + interval_minutes: number; + enabled: boolean; + created_at: number; + last_run_id: string | null; + last_run_at: number | null; + run_count: number; +} + async function apiFetch(path: string, options?: RequestInit): Promise { const res = await fetch(`${API_BASE}${path}`, { headers: { "Content-Type": "application/json", ...options?.headers }, @@ -282,4 +296,16 @@ export const sentifishApi = { const res = await fetch(`${API_BASE}/api/metrics`); return res.json(); }, + + getSchedules: () => + apiFetch<{ schedules: EvalSchedule[] }>("/api/schedules").then((r) => r.schedules), + createSchedule: (schedule: Omit) => + apiFetch<{ ok: boolean; id: string }>("/api/schedules", { + method: "POST", + body: JSON.stringify(schedule), + }), + deleteSchedule: (id: string) => + apiFetch<{ ok: boolean }>(`/api/schedules/${id}`, { method: "DELETE" }), + toggleSchedule: (id: string) => + apiFetch<{ ok: boolean; enabled: boolean }>(`/api/schedules/${id}/toggle`, { method: "PATCH" }), }; diff --git a/ui/src/pages/Dashboard.tsx b/ui/src/pages/Dashboard.tsx index 179623a..01c4c85 100644 --- a/ui/src/pages/Dashboard.tsx +++ b/ui/src/pages/Dashboard.tsx @@ -15,6 +15,7 @@ import NewRunDialog from "@/components/dashboard/NewRunDialog"; import NewDatasetDialog from "@/components/dashboard/NewDatasetDialog"; import RunProgressPanel from "@/components/dashboard/RunProgressPanel"; import DatasetList from "@/components/dashboard/DatasetList"; +import SchedulePanel from "@/components/dashboard/SchedulePanel"; import { useRuns, useDemoRun } from "@/hooks/useApi"; import { toast } from "sonner"; @@ -141,6 +142,8 @@ export default function Dashboard() { + +