Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 96 additions & 32 deletions omlx/admin/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3483,6 +3483,13 @@ def _build_runtime_cache_observability(
}

cache_dir = global_settings.cache.get_ssd_cache_dir(global_settings.base_path)
cache_cfg = global_settings.cache
try:
cfg_disk_max = cache_cfg.get_ssd_cache_max_size_bytes(global_settings.base_path)
except (ValueError, OSError, TypeError) as exc:
logger.warning("Could not read SSD cache max size from config: %s", exc)
cfg_disk_max = 0

payload = {
"base_path": str(global_settings.base_path),
"ssd_cache_dir": str(cache_dir),
Expand All @@ -3491,6 +3498,10 @@ def _build_runtime_cache_observability(
"total_num_files": 0,
"total_size_bytes": 0,
"effective_block_sizes": [],
"disk_max_bytes": cfg_disk_max,
"hot_cache_max_bytes": 0,
"hot_cache_size_bytes": 0,
"hot_cache_entries": 0,
}

engine_pool = _get_engine_pool()
Expand Down Expand Up @@ -3602,11 +3613,16 @@ def _build_runtime_cache_observability(
"last_tokens_to_next_block": last_tokens_to_next_block,
"num_files": int(ssd_stats.get("num_files", 0) or 0),
"total_size_bytes": int(ssd_stats.get("total_size_bytes", 0) or 0),
"max_size_bytes": int(ssd_stats.get("max_size_bytes", 0) or 0),
"hot_cache_max_bytes": int(ssd_stats.get("hot_cache_max_bytes", 0) or 0),
"hot_cache_size_bytes": int(ssd_stats.get("hot_cache_size_bytes", 0) or 0),
"hot_cache_entries": int(ssd_stats.get("hot_cache_entries", 0) or 0),
}

cache_rates = runtime_stats.get("cache_rates")
if cache_rates:
model_payload["cache_rates"] = cache_rates

payload["models"].append(model_payload)
payload["total_num_files"] += model_payload["num_files"]
payload["total_size_bytes"] += model_payload["total_size_bytes"]
Expand All @@ -3616,6 +3632,26 @@ def _build_runtime_cache_observability(

payload["effective_block_sizes"] = sorted(block_sizes)

# Aggregate hot-cache and disk-max across models.
# hot_cache_max sums across models (each model reserves its own slice of
# the same process-wide hot cache budget) so the gauge denominator matches
# the summed numerator. disk_max keeps the config fallback via max()
# because a single SSD cache directory is shared — the effective cap is
# the largest configured limit, not a per-model sum.
hot_cache_max = 0
disk_max = payload["disk_max_bytes"]
hot_cache_size_total = 0
hot_cache_entries_total = 0
for m in payload["models"]:
hot_cache_size_total += m.get("hot_cache_size_bytes", 0)
hot_cache_entries_total += m.get("hot_cache_entries", 0)
hot_cache_max += m.get("hot_cache_max_bytes", 0)
disk_max = max(disk_max, m.get("max_size_bytes", 0))
payload["hot_cache_max_bytes"] = hot_cache_max
payload["hot_cache_size_bytes"] = hot_cache_size_total
payload["hot_cache_entries"] = hot_cache_entries_total
payload["disk_max_bytes"] = disk_max

# Fallback: if no loaded models contributed stats, scan the cache
# directory directly so the dashboard still shows real disk usage.
if payload["total_num_files"] == 0 and cache_dir.exists():
Expand Down Expand Up @@ -3870,6 +3906,30 @@ async def clear_alltime_stats(is_admin: bool = Depends(require_admin)):
return {"status": "ok"}


def _iter_loaded_schedulers():
"""Yield (model_id, scheduler) for each loaded model.

Traverses the internal engine hierarchy: pool entry → async engine →
core engine → scheduler. Both ``clear_ssd_cache`` and
``clear_hot_cache`` share this traversal.
"""
engine_pool = _get_engine_pool()
if engine_pool is None:
return
for model_info in engine_pool.get_status().get("models", []):
model_id = model_info.get("id")
if not model_id or not model_info.get("loaded"):
continue
entry = engine_pool._entries.get(model_id)
if entry is None or entry.engine is None:
continue
async_core = getattr(entry.engine, "_engine", None)
core = getattr(async_core, "engine", None) if async_core is not None else None
scheduler = getattr(core, "scheduler", None) if core is not None else None
if scheduler is not None:
yield model_id, scheduler


@router.post("/api/ssd-cache/clear")
async def clear_ssd_cache(is_admin: bool = Depends(require_admin)):
"""Clear all SSD cache files for all loaded models.
Expand All @@ -3880,38 +3940,17 @@ async def clear_ssd_cache(is_admin: bool = Depends(require_admin)):
"""
total_deleted = 0

# Phase 1: clear via loaded models' cache managers (updates in-memory index)
engine_pool = _get_engine_pool()
if engine_pool is not None:
for model_info in engine_pool.get_status().get("models", []):
model_id = model_info.get("id")
if not model_id or not model_info.get("loaded"):
continue

entry = engine_pool._entries.get(model_id)
if entry is None or entry.engine is None:
continue

async_core = getattr(entry.engine, "_engine", None)
core = (
getattr(async_core, "engine", None) if async_core is not None else None
)
scheduler = (
getattr(core, "scheduler", None) if core is not None else None
)

if scheduler is not None:
ssd_manager = getattr(scheduler, "paged_ssd_cache_manager", None)
if ssd_manager is not None:
try:
deleted = ssd_manager.clear()
total_deleted += deleted
except Exception as exc:
logger.warning(
"Failed to clear SSD cache for model '%s': %s",
model_id,
exc,
)
for model_id, scheduler in _iter_loaded_schedulers():
ssd_manager = getattr(scheduler, "paged_ssd_cache_manager", None)
if ssd_manager is not None:
try:
total_deleted += ssd_manager.clear()
except Exception as exc:
logger.warning(
"Failed to clear SSD cache for model '%s': %s",
model_id,
exc,
)

# Phase 2: remove any remaining files on disk (covers unloaded models)
global_settings = _get_global_settings()
Expand All @@ -3937,6 +3976,31 @@ async def clear_ssd_cache(is_admin: bool = Depends(require_admin)):
return {"status": "ok", "total_deleted": total_deleted}


@router.post("/api/hot-cache/clear")
async def clear_hot_cache(is_admin: bool = Depends(require_admin)):
"""Clear the in-memory (hot) cache for all loaded models.

No filesystem fallback needed — hot cache is in-memory only and does
not survive process restart.
"""
total_cleared = 0
for model_id, scheduler in _iter_loaded_schedulers():
ssd_manager = getattr(scheduler, "paged_ssd_cache_manager", None)
if ssd_manager is not None and hasattr(ssd_manager, "clear_hot_cache"):
try:
total_cleared += ssd_manager.clear_hot_cache()
except Exception as exc:
logger.warning(
"Failed to clear hot cache for model '%s': %s",
model_id,
exc,
)
rate_tracker = getattr(scheduler, "_cache_rate_tracker", None)
if rate_tracker is not None:
rate_tracker.clear()
return {"status": "ok", "total_cleared": total_cleared}


@router.post("/api/cache/probe")
async def probe_cache(
request: CacheProbeRequest,
Expand Down
4 changes: 4 additions & 0 deletions omlx/admin/static/css/dashboard.css
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@
[data-theme="dark"] .hover\:text-neutral-700:hover { color: var(--text-primary) !important; }
[data-theme="dark"] .hover\:text-neutral-600:hover { color: var(--text-secondary) !important; }

/* === Gauge track (visible in both themes) === */
.gauge-track { background-color: #e5e5e5; }
[data-theme="dark"] .gauge-track { background-color: #3f3f46 !important; }

/* === Active nav tab (bg-white with shadow inside dark nav) === */
[data-theme="dark"] .shadow-sm { box-shadow: 0 1px 2px 0 rgba(0, 0, 0, 0.3) !important; }

Expand Down
62 changes: 61 additions & 1 deletion omlx/admin/static/js/dashboard.js
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,10 @@
total_num_files: 0,
total_size_bytes: 0,
effective_block_sizes: [],
hot_cache_size_bytes: 0,
hot_cache_entries: 0,
hot_cache_max_bytes: 0,
disk_max_bytes: 0,
},
},
alltimeStats: {
Expand Down Expand Up @@ -190,6 +194,7 @@
showClearStatsConfirm: false,
showClearAlltimeConfirm: false,
showClearSsdCacheConfirm: false,
showClearHotCacheConfirm: false,
_statsRefreshTimer: null,

// Log viewer state
Expand Down Expand Up @@ -2149,7 +2154,8 @@

async clearSsdCache() {
try {
await fetch('/admin/api/ssd-cache/clear', { method: 'POST' });
const resp = await fetch('/admin/api/ssd-cache/clear', { method: 'POST' });
if (!resp.ok) console.error('SSD cache clear failed:', resp.status);
this.showClearSsdCacheConfirm = false;
await this.loadStats();
} catch (err) {
Expand All @@ -2158,6 +2164,18 @@
}
},

async clearHotCache() {
try {
const resp = await fetch('/admin/api/hot-cache/clear', { method: 'POST' });
if (!resp.ok) console.error('Hot cache clear failed:', resp.status);
this.showClearHotCacheConfirm = false;
await this.loadStats();
} catch (err) {
console.error('Failed to clear hot cache:', err);
this.showClearHotCacheConfirm = false;
}
},

startStatsRefresh() {
this.stopStatsRefresh();
this._statsRefreshTimer = setInterval(() => {
Expand All @@ -2178,6 +2196,36 @@
return num.toLocaleString();
},

cacheObsCumulative(stats, selectedModel) {
const entries = stats.runtime_cache?.models || [];
if (entries.length === 0) return {};

if (selectedModel) {
const entry = entries.find(m => m.id === selectedModel);
return entry?.cache_rates?.cumulative || {};
}

const sumKeys = ['prefix_hits', 'prefix_misses', 'evictions', 'ssd_hot_hits', 'ssd_disk_loads', 'ssd_saves', 'hot_cache_evictions', 'hot_cache_promotions'];
let agg = {};

for (const m of entries) {
const c = m.cache_rates?.cumulative;
if (!c || Object.keys(c).length === 0) continue;
for (const k of sumKeys) {
agg[k] = (agg[k] || 0) + (c[k] || 0);
}
}

const ph = agg.prefix_hits || 0;
const pm = agg.prefix_misses || 0;
const sh = agg.ssd_hot_hits || 0;
const sd = agg.ssd_disk_loads || 0;
agg.prefix_hit_rate = (ph + pm) > 0 ? ph / (ph + pm) : 0;
agg.ssd_hot_rate = (sh + sd) > 0 ? sh / (sh + sd) : 0;

return agg;
},

getStatFontClass(value) {
if (value >= 1000000000) return 'text-2xl';
if (value >= 1000000) return 'text-3xl';
Expand Down Expand Up @@ -2239,6 +2287,18 @@
return 'bg-red-400';
},

get runtimeHotCachePercent() {
const rc = this.stats.runtime_cache;
if (!rc || !rc.hot_cache_max_bytes) return 0;
return Math.min(100, (rc.hot_cache_size_bytes / rc.hot_cache_max_bytes) * 100);
},

get runtimeSsdCachePercent() {
const rc = this.stats.runtime_cache;
if (!rc || !rc.disk_max_bytes) return 0;
return Math.min(100, (rc.total_size_bytes / rc.disk_max_bytes) * 100);
},

get activeModelsMemoryPercent() {
const am = this.stats.active_models;
if (!am || !am.model_memory_max) return 0;
Expand Down
Loading