diff --git a/omlx/admin/routes.py b/omlx/admin/routes.py index 06ceb808b..16a51a4cb 100644 --- a/omlx/admin/routes.py +++ b/omlx/admin/routes.py @@ -3483,6 +3483,13 @@ def _build_runtime_cache_observability( } cache_dir = global_settings.cache.get_ssd_cache_dir(global_settings.base_path) + cache_cfg = global_settings.cache + try: + cfg_disk_max = cache_cfg.get_ssd_cache_max_size_bytes(global_settings.base_path) + except (ValueError, OSError, TypeError) as exc: + logger.warning("Could not read SSD cache max size from config: %s", exc) + cfg_disk_max = 0 + payload = { "base_path": str(global_settings.base_path), "ssd_cache_dir": str(cache_dir), @@ -3491,6 +3498,10 @@ def _build_runtime_cache_observability( "total_num_files": 0, "total_size_bytes": 0, "effective_block_sizes": [], + "disk_max_bytes": cfg_disk_max, + "hot_cache_max_bytes": 0, + "hot_cache_size_bytes": 0, + "hot_cache_entries": 0, } engine_pool = _get_engine_pool() @@ -3602,11 +3613,16 @@ def _build_runtime_cache_observability( "last_tokens_to_next_block": last_tokens_to_next_block, "num_files": int(ssd_stats.get("num_files", 0) or 0), "total_size_bytes": int(ssd_stats.get("total_size_bytes", 0) or 0), + "max_size_bytes": int(ssd_stats.get("max_size_bytes", 0) or 0), "hot_cache_max_bytes": int(ssd_stats.get("hot_cache_max_bytes", 0) or 0), "hot_cache_size_bytes": int(ssd_stats.get("hot_cache_size_bytes", 0) or 0), "hot_cache_entries": int(ssd_stats.get("hot_cache_entries", 0) or 0), } + cache_rates = runtime_stats.get("cache_rates") + if cache_rates: + model_payload["cache_rates"] = cache_rates + payload["models"].append(model_payload) payload["total_num_files"] += model_payload["num_files"] payload["total_size_bytes"] += model_payload["total_size_bytes"] @@ -3616,6 +3632,26 @@ def _build_runtime_cache_observability( payload["effective_block_sizes"] = sorted(block_sizes) + # Aggregate hot-cache and disk-max across models. + # hot_cache_max sums across models (each model reserves its own slice of + # the same process-wide hot cache budget) so the gauge denominator matches + # the summed numerator. disk_max keeps the config fallback via max() + # because a single SSD cache directory is shared — the effective cap is + # the largest configured limit, not a per-model sum. + hot_cache_max = 0 + disk_max = payload["disk_max_bytes"] + hot_cache_size_total = 0 + hot_cache_entries_total = 0 + for m in payload["models"]: + hot_cache_size_total += m.get("hot_cache_size_bytes", 0) + hot_cache_entries_total += m.get("hot_cache_entries", 0) + hot_cache_max += m.get("hot_cache_max_bytes", 0) + disk_max = max(disk_max, m.get("max_size_bytes", 0)) + payload["hot_cache_max_bytes"] = hot_cache_max + payload["hot_cache_size_bytes"] = hot_cache_size_total + payload["hot_cache_entries"] = hot_cache_entries_total + payload["disk_max_bytes"] = disk_max + # Fallback: if no loaded models contributed stats, scan the cache # directory directly so the dashboard still shows real disk usage. if payload["total_num_files"] == 0 and cache_dir.exists(): @@ -3870,6 +3906,30 @@ async def clear_alltime_stats(is_admin: bool = Depends(require_admin)): return {"status": "ok"} +def _iter_loaded_schedulers(): + """Yield (model_id, scheduler) for each loaded model. + + Traverses the internal engine hierarchy: pool entry → async engine → + core engine → scheduler. Both ``clear_ssd_cache`` and + ``clear_hot_cache`` share this traversal. + """ + engine_pool = _get_engine_pool() + if engine_pool is None: + return + for model_info in engine_pool.get_status().get("models", []): + model_id = model_info.get("id") + if not model_id or not model_info.get("loaded"): + continue + entry = engine_pool._entries.get(model_id) + if entry is None or entry.engine is None: + continue + async_core = getattr(entry.engine, "_engine", None) + core = getattr(async_core, "engine", None) if async_core is not None else None + scheduler = getattr(core, "scheduler", None) if core is not None else None + if scheduler is not None: + yield model_id, scheduler + + @router.post("/api/ssd-cache/clear") async def clear_ssd_cache(is_admin: bool = Depends(require_admin)): """Clear all SSD cache files for all loaded models. @@ -3880,38 +3940,17 @@ async def clear_ssd_cache(is_admin: bool = Depends(require_admin)): """ total_deleted = 0 - # Phase 1: clear via loaded models' cache managers (updates in-memory index) - engine_pool = _get_engine_pool() - if engine_pool is not None: - for model_info in engine_pool.get_status().get("models", []): - model_id = model_info.get("id") - if not model_id or not model_info.get("loaded"): - continue - - entry = engine_pool._entries.get(model_id) - if entry is None or entry.engine is None: - continue - - async_core = getattr(entry.engine, "_engine", None) - core = ( - getattr(async_core, "engine", None) if async_core is not None else None - ) - scheduler = ( - getattr(core, "scheduler", None) if core is not None else None - ) - - if scheduler is not None: - ssd_manager = getattr(scheduler, "paged_ssd_cache_manager", None) - if ssd_manager is not None: - try: - deleted = ssd_manager.clear() - total_deleted += deleted - except Exception as exc: - logger.warning( - "Failed to clear SSD cache for model '%s': %s", - model_id, - exc, - ) + for model_id, scheduler in _iter_loaded_schedulers(): + ssd_manager = getattr(scheduler, "paged_ssd_cache_manager", None) + if ssd_manager is not None: + try: + total_deleted += ssd_manager.clear() + except Exception as exc: + logger.warning( + "Failed to clear SSD cache for model '%s': %s", + model_id, + exc, + ) # Phase 2: remove any remaining files on disk (covers unloaded models) global_settings = _get_global_settings() @@ -3937,6 +3976,31 @@ async def clear_ssd_cache(is_admin: bool = Depends(require_admin)): return {"status": "ok", "total_deleted": total_deleted} +@router.post("/api/hot-cache/clear") +async def clear_hot_cache(is_admin: bool = Depends(require_admin)): + """Clear the in-memory (hot) cache for all loaded models. + + No filesystem fallback needed — hot cache is in-memory only and does + not survive process restart. + """ + total_cleared = 0 + for model_id, scheduler in _iter_loaded_schedulers(): + ssd_manager = getattr(scheduler, "paged_ssd_cache_manager", None) + if ssd_manager is not None and hasattr(ssd_manager, "clear_hot_cache"): + try: + total_cleared += ssd_manager.clear_hot_cache() + except Exception as exc: + logger.warning( + "Failed to clear hot cache for model '%s': %s", + model_id, + exc, + ) + rate_tracker = getattr(scheduler, "_cache_rate_tracker", None) + if rate_tracker is not None: + rate_tracker.clear() + return {"status": "ok", "total_cleared": total_cleared} + + @router.post("/api/cache/probe") async def probe_cache( request: CacheProbeRequest, diff --git a/omlx/admin/static/css/dashboard.css b/omlx/admin/static/css/dashboard.css index a95382df8..6af38022b 100644 --- a/omlx/admin/static/css/dashboard.css +++ b/omlx/admin/static/css/dashboard.css @@ -63,6 +63,10 @@ [data-theme="dark"] .hover\:text-neutral-700:hover { color: var(--text-primary) !important; } [data-theme="dark"] .hover\:text-neutral-600:hover { color: var(--text-secondary) !important; } + /* === Gauge track (visible in both themes) === */ + .gauge-track { background-color: #e5e5e5; } + [data-theme="dark"] .gauge-track { background-color: #3f3f46 !important; } + /* === Active nav tab (bg-white with shadow inside dark nav) === */ [data-theme="dark"] .shadow-sm { box-shadow: 0 1px 2px 0 rgba(0, 0, 0, 0.3) !important; } diff --git a/omlx/admin/static/js/dashboard.js b/omlx/admin/static/js/dashboard.js index ec8ddbf90..d5eca0eba 100644 --- a/omlx/admin/static/js/dashboard.js +++ b/omlx/admin/static/js/dashboard.js @@ -160,6 +160,10 @@ total_num_files: 0, total_size_bytes: 0, effective_block_sizes: [], + hot_cache_size_bytes: 0, + hot_cache_entries: 0, + hot_cache_max_bytes: 0, + disk_max_bytes: 0, }, }, alltimeStats: { @@ -190,6 +194,7 @@ showClearStatsConfirm: false, showClearAlltimeConfirm: false, showClearSsdCacheConfirm: false, + showClearHotCacheConfirm: false, _statsRefreshTimer: null, // Log viewer state @@ -2149,7 +2154,8 @@ async clearSsdCache() { try { - await fetch('/admin/api/ssd-cache/clear', { method: 'POST' }); + const resp = await fetch('/admin/api/ssd-cache/clear', { method: 'POST' }); + if (!resp.ok) console.error('SSD cache clear failed:', resp.status); this.showClearSsdCacheConfirm = false; await this.loadStats(); } catch (err) { @@ -2158,6 +2164,18 @@ } }, + async clearHotCache() { + try { + const resp = await fetch('/admin/api/hot-cache/clear', { method: 'POST' }); + if (!resp.ok) console.error('Hot cache clear failed:', resp.status); + this.showClearHotCacheConfirm = false; + await this.loadStats(); + } catch (err) { + console.error('Failed to clear hot cache:', err); + this.showClearHotCacheConfirm = false; + } + }, + startStatsRefresh() { this.stopStatsRefresh(); this._statsRefreshTimer = setInterval(() => { @@ -2178,6 +2196,36 @@ return num.toLocaleString(); }, + cacheObsCumulative(stats, selectedModel) { + const entries = stats.runtime_cache?.models || []; + if (entries.length === 0) return {}; + + if (selectedModel) { + const entry = entries.find(m => m.id === selectedModel); + return entry?.cache_rates?.cumulative || {}; + } + + const sumKeys = ['prefix_hits', 'prefix_misses', 'evictions', 'ssd_hot_hits', 'ssd_disk_loads', 'ssd_saves', 'hot_cache_evictions', 'hot_cache_promotions']; + let agg = {}; + + for (const m of entries) { + const c = m.cache_rates?.cumulative; + if (!c || Object.keys(c).length === 0) continue; + for (const k of sumKeys) { + agg[k] = (agg[k] || 0) + (c[k] || 0); + } + } + + const ph = agg.prefix_hits || 0; + const pm = agg.prefix_misses || 0; + const sh = agg.ssd_hot_hits || 0; + const sd = agg.ssd_disk_loads || 0; + agg.prefix_hit_rate = (ph + pm) > 0 ? ph / (ph + pm) : 0; + agg.ssd_hot_rate = (sh + sd) > 0 ? sh / (sh + sd) : 0; + + return agg; + }, + getStatFontClass(value) { if (value >= 1000000000) return 'text-2xl'; if (value >= 1000000) return 'text-3xl'; @@ -2239,6 +2287,18 @@ return 'bg-red-400'; }, + get runtimeHotCachePercent() { + const rc = this.stats.runtime_cache; + if (!rc || !rc.hot_cache_max_bytes) return 0; + return Math.min(100, (rc.hot_cache_size_bytes / rc.hot_cache_max_bytes) * 100); + }, + + get runtimeSsdCachePercent() { + const rc = this.stats.runtime_cache; + if (!rc || !rc.disk_max_bytes) return 0; + return Math.min(100, (rc.total_size_bytes / rc.disk_max_bytes) * 100); + }, + get activeModelsMemoryPercent() { const am = this.stats.active_models; if (!am || !am.model_memory_max) return 0; diff --git a/omlx/admin/templates/dashboard/_status.html b/omlx/admin/templates/dashboard/_status.html index 1d6e04f40..865ba6c25 100644 --- a/omlx/admin/templates/dashboard/_status.html +++ b/omlx/admin/templates/dashboard/_status.html @@ -282,8 +282,47 @@

{{ t('status.head Runtime Cache Observability
- + +
+ Memory +
+
+
+ +
+ +
+ Clear memory cache? + + +
+
+
+ + | + +
+ SSD +
+
+
+ +
+ +
+
+

Prefix Hit Rate

+

+
+
+

Memory Hit Rate

+

+
+
+

Prefix Evictions

+

+
+
+

Memory Evictions

+

+
+
+
@@ -336,8 +402,10 @@

{{ t('status.head

- - + + + + @@ -358,6 +426,8 @@

{{ t('status.head

+ + diff --git a/omlx/cache/observability.py b/omlx/cache/observability.py new file mode 100644 index 000000000..72a0e370c --- /dev/null +++ b/omlx/cache/observability.py @@ -0,0 +1,149 @@ +# SPDX-License-Identifier: Apache-2.0 +import threading +import time +from collections import deque +from typing import Any + + +_DEFAULT_WINDOWS = (60, 300, 900) +_MAX_SNAPSHOTS = 90 +_MIN_INTERVAL = 10.0 + + +class CacheRateTracker: + + def __init__( + self, + max_snapshots: int = _MAX_SNAPSHOTS, + min_interval: float = _MIN_INTERVAL, + ): + self._snapshots: deque[tuple[float, dict[str, int]]] = deque( + maxlen=max_snapshots + ) + self._min_interval = min_interval + self._lock = threading.Lock() + + def maybe_snapshot(self, counters: dict[str, int]) -> bool: + with self._lock: + now = time.monotonic() + if self._snapshots and (now - self._snapshots[-1][0]) < self._min_interval: + return False + self._snapshots.append((now, dict(counters))) + return True + + def get_rates( + self, windows: tuple[int, ...] = _DEFAULT_WINDOWS + ) -> dict[str, Any]: + with self._lock: + if not self._snapshots: + return {"windows": {}, "cumulative": {}} + + now = self._snapshots[-1][0] + newest = self._snapshots[-1][1] + + window_rates = {} + for w in windows: + label = _window_label(w) + baseline_ts = None + baseline_counters = None + for ts, counters in self._snapshots: + if (now - ts) <= w: + baseline_ts, baseline_counters = ts, counters + break + if baseline_ts is None: + baseline_ts, baseline_counters = self._snapshots[0] + elapsed = now - baseline_ts + if elapsed < 1.0: + window_rates[label] = {} + continue + window_rates[label] = _compute_window( + baseline_counters, newest, elapsed + ) + + cumulative = _compute_cumulative(newest) + return {"windows": window_rates, "cumulative": cumulative} + + def snapshot_and_get_rates( + self, + counters: dict[str, int], + windows: tuple[int, ...] = _DEFAULT_WINDOWS, + ) -> dict[str, Any]: + self.maybe_snapshot(counters) + return self.get_rates(windows) + + def clear(self) -> None: + with self._lock: + self._snapshots.clear() + + +def _window_label(seconds: int) -> str: + if seconds < 60: + return f"{seconds}s" + return f"{seconds // 60}m" + + +def _safe_ratio(numerator: int, denominator: int) -> float: + if denominator == 0: + return 0.0 + return numerator / denominator + + +def _compute_window( + old: dict[str, int], new: dict[str, int], elapsed: float +) -> dict[str, Any]: + def delta(key: str) -> int: + return max(0, new.get(key, 0) - old.get(key, 0)) + + d_prefix_hits = delta("prefix_hits") + d_prefix_misses = delta("prefix_misses") + d_evictions = delta("evictions") + d_ssd_hot = delta("ssd_hot_hits") + d_ssd_disk = delta("ssd_disk_loads") + d_tokens_matched = delta("prefix_tokens_matched") + d_tokens_requested = delta("prefix_tokens_requested") + + minutes = elapsed / 60.0 + + return { + "prefix_hit_rate": round( + _safe_ratio(d_prefix_hits, d_prefix_hits + d_prefix_misses), 4 + ), + "prefix_hits": d_prefix_hits, + "prefix_misses": d_prefix_misses, + "prefix_match_efficiency": round( + _safe_ratio(d_tokens_matched, d_tokens_requested), 4 + ), + "evictions": d_evictions, + "eviction_rate_per_min": round(d_evictions / minutes, 2) if minutes > 0 else 0.0, + "ssd_hot_hits": d_ssd_hot, + "ssd_disk_loads": d_ssd_disk, + "ssd_hot_rate": round( + _safe_ratio(d_ssd_hot, d_ssd_hot + d_ssd_disk), 4 + ), + } + + +def _compute_cumulative(counters: dict[str, int]) -> dict[str, Any]: + prefix_hits = counters.get("prefix_hits", 0) + prefix_misses = counters.get("prefix_misses", 0) + ssd_hot = counters.get("ssd_hot_hits", 0) + ssd_disk = counters.get("ssd_disk_loads", 0) + tokens_matched = counters.get("prefix_tokens_matched", 0) + tokens_requested = counters.get("prefix_tokens_requested", 0) + + return { + "prefix_hits": prefix_hits, + "prefix_misses": prefix_misses, + "prefix_hit_rate": round(_safe_ratio(prefix_hits, prefix_hits + prefix_misses), 4), + "prefix_tokens_saved": counters.get("prefix_tokens_saved", 0), + "prefix_match_efficiency": round( + _safe_ratio(tokens_matched, tokens_requested), 4 + ), + "evictions": counters.get("evictions", 0), + "ssd_hot_hits": ssd_hot, + "ssd_disk_loads": ssd_disk, + "ssd_saves": counters.get("ssd_saves", 0), + "hot_cache_evictions": counters.get("hot_cache_evictions", 0), + "hot_cache_promotions": counters.get("hot_cache_promotions", 0), + "ssd_hot_rate": round(_safe_ratio(ssd_hot, ssd_hot + ssd_disk), 4), + } diff --git a/omlx/cache/paged_ssd_cache.py b/omlx/cache/paged_ssd_cache.py index 7d5c0d6c7..be52bc8e5 100644 --- a/omlx/cache/paged_ssd_cache.py +++ b/omlx/cache/paged_ssd_cache.py @@ -2035,6 +2035,20 @@ def enforce_size_limit(self) -> int: ) return freed + def clear_hot_cache(self) -> int: + """Clear all in-memory (hot) cache entries. + + Returns: + Number of entries cleared. + """ + with self._hot_cache_lock: + count = len(self._hot_cache) + self._hot_cache.clear() + self._hot_cache_total_bytes = 0 + if count: + logger.info("Cleared %d hot cache entries", count) + return count + def clear(self) -> int: """ Clear all SSD cache files. diff --git a/omlx/cache/prefix_cache.py b/omlx/cache/prefix_cache.py index 4f2bd1d32..c9efaf349 100644 --- a/omlx/cache/prefix_cache.py +++ b/omlx/cache/prefix_cache.py @@ -117,6 +117,8 @@ def __init__( self._tokens_saved = 0 self._partial_block_skips = 0 self._partial_tokens_skipped = 0 + self._tokens_matched_total = 0 + self._tokens_requested_total = 0 self._last_partial_tokens_skipped = 0 self._last_tokens_to_next_block = 0 @@ -285,6 +287,8 @@ def fetch_cache( num_prefix_tokens = len(tokens) - len(remaining) self._hits += 1 self._tokens_saved += num_prefix_tokens + self._tokens_matched_total += num_prefix_tokens + self._tokens_requested_total += len(tokens) logger.debug( f"Cache hit for {request_id}: " @@ -310,6 +314,8 @@ def fetch_cache( remaining = tokens[prefix_len:] self._hits += 1 self._tokens_saved += prefix_len + self._tokens_matched_total += prefix_len + self._tokens_requested_total += len(tokens) logger.debug( f"Prefix index hit for {request_id}: " f"{prefix_len} tokens matched" @@ -319,6 +325,7 @@ def fetch_cache( # No cache hit self._misses += 1 + self._tokens_requested_total += len(tokens) logger.debug(f"Cache miss for {request_id}") return None, tokens @@ -2367,6 +2374,8 @@ def get_stats(self) -> PrefixCacheStats: block_size=self.block_size, last_partial_tokens_skipped=self._last_partial_tokens_skipped, last_tokens_to_next_block=self._last_tokens_to_next_block, + tokens_matched_total=self._tokens_matched_total, + tokens_requested_total=self._tokens_requested_total, ) def get_stats_dict(self) -> dict[str, Any]: @@ -2393,6 +2402,8 @@ def get_stats_dict(self) -> dict[str, Any]: "block_size": self.block_size, "last_partial_tokens_skipped": self._last_partial_tokens_skipped, "last_tokens_to_next_block": self._last_tokens_to_next_block, + "tokens_matched_total": self._tokens_matched_total, + "tokens_requested_total": self._tokens_requested_total, "active_requests": len(self._request_tables), **paged_stats, } @@ -2404,6 +2415,8 @@ def reset_stats(self) -> None: self._tokens_saved = 0 self._partial_block_skips = 0 self._partial_tokens_skipped = 0 + self._tokens_matched_total = 0 + self._tokens_requested_total = 0 self._last_partial_tokens_skipped = 0 self._last_tokens_to_next_block = 0 self.paged_cache.reset_stats() diff --git a/omlx/cache/stats.py b/omlx/cache/stats.py index 412074fc7..01a78c531 100644 --- a/omlx/cache/stats.py +++ b/omlx/cache/stats.py @@ -88,6 +88,8 @@ class PrefixCacheStats(BaseCacheStats): block_size: int = 0 last_partial_tokens_skipped: int = 0 last_tokens_to_next_block: int = 0 + tokens_matched_total: int = 0 + tokens_requested_total: int = 0 _total_queries: int = field(default=0, repr=False) @property @@ -111,6 +113,8 @@ def reset(self) -> None: self.partial_tokens_skipped = 0 self.last_partial_tokens_skipped = 0 self.last_tokens_to_next_block = 0 + self.tokens_matched_total = 0 + self.tokens_requested_total = 0 self._total_queries = 0 diff --git a/omlx/scheduler.py b/omlx/scheduler.py index fab20ed7b..ef17f295f 100644 --- a/omlx/scheduler.py +++ b/omlx/scheduler.py @@ -37,6 +37,7 @@ from mlx_lm.models.cache import make_prompt_cache from mlx_lm.sample_utils import make_logits_processors +from .cache.observability import CacheRateTracker from .cache.paged_cache import PagedCacheManager from .cache.prefix_cache import BlockAwarePrefixCache from .exceptions import is_cache_corruption_error @@ -781,6 +782,7 @@ def __init__( self.paged_cache_manager: PagedCacheManager | None = None self.block_aware_cache: BlockAwarePrefixCache | None = None self.paged_ssd_cache_manager: PagedSSDCacheManager | None = None + self._cache_rate_tracker = CacheRateTracker() self.memory_monitor: MemoryMonitor | None = None # Initialize paged SSD cache if paged_ssd_cache_dir is specified @@ -5322,6 +5324,7 @@ def _recover_from_cache_error(self) -> None: # Clear caches if self.block_aware_cache is not None: self.block_aware_cache.clear() + self._cache_rate_tracker.clear() # Clear UID mappings self.request_id_to_uid.clear() @@ -5651,6 +5654,7 @@ def reset(self) -> None: # Clear caches if self.block_aware_cache is not None: self.block_aware_cache.clear() + self._cache_rate_tracker.clear() # Clear detokenizers self._request_detokenizers.clear() @@ -6083,6 +6087,35 @@ def restore_cold_blocks_for_request(self, request_id: str) -> int: return verified + def _collect_cache_counters(self) -> dict[str, int] | None: + if self.block_aware_cache is None: + return None + + prefix_stats = self.block_aware_cache.get_stats() + counters = { + "prefix_hits": prefix_stats.hits, + "prefix_misses": prefix_stats.misses, + "prefix_tokens_matched": prefix_stats.tokens_matched_total, + "prefix_tokens_requested": prefix_stats.tokens_requested_total, + "prefix_tokens_saved": prefix_stats.tokens_saved, + "evictions": prefix_stats.evictions, + } + + if self.paged_ssd_cache_manager is not None: + ssd = self.paged_ssd_cache_manager.get_stats() + hot_hits = ssd.hot_cache_hits + total_loads = ssd.loads + counters.update({ + "ssd_hot_hits": hot_hits, + "ssd_disk_loads": max(0, total_loads - hot_hits), + "ssd_saves": ssd.saves, + "ssd_errors": ssd.errors, + "hot_cache_evictions": ssd.hot_cache_evictions, + "hot_cache_promotions": ssd.hot_cache_promotions, + }) + + return counters + def get_ssd_cache_stats(self) -> dict[str, Any] | None: """Get paged SSD + prefix cache observability statistics.""" stats = {} @@ -6091,15 +6124,18 @@ def get_ssd_cache_stats(self) -> dict[str, Any] | None: stats["ssd_cache"] = self.paged_ssd_cache_manager.get_stats() if self.paged_cache_manager is not None: - # In paged SSD-only mode, all cache data is on paged SSD stats["indexed_blocks"] = self.paged_cache_manager.cold_block_count stats["block_size"] = self.config.paged_cache_block_size if self.block_aware_cache is not None: - # Expose prefix-cache observability so UI can distinguish - # "0 indexed blocks" from "sub-block cached (
Block Size Indexed Blocks Sub-block CacheCache FilesCache SizeSSD FilesSSD SizeMemory EntriesMemory Size