Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions api/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,7 @@ def _clear_live_models_cache() -> None:
_redact_text,
)
from api.agent_health import build_agent_health_payload
from api.system_health import build_system_health_payload


def _clear_stale_stream_state(session) -> bool:
Expand Down Expand Up @@ -2491,6 +2492,10 @@ def handle_get(handler, parsed) -> bool:
if parsed.path == "/api/health/agent":
return j(handler, build_agent_health_payload())

if parsed.path == "/api/system/health":
j(handler, build_system_health_payload())
return True

if parsed.path == "/api/models":
return j(handler, get_available_models())

Expand Down
167 changes: 167 additions & 0 deletions api/system_health.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
"""Safe aggregate host resource metrics for the WebUI VPS panel (#693).

The browser only needs coarse CPU/RAM/disk usage. Keep this module intentionally
small and dependency-free: no process lists, command strings, user identities,
environment variables, or filesystem topology leave the server.
"""

from __future__ import annotations

import shutil
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import Any


_PROC_STAT = Path("/proc/stat")
_PROC_MEMINFO = Path("/proc/meminfo")
_CPU_SAMPLE_SECONDS = 0.05


def _checked_at() -> str:
return datetime.now(timezone.utc).isoformat()


def _clamp_percent(value: Any) -> float:
try:
numeric = float(value)
except (TypeError, ValueError):
return 0.0
if numeric < 0:
numeric = 0.0
if numeric > 100:
numeric = 100.0
return round(numeric, 1)


def _read_proc_stat_cpu() -> tuple[int, int]:
"""Return (idle_ticks, total_ticks) from Linux /proc/stat."""
with _PROC_STAT.open("r", encoding="utf-8") as handle:
first = handle.readline().strip().split()
if not first or first[0] != "cpu":
raise RuntimeError("proc_stat_unavailable")
values = [int(part) for part in first[1:]]
if len(values) < 4:
raise RuntimeError("proc_stat_unavailable")
idle = values[3] + (values[4] if len(values) > 4 else 0)
total = sum(values)
if total <= 0:
raise RuntimeError("proc_stat_unavailable")
return idle, total


def _cpu_delta_percent(start: tuple[int, int], end: tuple[int, int]) -> float:
idle_delta = end[0] - start[0]
total_delta = end[1] - start[1]
if total_delta <= 0:
return 0.0
busy_delta = max(0, total_delta - max(0, idle_delta))
return _clamp_percent((busy_delta / total_delta) * 100.0)


def _cpu_percent() -> float:
"""Sample aggregate CPU usage without psutil.

A short local sample avoids storing cross-request state and returns a stable
percentage on the first poll. Unsupported platforms raise a safe error code.
"""
start = _read_proc_stat_cpu()
time.sleep(_CPU_SAMPLE_SECONDS)
end = _read_proc_stat_cpu()
return _cpu_delta_percent(start, end)


def _read_meminfo_kib() -> dict[str, int]:
data: dict[str, int] = {}
with _PROC_MEMINFO.open("r", encoding="utf-8") as handle:
for line in handle:
key, _, rest = line.partition(":")
if not key or not rest:
continue
parts = rest.strip().split()
if not parts:
continue
try:
data[key] = int(parts[0])
except ValueError:
continue
return data


def _memory_usage() -> dict[str, int | float]:
meminfo = _read_meminfo_kib()
total = int(meminfo.get("MemTotal") or 0) * 1024
if total <= 0:
raise RuntimeError("meminfo_unavailable")
available_kib = meminfo.get("MemAvailable")
if available_kib is None:
available_kib = (
meminfo.get("MemFree", 0)
+ meminfo.get("Buffers", 0)
+ meminfo.get("Cached", 0)
+ meminfo.get("SReclaimable", 0)
- meminfo.get("Shmem", 0)
)
available = max(0, int(available_kib) * 1024)
used = max(0, min(total, total - available))
return {
"used_bytes": used,
"total_bytes": total,
"percent": _clamp_percent((used / total) * 100.0),
}


def _disk_usage() -> dict[str, int | float]:
usage = shutil.disk_usage("/")
total = int(usage.total)
if total <= 0:
raise RuntimeError("disk_unavailable")
used = int(usage.used)
return {
"used_bytes": used,
"total_bytes": total,
"percent": _clamp_percent((used / total) * 100.0),
}


def _safe_error(metric: str, exc: Exception) -> dict[str, str]:
# Keep this intentionally coarse. Exception messages can contain local paths
# on unusual platforms; the browser only needs a safe unavailable reason.
return {"metric": metric, "code": type(exc).__name__}


def build_system_health_payload() -> dict[str, Any]:
metrics: dict[str, Any] = {"cpu": None, "memory": None, "disk": None}
errors: list[dict[str, str]] = []

collectors = {
"cpu": _cpu_percent,
"memory": _memory_usage,
"disk": _disk_usage,
}
for name, collect in collectors.items():
try:
value = collect()
if name == "cpu":
metrics[name] = {"percent": _clamp_percent(value)}
else:
metrics[name] = {
"used_bytes": max(0, int(value["used_bytes"])),
"total_bytes": max(0, int(value["total_bytes"])),
"percent": _clamp_percent(value["percent"]),
}
except Exception as exc:
errors.append(_safe_error(name, exc))

available = any(metrics[name] is not None for name in metrics)
status = "ok" if available and not errors else "partial" if available else "unavailable"
return {
"status": status,
"available": available,
"checked_at": _checked_at(),
"cpu": metrics["cpu"],
"memory": metrics["memory"],
"disk": metrics["disk"],
"errors": errors,
}
Binary file added docs/pr-media/1688/chat-no-health-bar.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/pr-media/1688/insights-system-health.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/pr-media/693/system-health-panel.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
32 changes: 32 additions & 0 deletions static/panels.js
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ async function switchPanel(name, opts = {}) {
if (nextPanel === 'insights') await loadInsights();
if (nextPanel === 'logs') await loadLogs();
_syncLogsAutoRefresh();
if (typeof _syncSystemHealthMonitorVisibility === 'function') _syncSystemHealthMonitorVisibility();
if (nextPanel === 'settings') {
switchSettingsSection(_currentSettingsSection);
loadSettingsPanel();
Expand Down Expand Up @@ -2118,6 +2119,8 @@ async function loadInsights(animate) {
api('/api/wiki/status').catch(err => ({status:'error', error: err.message || String(err)})),
]);
_renderInsights(data, box, wikiStatus);
if (typeof _syncSystemHealthMonitorVisibility === 'function') _syncSystemHealthMonitorVisibility();
if (typeof pollSystemHealth === 'function') void pollSystemHealth();
} catch(e) {
box.innerHTML = `<div style="color:var(--accent);font-size:12px">${esc(t('error_prefix') + e.message)}</div>`;
} finally {
Expand All @@ -2134,6 +2137,34 @@ function _formatLlmWikiTimestamp(value) {
catch (_) { return String(value); }
}

function _renderSystemHealthPanel() {
return `
<section class="insights-card system-health-panel loading" id="systemHealthPanel" aria-label="Host resource health" aria-live="polite">
<div class="system-health-head">
<div>
<div class="insights-card-title">System health</div>
<div class="system-health-sub">Current VPS resource usage</div>
</div>
<span class="system-health-status" id="systemHealthStatus"><span class="system-health-dot" aria-hidden="true"></span>Loading…</span>
</div>
<div class="system-health-metrics">
<div class="system-health-metric" data-system-health-metric="cpu">
<div class="system-health-label"><span>CPU</span><span class="system-health-value" data-system-health-value>—</span></div>
<div class="system-health-bar" role="progressbar" aria-label="CPU usage" aria-valuemin="0" aria-valuemax="100" aria-valuenow="0"><div class="system-health-bar-fill"></div></div>
</div>
<div class="system-health-metric" data-system-health-metric="memory">
<div class="system-health-label"><span>RAM</span><span class="system-health-value" data-system-health-value>—</span></div>
<div class="system-health-bar" role="progressbar" aria-label="RAM usage" aria-valuemin="0" aria-valuemax="100" aria-valuenow="0"><div class="system-health-bar-fill"></div></div>
</div>
<div class="system-health-metric" data-system-health-metric="disk">
<div class="system-health-label"><span>Disk</span><span class="system-health-value" data-system-health-value>—</span></div>
<div class="system-health-bar" role="progressbar" aria-label="Disk usage" aria-valuemin="0" aria-valuemax="100" aria-valuenow="0"><div class="system-health-bar-fill"></div></div>
</div>
</div>
<div class="system-health-foot">Live snapshot only; historical resource charts can build on this surface later.</div>
</section>`;
}

function _renderLlmWikiStatus(d) {
const status = d || {status:'error'};
const isReady = status.available && status.status === 'ready';
Expand Down Expand Up @@ -2279,6 +2310,7 @@ function _renderInsights(d, box, wikiStatus) {
</div>`;

box.innerHTML = `
${_renderSystemHealthPanel()}
${_renderLlmWikiStatus(wikiStatus)}
<div class="insights-grid">
${overviewCards.map(c => `<div class="insights-stat"><div class="insights-stat-icon">${c.icon}</div><div class="insights-stat-info"><div class="insights-stat-value">${c.value}</div><div class="insights-stat-label">${esc(c.label)}</div></div></div>`).join('')}
Expand Down
19 changes: 19 additions & 0 deletions static/style.css
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,20 @@
.layout{display:flex;width:100%;flex:1 1 auto;min-height:0;}
.app-titlebar{display:flex;align-items:center;justify-content:center;height:38px;flex-shrink:0;background:var(--sidebar);border-bottom:1px solid var(--border);padding:0 12px;padding-top:var(--app-titlebar-safe-top);padding-left:max(12px,env(safe-area-inset-left,0));padding-right:max(12px,env(safe-area-inset-right,0));box-sizing:content-box;font-size:12px;color:var(--muted);user-select:none;-webkit-app-region:drag;position:relative;z-index:20;}
.app-titlebar-inner{display:flex;align-items:center;gap:8px;min-width:0;max-width:100%;justify-content:center;}
.system-health-panel.insights-card{display:flex;flex-direction:column;gap:12px;color:var(--muted);}
.system-health-panel.unavailable{display:none;}
.system-health-head{display:flex;align-items:flex-start;justify-content:space-between;gap:12px;}
.system-health-sub{font-size:11px;color:var(--muted);margin-top:-4px;}
.system-health-dot{width:7px;height:7px;border-radius:999px;background:var(--accent);box-shadow:0 0 0 3px var(--accent-bg);opacity:.88;}
.system-health-panel.loading .system-health-dot{background:var(--muted);box-shadow:none;opacity:.55;}
.system-health-status{display:inline-flex;align-items:center;gap:7px;border-radius:999px;padding:3px 8px;font-size:11px;font-weight:700;border:1px solid var(--border);color:var(--muted);background:var(--surface);white-space:nowrap;}
.system-health-metrics{display:grid;grid-template-columns:repeat(3,minmax(120px,1fr));gap:10px;min-width:0;}
.system-health-metric{min-width:0;display:flex;flex-direction:column;gap:5px;padding:10px 11px;border:1px solid var(--border);border-radius:8px;background:var(--surface);}
.system-health-label{display:flex;align-items:center;justify-content:space-between;gap:8px;font-size:11px;line-height:1;color:var(--muted);}
.system-health-value{font-variant-numeric:tabular-nums;color:var(--text);font-weight:650;}
.system-health-bar{height:5px;overflow:hidden;border-radius:999px;background:color-mix(in srgb,var(--border) 70%,transparent);border:1px solid color-mix(in srgb,var(--border) 75%,transparent);}
.system-health-bar-fill{height:100%;width:0%;border-radius:inherit;background:linear-gradient(90deg,var(--accent),var(--accent-hover));transition:width .25s ease;}
.system-health-foot{font-size:11px;color:var(--muted);line-height:1.45;opacity:.82;}
.app-titlebar-icon{display:inline-flex;align-items:center;color:var(--accent);}
.app-titlebar-title{font-size:12px;font-weight:600;color:var(--text);letter-spacing:-.01em;white-space:nowrap;overflow:hidden;text-overflow:ellipsis;max-width:60vw;}
.app-titlebar-sub{font-size:10px;color:var(--muted);background:var(--hover-bg);padding:2px 7px;border-radius:4px;font-family:'SF Mono',ui-monospace,monospace;white-space:nowrap;flex-shrink:0;}
Expand Down Expand Up @@ -1280,6 +1294,11 @@
.app-titlebar{justify-content:space-between;}
.app-titlebar-hamburger,.app-titlebar-spacer{display:flex;}
.app-titlebar-inner{flex:1 1 auto;}
.system-health-panel.insights-card{gap:10px;padding:12px;}
.system-health-head{align-items:flex-start;}
.system-health-metrics{grid-template-columns:1fr;gap:8px;}
.system-health-label{font-size:10px;gap:4px;}
.system-health-bar{height:4px;}
/* Overlay backdrop */
.mobile-overlay{display:none;position:fixed;inset:0;background:rgba(0,0,0,.5);
z-index:199;-webkit-tap-highlight-color:transparent;}
Expand Down
94 changes: 94 additions & 0 deletions static/ui.js
Original file line number Diff line number Diff line change
Expand Up @@ -3065,6 +3065,100 @@ function dismissReconnect() {
clearInflight();
}

// ── Live host resource health panel (#693) ──
const SYSTEM_HEALTH_INTERVAL_MS=5000;
let _systemHealthTimer=null;
function _systemHealthPercent(metric){
const percent=Number(metric&&metric.percent);
if(!Number.isFinite(percent)) return null;
return Math.max(0,Math.min(100,Math.round(percent*10)/10));
}
function _formatSystemHealthPercent(percent){
if(percent == null) return '—';
return `${percent.toFixed(percent%1?1:0)}%`;
}
function _formatSystemHealthBytes(metric){
if(!metric||!metric.used_bytes||!metric.total_bytes) return '';
const units=['B','KB','MB','GB','TB'];
const fmt=(bytes)=>{
let value=Number(bytes)||0, idx=0;
while(value>=1024&&idx<units.length-1){value/=1024;idx++;}
return `${value.toFixed(value>=10||idx===0?0:1)} ${units[idx]}`;
};
return `${fmt(metric.used_bytes)} / ${fmt(metric.total_bytes)}`;
}
function _updateSystemHealthMetric(name,metric){
const row=document.querySelector(`[data-system-health-metric="${name}"]`);
if(!row) return;
const rawPercent=_systemHealthPercent(metric);
const percent=rawPercent == null ? 0 : rawPercent;
const label=row.querySelector('[data-system-health-value]');
const bar=row.querySelector('.system-health-bar');
const fill=row.querySelector('.system-health-bar-fill');
const text=_formatSystemHealthPercent(rawPercent);
if(label){
label.textContent=text;
const bytes=(name==='memory'||name==='disk')?_formatSystemHealthBytes(metric):'';
label.title=bytes||text;
}
if(bar) bar.setAttribute('aria-valuenow',String(percent));
if(fill) fill.style.width=`${percent}%`;
}
function setSystemHealthUnavailable(message){
const panel=$('systemHealthPanel');
const status=$('systemHealthStatus');
if(!panel) return;
panel.classList.remove('loading');
panel.classList.add('unavailable');
if(status) status.textContent=message||'Unavailable';
['cpu','memory','disk'].forEach(name=>_updateSystemHealthMetric(name,null));
}
function renderSystemHealth(payload){
const panel=$('systemHealthPanel');
const status=$('systemHealthStatus');
if(!panel) return;
if(!payload||payload.available===false){
setSystemHealthUnavailable('Unavailable');
return;
}
panel.classList.remove('loading','unavailable');
if(status) status.textContent=payload.status==='partial'?'Partial':'Live';
_updateSystemHealthMetric('cpu',payload.cpu);
_updateSystemHealthMetric('memory',payload.memory);
_updateSystemHealthMetric('disk',payload.disk);
}
async function pollSystemHealth(){
if(document.visibilityState !== 'visible') return;
if(!_systemHealthPanelIsVisible()) return;
try{
const payload=await api('/api/system/health');
renderSystemHealth(payload);
}catch(_){
setSystemHealthUnavailable('Unavailable');
}
}
function _systemHealthPanelIsVisible(){
return document.visibilityState === 'visible' &&
!!document.querySelector('main.main.showing-insights') &&
!!$('systemHealthPanel');
}
function startSystemHealthMonitor(){
if(!_systemHealthPanelIsVisible()) return;
if(_systemHealthTimer) return;
void pollSystemHealth();
_systemHealthTimer=setInterval(pollSystemHealth,SYSTEM_HEALTH_INTERVAL_MS);
}
function stopSystemHealthMonitor(){
if(_systemHealthTimer){clearInterval(_systemHealthTimer);_systemHealthTimer=null;}
}
function _syncSystemHealthMonitorVisibility(){
if(_systemHealthPanelIsVisible()) startSystemHealthMonitor();
else stopSystemHealthMonitor();
}
document.addEventListener('visibilitychange',_syncSystemHealthMonitorVisibility);
if(document.readyState==='loading') document.addEventListener('DOMContentLoaded',startSystemHealthMonitor);
else startSystemHealthMonitor();

// ── Hermes agent/gateway heartbeat alert (#716) ──
const AGENT_HEALTH_INTERVAL_MS=30000;
const AGENT_HEALTH_DISMISSED_KEY='agent-health-dismissed';
Expand Down
Loading
Loading