From 6caf86ba9698e86bc7dae0a5bd824cbf844d87d2 Mon Sep 17 00:00:00 2001 From: Bryan Bartley Date: Mon, 18 May 2026 21:40:02 -0500 Subject: [PATCH 01/14] feat(workspace): download folder as zip via /api/folder/download Adds a "Download Folder" item to the workspace file-tree right-click menu and a GET /api/folder/download endpoint that streams the directory as a zip with Content-Disposition: attachment. Configurable caps: HERMES_WEBUI_FOLDER_ZIP_MAX_MB (default 1024) HERMES_WEBUI_FOLDER_ZIP_MAX_FILES (default 50000) Pre-flights the walk so cap-exceeded returns 413 + JSON BEFORE any zip bytes are sent. Symlinks resolving outside the workspace are skipped. Mirrors the existing _handle_file_raw shape (session_id resolution, safe_resolve, RFC 5987 filename via _content_disposition_value). Stdlib zipfile only; no new dependencies. Tests: 11 static-inspection tests matching the style of tests/test_issue1867_upload_size_preflight.py. All passing on Python 3.11/3.12/3.13. --- api/routes.py | 140 ++++++++++++++++++++++++++++++++++ static/i18n.js | 1 + static/ui.js | 17 +++++ tests/test_folder_download.py | 102 +++++++++++++++++++++++++ 4 files changed, 260 insertions(+) create mode 100644 tests/test_folder_download.py diff --git a/api/routes.py b/api/routes.py index fb2caeab09..1835416638 100644 --- a/api/routes.py +++ b/api/routes.py @@ -4150,6 +4150,9 @@ def handle_get(handler, parsed) -> bool: if parsed.path == "/api/file/raw": return _handle_file_raw(handler, parsed) + if parsed.path == "/api/folder/download": + return _handle_folder_download(handler, parsed) + if parsed.path == "/api/file": return _handle_file_read(handler, parsed) @@ -6630,6 +6633,143 @@ def _file_raw_target(session, sid: str, rel: str) -> Path | None: return None +# ─── /api/folder/download ─────────────────────────────────────────────────── +# Configurable caps. Match the HERMES_WEBUI_MAX_UPLOAD_MB style used elsewhere +# (api/config.py) so operators have one consistent env-var convention. +def _folder_zip_max_bytes() -> int: + try: + mb = int(os.getenv("HERMES_WEBUI_FOLDER_ZIP_MAX_MB", "1024")) + except ValueError: + mb = 1024 + return max(1, mb) * 1024 * 1024 + + +def _folder_zip_max_files() -> int: + try: + return max(1, int(os.getenv("HERMES_WEBUI_FOLDER_ZIP_MAX_FILES", "50000"))) + except ValueError: + return 50000 + + +def _folder_download_collect(target: Path, workspace_root: Path, + max_bytes: int, max_files: int): + """Walk target dir; return (files, total_bytes, hit_limit_reason_or_None). + + files is a list of (filesystem_path, archive_name) tuples ready for + ZipFile.write. Symlinks escaping the workspace are skipped. + """ + import os as _os + files = [] + total_bytes = 0 + for root, dirs, names in _os.walk(target, followlinks=False): + root_path = Path(root) + try: + if not root_path.resolve().is_relative_to(workspace_root): + dirs[:] = [] + continue + except (ValueError, OSError): + dirs[:] = [] + continue + for name in names: + fp = root_path / name + if fp.is_symlink(): + try: + if not fp.resolve().is_relative_to(workspace_root): + continue + except (ValueError, OSError): + continue + try: + size = fp.stat().st_size + except OSError: + continue + if len(files) >= max_files: + return files, total_bytes, "max_files" + if total_bytes + size > max_bytes: + return files, total_bytes, "max_bytes" + try: + arcname = fp.relative_to(target) + except ValueError: + continue + files.append((fp, str(arcname))) + total_bytes += size + return files, total_bytes, None + + +def _handle_folder_download(handler, parsed): + """GET /api/folder/download?session_id=...&path=... + + Streams a zip of /. Symlinks escaping the + workspace are skipped. Empty folders return an empty (valid) zip. + Respects HERMES_WEBUI_FOLDER_ZIP_MAX_MB and HERMES_WEBUI_FOLDER_ZIP_MAX_FILES. + Pre-flights the walk so size/count failures return a clean 413 with JSON + body BEFORE any zip bytes are sent. + """ + import zipfile + from urllib.parse import parse_qs + + qs = parse_qs(parsed.query) + sid = qs.get("session_id", [""])[0] + if not sid: + return bad(handler, "session_id is required") + try: + s = get_session(sid) + except KeyError: + return bad(handler, "Session not found", 404) + + rel = qs.get("path", [""])[0] + try: + target = safe_resolve(Path(s.workspace), rel) + except ValueError: + return bad(handler, "invalid path", 400) + if not target.exists(): + return j(handler, {"error": "not found"}, status=404) + if not target.is_dir(): + return bad(handler, "path must be a directory; use /api/file/raw for single files", 400) + + workspace_root = Path(s.workspace).resolve() + max_bytes = _folder_zip_max_bytes() + max_files = _folder_zip_max_files() + + files, total_bytes, limit_hit = _folder_download_collect( + target, workspace_root, max_bytes, max_files + ) + if limit_hit == "max_files": + return j(handler, { + "error": "too many files", + "limit": max_files, + "configure": "HERMES_WEBUI_FOLDER_ZIP_MAX_FILES", + }, status=413) + if limit_hit == "max_bytes": + return j(handler, { + "error": "folder too large", + "limit_bytes": max_bytes, + "configure": "HERMES_WEBUI_FOLDER_ZIP_MAX_MB", + }, status=413) + + zip_name = (target.name or "workspace") + ".zip" + handler.send_response(200) + handler.send_header("Content-Type", "application/zip") + handler.send_header( + "Content-Disposition", + _content_disposition_value("attachment", zip_name), + ) + handler.send_header("Cache-Control", "no-store") + handler.end_headers() + + written = 0 + with zipfile.ZipFile(handler.wfile, mode="w", compression=zipfile.ZIP_DEFLATED, allowZip64=True) as zf: + for fp, arcname in files: + try: + zf.write(fp, arcname=arcname) + written += 1 + except (OSError, PermissionError) as e: + logger.warning("folder-download: skipping %s: %s", fp, e) + logger.info( + "folder-download: streamed %d/%d files (~%d bytes) from %s", + written, len(files), total_bytes, target, + ) + + def _handle_file_raw(handler, parsed): qs = parse_qs(parsed.query) sid = qs.get("session_id", [""])[0] diff --git a/static/i18n.js b/static/i18n.js index 0580028141..f02c9fb88d 100644 --- a/static/i18n.js +++ b/static/i18n.js @@ -401,6 +401,7 @@ const LOCALES = { reveal_in_finder: 'Reveal in File Manager', reveal_failed: 'Failed to reveal: ', copy_file_path: 'Copy file path', + download_folder: 'Download Folder', path_copied: 'File path copied to clipboard', path_copy_failed: 'Failed to copy path: ', session_rename: 'Rename conversation', diff --git a/static/ui.js b/static/ui.js index cb78ba0976..195d0b2faa 100644 --- a/static/ui.js +++ b/static/ui.js @@ -7788,6 +7788,23 @@ function _showFileContextMenu(e, item){ }; menu.appendChild(copyPathItem); + // Download as zip — only for directories. Streams the folder contents + // through /api/folder/download which builds the zip on the fly. + if(item.type==='dir'){ + const dlItem=document.createElement('div'); + dlItem.textContent=t('download_folder'); + dlItem.style.cssText='padding:7px 14px;cursor:pointer;font-size:13px;color:var(--text);'; + dlItem.onmouseenter=()=>dlItem.style.background='var(--hover-bg)'; + dlItem.onmouseleave=()=>dlItem.style.background=''; + dlItem.onclick=()=>{ + menu.remove(); + const url='/api/folder/download?session_id='+encodeURIComponent(S.session.session_id) + + '&path='+encodeURIComponent(item.path||''); + window.location.href=url; + }; + menu.appendChild(dlItem); + } + // Divider + Delete const sep=document.createElement('hr'); sep.style.cssText='border:none;border-top:1px solid var(--border);margin:4px 0;'; diff --git a/tests/test_folder_download.py b/tests/test_folder_download.py new file mode 100644 index 0000000000..2f0463555a --- /dev/null +++ b/tests/test_folder_download.py @@ -0,0 +1,102 @@ +"""Tests for /api/folder/download — matches the static-inspection style used +elsewhere in the hermes-webui test suite (see tests/test_issue1867_upload_size_preflight.py). +""" + +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +ROUTES_PY = ROOT / "api" / "routes.py" +UI_JS = ROOT / "static" / "ui.js" + + +def test_folder_download_handler_defined(): + src = ROUTES_PY.read_text(encoding="utf-8") + assert "def _handle_folder_download(handler, parsed):" in src + assert "/api/folder/download?session_id=" in src # in handler docstring + assert 'Content-Type", "application/zip"' in src + assert "zipfile.ZipFile(handler.wfile" in src + + +def test_folder_download_dispatch_registered(): + src = ROUTES_PY.read_text(encoding="utf-8") + assert 'parsed.path == "/api/folder/download"' in src + assert "_handle_folder_download(handler, parsed)" in src + + +def test_folder_download_uses_safe_resolve(): + src = ROUTES_PY.read_text(encoding="utf-8") + handler_idx = src.index("def _handle_folder_download") + end_idx = src.index("\n\ndef ", handler_idx + 1) + body = src[handler_idx:end_idx] + assert "safe_resolve(Path(s.workspace), rel)" in body + assert "ValueError" in body + + +def test_folder_download_skips_escaping_symlinks(): + src = ROUTES_PY.read_text(encoding="utf-8") + collect_idx = src.index("def _folder_download_collect") + end_idx = src.index("\n\ndef ", collect_idx + 1) + body = src[collect_idx:end_idx] + assert "followlinks=False" in body + assert "is_symlink()" in body + assert "is_relative_to(workspace_root)" in body + + +def test_folder_download_respects_max_files_env(): + src = ROUTES_PY.read_text(encoding="utf-8") + assert 'HERMES_WEBUI_FOLDER_ZIP_MAX_FILES' in src + assert '"too many files"' in src + assert 'status=413' in src + + +def test_folder_download_respects_max_bytes_env(): + src = ROUTES_PY.read_text(encoding="utf-8") + assert 'HERMES_WEBUI_FOLDER_ZIP_MAX_MB' in src + assert '"folder too large"' in src + assert 'limit_bytes' in src + + +def test_folder_download_preflights_before_streaming(): + """Pre-flight collect must run BEFORE send_response so 413 can return JSON.""" + src = ROUTES_PY.read_text(encoding="utf-8") + handler_idx = src.index("def _handle_folder_download") + end_idx = src.index("\n\n# ", handler_idx) if "\n\n# " in src[handler_idx:] else len(src) + body = src[handler_idx:end_idx] + collect_call = body.index("_folder_download_collect") + send_response = body.index("handler.send_response(200)") + limit_check = body.index('"too many files"') + assert collect_call < limit_check < send_response + + +def test_folder_download_rejects_files(): + src = ROUTES_PY.read_text(encoding="utf-8") + assert "path must be a directory" in src + assert "/api/file/raw" in src # error message guides user + + +def test_folder_download_streams_not_buffers(): + src = ROUTES_PY.read_text(encoding="utf-8") + assert "zipfile.ZipFile(handler.wfile" in src + assert "allowZip64=True" in src + handler_idx = src.index("def _handle_folder_download") + end_idx = src.index("\n\ndef ", handler_idx + 1) + body = src[handler_idx:end_idx] + assert "io.BytesIO" not in body, "must stream, not buffer in memory" + + +def test_ui_context_menu_has_download_folder(): + src = UI_JS.read_text(encoding="utf-8") + assert "download_folder" in src + download_idx = src.index("download_folder") + snippet = src[max(0, download_idx - 200):download_idx] + assert "item.type==='dir'" in snippet or "item.type === 'dir'" in snippet + + +def test_ui_download_folder_uses_endpoint(): + src = UI_JS.read_text(encoding="utf-8") + download_idx = src.index("download_folder") + snippet = src[download_idx:download_idx + 600] + assert "/api/folder/download" in snippet + assert "session_id=" in snippet + assert "path=" in snippet + assert "encodeURIComponent" in snippet From 0736e45485b66d89e81e2f4a0e49c3577df90ab2 Mon Sep 17 00:00:00 2001 From: Michael Lam Date: Tue, 19 May 2026 11:15:35 -0700 Subject: [PATCH 02/14] fix: dedupe tool-only partial recovery markers --- CHANGELOG.md | 3 + api/models.py | 64 ++++++++++++++++++- api/streaming.py | 80 +++++++++++++++++------ tests/test_issue2592_partial_dedupe.py | 87 ++++++++++++++++++++++++++ 4 files changed, 214 insertions(+), 20 deletions(-) create mode 100644 tests/test_issue2592_partial_dedupe.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 941c61eaa7..40d4e0c699 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ ## [Unreleased] +### Fixed + +- **PR #2593** by @Michaelyklam (closes #2592) — Deduplicate cancelled/recovered partial assistant markers using the full `(content, reasoning, partial tool calls)` payload instead of only non-empty text content. Tool-only failed turns no longer append identical empty-content `_partial` messages repeatedly, and full session loads collapse adjacent duplicate partial markers from already-bloated session files while preserving a backup. ## [v0.51.92] — 2026-05-19 — Release BP (stage-385 — 7-PR full sweep batch — RFC Slice 3c clarification + workspace tree icon alignment + project move cache refresh + auto-compression handoff metadata + Grok OAuth provider catalog + anonymous custom endpoint picker fallback + PWA standalone reload + pull-to-refresh) diff --git a/api/models.py b/api/models.py index 0518b227b7..35680090aa 100644 --- a/api/models.py +++ b/api/models.py @@ -562,7 +562,18 @@ def load(cls, sid): p = SESSION_DIR / f'{sid}.json' if not p.exists(): return None - return cls(**json.loads(p.read_text(encoding='utf-8'))) + data = json.loads(p.read_text(encoding='utf-8')) + data['messages'], _collapsed_partials = _collapse_adjacent_duplicate_partials(data.get('messages')) + session = cls(**data) + if _collapsed_partials: + try: + # Self-heal bloated sessions on first full load without touching + # recency/index ordering; save() creates a .bak because this + # intentionally shrinks the transcript (#2592). + session.save(touch_updated_at=False, skip_index=True) + except Exception: + logger.debug("Failed to persist collapsed duplicate partials for %s", sid, exc_info=True) + return session @classmethod def load_metadata_only(cls, sid): @@ -722,6 +733,57 @@ def _normalize_journal_recovery_text(value) -> str: return " ".join(str(value or "").split()) +def _partial_message_signature(message: dict) -> tuple: + """Return a stable identity for partial assistant markers recovered on load.""" + if not isinstance(message, dict): + return ('', '', ()) + tool_sig = [] + for tool_call in message.get('_partial_tool_calls') or []: + if not isinstance(tool_call, dict): + continue + try: + args_sig = json.dumps( + tool_call.get('args') or {}, + ensure_ascii=False, + sort_keys=True, + default=str, + ) + except Exception: + args_sig = str(tool_call.get('args') or '') + tool_sig.append(( + str(tool_call.get('name') or ''), + args_sig, + bool(tool_call.get('done', False)), + bool(tool_call.get('is_error', False)), + str(tool_call.get('preview') or tool_call.get('snippet') or ''), + )) + return ( + str(message.get('content') or '').strip(), + str(message.get('reasoning') or '').strip(), + tuple(tool_sig), + ) + + +def _collapse_adjacent_duplicate_partials(messages) -> tuple[list, bool]: + """Collapse repeated identical partial markers from the same failed turn.""" + if not isinstance(messages, list): + return messages, False + collapsed = [] + changed = False + previous_partial_sig = None + for message in messages: + if isinstance(message, dict) and message.get('_partial'): + sig = _partial_message_signature(message) + if previous_partial_sig == sig: + changed = True + continue + previous_partial_sig = sig + else: + previous_partial_sig = None + collapsed.append(message) + return collapsed, changed + + def _find_existing_assistant_for_journal_content(session, content: str) -> int | None: candidate = _normalize_journal_recovery_text(content) if not candidate: diff --git a/api/streaming.py b/api/streaming.py index 5998bdbc64..3a4bf26e56 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -2590,6 +2590,56 @@ def _extract_tool_calls_from_messages(messages, live_tool_calls=None): return tool_calls +def _partial_message_signature(message: dict) -> tuple: + """Return a stable identity for a persisted partial assistant marker.""" + if not isinstance(message, dict): + return ('', '', ()) + tool_sig = [] + for tool_call in message.get('_partial_tool_calls') or []: + if not isinstance(tool_call, dict): + continue + try: + args_sig = json.dumps( + tool_call.get('args') or {}, + ensure_ascii=False, + sort_keys=True, + default=str, + ) + except Exception: + args_sig = str(tool_call.get('args') or '') + tool_sig.append(( + str(tool_call.get('name') or ''), + args_sig, + bool(tool_call.get('done', False)), + bool(tool_call.get('is_error', False)), + str(tool_call.get('preview') or tool_call.get('snippet') or ''), + )) + return ( + str(message.get('content') or '').strip(), + str(message.get('reasoning') or '').strip(), + tuple(tool_sig), + ) + + +def _partial_marker_already_present(messages, candidate: dict, *, before_idx: int | None = None) -> bool: + """Check for an equivalent partial marker in the current user turn only.""" + if not isinstance(messages, list) or not isinstance(candidate, dict): + return False + end = before_idx if isinstance(before_idx, int) else len(messages) + end = max(0, min(end, len(messages))) + start = 0 + for idx in range(end - 1, -1, -1): + msg = messages[idx] + if isinstance(msg, dict) and msg.get('role') == 'user': + start = idx + 1 + break + candidate_sig = _partial_message_signature(candidate) + for msg in messages[start:end]: + if isinstance(msg, dict) and msg.get('_partial') and _partial_message_signature(msg) == candidate_sig: + return True + return False + + def _sse(handler, event, data): """Write one SSE event to the response stream.""" payload = f"event: {event}\ndata: {json.dumps(data, ensure_ascii=False)}\n\n" @@ -5504,24 +5554,7 @@ def cancel_stream(stream_id: str) -> bool: if any(pattern in _content for pattern in _CANCEL_MARKER_PATTERNS): _cancel_marker_idx = _idx break - _partial_already_present = False - if _stripped: - for _m in _cs.messages: - # Stage-350 Opus SHOULD-FIX (#2151): only dedup - # against actual prior _partial markers from the - # same stream, with exact content match. The original - # substring check (`_stripped in _existing or - # _existing in _stripped`) was too broad — any short - # prior assistant reply (e.g. "OK", "Here is the - # answer:") becomes a substring of many later partial - # bodies and could silently drop the new partial, - # resurrecting the #893 data-loss bug on long sessions. - if not isinstance(_m, dict) or not _m.get('_partial'): - continue - if str(_m.get('content') or '').strip() == _stripped: - _partial_already_present = True - break - if (_stripped or _has_reasoning or _has_tools) and not _partial_already_present: + if _stripped or _has_reasoning or _has_tools: _partial_msg: dict = { 'role': 'assistant', 'content': _stripped, # may be empty for reasoning/tool-only turns @@ -5548,7 +5581,16 @@ def cancel_stream(stream_id: str) -> bool: # alongside the regular tool_calls path. # (Opus pre-release review pass 2 of v0.50.251.) _partial_msg['_partial_tool_calls'] = list(_cancel_tool_calls) - _cs.messages.insert(_cancel_marker_idx, _partial_msg) + # Deduplicate against the full partial payload, not just + # non-empty content. Tool-only/reasoning-only partials have + # empty content, so a content-gated check can append the same + # failed turn repeatedly during cancel/replay recovery (#2592). + if not _partial_marker_already_present( + _cs.messages, + _partial_msg, + before_idx=_cancel_marker_idx, + ): + _cs.messages.insert(_cancel_marker_idx, _partial_msg) # Cancel marker — flagged _error=True so it is stripped from conversation # history on the next turn (prevents model from seeing "Task cancelled." # as a prior assistant reply). diff --git a/tests/test_issue2592_partial_dedupe.py b/tests/test_issue2592_partial_dedupe.py new file mode 100644 index 0000000000..789633c4bb --- /dev/null +++ b/tests/test_issue2592_partial_dedupe.py @@ -0,0 +1,87 @@ +import json + + +def _tool_partial(reasoning="same reasoning", args=None, *, timestamp=123): + return { + "role": "assistant", + "content": "", + "_partial": True, + "timestamp": timestamp, + "reasoning": reasoning, + "_partial_tool_calls": [ + { + "name": "execute_code", + "args": args or {"code": "raise RuntimeError('boom')"}, + "done": True, + "is_error": True, + "duration": 3.87, + } + ], + } + + +def test_tool_only_partial_dedupe_uses_reasoning_and_tool_signature(): + from api.streaming import _partial_marker_already_present + + existing = [ + {"role": "user", "content": "run this"}, + _tool_partial(), + {"role": "assistant", "content": "**Task cancelled.**", "_error": True}, + ] + + assert _partial_marker_already_present(existing, _tool_partial(), before_idx=2) + assert not _partial_marker_already_present( + existing, + _tool_partial(args={"code": "print('different tool body')"}), + before_idx=2, + ) + + +def test_tool_only_partial_dedupe_is_scoped_to_current_user_turn(): + from api.streaming import _partial_marker_already_present + + existing = [ + {"role": "user", "content": "first run"}, + _tool_partial(), + {"role": "assistant", "content": "**Task cancelled.**", "_error": True}, + {"role": "user", "content": "repeat it"}, + ] + + assert not _partial_marker_already_present(existing, _tool_partial(), before_idx=len(existing)) + + +def test_session_load_collapses_adjacent_duplicate_partials(tmp_path, monkeypatch): + import api.models as models + + sid = "abc123" + session_dir = tmp_path / "sessions" + session_dir.mkdir() + monkeypatch.setattr(models, "SESSION_DIR", session_dir) + monkeypatch.setattr(models, "SESSION_INDEX_FILE", session_dir / "_index.json") + + payload = { + "session_id": sid, + "title": "bloated partials", + "workspace": str(tmp_path), + "model": "gpt-5.5", + "created_at": 100.0, + "updated_at": 200.0, + "messages": [ + {"role": "user", "content": "run this"}, + _tool_partial(timestamp=123), + _tool_partial(timestamp=123), + _tool_partial(timestamp=123), + {"role": "assistant", "content": "**Task cancelled.**", "_error": True}, + ], + "tool_calls": [], + } + (session_dir / f"{sid}.json").write_text(json.dumps(payload), encoding="utf-8") + + loaded = models.Session.load(sid) + + assert loaded is not None + assert sum(1 for message in loaded.messages if message.get("_partial")) == 1 + persisted = json.loads((session_dir / f"{sid}.json").read_text(encoding="utf-8")) + assert sum(1 for message in persisted["messages"] if message.get("_partial")) == 1 + assert persisted["updated_at"] == 200.0 + assert (session_dir / f"{sid}.json.bak").exists() From b1b93f9c9749eba090b5247d2557b4914781671c Mon Sep 17 00:00:00 2001 From: Bryan Bartley Date: Tue, 19 May 2026 13:40:29 -0500 Subject: [PATCH 03/14] fix(i18n): add download_folder key to all non-en locales MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI parity tests enforce that every key in the English locale block exists in zh, ja, ko, ru, and es. The PR introducing download_folder added it to en only, which broke the 5 hard-parity test files. Adds the English fallback to all 10 non-en blocks (it/ja/ru/es/de/zh/zh-Hant/pt/ko/fr) with the project's // TODO: translate marker so translators can refine later. Tests: tests/test_chinese_locale.py, test_japanese_locale.py, test_korean_locale.py, test_russian_locale.py, test_spanish_locale.py — 26/26 passing locally. --- static/i18n.js | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/static/i18n.js b/static/i18n.js index f02c9fb88d..3354837021 100644 --- a/static/i18n.js +++ b/static/i18n.js @@ -1621,6 +1621,7 @@ const LOCALES = { reveal_in_finder: 'Mostra nel File Manager', reveal_failed: 'Mostra fallito: ', copy_file_path: 'Copia percorso file', + download_folder: 'Download Folder', // TODO: translate path_copied: 'Percorso file copiato negli appunti', path_copy_failed: 'Copia percorso fallita: ', session_rename: 'Rinomina conversazione', @@ -2832,6 +2833,7 @@ const LOCALES = { reveal_in_finder: 'ファイルマネージャーで表示', reveal_failed: '表示に失敗しました: ', copy_file_path: 'ファイルパスをコピー', + download_folder: 'Download Folder', // TODO: translate path_copied: 'ファイルパスをクリップボードにコピーしました', path_copy_failed: 'パスのコピーに失敗しました: ', session_rename: '会話の名前を変更', @@ -3969,6 +3971,7 @@ const LOCALES = { reveal_in_finder: 'Показать в файловом менеджере', reveal_failed: 'Не удалось открыть: ', copy_file_path: 'Копировать путь к файлу', + download_folder: 'Download Folder', // TODO: translate path_copied: 'Путь к файлу скопирован в буфер обмена', path_copy_failed: 'Не удалось скопировать путь: ', session_rename: 'Переименовать беседу', @@ -5099,6 +5102,7 @@ const LOCALES = { reveal_in_finder: 'Mostrar en el gestor de archivos', reveal_failed: 'Error al mostrar: ', copy_file_path: 'Copiar ruta del archivo', + download_folder: 'Download Folder', // TODO: translate path_copied: 'Ruta del archivo copiada al portapapeles', path_copy_failed: 'Error al copiar la ruta: ', session_rename: 'Renombrar conversación', @@ -6232,6 +6236,7 @@ const LOCALES = { reveal_in_finder: 'Im Dateimanager anzeigen', reveal_failed: 'Anzeige fehlgeschlagen: ', copy_file_path: 'Dateipfad kopieren', + download_folder: 'Download Folder', // TODO: translate path_copied: 'Dateipfad in die Zwischenablage kopiert', path_copy_failed: 'Pfad konnte nicht kopiert werden: ', session_rename: 'Unterhaltung umbenennen', @@ -7415,6 +7420,7 @@ const LOCALES = { reveal_in_finder: '在文件管理器中显示', reveal_failed: '显示失败:', copy_file_path: '\u590d\u5236\u6587\u4ef6\u8def\u5f84', + download_folder: 'Download Folder', // TODO: translate path_copied: '\u6587\u4ef6\u8def\u5f84\u5df2\u590d\u5236\u5230\u526a\u8d34\u677f', path_copy_failed: '\u590d\u5236\u8def\u5f84\u5931\u8d25\uff1a', session_rename: '\u91cd\u547d\u540d\u5bf9\u8bdd', @@ -8473,6 +8479,7 @@ const LOCALES = { reveal_in_finder: '\u5728\u6a94\u6848\u7ba1\u7406\u54e1\u4e2d\u986f\u793a', reveal_failed: '\u986f\u793a\u5931\u6557\uff1a', copy_file_path: '\u8907\u88fd\u6a94\u6848\u8def\u5f91', + download_folder: 'Download Folder', // TODO: translate path_copied: '\u6a94\u6848\u8def\u5f91\u5df2\u8907\u88fd\u5230\u526a\u8cbc\u7c3f', path_copy_failed: '\u8907\u88fd\u8def\u5f91\u5931\u6557\uff1a', session_rename: '\u91cd\u65b0\u547d\u540d\u5c0d\u8a71', @@ -9775,6 +9782,7 @@ const LOCALES = { reveal_in_finder: 'Mostrar no gerenciador de arquivos', reveal_failed: 'Falha ao mostrar: ', copy_file_path: 'Copiar caminho do arquivo', + download_folder: 'Download Folder', // TODO: translate path_copied: 'Caminho do arquivo copiado para a área de transferência', path_copy_failed: 'Falha ao copiar caminho: ', session_rename: 'Renomear conversa', @@ -10884,6 +10892,7 @@ const LOCALES = { reveal_in_finder: '파일 관리자에서 열기', reveal_failed: '표시 실패: ', copy_file_path: '파일 경로 복사', + download_folder: 'Download Folder', // TODO: translate path_copied: '파일 경로가 클립보드에 복사되었습니다', path_copy_failed: '경로 복사 실패: ', session_rename: '대화 이름 변경', @@ -12026,6 +12035,7 @@ const LOCALES = { reveal_in_finder: 'Révéler dans le gestionnaire de fichiers', reveal_failed: 'Échec de la révélation :', copy_file_path: 'Copier le chemin du fichier', + download_folder: 'Download Folder', // TODO: translate path_copied: 'Chemin du fichier copié dans le presse-papiers', path_copy_failed: 'Échec de la copie du chemin :', session_rename: 'Renommer la conversation', From acd1df1112e6a2c15e8844e35f36067ec83feba8 Mon Sep 17 00:00:00 2001 From: Dennis Soong Date: Wed, 20 May 2026 02:41:00 +0800 Subject: [PATCH 04/14] fix: time out hung browser api requests --- static/panels.js | 2 +- static/ui.js | 6 +- static/workspace.js | 87 ++++++++++++----- tests/test_api_timeout.py | 194 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 264 insertions(+), 25 deletions(-) create mode 100644 tests/test_api_timeout.py diff --git a/static/panels.js b/static/panels.js index 198b6b3781..b40252e716 100644 --- a/static/panels.js +++ b/static/panels.js @@ -6364,7 +6364,7 @@ async function checkUpdatesNow(){ if(label) label.textContent=t('settings_checking'); if(status) status.textContent=''; try { - const data=await api('/api/updates/check?force=1'); + const data=await api('/api/updates/check?force=1',{timeoutMs:60000}); if(data.disabled){ if(status){status.textContent=t('settings_updates_disabled');status.style.color='var(--muted)';} } else { diff --git a/static/ui.js b/static/ui.js index 5ab149f741..e07373a090 100644 --- a/static/ui.js +++ b/static/ui.js @@ -4393,7 +4393,7 @@ async function showWhatsNewSummary(target){ } _renderUpdateSummaryPanel({summary:'Writing a simple summary…'},data,target); try{ - const res=await api('/api/updates/summary',{method:'POST',body:JSON.stringify({updates:scopedUpdates,target:target||null})}); + const res=await api('/api/updates/summary',{method:'POST',body:JSON.stringify({updates:scopedUpdates,target:target||null}),timeoutMs:60000}); _rememberGeneratedSummary(target,res,data); _renderUpdateSummaryPanel(res,data,target); _renderUpdateWhatsNewLinks(data,{mode:'summary'}); @@ -4514,7 +4514,7 @@ async function applyUpdates(){ if(window._updateData?.agent?.behind>0) targets.push('agent'); try{ for(const target of targets){ - const res=await api('/api/updates/apply',{method:'POST',body:JSON.stringify({target})}); + const res=await api('/api/updates/apply',{method:'POST',body:JSON.stringify({target}),timeoutMs:120000}); if(!res.ok){ _showUpdateError(target,res); resetApplyButton(0); @@ -4563,7 +4563,7 @@ async function forceUpdate(btn){ const errEl=$('updateError'); if(errEl){errEl.style.display='none';} try{ - const res=await api('/api/updates/force',{method:'POST',body:JSON.stringify({target})}); + const res=await api('/api/updates/force',{method:'POST',body:JSON.stringify({target}),timeoutMs:120000}); if(!res.ok){ if(errEl){errEl.textContent='Force update failed: '+(res.message||'unknown error');errEl.style.display='block';} btn.disabled=false;btn.textContent='Force update'; diff --git a/static/workspace.js b/static/workspace.js index 1511a70a44..5309addca7 100644 --- a/static/workspace.js +++ b/static/workspace.js @@ -2,39 +2,84 @@ async function api(path,opts={}){ // Strip leading slash so URL resolves relative to location.href (supports subpath mounts) const rel = path.startsWith('/') ? path.slice(1) : path; const url=new URL(rel,document.baseURI||location.href); + const timeoutMs=Object.prototype.hasOwnProperty.call(opts,'timeoutMs')?opts.timeoutMs:30000; // Retry up to 2 times on network errors (e.g. stale keep-alive after long idle). - // Server errors (4xx/5xx) are NOT retried — only connection failures. + // Server errors (4xx/5xx) and client-side timeouts are NOT retried. let lastErr; for(let attempt=0;attempt<3;attempt++){ + let controller=null; + let timeoutId=null; + let didTimeout=false; + let upstreamSignal=null; + let upstreamAbort=null; try{ - const res=await fetch(url.href,{credentials:'include',headers:{'Content-Type':'application/json'},...opts}); - if(!res.ok){ - // 401 means the auth session expired. Redirect to login so the user can - // re-authenticate. This is especially important for iOS PWA (standalone mode) - // and for subpath mounts like /hermes/, where /login escapes to the site root. - if(res.status===401){window.location.href='login?next='+encodeURIComponent(window.location.pathname+window.location.search);return;} - const text=await res.text(); - // Parse JSON error body and surface the human-readable message, - // rather than showing raw JSON like {"error":"Profile 'x' does not exist."} - let message=text; - try{const j=JSON.parse(text);message=j.error||j.message||text;}catch(e){} - // Attach the raw HTTP context so callers can branch on status (404 stale-session - // cleanup, 401 redirect, 503 retry, etc.) without re-parsing the message string. - const err=new Error(message); - err.status=res.status; - err.statusText=res.statusText; - err.body=text; - throw err; + const fetchOpts={...opts}; + delete fetchOpts.timeoutMs; + const useTimeout=Number.isFinite(Number(timeoutMs))&&Number(timeoutMs)>0; + if(useTimeout&&typeof AbortController!=='undefined'){ + controller=new AbortController(); + upstreamSignal=fetchOpts.signal||null; + if(upstreamSignal){ + upstreamAbort=()=>controller.abort(upstreamSignal.reason); + if(upstreamSignal.aborted) upstreamAbort(); + else upstreamSignal.addEventListener('abort',upstreamAbort,{once:true}); + } + fetchOpts.signal=controller.signal; } - const ct=res.headers.get('content-type')||''; - return ct.includes('application/json')?res.json():res.text(); + const requestPromise=(async()=>{ + const res=await fetch(url.href,{credentials:'include',headers:{'Content-Type':'application/json'},...fetchOpts}); + if(!res.ok){ + // 401 means the auth session expired. Redirect to login so the user can + // re-authenticate. This is especially important for iOS PWA (standalone mode) + // and for subpath mounts like /hermes/, where /login escapes to the site root. + if(res.status===401){window.location.href='login?next='+encodeURIComponent(window.location.pathname+window.location.search);return;} + const text=await res.text(); + // Parse JSON error body and surface the human-readable message, + // rather than showing raw JSON like {"error":"Profile 'x' does not exist."} + let message=text; + try{const j=JSON.parse(text);message=j.error||j.message||text;}catch(e){} + // Attach the raw HTTP context so callers can branch on status (404 stale-session + // cleanup, 401 redirect, 503 retry, etc.) without re-parsing the message string. + const err=new Error(message); + err.status=res.status; + err.statusText=res.statusText; + err.body=text; + throw err; + } + const ct=res.headers.get('content-type')||''; + return ct.includes('application/json')?await res.json():await res.text(); + })(); + return useTimeout?await Promise.race([ + requestPromise, + new Promise((_,reject)=>{ + timeoutId=setTimeout(()=>{ + didTimeout=true; + if(controller) controller.abort(); + const err=new Error('Request timed out. Please try again.'); + err.name='TimeoutError'; + err.timeout=true; + reject(err); + },Number(timeoutMs)); + }) + ]):await requestPromise; }catch(e){ lastErr=e; + const isTimeout=didTimeout||(e&&(e.timeout===true||e.name==='TimeoutError')); + if(isTimeout){ + const err=(e&&e.name==='TimeoutError')?e:new Error('Request timed out. Please try again.'); + err.name='TimeoutError'; + err.timeout=true; + if(typeof showToast==='function') showToast('Request timed out. Please try again.',5000,'error'); + throw err; + } // Only retry on network errors (TypeError from fetch), not on HTTP errors // that were already thrown above. Re-throw 401 redirects immediately. if(e.message&&/401/.test(e.message)) throw e; if(attempt<2 && e instanceof TypeError) continue; throw e; + }finally{ + if(timeoutId) clearTimeout(timeoutId); + if(upstreamSignal&&upstreamAbort) upstreamSignal.removeEventListener('abort',upstreamAbort); } } throw lastErr; diff --git a/tests/test_api_timeout.py b/tests/test_api_timeout.py new file mode 100644 index 0000000000..3f89f71afa --- /dev/null +++ b/tests/test_api_timeout.py @@ -0,0 +1,194 @@ +"""Regression coverage for #2539 client-side api() timeout handling.""" + +from __future__ import annotations + +import json +import re +import subprocess +import textwrap +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +WORKSPACE_JS = ROOT / "static" / "workspace.js" +SESSIONS_JS = ROOT / "static" / "sessions.js" +UI_JS = ROOT / "static" / "ui.js" +PANELS_JS = ROOT / "static" / "panels.js" + + +def _source(path: Path) -> str: + return path.read_text(encoding="utf-8") + + +def _extract_js_function(src: str, name: str) -> str: + marker = f"async function {name}(" + start = src.find(marker) + assert start >= 0, f"{name}() function must exist" + # The api() signature contains a default object literal (`opts={}`), so the + # function-body brace is the first `{` after the balanced parameter list. + paren_depth = 0 + close_paren = -1 + for idx in range(start + len(f"async function {name}"), len(src)): + ch = src[idx] + if ch == "(": + paren_depth += 1 + elif ch == ")": + paren_depth -= 1 + if paren_depth == 0: + close_paren = idx + break + assert close_paren > start, f"{name}() parameter list must close" + brace = src.find("{", close_paren) + assert brace > close_paren, f"{name}() function body must start with {{" + depth = 0 + in_string: str | None = None + escaped = False + in_line_comment = False + in_block_comment = False + for idx in range(brace, len(src)): + ch = src[idx] + nxt = src[idx + 1] if idx + 1 < len(src) else "" + if in_line_comment: + if ch == "\n": + in_line_comment = False + continue + if in_block_comment: + if ch == "*" and nxt == "/": + in_block_comment = False + continue + if in_string: + if escaped: + escaped = False + elif ch == "\\": + escaped = True + elif ch == in_string: + in_string = None + continue + if ch == "/" and nxt == "/": + in_line_comment = True + continue + if ch == "/" and nxt == "*": + in_block_comment = True + continue + if ch in ("'", '"', "`"): + in_string = ch + continue + if ch == "{": + depth += 1 + elif ch == "}": + depth -= 1 + if depth == 0: + return src[start : idx + 1] + raise AssertionError(f"could not extract {name}() body") + + +def _node_eval(script: str, timeout: float = 2.0) -> subprocess.CompletedProcess[str]: + return subprocess.run( + ["node", "-e", script], + cwd=ROOT, + text=True, + capture_output=True, + timeout=timeout, + check=False, + ) + + +def test_api_rejects_hung_fetch_with_timeout_and_toast(): + """A hung fetch must reject quickly and surface a recognizable timeout toast.""" + api_fn = _extract_js_function(_source(WORKSPACE_JS), "api") + script = textwrap.dedent( + f""" + const events=[]; + global.document={{baseURI:'http://example.test/hermes/'}}; + global.location={{href:'http://example.test/hermes/',pathname:'/hermes/',search:''}}; + global.window={{location:global.location}}; + global.showToast=(msg,ms,type)=>events.push({{msg:String(msg),ms,type}}); + global.fetch=(url,opts)=>new Promise(()=>{{ + if(opts&&opts.signal)opts.signal.addEventListener('abort',()=>events.push({{aborted:true}})); + }}); + {api_fn} + api('/api/sessions',{{timeoutMs:20}}) + .then(()=>{{console.error('resolved unexpectedly');process.exit(2);}}) + .catch(err=>{{ + console.log(JSON.stringify({{message:String(err&&err.message||err),events}})); + process.exit(0); + }}); + setTimeout(()=>{{console.error('api did not reject after timeoutMs');process.exit(3);}},250); + """ + ) + result = _node_eval(script, timeout=1.0) + assert result.returncode == 0, result.stderr or result.stdout + payload = json.loads(result.stdout.strip()) + assert "timed out" in payload["message"].lower() + assert any(event.get("aborted") for event in payload["events"]), payload + assert any("request timed out" in event.get("msg", "").lower() for event in payload["events"]), payload + assert any(event.get("type") == "error" for event in payload["events"]), payload + + +def test_api_rejects_stalled_response_body_with_timeout(): + """The timeout must stay active through JSON/text body consumption, not only headers.""" + api_fn = _extract_js_function(_source(WORKSPACE_JS), "api") + script = textwrap.dedent( + f""" + const events=[]; + global.document={{baseURI:'http://example.test/hermes/'}}; + global.location={{href:'http://example.test/hermes/',pathname:'/hermes/',search:''}}; + global.window={{location:global.location}}; + global.showToast=(msg,ms,type)=>events.push({{msg:String(msg),ms,type}}); + global.fetch=(url,opts)=>Promise.resolve({{ + ok:true, + headers:{{get:()=> 'application/json'}}, + json:()=>new Promise(()=>{{ + if(opts&&opts.signal)opts.signal.addEventListener('abort',()=>events.push({{aborted:true}})); + }}), + text:()=>Promise.resolve('') + }}); + {api_fn} + api('/api/sessions',{{timeoutMs:20}}) + .then(()=>{{console.error('resolved unexpectedly');process.exit(2);}}) + .catch(err=>{{ + console.log(JSON.stringify({{message:String(err&&err.message||err),events}})); + process.exit(0); + }}); + setTimeout(()=>{{console.error('api body read did not reject after timeoutMs');process.exit(3);}},250); + """ + ) + result = _node_eval(script, timeout=1.0) + assert result.returncode == 0, result.stderr or result.stdout + payload = json.loads(result.stdout.strip()) + assert "timed out" in payload["message"].lower() + assert any(event.get("aborted") for event in payload["events"]), payload + + +def test_api_has_default_timeout_and_per_call_override_contract(): + src = _source(WORKSPACE_JS) + body = _extract_js_function(src, "api") + assert "timeoutMs" in body, "api() must accept opts.timeoutMs as a per-call override" + assert "30000" in body, "api() must default browser API calls to a 30s timeout" + assert "AbortController" in body, "api() must abort hung fetches with AbortController" + assert "delete fetchOpts.timeoutMs" in body, "api() must strip timeoutMs before calling fetch()" + fetch_call = re.search(r"fetch\(url\.href,\{.*?\.\.\.fetchOpts.*?\}\)", body, re.DOTALL) + assert fetch_call, "api() must call fetch() with sanitized fetchOpts" + assert "...opts" not in fetch_call.group(0), "api() must not spread raw opts into fetch()" + assert "timeoutMs" not in fetch_call.group(0), "api() must not forward timeoutMs to fetch()" + + +def test_update_flows_keep_explicit_longer_timeouts(): + """Legitimately long update flows should not inherit the generic 30s guard.""" + src = _source(UI_JS) + panels = _source(PANELS_JS) + assert "api('/api/updates/check?force=1',{timeoutMs:60000})" in panels + assert "api('/api/updates/summary',{method:'POST',body:JSON.stringify({updates:scopedUpdates,target:target||null}),timeoutMs:60000})" in src + assert "api('/api/updates/apply',{method:'POST',body:JSON.stringify({target}),timeoutMs:120000})" in src + assert "api('/api/updates/force',{method:'POST',body:JSON.stringify({target}),timeoutMs:120000})" in src + + +def test_new_session_inflight_cleanup_still_runs_after_api_rejects(): + """newSession() must keep its finally cleanup path so timeout rejections unpin the UI.""" + src = _source(SESSIONS_JS) + start = src.find("async function newSession") + assert start >= 0, "newSession() must exist" + finally_idx = src.find("}finally{", start) + assert finally_idx > start, "newSession() must keep a finally cleanup block" + block = src[finally_idx : src.find("\n}", finally_idx) + 2] + assert "_newSessionInFlight=null" in block + assert "_setNewSessionPending(false)" in block From 94ceb66c170ceeee6684098cbdee0d3ba0423e55 Mon Sep 17 00:00:00 2001 From: Bryan Bartley Date: Tue, 19 May 2026 13:44:56 -0500 Subject: [PATCH 05/14] docs: clarify folder-zip cap bounds wall-clock/bandwidth not RSS Per reviewer note: because the zip streams straight into handler.wfile (no io.BytesIO buffering), peak memory is bounded by zipfile's per-file read buffer, not the HERMES_WEBUI_FOLDER_ZIP_MAX_MB cap. Adds a comment so the next reader doesn't have to trace it to learn the cap's actual shape. --- api/routes.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/api/routes.py b/api/routes.py index 1835416638..40cb55354a 100644 --- a/api/routes.py +++ b/api/routes.py @@ -6636,6 +6636,9 @@ def _file_raw_target(session, sid: str, rel: str) -> Path | None: # ─── /api/folder/download ─────────────────────────────────────────────────── # Configurable caps. Match the HERMES_WEBUI_MAX_UPLOAD_MB style used elsewhere # (api/config.py) so operators have one consistent env-var convention. +# Bound on per-request wall-clock and bandwidth, not RSS. The zip streams +# straight into handler.wfile, so peak memory is the per-file read buffer +# inside zipfile, not the cap value. def _folder_zip_max_bytes() -> int: try: mb = int(os.getenv("HERMES_WEBUI_FOLDER_ZIP_MAX_MB", "1024")) From 8d2b9d4a16396eb302127eff887413081d973eb8 Mon Sep 17 00:00:00 2001 From: Lumen Yang Date: Tue, 19 May 2026 18:52:50 +0000 Subject: [PATCH 06/14] feat(webui): render indexed context metadata --- api/compression_anchor.py | 20 ++++++++ api/models.py | 17 +++++++ static/i18n.js | 16 +++++++ static/messages.js | 3 ++ static/ui.js | 37 +++++++++++++-- tests/test_auto_compression_card.py | 72 +++++++++++++++++++++++++++++ 6 files changed, 161 insertions(+), 4 deletions(-) diff --git a/api/compression_anchor.py b/api/compression_anchor.py index 3a457d5778..f251851c4f 100644 --- a/api/compression_anchor.py +++ b/api/compression_anchor.py @@ -53,6 +53,24 @@ def _content_has_part_type(content, part_types): ) +def _is_context_compression_marker(message): + """Return true for synthetic compression/reference cards, not user turns.""" + if not isinstance(message, dict): + return False + role = message.get("role") + if not role or role == "tool": + return False + text = _content_text( + message.get("content", ""), + part_types={"text", "input_text", "output_text"}, + ).lower().lstrip() + return ( + text.startswith("[context compaction") + or text.startswith("context compaction") + or text.startswith("[your active task list was preserved across context compression]") + ) + + def visible_messages_for_anchor(messages, *, auto_compression: bool = False): """Return transcript messages that can anchor compression UI metadata. @@ -70,6 +88,8 @@ def visible_messages_for_anchor(messages, *, auto_compression: bool = False): role = message.get("role") if not role or role == "tool": continue + if _is_context_compression_marker(message): + continue content = message.get("content", "") has_attachments = bool(message.get("attachments")) diff --git a/api/models.py b/api/models.py index 6ff748696b..1a22ec9d13 100644 --- a/api/models.py +++ b/api/models.py @@ -377,6 +377,11 @@ def __init__(self, session_id: str=None, title: str='Untitled', compression_anchor_message_key=None, compression_anchor_summary=None, pre_compression_snapshot: bool=False, + context_engine=None, + compression_anchor_engine=None, + compression_anchor_mode=None, + compression_anchor_details=None, + context_engine_state=None, context_length=None, threshold_tokens=None, last_prompt_tokens=None, gateway_routing=None, gateway_routing_history=None, @@ -417,6 +422,11 @@ def __init__(self, session_id: str=None, title: str='Untitled', self.compression_anchor_message_key = compression_anchor_message_key self.compression_anchor_summary = compression_anchor_summary self.pre_compression_snapshot = bool(pre_compression_snapshot) + self.context_engine = context_engine + self.compression_anchor_engine = compression_anchor_engine + self.compression_anchor_mode = compression_anchor_mode + self.compression_anchor_details = compression_anchor_details if isinstance(compression_anchor_details, dict) else {} + self.context_engine_state = context_engine_state if isinstance(context_engine_state, dict) else {} self.context_length = context_length self.threshold_tokens = threshold_tokens self.last_prompt_tokens = last_prompt_tokens @@ -474,6 +484,8 @@ def save(self, touch_updated_at: bool = True, skip_index: bool = False) -> None: 'pending_user_message', 'pending_attachments', 'pending_started_at', 'compression_anchor_visible_idx', 'compression_anchor_message_key', 'compression_anchor_summary', 'pre_compression_snapshot', + 'context_engine', 'compression_anchor_engine', 'compression_anchor_mode', + 'compression_anchor_details', 'context_engine_state', 'context_length', 'threshold_tokens', 'last_prompt_tokens', 'gateway_routing', 'gateway_routing_history', 'llm_title_generated', 'parent_session_id', @@ -641,6 +653,11 @@ def compact(self, include_runtime=False, active_stream_ids=None) -> dict: 'compression_anchor_message_key': self.compression_anchor_message_key, 'compression_anchor_summary': self.compression_anchor_summary, 'pre_compression_snapshot': self.pre_compression_snapshot, + 'context_engine': self.context_engine, + 'compression_anchor_engine': self.compression_anchor_engine, + 'compression_anchor_mode': self.compression_anchor_mode, + 'compression_anchor_details': self.compression_anchor_details, + 'context_engine_state': self.context_engine_state, 'context_length': self.context_length, 'threshold_tokens': self.threshold_tokens, 'last_prompt_tokens': self.last_prompt_tokens, diff --git a/static/i18n.js b/static/i18n.js index c9e923608b..4982119caf 100644 --- a/static/i18n.js +++ b/static/i18n.js @@ -196,6 +196,8 @@ const LOCALES = { conversation_cleared: 'Conversation cleared', command_label: 'Command', context_compaction_label: 'Context compaction', + retrieval_context_label: 'Indexed context', + retrieval_context_preview: 'Earlier messages are stored and retrievable with context tools', preserved_task_list_label: 'Preserved task list', reference_only_label: 'Reference only', model_usage: 'Usage: /model ', @@ -1417,6 +1419,8 @@ const LOCALES = { conversation_cleared: 'Conversazione cancellata', command_label: 'Comando', context_compaction_label: 'Compattazione contesto', + retrieval_context_label: 'Contesto indicizzato', + retrieval_context_preview: 'I messaggi precedenti sono archiviati e recuperabili con gli strumenti di contesto', preserved_task_list_label: 'Lista task preservata', reference_only_label: 'Solo riferimento', model_usage: 'Uso: /model ', @@ -2630,6 +2634,8 @@ const LOCALES = { conversation_cleared: '会話をクリアしました', command_label: 'コマンド', context_compaction_label: 'コンテキスト圧縮', + retrieval_context_label: 'インデックス済みコンテキスト', + retrieval_context_preview: '以前のメッセージは保存され、コンテキストツールで取得できます', preserved_task_list_label: '保持されたタスクリスト', reference_only_label: '参照専用', model_usage: '使い方: /model <名前>', @@ -3883,6 +3889,8 @@ const LOCALES = { compress_failed_label: 'Ошибка сжатия', compress_running_label: 'Сжатие…', context_compaction_label: 'Сжатие контекста', + retrieval_context_label: 'Индексированный контекст', + retrieval_context_preview: 'Предыдущие сообщения сохранены и доступны через инструменты контекста', preserved_task_list_label: 'Сохранённый список задач', focus_label: 'Фокус', model_search_no_results: 'Модели не найдены', @@ -4992,6 +5000,8 @@ const LOCALES = { conversation_cleared: 'Conversación borrada', command_label: 'Comando', context_compaction_label: 'Compacción de contexto', + retrieval_context_label: 'Contexto indexado', + retrieval_context_preview: 'Los mensajes anteriores se almacenan y se pueden recuperar con herramientas de contexto', preserved_task_list_label: 'Lista de tareas conservada', reference_only_label: 'Solo referencia', model_usage: 'Uso: /model ', @@ -6118,6 +6128,8 @@ const LOCALES = { conversation_cleared: 'Konversation gelöscht', command_label: 'Befehl', context_compaction_label: 'Kontextkomprimierung', + retrieval_context_label: 'Indizierter Kontext', + retrieval_context_preview: 'Frühere Nachrichten sind gespeichert und über Kontextwerkzeuge abrufbar', preserved_task_list_label: 'Beibehaltene Aufgabenliste', reference_only_label: 'Nur Referenz', model_usage: 'Nutzung: /model ', @@ -7295,6 +7307,8 @@ const LOCALES = { conversation_cleared: '对话已清空', command_label: '命令', context_compaction_label: '上下文压缩', + retrieval_context_label: '已索引上下文', + retrieval_context_preview: '较早消息已存储,可通过上下文工具检索', preserved_task_list_label: '保留的任务列表', reference_only_label: '仅供参考', model_usage: '用法:/model ', @@ -10715,6 +10729,8 @@ const LOCALES = { conversation_cleared: '대화를 지웠습니다', command_label: '명령', context_compaction_label: 'Context compaction', + retrieval_context_label: 'Indexed context', + retrieval_context_preview: 'Earlier messages are stored and retrievable with context tools', preserved_task_list_label: '보존된 작업 목록', reference_only_label: 'Reference only', model_usage: 'Usage: /model ', diff --git a/static/messages.js b/static/messages.js index 6842bbb96f..0cb687ee8f 100644 --- a/static/messages.js +++ b/static/messages.js @@ -1846,6 +1846,9 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){ phase:'done', automatic:true, message, + engine:d.engine, + mode:d.mode, + details:d.details, summary:{headline:message}, continuationSessionId:continuationSid, }; diff --git a/static/ui.js b/static/ui.js index 8da479ab9f..b5bc868404 100644 --- a/static/ui.js +++ b/static/ui.js @@ -5096,9 +5096,10 @@ function _autoCompressionBaseDetail(state){ : (String(state&&state.message||fallback).trim()||fallback); } function _autoCompressionPreviewText(state){ + const copy=_engineAwareCompressionCopy(String(state&&state.engine||_compressionEngineForSession()).toLowerCase(), String(state&&state.mode||_compressionModeForSession()).toLowerCase()); const running=state&&state.phase==='running'; const detail=_autoCompressionBaseDetail(state); - if(!running) return (String(state&&state.summary?.headline||detail).trim()||detail); + if(!running) return (String(state&&state.summary?.headline||copy.preview||detail).trim()||detail); const elapsedLabel=_compressionElapsedLabel(state); return [detail, elapsedLabel].filter(Boolean).join(' · '); } @@ -5112,13 +5113,14 @@ function _autoCompressionDetailText(state){ return [base,handoff].filter(Boolean).join('\n'); } function _autoCompressionCardsHtml(state){ + const copy=_engineAwareCompressionCopy(String(state&&state.engine||_compressionEngineForSession()).toLowerCase(), String(state&&state.mode||_compressionModeForSession()).toLowerCase()); const running=state&&state.phase==='running'; const preview=_autoCompressionPreviewText(state); const cardDetail=_autoCompressionDetailText(state); return `
${_compressionStatusCardHtml({ - statusLabel: t('auto_compress_label'), + statusLabel: (String(state&&state.engine||'').toLowerCase()==='lcm'||String(state&&state.mode||'').toLowerCase()==='lossless_retrieval')?copy.label:t('auto_compress_label'), previewText: preview, detail: cardDetail, icon: running ? '' : li('check',13), @@ -5286,14 +5288,15 @@ function _latestCompressionReferenceMessage(messages, summaryText=''){ return {message:null, rawIdx:-1}; } function _compressionReferenceCardHtml(text, open=false){ + const copy=_engineAwareCompressionCopy(); const preview=text.split(/\n+/).filter(Boolean).slice(0,2).join(' '); return `
${li('star',13)} - ${esc(t('context_compaction_label'))} - ${esc(t('reference_only_label'))} · ${esc(preview)} + ${esc(copy.label)} + ${esc(copy.preview)} · ${esc(preview)} ${li('chevron-right',12)}
@@ -5367,6 +5370,31 @@ function _formatMessageFooterTimestamp(tsVal){ const opts={month:'short', day:'numeric', hour:'numeric', minute:'2-digit'}; return fmt?fmt(date,opts):date.toLocaleString([], opts); } +function _compressionEngineForSession(){ + return String( + (S.session&&( + S.session.compression_anchor_engine + || S.session.context_engine + )) || 'compressor' + ).trim().toLowerCase() || 'compressor'; +} +function _compressionModeForSession(){ + return String( + (S.session&&S.session.compression_anchor_mode) || 'summary_compaction' + ).trim().toLowerCase() || 'summary_compaction'; +} +function _engineAwareCompressionCopy(engine=_compressionEngineForSession(), mode=_compressionModeForSession()){ + if(engine==='lcm'||mode==='lossless_retrieval'){ + return { + label:t('retrieval_context_label'), + preview:t('retrieval_context_preview'), + }; + } + return { + label:t('context_compaction_label'), + preview:t('reference_only_label'), + }; +} function _compressionStatusCardHtml({ statusLabel, previewText, @@ -5946,6 +5974,7 @@ function renderMessages(options){ } function _insertCompressionLikeNodeByRawIdx(node, rawIdx){ if(!node) return; + if(rawIdx Date: Tue, 19 May 2026 12:06:57 -0700 Subject: [PATCH 07/14] feat(runtime): add runner adapter facade --- CHANGELOG.md | 3 + api/runtime_adapter.py | 123 +++++++++++++++++++++++ docs/rfcs/hermes-run-adapter-contract.md | 27 ++++- tests/test_runtime_adapter_seam.py | 118 ++++++++++++++++++++++ 4 files changed, 270 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cfb09c642e..87d37ceb3f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ ## [Unreleased] +### Added + +- **PR #TBD** by @Michaelyklam (refs #1925) — Add the first Slice 4b `RunnerRuntimeAdapter` facade for future runner/sidecar backends. The facade delegates `start_run`, `observe_run`, `get_run`, and control calls to an injected runner client, normalizes results into the existing RuntimeAdapter dataclasses, carries explicit profile/workspace/model payloads, and returns bounded unsupported-control results without owning `AIAgent`, stream, cancel, approval, clarify, goal, or queue state. No route wiring or default-on runner mode is introduced. ## [v0.51.93] — 2026-05-19 — Release BQ (stage-386 — 10-PR full sweep batch — RFC Slice 4 runner/sidecar gate + workspace tree toggle width CSS variable + settled file:// markdown link rendering + prompt-cache coverage percentage fix + terminal shell shutdown reap + configured model picker provider preservation + profile-aware assistant display names + state.db reconciliation slice 1 + queued-message cross-session drain fix + stale-stream writeback supersede) diff --git a/api/runtime_adapter.py b/api/runtime_adapter.py index f59fe87ad3..28a63d8099 100644 --- a/api/runtime_adapter.py +++ b/api/runtime_adapter.py @@ -144,6 +144,129 @@ def _active_control_result(value: Any) -> ControlResult: ) +def _runner_unsupported_control(name: str) -> ControlResult: + return ControlResult( + False, + status="unsupported", + safe_message=f"{name} is not supported by this runner backend.", + ) + + +class RunnerRuntimeAdapter: + """Protocol-translator facade for a future runner/sidecar backend. + + Slice 4 moves runtime ownership behind a runner boundary, but the WebUI + adapter must remain a translator. This class deliberately delegates to an + injected client instead of owning process-local streams, cancellation flags, + approval queues, clarify queues, or cached agent instances itself. + """ + + def __init__(self, *, client: Any): + self._client = client + + def start_run(self, request: StartRunRequest) -> RunStartResult: + start_run = getattr(self._client, "start_run", None) + if start_run is None: + raise NotImplementedError("RunnerRuntimeAdapter.start_run requires a runner client") + payload = start_run(request) + if isinstance(payload, RunStartResult): + return payload + payload = dict(payload or {}) + run_id = str(payload.get("run_id") or payload.get("stream_id") or "") + stream_id = str(payload.get("stream_id") or run_id) + session_id = str(payload.get("session_id") or request.session_id) + active_controls = payload.get("active_controls") + if not isinstance(active_controls, list): + active_controls = [] + return RunStartResult( + run_id=run_id, + session_id=session_id, + stream_id=stream_id, + status=str(payload.get("status") or "started"), + started_at=payload.get("started_at"), + cursor=payload.get("cursor"), + active_controls=active_controls, + payload=payload, + ) + + def observe_run(self, run_id: str, *, cursor: str | None = None) -> RunEventStream: + observe_run = getattr(self._client, "observe_run", None) + if observe_run is None: + return RunEventStream(run_id=run_id, events=[], cursor=cursor, last_event_id=None) + result = observe_run(run_id, cursor=cursor) + if isinstance(result, RunEventStream): + return result + payload = dict(result or {}) + events = list(payload.get("events") or []) + last_event_id = payload.get("last_event_id") or (events[-1].get("event_id") if events else None) + next_cursor = payload.get("cursor") + if next_cursor is None and events: + next_cursor = str(events[-1].get("seq") or "") + return RunEventStream( + run_id=str(payload.get("run_id") or run_id), + events=events, + cursor=str(next_cursor) if next_cursor is not None else cursor, + last_event_id=last_event_id, + ) + + def get_run(self, run_id: str) -> RunStatus: + get_run = getattr(self._client, "get_run", None) + if get_run is None: + return RunStatus(run_id=run_id) + result = get_run(run_id) + if isinstance(result, RunStatus): + return result + payload = dict(result or {}) + active_controls = payload.get("active_controls") + if not isinstance(active_controls, list): + active_controls = [] + return RunStatus( + run_id=str(payload.get("run_id") or run_id), + session_id=str(payload.get("session_id") or "") or None, + status=str(payload.get("status") or "unknown"), + last_event_id=payload.get("last_event_id"), + terminal_state=payload.get("terminal_state"), + active_controls=active_controls, + pending_approval_id=payload.get("pending_approval_id"), + pending_clarify_id=payload.get("pending_clarify_id"), + ) + + def cancel_run(self, run_id: str) -> ControlResult: + cancel_run = getattr(self._client, "cancel_run", None) + if cancel_run is None: + return _runner_unsupported_control("Cancel") + return _active_control_result(cancel_run(run_id)) + + def respond_approval(self, run_id: str, approval_id: str, choice: str) -> ControlResult: + respond_approval = getattr(self._client, "respond_approval", None) + if respond_approval is None: + return _runner_unsupported_control("Approval") + return _active_control_result(respond_approval(run_id, approval_id, choice)) + + def respond_clarify(self, run_id: str, clarify_id: str, response: str) -> ControlResult: + respond_clarify = getattr(self._client, "respond_clarify", None) + if respond_clarify is None: + return _runner_unsupported_control("Clarify") + return _active_control_result(respond_clarify(run_id, clarify_id, response)) + + def queue_message(self, run_id: str, message: str, *, mode: str = "queue") -> ControlResult: + queue_message = getattr(self._client, "queue_message", None) + if queue_message is None: + return _runner_unsupported_control("Queue") + return _active_control_result(queue_message(run_id, message, mode=mode)) + + def update_goal( + self, + session_id: str, + action: Literal["set", "pause", "resume", "clear", "status", "edit"], + text: str = "", + ) -> ControlResult: + update_goal = getattr(self._client, "update_goal", None) + if update_goal is None: + return _runner_unsupported_control("Goal") + return _active_control_result(update_goal(session_id, action, text)) + + class LegacyJournalRuntimeAdapter: """Protocol-translator facade over the current legacy streaming path. diff --git a/docs/rfcs/hermes-run-adapter-contract.md b/docs/rfcs/hermes-run-adapter-contract.md index 34ad176ab8..50ddb09795 100644 --- a/docs/rfcs/hermes-run-adapter-contract.md +++ b/docs/rfcs/hermes-run-adapter-contract.md @@ -94,8 +94,13 @@ adapter-seam work: `queue_message(...)` as a staged protocol method only; `/queue` remains browser-side queue/drain behavior, and no server-side queue endpoint or queue scheduler should be added merely for adapter symmetry. +- #2575 shipped the Slice 4a runner/sidecar contract gate in v0.51.93. The next + implementation step can add runner-backend adapter plumbing, but it must stay + default-off, keep legacy fallback intact, pass explicit profile/workspace/model + payloads instead of mutating WebUI process globals, and avoid recreating + `STREAMS` / `CANCEL_FLAGS` / approval queues / clarify queues under new names. -The next gate is the runner/sidecar planning contract, not queue implementation +The next gate is runner-backend plumbing, not queue implementation by default. Queue / continue routing should only move before Slice 4 if a future maintainer decision identifies an existing server-side legacy entry point and pins its response shape, ordering, and idempotency contract. Otherwise, keeping @@ -746,6 +751,26 @@ Non-goals for Slice 4a: - no dependency on Hermes Agent shipping `/v1/runs` before WebUI can validate the local runner boundary. +#### Slice 4b: Runner adapter client facade + +The first code slice after the Slice 4a contract should be a small +`RunnerRuntimeAdapter` facade that delegates to an injected runner client. This +is still not the runner process itself. Its job is to pin the adapter-facing +normalization rules before route wiring or process supervision lands: + +- `start_run` forwards a `StartRunRequest` carrying explicit session, profile, + workspace, attachments, model/provider, toolset, source, and metadata payloads; +- `observe_run` and `get_run` normalize runner responses into `RunEventStream` + and `RunStatus` so a recreated WebUI server can observe the same runner-owned + state without relying on process-local `STREAMS`; +- controls normalize accepted / not-active / unsupported outcomes into bounded + `ControlResult` values; +- the facade itself owns no `AIAgent`, worker thread, cancellation registry, + approval queue, clarify queue, goal scheduler, or server-side queue. + +The implementation remains default-off until a later slice adds an actual runner +client/backend and explicit route selection. + ## First Meaningful Success Criteria The first meaningful milestones are deliberately split. diff --git a/tests/test_runtime_adapter_seam.py b/tests/test_runtime_adapter_seam.py index 1b6910c6e0..1028b08a17 100644 --- a/tests/test_runtime_adapter_seam.py +++ b/tests/test_runtime_adapter_seam.py @@ -18,6 +18,7 @@ def test_runtime_adapter_interface_and_legacy_journal_methods_exist(): for name in required: assert hasattr(runtime.RuntimeAdapter, name) assert hasattr(runtime.LegacyJournalRuntimeAdapter, name) + assert hasattr(runtime.RunnerRuntimeAdapter, name) assert runtime.runtime_adapter_mode({}) == "legacy-direct" assert runtime.runtime_adapter_enabled({}) is False @@ -328,3 +329,120 @@ def test_rfc_defines_slice4_runner_contract_before_runner_code(): assert "profile,\n workspace, attachments, model/provider, toolset, and source metadata" in rfc assert "no removal of the legacy in-process backend" in rfc assert "no default-on runner mode" in rfc + assert "#### Slice 4b: Runner adapter client facade" in rfc + assert "delegates to an injected runner client" in rfc + assert "without relying on process-local `STREAMS`" in rfc + + +def test_runner_runtime_adapter_passes_explicit_start_payload_without_env_mutation(monkeypatch): + runtime = importlib.import_module("api.runtime_adapter") + captured = [] + + class FakeRunnerClient: + def start_run(self, request): + captured.append(request) + return { + "run_id": "runner-1", + "session_id": request.session_id, + "stream_id": "runner-1", + "status": "running", + "active_controls": ["cancel", "approval", "clarify", "goal"], + } + + before_terminal_cwd = "existing-cwd" + monkeypatch.setenv("TERMINAL_CWD", before_terminal_cwd) + adapter = runtime.RunnerRuntimeAdapter(client=FakeRunnerClient()) + request = runtime.StartRunRequest( + session_id="s-runner", + message="hello runner", + attachments=[{"path": "/tmp/a.png", "mime": "image/png"}], + workspace="/workspace/project", + profile="research", + provider="openai-codex", + model="gpt-5.5", + toolsets=["terminal", "file"], + source="webui", + metadata={"route": "/api/chat/start", "csrf_checked": True}, + ) + + result = adapter.start_run(request) + + assert captured == [request] + assert captured[0].workspace == "/workspace/project" + assert captured[0].profile == "research" + assert captured[0].attachments == [{"path": "/tmp/a.png", "mime": "image/png"}] + assert captured[0].provider == "openai-codex" + assert captured[0].model == "gpt-5.5" + assert captured[0].toolsets == ["terminal", "file"] + assert result.run_id == "runner-1" + assert result.active_controls == ["cancel", "approval", "clarify", "goal"] + assert runtime.os.environ["TERMINAL_CWD"] == before_terminal_cwd + + +def test_runner_runtime_adapter_observe_and_get_survive_adapter_recreation(): + runtime = importlib.import_module("api.runtime_adapter") + + class FakeRunnerClient: + def __init__(self): + self.events = [] + self.status = "unknown" + + def start_run(self, request): + self.status = "running" + self.events.append({"event_id": "runner-1:1", "seq": 1, "type": "token", "data": {"text": "hi"}}) + self.events.append({"event_id": "runner-1:2", "seq": 2, "type": "done", "data": {"ok": True}}) + self.status = "completed" + return {"run_id": "runner-1", "session_id": request.session_id, "stream_id": "runner-1", "status": "running"} + + def observe_run(self, run_id, *, cursor=None): + after = int(cursor or 0) + return {"run_id": run_id, "events": [e for e in self.events if e["seq"] > after]} + + def get_run(self, run_id): + return { + "run_id": run_id, + "session_id": "s-runner", + "status": self.status, + "terminal_state": "completed", + "last_event_id": self.events[-1]["event_id"], + "active_controls": [], + } + + shared_runner = FakeRunnerClient() + first_webui_process = runtime.RunnerRuntimeAdapter(client=shared_runner) + first_webui_process.start_run(runtime.StartRunRequest(session_id="s-runner", message="hello")) + + restarted_webui_process = runtime.RunnerRuntimeAdapter(client=shared_runner) + replay = restarted_webui_process.observe_run("runner-1", cursor="1") + status = restarted_webui_process.get_run("runner-1") + + assert [event["type"] for event in replay.events] == ["done"] + assert replay.cursor == "2" + assert replay.last_event_id == "runner-1:2" + assert status.status == "completed" + assert status.terminal_state == "completed" + assert status.last_event_id == "runner-1:2" + + +def test_runner_runtime_adapter_controls_are_bounded_and_do_not_use_legacy_state(): + runtime = importlib.import_module("api.runtime_adapter") + + class FakeRunnerClient: + def cancel_run(self, run_id): + return {"ok": False, "status": "not-active", "message": "Run is not active."} + + adapter = runtime.RunnerRuntimeAdapter(client=FakeRunnerClient()) + + cancel = adapter.cancel_run("finished-run") + approval = adapter.respond_approval("finished-run", "approval-1", "once") + clarify = adapter.respond_clarify("finished-run", "clarify-1", "answer") + queued = adapter.queue_message("finished-run", "next") + goal = adapter.update_goal("s-runner", "status") + + assert cancel.accepted is False + assert cancel.status == "not-active" + assert cancel.safe_message == "Run is not active." + for result in (approval, clarify, queued, goal): + assert result.accepted is False + assert result.status == "unsupported" + assert "not supported by this runner backend" in (result.safe_message or "") From 37df7d76a40c5c878b9486ee75bd3cfe45939703 Mon Sep 17 00:00:00 2001 From: starship-s <45587122+starship-s@users.noreply.github.com> Date: Tue, 19 May 2026 13:25:16 -0600 Subject: [PATCH 08/14] fix(webui): prevent composer draft rollback on refresh --- api/routes.py | 6 +++++- static/sessions.js | 17 +++++++++++++---- .../test_stage326_composer_draft_validation.py | 16 +++++++++++++++- tests/test_webui_external_refresh_frontend.py | 13 +++++++++++++ 4 files changed, 46 insertions(+), 6 deletions(-) diff --git a/api/routes.py b/api/routes.py index 01671c2af1..6e738d0661 100644 --- a/api/routes.py +++ b/api/routes.py @@ -4852,7 +4852,11 @@ def handle_post(handler, parsed) -> bool: if files is not None: draft["files"] = files s.composer_draft = draft - s.save() + # Draft persistence is not conversation activity. Touching updated_at + # here makes the active-session external-refresh poll force-reload the + # current chat every few seconds while the user is typing, and that + # delayed reload can restore an older draft over newer local input. + s.save(touch_updated_at=False) return j(handler, {"ok": True, "draft": s.composer_draft}) if parsed.path == "/api/session/update": diff --git a/static/sessions.js b/static/sessions.js index b3ffd7ed5f..fc9ae52bb2 100644 --- a/static/sessions.js +++ b/static/sessions.js @@ -47,7 +47,7 @@ function _saveComposerDraftNow(sid, text, files) { // Restore composer draft from server onto #msg textarea. // Only restores if there's actual text (skip empty/None drafts). // Guards against double-restore when rapidly switching sessions. -function _restoreComposerDraft(draft, targetSid) { +function _restoreComposerDraft(draft, targetSid, opts={}) { const ta = $('msg'); if (!ta) return; // targetSid is the session that was requested — if it no longer matches @@ -55,10 +55,20 @@ function _restoreComposerDraft(draft, targetSid) { if (targetSid && _loadingSessionId !== null && _loadingSessionId !== targetSid) return; const text = (draft && typeof draft.text === 'string') ? draft.text : ''; const files = (draft && Array.isArray(draft.files)) ? draft.files : []; + const current = ta.value || ''; + const preserveActiveInput = !!(opts && opts.preserveActiveInput); + + // Same-session force refreshes are driven by external state changes and may + // finish seconds after the user continued typing. In that case the local + // composer is the authoritative in-progress draft; never replace non-empty + // local input with an older server draft. Cross-session switches still restore + // normally so the previous session's composer contents do not leak forward. + if (preserveActiveInput && current && current !== text) return; + // If there's no text and no files, clear the textarea (a previous session's // draft may still be sitting there from a cross-session switch). if (!text && !files.length) { - if (ta.value) { + if (current) { ta.value = ''; if (typeof autoResize === 'function') autoResize(); if (typeof updateSendBtn === 'function') updateSendBtn(); @@ -66,7 +76,6 @@ function _restoreComposerDraft(draft, targetSid) { return; } // Only update if different to avoid cursor jumps on unrelated session switches. - const current = ta.value || ''; if (current !== text) { ta.value = text; if (typeof autoResize === 'function') autoResize(); @@ -790,7 +799,7 @@ async function loadSession(sid){ // against stale writes from slow responses racing to restore the previous draft). const _draft = S.session && S.session.composer_draft; if (_draft && (typeof _restoreComposerDraft === 'function')) { - _restoreComposerDraft(_draft, sid); + _restoreComposerDraft(_draft, sid, {preserveActiveInput:currentSid===sid&&forceReload}); } _resolveSessionModelForDisplaySoon(sid); diff --git a/tests/test_stage326_composer_draft_validation.py b/tests/test_stage326_composer_draft_validation.py index 71e3ecec5e..3f5904d6a1 100644 --- a/tests/test_stage326_composer_draft_validation.py +++ b/tests/test_stage326_composer_draft_validation.py @@ -81,10 +81,24 @@ def test_draft_validation_appears_before_persist(): src = Path(__file__).parents[1].joinpath("api", "routes.py").read_text(encoding="utf-8") # Anchor on the unique POST-validation comment marker. marker_idx = src.find("Stage-326 hardening (per Opus advisor)") - persist_idx = src.find("s.composer_draft = draft\n s.save()") + persist_idx = src.find("s.composer_draft = draft\n # Draft persistence is not conversation activity") assert marker_idx != -1 and persist_idx != -1, ( "could not locate validation marker or persist site" ) assert marker_idx < persist_idx, ( "validation block must run before composer_draft persist" ) + + +def test_draft_save_does_not_touch_session_updated_at(): + """Autosaving the composer must not look like conversation activity. + + If POST /api/session/draft bumps updated_at, the frontend's active-session + external refresh poll treats every keystroke autosave as a remote session + update and force-reloads the current chat a few seconds later. + """ + src = Path(__file__).parents[1].joinpath("api", "routes.py").read_text(encoding="utf-8") + persist_idx = src.find("s.composer_draft = draft") + assert persist_idx != -1, "could not locate composer draft persist site" + save_idx = src.find("s.save(touch_updated_at=False)", persist_idx) + assert save_idx != -1, "composer draft save must preserve session updated_at" diff --git a/tests/test_webui_external_refresh_frontend.py b/tests/test_webui_external_refresh_frontend.py index faf1fe1aec..44b22f68f7 100644 --- a/tests/test_webui_external_refresh_frontend.py +++ b/tests/test_webui_external_refresh_frontend.py @@ -37,3 +37,16 @@ def test_force_reload_clears_stale_blocking_prompts_immediately(): """ assert "hideApprovalCard(forceReload)" in SESSIONS_JS assert "hideClarifyCard(forceReload, forceReload?'external-refresh':'dismissed')" in SESSIONS_JS + + +def test_same_session_force_reload_preserves_non_empty_composer_input(): + """A slow same-session refresh must not roll back text typed meanwhile. + + The active-session refresh path can finish seconds after it started. If the + user kept typing, restoring the server draft at the end of that load would + replace newer local input with an older debounced draft. + """ + assert "function _restoreComposerDraft(draft, targetSid, opts={})" in SESSIONS_JS + assert "const preserveActiveInput = !!(opts && opts.preserveActiveInput);" in SESSIONS_JS + assert "if (preserveActiveInput && current && current !== text) return;" in SESSIONS_JS + assert "_restoreComposerDraft(_draft, sid, {preserveActiveInput:currentSid===sid&&forceReload});" in SESSIONS_JS From 729ed415ff91cac7d57086098e308f1ae5acee95 Mon Sep 17 00:00:00 2001 From: keyos Date: Tue, 19 May 2026 20:23:46 +0000 Subject: [PATCH 09/14] fix(approval): peek _gateway_queues for session-level approval when _pending is empty MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During active streaming, dangerous-command approvals go through the gateway path and are stored in _gateway_queues as _ApprovalEntry objects, not in _pending. The _resolve_approval_legacy helper only looked at _pending, so 'Allow for this session' never called approve_session() — the user clicked Allow, the card vanished, but the next dangerous command asked again. Now when _pending has no matching entry, the helper peeks into _gateway_queues to extract pattern_keys, calls approve_session(), and marks found_target=True so resolve_gateway_approval also fires. This commit is re-scoped to peek-only (no agent_session_key round-trip, no state_db metadata changes). Includes: - Import + fallback for _gateway_queues - Null-safe key filtering in all_keys - Source-contract test (static) + functional test with @requires_agent_modules skip marker for CI - All comments and docstrings in English --- api/routes.py | 43 +++++++++++++---- tests/test_runtime_adapter_seam.py | 74 ++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+), 10 deletions(-) diff --git a/api/routes.py b/api/routes.py index 01671c2af1..097de89079 100644 --- a/api/routes.py +++ b/api/routes.py @@ -2277,6 +2277,7 @@ def _keep_latest_messaging_session_per_source( _pending, _lock, _permanent_approved, + _gateway_queues, resolve_gateway_approval, enable_session_yolo, disable_session_yolo, @@ -2295,6 +2296,7 @@ def _keep_latest_messaging_session_per_source( _pending = {} _lock = threading.Lock() _permanent_approved = set() + _gateway_queues = {} # ── Approval SSE subscribers (long-connection push) ────────────────────────── @@ -8739,6 +8741,7 @@ def _resolve_approval_legacy(sid: str, approval_id: str, choice: str) -> bool: # that omit approval_id still resolve the oldest entry for compatibility. pending = None found_target = False + gateway_keys = [] with _lock: queue = _pending.get(sid) if isinstance(queue, list): @@ -8764,6 +8767,25 @@ def _resolve_approval_legacy(sid: str, approval_id: str, choice: str) -> bool: if not approval_id or queue.get("approval_id") == approval_id: pending = _pending.pop(sid, None) found_target = pending is not None + # When no _pending entry found, peek into _gateway_queues for + # pattern_keys so session-level approval still works. The gateway + # path is the primary mechanism during active streaming; _pending + # is only used for UI polling/SSE notification. + # NOTE: Gateway queue entries don't carry approval_id, so when + # approval_id is given and _pending is empty, we assume the gateway + # entry at the head of the queue corresponds. This is safe because + # gateway entries are consumed synchronously with _pending entries + # under the same lock — there is no interleaving where a stale + # approval_id could match a different gateway entry. + if not pending: + gw_queue = _gateway_queues.get(sid) + if gw_queue and len(gw_queue) > 0: + gw_entry = gw_queue[0] + # _gateway_queues stores _ApprovalEntry objects; their + # .data dict carries command, pattern_key, pattern_keys. + gw_data = getattr(gw_entry, 'data', None) or {} + gateway_keys = gw_data.get("pattern_keys") or [gw_data.get("pattern_key", "")] if gw_data else [] + found_target = True # Notify SSE subscribers of the new head (or empty state) so the UI # surfaces any trailing approvals that were queued behind this one # without waiting for the next submit_pending. Without this, a parallel @@ -8775,16 +8797,17 @@ def _resolve_approval_legacy(sid: str, approval_id: str, choice: str) -> bool: else: _approval_sse_notify_locked(sid, None, 0) - if pending: - keys = pending.get("pattern_keys") or [pending.get("pattern_key", "")] - if choice in ("once", "session"): - for k in keys: - approve_session(sid, k) - elif choice == "always": - for k in keys: - approve_session(sid, k) - approve_permanent(k) - save_permanent_allowlist(_permanent_approved) + # Collect keys from both _pending and _gateway_queues + keys_from_pending = pending.get("pattern_keys") or [pending.get("pattern_key", "")] if pending else [] + all_keys = [k for k in keys_from_pending if k] + [k for k in gateway_keys if k] + if choice in ("once", "session"): + for k in all_keys: + approve_session(sid, k) + elif choice == "always": + for k in all_keys: + approve_session(sid, k) + approve_permanent(k) + save_permanent_allowlist(_permanent_approved) # Unblock the agent thread waiting in the gateway approval queue. # This is the primary signal when streaming is active — the agent # thread is parked in entry.event.wait() and needs to be woken up. diff --git a/tests/test_runtime_adapter_seam.py b/tests/test_runtime_adapter_seam.py index 1b6910c6e0..f24bac6715 100644 --- a/tests/test_runtime_adapter_seam.py +++ b/tests/test_runtime_adapter_seam.py @@ -1,6 +1,8 @@ import importlib import queue +from tests.conftest import requires_agent_modules + def test_runtime_adapter_interface_and_legacy_journal_methods_exist(): runtime = importlib.import_module("api.runtime_adapter") @@ -271,6 +273,78 @@ def test_approval_respond_does_not_fallback_to_oldest_when_explicit_id_is_stale( assert "queue.pop(0)" not in stale_branch +def test_approval_respond_peeks_gateway_queues_when_pending_empty() -> None: + """When _pending has no matching entry but _gateway_queues does, the + helper should extract pattern_keys from the gateway queue and call + approve_session even though pending is None. + """ + routes = importlib.import_module("api.routes") + src = (routes.Path(__file__).parent.parent / "api" / "routes.py").read_text(encoding="utf-8") + helper_idx = src.index("def _resolve_approval_legacy") + helper_body = src[helper_idx:src.index("def _handle_approval_respond", helper_idx)] + + assert "_gateway_queues" in helper_body, ( + "_resolve_approval_legacy must reference _gateway_queues " + "to read pattern_keys when _pending is empty" + ) + assert "gateway_keys" in helper_body, ( + "Must extract pattern_keys from _gateway_queues into a gateway_keys variable" + ) + assert "approve_session" in helper_body[helper_body.index("all_keys"):], ( + "Must call approve_session for keys extracted from _gateway_queues" + ) + + +@requires_agent_modules +def test_approval_respond_approves_from_gateway_queues_when_pending_empty() -> None: + """Verify _resolve_approval_legacy peeks into _gateway_queues for + pattern_keys when _pending has no matching entry, and calls + approve_session() even though pending is None (the real streaming case). + """ + import threading + from api.routes import _resolve_approval_legacy + + routes = importlib.import_module("api.routes") + approval_mod = importlib.import_module("tools.approval") + + test_sid = "__test_gateway_approval_sid__" + test_key = "__test_pattern_key__" + + # 1. Ensure _pending is empty for this sid + with approval_mod._lock: + approval_mod._pending.pop(test_sid, None) + + # 2. Populate _gateway_queues with a real entry + entry = approval_mod._ApprovalEntry({ + "command": "test_cmd", + "pattern_key": test_key, + "pattern_keys": [test_key], + "description": "test dangerous cmd", + }) + with approval_mod._lock: + approval_mod._gateway_queues.setdefault(test_sid, []).append(entry) + + try: + # 3. Run the helper with empty _pending but populated _gateway_queues + result = _resolve_approval_legacy(test_sid, "", "session") + + # 4. Verify approve_session was called (is_approved must return True) + assert approval_mod.is_approved(test_sid, test_key), ( + "approve_session should have been called for the pattern_key " + "extracted from _gateway_queues" + ) + assert result is True, ( + "_resolve_approval_legacy should return True when it finds " + "and resolves the gateway entry" + ) + finally: + # 5. Cleanup + with approval_mod._lock: + approval_mod._gateway_queues.pop(test_sid, None) + approval_mod._session_approved.pop(test_sid, None) + approval_mod._pending.pop(test_sid, None) + + def test_chat_start_route_selects_adapter_only_when_flag_enabled(): routes = importlib.import_module("api.routes") src = (routes.Path(__file__).parent.parent / "api" / "routes.py").read_text(encoding="utf-8") From 692ea22f9ef7a009f596ee80b7297ae66183a0ac Mon Sep 17 00:00:00 2001 From: starship-s <45587122+starship-s@users.noreply.github.com> Date: Tue, 19 May 2026 14:35:11 -0600 Subject: [PATCH 10/14] fix(streaming): finish auto-compression card after rotation --- static/messages.js | 9 ++++++--- tests/test_auto_compression_card.py | 30 ++++++++++++++++++++++------- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/static/messages.js b/static/messages.js index 6842bbb96f..7fffdc631c 100644 --- a/static/messages.js +++ b/static/messages.js @@ -1829,12 +1829,15 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){ // Context was auto-compressed during this turn. Render it through the // same transient compression-card path as manual /compress, without // inserting a fake assistant message into history or model context. - if(!S.session||S.session.session_id!==activeSid) return; + if(!S.session) return; + const currentSid=S.session.session_id; let d={}; try{ d=JSON.parse(e.data||'{}')||{}; }catch(_){ d={}; } const eventSid=d.old_session_id||d.session_id||activeSid; - if(eventSid!==activeSid && d.new_session_id!==activeSid && d.continuation_session_id!==activeSid) return; const continuationSid=d.new_session_id||d.continuation_session_id||''; + const eventMatchesCurrent=!!(currentSid&&(eventSid===currentSid||d.new_session_id===currentSid||d.continuation_session_id===currentSid)); + if(!eventMatchesCurrent) return; + const displaySid=currentSid; const message=String(d.message||'Context auto-compressed to continue the conversation').trim(); if(d.usage&&typeof _syncCtxIndicator==='function'){ S.lastUsage={...(S.lastUsage||{}),...d.usage}; @@ -1842,7 +1845,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){ } if(typeof setCompressionUi==='function'){ const state={ - sessionId:activeSid, + sessionId:displaySid, phase:'done', automatic:true, message, diff --git a/tests/test_auto_compression_card.py b/tests/test_auto_compression_card.py index 23576ff2a0..d57a79567d 100644 --- a/tests/test_auto_compression_card.py +++ b/tests/test_auto_compression_card.py @@ -217,16 +217,15 @@ def test_auto_compression_sse_uses_transient_card_not_fake_message(): def test_auto_compression_sse_keeps_inactive_and_malformed_paths_safe(): block = _compressed_listener_block() - guard = "if(!S.session||S.session.session_id!==activeSid) return;" + guard = "if(!S.session) return;" assert guard in block assert block.index(guard) < block.index("setCompressionUi") assert "try{ d=JSON.parse(e.data||'{}')||{}; }catch(_){ d={}; }" in block assert "const eventSid=d.old_session_id||d.session_id||activeSid;" in block - # The listener also accepts a rotated continuation session id so journal- - # replay reconnects after compression rotate land the done card. - # See Opus advisor followup on stage-385 (v0.51.92). - event_guard = "if(eventSid!==activeSid && d.new_session_id!==activeSid && d.continuation_session_id!==activeSid) return;" + assert "const eventMatchesCurrent=" in block + event_guard = "if(!eventMatchesCurrent) return;" assert event_guard in block + assert block.index("const eventMatchesCurrent=") < block.index(event_guard) def test_auto_compression_done_accepts_rotated_continuation_session_event(): @@ -238,12 +237,29 @@ def test_auto_compression_done_accepts_rotated_continuation_session_event(): # continuation id as display metadata instead of dropping the event. assert "const eventSid=d.old_session_id||d.session_id||activeSid;" in block assert "const continuationSid=d.new_session_id||d.continuation_session_id||'';" in block - event_guard = "if(eventSid!==activeSid && d.new_session_id!==activeSid && d.continuation_session_id!==activeSid) return;" + event_guard = "if(!eventMatchesCurrent) return;" assert event_guard in block - assert block.index("const eventSid=") < block.index(event_guard) + assert block.index("const eventSid=") < block.index("const eventMatchesCurrent=") assert "continuationSessionId:continuationSid" in block +def test_auto_compression_done_accepts_event_after_current_session_rotates(): + block = _compressed_listener_block() + + # The final compressed event can arrive/replay after another event has already + # updated S.session to the continuation session id. Do not drop it just + # because the active browser session no longer equals the original activeSid. + strict_active_guard = "if(!S.session||S.session.session_id!==activeSid) return;" + assert strict_active_guard not in block + assert "if(!S.session) return;" in block + assert "const currentSid=S.session.session_id;" in block + assert "const eventMatchesCurrent=" in block + assert "const displaySid=currentSid;" in block + assert "sessionId:displaySid" in block + assert block.index("const eventSid=") < block.index("const eventMatchesCurrent=") + assert block.index("const displaySid=") < block.index("setCompressionUi(state)") + + def test_auto_compression_done_sse_refreshes_context_indicator_usage(): block = _compressed_listener_block() From ada59d73e631cef1836f346b6fea088e29db571e Mon Sep 17 00:00:00 2001 From: keyos Date: Tue, 19 May 2026 20:56:17 +0000 Subject: [PATCH 11/14] fix(approval): simplify gateway_keys expression and document race window MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drop the redundant 'if gw_data else []' guard — gw_data is already guaranteed to be a dict by the 'or {}' fallback above. Add a one-line comment explaining the peek-without-pop race window: a concurrent resolver may pop a different gateway entry, but approve_session is idempotent over the session key set so the outcome is the same regardless. --- api/routes.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/api/routes.py b/api/routes.py index 097de89079..73a9427e08 100644 --- a/api/routes.py +++ b/api/routes.py @@ -8784,7 +8784,12 @@ def _resolve_approval_legacy(sid: str, approval_id: str, choice: str) -> bool: # _gateway_queues stores _ApprovalEntry objects; their # .data dict carries command, pattern_key, pattern_keys. gw_data = getattr(gw_entry, 'data', None) or {} - gateway_keys = gw_data.get("pattern_keys") or [gw_data.get("pattern_key", "")] if gw_data else [] + gateway_keys = gw_data.get("pattern_keys") or [gw_data.get("pattern_key", "")] + # Peek is not strict — a concurrent resolver may pop a + # different gateway entry before we reach + # resolve_gateway_approval below, but approve_session is + # idempotent over the session key set so the outcome is + # the same regardless of which entry wins the race. found_target = True # Notify SSE subscribers of the new head (or empty state) so the UI # surfaces any trailing approvals that were queued behind this one From 1ebfbf352702def9af13d3c9b477b3a75d3879bb Mon Sep 17 00:00:00 2001 From: Michael Lam Date: Tue, 19 May 2026 14:27:41 -0700 Subject: [PATCH 12/14] fix: reconcile session metadata counts --- CHANGELOG.md | 4 ++ api/routes.py | 42 +++++++++++---------- tests/test_webui_state_db_reconciliation.py | 37 ++++++++++++++++++ 3 files changed, 64 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cfb09c642e..c518267d71 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Fixed + +- **PR #2604** by @Michaelyklam (refs #2594) — Make the metadata-only `/api/session?messages=0` path report the same reconciled message count and last-message timestamp as a full session load. Sidebar refresh polling no longer loops forever when `state.db` retains old rows that the append-only merge correctly filters out. + ## [v0.51.93] — 2026-05-19 — Release BQ (stage-386 — 10-PR full sweep batch — RFC Slice 4 runner/sidecar gate + workspace tree toggle width CSS variable + settled file:// markdown link rendering + prompt-cache coverage percentage fix + terminal shell shutdown reap + configured model picker provider preservation + profile-aware assistant display names + state.db reconciliation slice 1 + queued-message cross-session drain fix + stale-stream writeback supersede) diff --git a/api/routes.py b/api/routes.py index 01671c2af1..8b00c8840f 100644 --- a/api/routes.py +++ b/api/routes.py @@ -2221,7 +2221,6 @@ def _keep_latest_messaging_session_per_source( get_cli_sessions, get_cli_session_messages, get_state_db_session_messages, - get_state_db_session_summary, merge_session_messages_append_only, ensure_cron_project, is_cron_session, @@ -3669,16 +3668,24 @@ def handle_get(handler, parsed) -> bool: is_messaging_session = _is_messaging_session_record(s) or _is_messaging_session_record(cli_meta) cli_messages = [] state_db_messages = [] - state_db_summary = {} + sidecar_metadata_messages = None if is_messaging_session: cli_messages = get_cli_session_messages(sid) elif load_messages: state_db_messages = get_state_db_session_messages(sid) elif not is_messaging_session: - # Metadata-only callers (frontend refresh polling) only need a - # cheap staleness signal. Avoid full transcript materialization - # on the steady-state polling path. - state_db_summary = get_state_db_session_summary(sid) + # Metadata-only callers still need the same append-only + # reconciliation contract as full loads. A raw state.db summary + # can count stale rows that the merge intentionally filters out, + # which makes sidebar polling think the transcript is always + # newer than the loaded conversation. + state_db_messages = get_state_db_session_messages(sid) + sidecar_metadata_session = Session.load(sid) + sidecar_metadata_messages = ( + getattr(sidecar_metadata_session, "messages", []) or [] + if sidecar_metadata_session + else [] + ) _t2 = _time.monotonic() effective_model = ( _resolve_effective_session_model_for_display(s) @@ -3708,23 +3715,20 @@ def handle_get(handler, parsed) -> bool: sidecar_messages = getattr(s, "messages", []) or [] _all_msgs = merge_session_messages_append_only(cli_messages, sidecar_messages) else: - _all_msgs = merge_session_messages_append_only(getattr(s, "messages", []) or [], state_db_messages) - if not load_messages and state_db_summary: - sidecar_messages = getattr(s, "messages", []) or [] - sidecar_count = len(sidecar_messages) + _metadata_sidecar = sidecar_metadata_messages + if _metadata_sidecar is None: + _metadata_sidecar = getattr(s, "messages", []) or [] + _all_msgs = merge_session_messages_append_only(_metadata_sidecar, state_db_messages) + if not load_messages: + _summary_message_count = len(_all_msgs) try: - sidecar_last = max( + _summary_last_message_at = max( float((m or {}).get("timestamp") or 0) - for m in sidecar_messages + for m in _all_msgs if isinstance(m, dict) - ) if sidecar_messages else 0 + ) if _all_msgs else 0 except (TypeError, ValueError): - sidecar_last = 0 - state_count = int(state_db_summary.get("message_count") or 0) - state_last = float(state_db_summary.get("last_message_at") or 0) - _all_msgs = sidecar_messages - _summary_message_count = max(sidecar_count, state_count) - _summary_last_message_at = max(sidecar_last, state_last) + _summary_last_message_at = 0 else: _summary_message_count = None _summary_last_message_at = None diff --git a/tests/test_webui_state_db_reconciliation.py b/tests/test_webui_state_db_reconciliation.py index 0180345057..5e0e17b29c 100644 --- a/tests/test_webui_state_db_reconciliation.py +++ b/tests/test_webui_state_db_reconciliation.py @@ -317,6 +317,43 @@ def test_metadata_fast_path_reports_reconciled_state_db_count(monkeypatch, tmp_p assert session["last_message_at"] == 1003.0 +def test_metadata_fast_path_excludes_state_db_rows_filtered_by_reconciliation(monkeypatch, tmp_path): + import api.routes as routes + + sid = "webui_reconcile_metadata_filtered" + _install_test_session( + monkeypatch, + tmp_path, + sid, + [ + {"role": "user", "content": "old user", "timestamp": 1000.0}, + {"role": "assistant", "content": "old assistant", "timestamp": 1001.0}, + ], + ) + _make_state_db( + tmp_path / "state.db", + sid, + [ + {"role": "user", "content": "old user", "timestamp": 1000.0}, + {"role": "assistant", "content": "old assistant", "timestamp": 1001.0}, + # This stale state.db-only row is older than the newest sidecar + # timestamp and lacks an explicit message id, so the full + # append-only merge filters it out. The metadata path must report + # the same count/last timestamp or sidebar refresh polling loops. + {"role": "tool", "content": "stale state row", "timestamp": 1000.5}, + ], + ) + + handler = _GetHandler(f"/api/session?session_id={sid}&messages=0&resolve_model=0") + routes.handle_get(handler, urlparse(handler.path)) + + assert handler.status == 200 + session = handler.response_json["session"] + assert session["messages"] == [] + assert session["message_count"] == 2 + assert session["last_message_at"] == 1001.0 + + def test_state_db_reconciliation_preserves_tool_metadata(monkeypatch, tmp_path): import api.routes as routes From dc5c8168d154d4621fb8faefff2a142a64163d94 Mon Sep 17 00:00:00 2001 From: Lumen Yang Date: Tue, 19 May 2026 21:34:08 +0000 Subject: [PATCH 13/14] fix(webui): refresh active session on external sidecar updates --- api/models.py | 23 ++++++++++++++++-- api/routes.py | 13 ++++++++--- tests/test_webui_state_db_reconciliation.py | 26 +++++++++++++++++++++ 3 files changed, 57 insertions(+), 5 deletions(-) diff --git a/api/models.py b/api/models.py index 6ff748696b..fe932d288c 100644 --- a/api/models.py +++ b/api/models.py @@ -436,7 +436,14 @@ def __init__(self, session_id: str=None, title: str='Untitled', self.read_only = bool(kwargs.get('read_only', False)) self.enabled_toolsets = enabled_toolsets # List[str] or None — per-session toolset override self.composer_draft = composer_draft if isinstance(composer_draft, dict) else {} - self._metadata_message_count = None + raw_message_count = kwargs.get('message_count') + parsed_message_count = None + if raw_message_count is not None: + try: + parsed_message_count = int(raw_message_count) + except (TypeError, ValueError): + parsed_message_count = None + self._metadata_message_count = parsed_message_count if parsed_message_count is not None and parsed_message_count >= 0 else None @property def path(self): @@ -590,7 +597,19 @@ def load_metadata_only(cls, sid): parsed['messages'] = [] parsed['tool_calls'] = [] session = cls(**parsed) - session._metadata_message_count = _lookup_index_message_count(sid) + metadata_message_count = _lookup_index_message_count(sid) + if metadata_message_count is None: + raw_count = parsed.get('message_count') + if isinstance(raw_count, int) and raw_count >= 0: + metadata_message_count = raw_count + else: + try: + parsed_count = int(raw_count) + except (TypeError, ValueError): + parsed_count = None + if parsed_count is not None and parsed_count >= 0: + metadata_message_count = parsed_count + session._metadata_message_count = metadata_message_count # Mark this session as a metadata-only stub. save() refuses to write # such a session because doing so would atomically replace the # on-disk JSON with messages=[], wiping the conversation. Any diff --git a/api/routes.py b/api/routes.py index 01671c2af1..b3a7c8373d 100644 --- a/api/routes.py +++ b/api/routes.py @@ -3709,9 +3709,16 @@ def handle_get(handler, parsed) -> bool: _all_msgs = merge_session_messages_append_only(cli_messages, sidecar_messages) else: _all_msgs = merge_session_messages_append_only(getattr(s, "messages", []) or [], state_db_messages) - if not load_messages and state_db_summary: + if not load_messages: sidecar_messages = getattr(s, "messages", []) or [] sidecar_count = len(sidecar_messages) + if sidecar_count == 0: + try: + metadata_count = getattr(s, "_metadata_message_count", None) + if metadata_count is not None: + sidecar_count = max(0, int(metadata_count)) + except (TypeError, ValueError): + sidecar_count = 0 try: sidecar_last = max( float((m or {}).get("timestamp") or 0) @@ -3720,8 +3727,8 @@ def handle_get(handler, parsed) -> bool: ) if sidecar_messages else 0 except (TypeError, ValueError): sidecar_last = 0 - state_count = int(state_db_summary.get("message_count") or 0) - state_last = float(state_db_summary.get("last_message_at") or 0) + state_count = int(state_db_summary.get("message_count") or 0) if state_db_summary else 0 + state_last = float(state_db_summary.get("last_message_at") or 0) if state_db_summary else 0 _all_msgs = sidecar_messages _summary_message_count = max(sidecar_count, state_count) _summary_last_message_at = max(sidecar_last, state_last) diff --git a/tests/test_webui_state_db_reconciliation.py b/tests/test_webui_state_db_reconciliation.py index 0180345057..3bb30bd6be 100644 --- a/tests/test_webui_state_db_reconciliation.py +++ b/tests/test_webui_state_db_reconciliation.py @@ -135,6 +135,32 @@ def test_api_session_includes_state_db_messages_newer_than_webui_sidecar(monkeyp assert payload["session"]["message_count"] == 4 +def test_metadata_poll_uses_sidecar_message_count_for_external_updates(monkeypatch, tmp_path): + """Active-session external refresh relies on metadata-only counts. + + When no session index exists, metadata-only loads may fall back to + _metadata_message_count=None. The refresh poll must still report the real + sidecar message count; otherwise an external session JSON update can be + invisible until a full reload. + """ + import api.routes as routes + + sid = "webui_reconcile_metadata_sidecar" + sidecar_messages = [ + {"role": "user", "content": "before external update", "timestamp": 1000.0}, + {"role": "assistant", "content": "externally appended", "timestamp": 1001.0}, + ] + _install_test_session(monkeypatch, tmp_path, sid, sidecar_messages) + + handler = _GetHandler(f"/api/session?session_id={sid}&messages=0&resolve_model=0") + routes.handle_get(handler, urlparse(handler.path)) + + assert handler.status == 200 + session = handler.response_json["session"] + assert session["message_count"] == 2 + assert session["last_message_at"] == 1001.0 + + def test_state_db_reconciliation_preserves_sidecar_only_messages(monkeypatch, tmp_path): import api.routes as routes From 7ae97c551a3b93ab71574571572389e0ebef3863 Mon Sep 17 00:00:00 2001 From: nesquena-hermes <[email protected]> Date: Tue, 19 May 2026 22:11:44 +0000 Subject: [PATCH 14/14] Stamp CHANGELOG for v0.51.94 (Release BR / stage-387 / 10-PR full sweep batch) --- CHANGELOG.md | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ab8a179e1f..20fd62773a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,14 +2,24 @@ ## [Unreleased] -### Added -- **PR #TBD** by @Michaelyklam (refs #1925) — Add the first Slice 4b `RunnerRuntimeAdapter` facade for future runner/sidecar backends. The facade delegates `start_run`, `observe_run`, `get_run`, and control calls to an injected runner client, normalizes results into the existing RuntimeAdapter dataclasses, carries explicit profile/workspace/model payloads, and returns bounded unsupported-control results without owning `AIAgent`, stream, cancel, approval, clarify, goal, or queue state. No route wiring or default-on runner mode is introduced. +## [v0.51.94] — 2026-05-19 — Release BR (stage-387 — 10-PR full sweep batch — Slice 4b runner adapter facade + folder zip download + partial recovery marker dedupe + browser api() client-side timeout + auto-compression card rotation finish + composer draft rollback fix + metadata count reconciliation + active-session refresh on external sidecar updates + indexed context metadata + gateway-queues approval peek) ### Fixed -- **PR #2604** by @Michaelyklam (refs #2594) — Make the metadata-only `/api/session?messages=0` path report the same reconciled message count and last-message timestamp as a full session load. Sidebar refresh polling no longer loops forever when `state.db` retains old rows that the append-only merge correctly filters out. +- **PR #2566** by @bjb2 — Add `GET /api/folder/download?session_id=...&path=...` streaming-zip endpoint with pre-flight 413 on size/file-count cap exceeded, `os.walk(followlinks=False)` plus per-symlink workspace-root resolution check, `allowZip64=True` for large files, and a "Download Folder" item in the workspace file context menu (dir items only). Configurable caps via `HERMES_WEBUI_FOLDER_ZIP_MAX_MB` (1024 default) and `HERMES_WEBUI_FOLDER_ZIP_MAX_FILES` (50000 default). `download_folder` i18n key added across all 11 locales with `// TODO: translate` fallback markers for non-en entries. +- **PR #2593** by @Michaelyklam (closes #2592) — Deduplicate cancelled/recovered partial assistant markers using the full `(content, reasoning, partial tool calls)` payload instead of only non-empty text content. Tool-only failed turns no longer append identical empty-content `_partial` messages repeatedly. Full session loads collapse adjacent duplicate partial markers from already-bloated session files while preserving a `.partial-bak-` backup. New helpers `_partial_message_signature()` (api/streaming.py:2593-2622) + `_partial_marker_already_present()` (api/streaming.py:2625-2641) scope the dedup search to the current user turn only. +- **PR #2597** by @dso2ng (closes #2539) — Add a 30s default client-side timeout to the shared browser `api()` helper, with per-call `timeoutMs` overrides, `AbortController`-based cancellation, a timeout toast, and explicit 60s/120s ceilings for legitimately longer update flows. Body-read phase also raced against the timeout so a server that replies headers-OK and then stalls mid-JSON rejects cleanly. New `tests/test_api_timeout.py` covers default, override, abort, and body-read-stall paths. +- **PR #2601** by @starship-s — Prevent the composer-draft rollback regression introduced by #2581's active-session external-refresh polling. Adds `opts.preserveActiveInput` to `_restoreComposerDraft` and gates the overwrite on `current && current !== text`, keeping the guard co-located with the function that owns the contract. Backend `s.save(touch_updated_at=False)` for `/api/session/draft` so draft autosaves no longer falsely advance `updated_at` and trigger the refresh poll. Supersedes parallel-discovery PR #2602. +- **PR #2603** by @starship-s — Finish the running auto-compression card after the backend rotates the session id. The `compressed` SSE listener at `static/messages.js:1829-1862` used to early-return whenever `S.session.session_id !== activeSid`, but the `state` event listener at `:1656-1662` already rotates `window._compressionUi.sessionId` to the continuation id before `compressed` arrives. The strict active-session check is replaced with a cross-session safety check that still rejects mismatched events but no longer rejects the legitimate post-rotation `done` payload, so the elapsed-timer "compressing…" state no longer freezes after rotation completes. +- **PR #2604** by @Michaelyklam (closes #2594) — Reconcile session metadata counts in the `/api/session?messages=0` fast path. Replaces the prior `max(sidecar_count, state_count)` heuristic with `len(merge_session_messages_append_only(sidecar_messages, state_db_messages))` so the metadata-only count matches the full-load count. Closes the followup issue filed against PR #2581 / v0.51.93 — sidebar refresh polling no longer loops forever when `state.db` retains old rows that the append-only merge correctly filters out. +- **PR #2605** by @LumenYoung (refs #2581) — Make the metadata-only `/api/session?messages=0&resolve_model=0` path return the persisted sidecar `message_count` from `Session._metadata_message_count` when no session-index entry exists, so the active-session external-refresh signal still trips on legacy sessions whose sidecar contains externally-appended content. Composed cleanly with #2604 (the legacy-fallback applies only when the reconciled merged count is zero). +- **PR #2573** by @espokaos-ops (closes #2510) — Persist session-level approvals when a "Allow for this session" click lands while a stream is active and `_pending` is empty. The approval flow now peeks `_gateway_queues[sid]` to recover the queued `_ApprovalEntry`'s `pattern_keys` so `approve_session()` records the approval; the next dangerous command in the same session no longer asks again. Reduced scope to peek-only per prior review note; the `agent_session_key` round-trip plumbing was dropped (it was dead on the WebUI streaming path). + +### Added +- **PR #2599** by @Michaelyklam (refs #1925) — Add the Slice 4b `RunnerRuntimeAdapter` facade — a protocol-translator client over a future runner/sidecar backend. The facade delegates `start_run`, `observe_run`, `get_run`, and control calls to an injected runner client, normalizes results into the existing `RunStartResult`/`RunEventStream`/`RunStatus`/`ControlResult` dataclasses, carries explicit `profile`/`workspace`/`model` payload fields, and returns bounded `unsupported` control results without owning `AIAgent`, stream lifecycle, cancel/approval/clarify queues, goal state, or cached-agent table. No route wiring, no default-on runner mode, no public response-shape change. +- **PR #2600** by @LumenYoung (refs #2266) — Slimmer WebUI follow-up from the closed LCM/context-engine PR #2266. Adds rendering and persistence for context-engine compression-anchor metadata (when present on a session or live compression event) including an "Indexed context" detail line on auto-compression cards. No agent-layer clone orchestration; WebUI-only metadata surface. ## [v0.51.93] — 2026-05-19 — Release BQ (stage-386 — 10-PR full sweep batch — RFC Slice 4 runner/sidecar gate + workspace tree toggle width CSS variable + settled file:// markdown link rendering + prompt-cache coverage percentage fix + terminal shell shutdown reap + configured model picker provider preservation + profile-aware assistant display names + state.db reconciliation slice 1 + queued-message cross-session drain fix + stale-stream writeback supersede) @@ -25,8 +35,6 @@ - **PR #2588** by @Michaelyklam (refs #2569) — Preserve the configured provider when choosing a configured model from the composer picker. `_getOptionProviderId()` now reads `data-provider` from temporary `