From 6caf86ba9698e86bc7dae0a5bd824cbf844d87d2 Mon Sep 17 00:00:00 2001
From: Bryan Bartley <bryanjbartley@gmail.com>
Date: Mon, 18 May 2026 21:40:02 -0500
Subject: [PATCH 01/14] feat(workspace): download folder as zip via
 /api/folder/download

Adds a "Download Folder" item to the workspace file-tree right-click
menu and a GET /api/folder/download endpoint that streams the
directory as a zip with Content-Disposition: attachment.

Configurable caps:
  HERMES_WEBUI_FOLDER_ZIP_MAX_MB    (default 1024)
  HERMES_WEBUI_FOLDER_ZIP_MAX_FILES (default 50000)

Pre-flights the walk so cap-exceeded returns 413 + JSON BEFORE any
zip bytes are sent. Symlinks resolving outside the workspace are
skipped. Mirrors the existing _handle_file_raw shape (session_id
resolution, safe_resolve, RFC 5987 filename via
_content_disposition_value). Stdlib zipfile only; no new dependencies.

Tests: 11 static-inspection tests matching the style of
tests/test_issue1867_upload_size_preflight.py. All passing on
Python 3.11/3.12/3.13.
---
 api/routes.py                 | 140 ++++++++++++++++++++++++++++++++++
 static/i18n.js                |   1 +
 static/ui.js                  |  17 +++++
 tests/test_folder_download.py | 102 +++++++++++++++++++++++++
 4 files changed, 260 insertions(+)
 create mode 100644 tests/test_folder_download.py

diff --git a/api/routes.py b/api/routes.py
index fb2caeab09..1835416638 100644
--- a/api/routes.py
+++ b/api/routes.py
@@ -4150,6 +4150,9 @@ def handle_get(handler, parsed) -> bool:
     if parsed.path == "/api/file/raw":
         return _handle_file_raw(handler, parsed)
 
+    if parsed.path == "/api/folder/download":
+        return _handle_folder_download(handler, parsed)
+
     if parsed.path == "/api/file":
         return _handle_file_read(handler, parsed)
 
@@ -6630,6 +6633,143 @@ def _file_raw_target(session, sid: str, rel: str) -> Path | None:
     return None
 
 
+# ─── /api/folder/download ───────────────────────────────────────────────────
+# Configurable caps. Match the HERMES_WEBUI_MAX_UPLOAD_MB style used elsewhere
+# (api/config.py) so operators have one consistent env-var convention.
+def _folder_zip_max_bytes() -> int:
+    try:
+        mb = int(os.getenv("HERMES_WEBUI_FOLDER_ZIP_MAX_MB", "1024"))
+    except ValueError:
+        mb = 1024
+    return max(1, mb) * 1024 * 1024
+
+
+def _folder_zip_max_files() -> int:
+    try:
+        return max(1, int(os.getenv("HERMES_WEBUI_FOLDER_ZIP_MAX_FILES", "50000")))
+    except ValueError:
+        return 50000
+
+
+def _folder_download_collect(target: Path, workspace_root: Path,
+                              max_bytes: int, max_files: int):
+    """Walk target dir; return (files, total_bytes, hit_limit_reason_or_None).
+
+    files is a list of (filesystem_path, archive_name) tuples ready for
+    ZipFile.write. Symlinks escaping the workspace are skipped.
+    """
+    import os as _os
+    files = []
+    total_bytes = 0
+    for root, dirs, names in _os.walk(target, followlinks=False):
+        root_path = Path(root)
+        try:
+            if not root_path.resolve().is_relative_to(workspace_root):
+                dirs[:] = []
+                continue
+        except (ValueError, OSError):
+            dirs[:] = []
+            continue
+        for name in names:
+            fp = root_path / name
+            if fp.is_symlink():
+                try:
+                    if not fp.resolve().is_relative_to(workspace_root):
+                        continue
+                except (ValueError, OSError):
+                    continue
+            try:
+                size = fp.stat().st_size
+            except OSError:
+                continue
+            if len(files) >= max_files:
+                return files, total_bytes, "max_files"
+            if total_bytes + size > max_bytes:
+                return files, total_bytes, "max_bytes"
+            try:
+                arcname = fp.relative_to(target)
+            except ValueError:
+                continue
+            files.append((fp, str(arcname)))
+            total_bytes += size
+    return files, total_bytes, None
+
+
+def _handle_folder_download(handler, parsed):
+    """GET /api/folder/download?session_id=...&path=...
+
+    Streams a zip of <session.workspace>/<path>. Symlinks escaping the
+    workspace are skipped. Empty folders return an empty (valid) zip.
+    Respects HERMES_WEBUI_FOLDER_ZIP_MAX_MB and HERMES_WEBUI_FOLDER_ZIP_MAX_FILES.
+    Pre-flights the walk so size/count failures return a clean 413 with JSON
+    body BEFORE any zip bytes are sent.
+    """
+    import zipfile
+    from urllib.parse import parse_qs
+
+    qs = parse_qs(parsed.query)
+    sid = qs.get("session_id", [""])[0]
+    if not sid:
+        return bad(handler, "session_id is required")
+    try:
+        s = get_session(sid)
+    except KeyError:
+        return bad(handler, "Session not found", 404)
+
+    rel = qs.get("path", [""])[0]
+    try:
+        target = safe_resolve(Path(s.workspace), rel)
+    except ValueError:
+        return bad(handler, "invalid path", 400)
+    if not target.exists():
+        return j(handler, {"error": "not found"}, status=404)
+    if not target.is_dir():
+        return bad(handler, "path must be a directory; use /api/file/raw for single files", 400)
+
+    workspace_root = Path(s.workspace).resolve()
+    max_bytes = _folder_zip_max_bytes()
+    max_files = _folder_zip_max_files()
+
+    files, total_bytes, limit_hit = _folder_download_collect(
+        target, workspace_root, max_bytes, max_files
+    )
+    if limit_hit == "max_files":
+        return j(handler, {
+            "error": "too many files",
+            "limit": max_files,
+            "configure": "HERMES_WEBUI_FOLDER_ZIP_MAX_FILES",
+        }, status=413)
+    if limit_hit == "max_bytes":
+        return j(handler, {
+            "error": "folder too large",
+            "limit_bytes": max_bytes,
+            "configure": "HERMES_WEBUI_FOLDER_ZIP_MAX_MB",
+        }, status=413)
+
+    zip_name = (target.name or "workspace") + ".zip"
+    handler.send_response(200)
+    handler.send_header("Content-Type", "application/zip")
+    handler.send_header(
+        "Content-Disposition",
+        _content_disposition_value("attachment", zip_name),
+    )
+    handler.send_header("Cache-Control", "no-store")
+    handler.end_headers()
+
+    written = 0
+    with zipfile.ZipFile(handler.wfile, mode="w", compression=zipfile.ZIP_DEFLATED, allowZip64=True) as zf:
+        for fp, arcname in files:
+            try:
+                zf.write(fp, arcname=arcname)
+                written += 1
+            except (OSError, PermissionError) as e:
+                logger.warning("folder-download: skipping %s: %s", fp, e)
+    logger.info(
+        "folder-download: streamed %d/%d files (~%d bytes) from %s",
+        written, len(files), total_bytes, target,
+    )
+
+
 def _handle_file_raw(handler, parsed):
     qs = parse_qs(parsed.query)
     sid = qs.get("session_id", [""])[0]
diff --git a/static/i18n.js b/static/i18n.js
index 0580028141..f02c9fb88d 100644
--- a/static/i18n.js
+++ b/static/i18n.js
@@ -401,6 +401,7 @@ const LOCALES = {
     reveal_in_finder: 'Reveal in File Manager',
     reveal_failed: 'Failed to reveal: ',
     copy_file_path: 'Copy file path',
+    download_folder: 'Download Folder',
     path_copied: 'File path copied to clipboard',
     path_copy_failed: 'Failed to copy path: ',
     session_rename: 'Rename conversation',
diff --git a/static/ui.js b/static/ui.js
index cb78ba0976..195d0b2faa 100644
--- a/static/ui.js
+++ b/static/ui.js
@@ -7788,6 +7788,23 @@ function _showFileContextMenu(e, item){
   };
   menu.appendChild(copyPathItem);
 
+  // Download as zip — only for directories. Streams the folder contents
+  // through /api/folder/download which builds the zip on the fly.
+  if(item.type==='dir'){
+    const dlItem=document.createElement('div');
+    dlItem.textContent=t('download_folder');
+    dlItem.style.cssText='padding:7px 14px;cursor:pointer;font-size:13px;color:var(--text);';
+    dlItem.onmouseenter=()=>dlItem.style.background='var(--hover-bg)';
+    dlItem.onmouseleave=()=>dlItem.style.background='';
+    dlItem.onclick=()=>{
+      menu.remove();
+      const url='/api/folder/download?session_id='+encodeURIComponent(S.session.session_id)
+              + '&path='+encodeURIComponent(item.path||'');
+      window.location.href=url;
+    };
+    menu.appendChild(dlItem);
+  }
+
   // Divider + Delete
   const sep=document.createElement('hr');
   sep.style.cssText='border:none;border-top:1px solid var(--border);margin:4px 0;';
diff --git a/tests/test_folder_download.py b/tests/test_folder_download.py
new file mode 100644
index 0000000000..2f0463555a
--- /dev/null
+++ b/tests/test_folder_download.py
@@ -0,0 +1,102 @@
+"""Tests for /api/folder/download — matches the static-inspection style used
+elsewhere in the hermes-webui test suite (see tests/test_issue1867_upload_size_preflight.py).
+"""
+
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+ROUTES_PY = ROOT / "api" / "routes.py"
+UI_JS = ROOT / "static" / "ui.js"
+
+
+def test_folder_download_handler_defined():
+    src = ROUTES_PY.read_text(encoding="utf-8")
+    assert "def _handle_folder_download(handler, parsed):" in src
+    assert "/api/folder/download?session_id=" in src  # in handler docstring
+    assert 'Content-Type", "application/zip"' in src
+    assert "zipfile.ZipFile(handler.wfile" in src
+
+
+def test_folder_download_dispatch_registered():
+    src = ROUTES_PY.read_text(encoding="utf-8")
+    assert 'parsed.path == "/api/folder/download"' in src
+    assert "_handle_folder_download(handler, parsed)" in src
+
+
+def test_folder_download_uses_safe_resolve():
+    src = ROUTES_PY.read_text(encoding="utf-8")
+    handler_idx = src.index("def _handle_folder_download")
+    end_idx = src.index("\n\ndef ", handler_idx + 1)
+    body = src[handler_idx:end_idx]
+    assert "safe_resolve(Path(s.workspace), rel)" in body
+    assert "ValueError" in body
+
+
+def test_folder_download_skips_escaping_symlinks():
+    src = ROUTES_PY.read_text(encoding="utf-8")
+    collect_idx = src.index("def _folder_download_collect")
+    end_idx = src.index("\n\ndef ", collect_idx + 1)
+    body = src[collect_idx:end_idx]
+    assert "followlinks=False" in body
+    assert "is_symlink()" in body
+    assert "is_relative_to(workspace_root)" in body
+
+
+def test_folder_download_respects_max_files_env():
+    src = ROUTES_PY.read_text(encoding="utf-8")
+    assert 'HERMES_WEBUI_FOLDER_ZIP_MAX_FILES' in src
+    assert '"too many files"' in src
+    assert 'status=413' in src
+
+
+def test_folder_download_respects_max_bytes_env():
+    src = ROUTES_PY.read_text(encoding="utf-8")
+    assert 'HERMES_WEBUI_FOLDER_ZIP_MAX_MB' in src
+    assert '"folder too large"' in src
+    assert 'limit_bytes' in src
+
+
+def test_folder_download_preflights_before_streaming():
+    """Pre-flight collect must run BEFORE send_response so 413 can return JSON."""
+    src = ROUTES_PY.read_text(encoding="utf-8")
+    handler_idx = src.index("def _handle_folder_download")
+    end_idx = src.index("\n\n# ", handler_idx) if "\n\n# " in src[handler_idx:] else len(src)
+    body = src[handler_idx:end_idx]
+    collect_call = body.index("_folder_download_collect")
+    send_response = body.index("handler.send_response(200)")
+    limit_check = body.index('"too many files"')
+    assert collect_call < limit_check < send_response
+
+
+def test_folder_download_rejects_files():
+    src = ROUTES_PY.read_text(encoding="utf-8")
+    assert "path must be a directory" in src
+    assert "/api/file/raw" in src  # error message guides user
+
+
+def test_folder_download_streams_not_buffers():
+    src = ROUTES_PY.read_text(encoding="utf-8")
+    assert "zipfile.ZipFile(handler.wfile" in src
+    assert "allowZip64=True" in src
+    handler_idx = src.index("def _handle_folder_download")
+    end_idx = src.index("\n\ndef ", handler_idx + 1)
+    body = src[handler_idx:end_idx]
+    assert "io.BytesIO" not in body, "must stream, not buffer in memory"
+
+
+def test_ui_context_menu_has_download_folder():
+    src = UI_JS.read_text(encoding="utf-8")
+    assert "download_folder" in src
+    download_idx = src.index("download_folder")
+    snippet = src[max(0, download_idx - 200):download_idx]
+    assert "item.type==='dir'" in snippet or "item.type === 'dir'" in snippet
+
+
+def test_ui_download_folder_uses_endpoint():
+    src = UI_JS.read_text(encoding="utf-8")
+    download_idx = src.index("download_folder")
+    snippet = src[download_idx:download_idx + 600]
+    assert "/api/folder/download" in snippet
+    assert "session_id=" in snippet
+    assert "path=" in snippet
+    assert "encodeURIComponent" in snippet

From 0736e45485b66d89e81e2f4a0e49c3577df90ab2 Mon Sep 17 00:00:00 2001
From: Michael Lam <michael@example.local>
Date: Tue, 19 May 2026 11:15:35 -0700
Subject: [PATCH 02/14] fix: dedupe tool-only partial recovery markers

---
 CHANGELOG.md                           |  3 +
 api/models.py                          | 64 ++++++++++++++++++-
 api/streaming.py                       | 80 +++++++++++++++++------
 tests/test_issue2592_partial_dedupe.py | 87 ++++++++++++++++++++++++++
 4 files changed, 214 insertions(+), 20 deletions(-)
 create mode 100644 tests/test_issue2592_partial_dedupe.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 941c61eaa7..40d4e0c699 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,9 @@
 
 ## [Unreleased]
 
+### Fixed
+
+- **PR #2593** by @Michaelyklam (closes #2592) — Deduplicate cancelled/recovered partial assistant markers using the full `(content, reasoning, partial tool calls)` payload instead of only non-empty text content. Tool-only failed turns no longer append identical empty-content `_partial` messages repeatedly, and full session loads collapse adjacent duplicate partial markers from already-bloated session files while preserving a backup.
 
 ## [v0.51.92] — 2026-05-19 — Release BP (stage-385 — 7-PR full sweep batch — RFC Slice 3c clarification + workspace tree icon alignment + project move cache refresh + auto-compression handoff metadata + Grok OAuth provider catalog + anonymous custom endpoint picker fallback + PWA standalone reload + pull-to-refresh)
 
diff --git a/api/models.py b/api/models.py
index 0518b227b7..35680090aa 100644
--- a/api/models.py
+++ b/api/models.py
@@ -562,7 +562,18 @@ def load(cls, sid):
         p = SESSION_DIR / f'{sid}.json'
         if not p.exists():
             return None
-        return cls(**json.loads(p.read_text(encoding='utf-8')))
+        data = json.loads(p.read_text(encoding='utf-8'))
+        data['messages'], _collapsed_partials = _collapse_adjacent_duplicate_partials(data.get('messages'))
+        session = cls(**data)
+        if _collapsed_partials:
+            try:
+                # Self-heal bloated sessions on first full load without touching
+                # recency/index ordering; save() creates a .bak because this
+                # intentionally shrinks the transcript (#2592).
+                session.save(touch_updated_at=False, skip_index=True)
+            except Exception:
+                logger.debug("Failed to persist collapsed duplicate partials for %s", sid, exc_info=True)
+        return session
 
     @classmethod
     def load_metadata_only(cls, sid):
@@ -722,6 +733,57 @@ def _normalize_journal_recovery_text(value) -> str:
     return " ".join(str(value or "").split())
 
 
+def _partial_message_signature(message: dict) -> tuple:
+    """Return a stable identity for partial assistant markers recovered on load."""
+    if not isinstance(message, dict):
+        return ('', '', ())
+    tool_sig = []
+    for tool_call in message.get('_partial_tool_calls') or []:
+        if not isinstance(tool_call, dict):
+            continue
+        try:
+            args_sig = json.dumps(
+                tool_call.get('args') or {},
+                ensure_ascii=False,
+                sort_keys=True,
+                default=str,
+            )
+        except Exception:
+            args_sig = str(tool_call.get('args') or '')
+        tool_sig.append((
+            str(tool_call.get('name') or ''),
+            args_sig,
+            bool(tool_call.get('done', False)),
+            bool(tool_call.get('is_error', False)),
+            str(tool_call.get('preview') or tool_call.get('snippet') or ''),
+        ))
+    return (
+        str(message.get('content') or '').strip(),
+        str(message.get('reasoning') or '').strip(),
+        tuple(tool_sig),
+    )
+
+
+def _collapse_adjacent_duplicate_partials(messages) -> tuple[list, bool]:
+    """Collapse repeated identical partial markers from the same failed turn."""
+    if not isinstance(messages, list):
+        return messages, False
+    collapsed = []
+    changed = False
+    previous_partial_sig = None
+    for message in messages:
+        if isinstance(message, dict) and message.get('_partial'):
+            sig = _partial_message_signature(message)
+            if previous_partial_sig == sig:
+                changed = True
+                continue
+            previous_partial_sig = sig
+        else:
+            previous_partial_sig = None
+        collapsed.append(message)
+    return collapsed, changed
+
+
 def _find_existing_assistant_for_journal_content(session, content: str) -> int | None:
     candidate = _normalize_journal_recovery_text(content)
     if not candidate:
diff --git a/api/streaming.py b/api/streaming.py
index 5998bdbc64..3a4bf26e56 100644
--- a/api/streaming.py
+++ b/api/streaming.py
@@ -2590,6 +2590,56 @@ def _extract_tool_calls_from_messages(messages, live_tool_calls=None):
     return tool_calls
 
 
+def _partial_message_signature(message: dict) -> tuple:
+    """Return a stable identity for a persisted partial assistant marker."""
+    if not isinstance(message, dict):
+        return ('', '', ())
+    tool_sig = []
+    for tool_call in message.get('_partial_tool_calls') or []:
+        if not isinstance(tool_call, dict):
+            continue
+        try:
+            args_sig = json.dumps(
+                tool_call.get('args') or {},
+                ensure_ascii=False,
+                sort_keys=True,
+                default=str,
+            )
+        except Exception:
+            args_sig = str(tool_call.get('args') or '')
+        tool_sig.append((
+            str(tool_call.get('name') or ''),
+            args_sig,
+            bool(tool_call.get('done', False)),
+            bool(tool_call.get('is_error', False)),
+            str(tool_call.get('preview') or tool_call.get('snippet') or ''),
+        ))
+    return (
+        str(message.get('content') or '').strip(),
+        str(message.get('reasoning') or '').strip(),
+        tuple(tool_sig),
+    )
+
+
+def _partial_marker_already_present(messages, candidate: dict, *, before_idx: int | None = None) -> bool:
+    """Check for an equivalent partial marker in the current user turn only."""
+    if not isinstance(messages, list) or not isinstance(candidate, dict):
+        return False
+    end = before_idx if isinstance(before_idx, int) else len(messages)
+    end = max(0, min(end, len(messages)))
+    start = 0
+    for idx in range(end - 1, -1, -1):
+        msg = messages[idx]
+        if isinstance(msg, dict) and msg.get('role') == 'user':
+            start = idx + 1
+            break
+    candidate_sig = _partial_message_signature(candidate)
+    for msg in messages[start:end]:
+        if isinstance(msg, dict) and msg.get('_partial') and _partial_message_signature(msg) == candidate_sig:
+            return True
+    return False
+
+
 def _sse(handler, event, data):
     """Write one SSE event to the response stream."""
     payload = f"event: {event}\ndata: {json.dumps(data, ensure_ascii=False)}\n\n"
@@ -5504,24 +5554,7 @@ def cancel_stream(stream_id: str) -> bool:
                         if any(pattern in _content for pattern in _CANCEL_MARKER_PATTERNS):
                             _cancel_marker_idx = _idx
                             break
-                _partial_already_present = False
-                if _stripped:
-                    for _m in _cs.messages:
-                        # Stage-350 Opus SHOULD-FIX (#2151): only dedup
-                        # against actual prior _partial markers from the
-                        # same stream, with exact content match. The original
-                        # substring check (`_stripped in _existing or
-                        # _existing in _stripped`) was too broad — any short
-                        # prior assistant reply (e.g. "OK", "Here is the
-                        # answer:") becomes a substring of many later partial
-                        # bodies and could silently drop the new partial,
-                        # resurrecting the #893 data-loss bug on long sessions.
-                        if not isinstance(_m, dict) or not _m.get('_partial'):
-                            continue
-                        if str(_m.get('content') or '').strip() == _stripped:
-                            _partial_already_present = True
-                            break
-                if (_stripped or _has_reasoning or _has_tools) and not _partial_already_present:
+                if _stripped or _has_reasoning or _has_tools:
                     _partial_msg: dict = {
                         'role': 'assistant',
                         'content': _stripped,  # may be empty for reasoning/tool-only turns
@@ -5548,7 +5581,16 @@ def cancel_stream(stream_id: str) -> bool:
                         # alongside the regular tool_calls path.
                         # (Opus pre-release review pass 2 of v0.50.251.)
                         _partial_msg['_partial_tool_calls'] = list(_cancel_tool_calls)
-                    _cs.messages.insert(_cancel_marker_idx, _partial_msg)
+                    # Deduplicate against the full partial payload, not just
+                    # non-empty content. Tool-only/reasoning-only partials have
+                    # empty content, so a content-gated check can append the same
+                    # failed turn repeatedly during cancel/replay recovery (#2592).
+                    if not _partial_marker_already_present(
+                        _cs.messages,
+                        _partial_msg,
+                        before_idx=_cancel_marker_idx,
+                    ):
+                        _cs.messages.insert(_cancel_marker_idx, _partial_msg)
                 # Cancel marker — flagged _error=True so it is stripped from conversation
                 # history on the next turn (prevents model from seeing "Task cancelled."
                 # as a prior assistant reply).
diff --git a/tests/test_issue2592_partial_dedupe.py b/tests/test_issue2592_partial_dedupe.py
new file mode 100644
index 0000000000..789633c4bb
--- /dev/null
+++ b/tests/test_issue2592_partial_dedupe.py
@@ -0,0 +1,87 @@
+import json
+
+
+def _tool_partial(reasoning="same reasoning", args=None, *, timestamp=123):
+    return {
+        "role": "assistant",
+        "content": "",
+        "_partial": True,
+        "timestamp": timestamp,
+        "reasoning": reasoning,
+        "_partial_tool_calls": [
+            {
+                "name": "execute_code",
+                "args": args or {"code": "raise RuntimeError('boom')"},
+                "done": True,
+                "is_error": True,
+                "duration": 3.87,
+            }
+        ],
+    }
+
+
+def test_tool_only_partial_dedupe_uses_reasoning_and_tool_signature():
+    from api.streaming import _partial_marker_already_present
+
+    existing = [
+        {"role": "user", "content": "run this"},
+        _tool_partial(),
+        {"role": "assistant", "content": "**Task cancelled.**", "_error": True},
+    ]
+
+    assert _partial_marker_already_present(existing, _tool_partial(), before_idx=2)
+    assert not _partial_marker_already_present(
+        existing,
+        _tool_partial(args={"code": "print('different tool body')"}),
+        before_idx=2,
+    )
+
+
+def test_tool_only_partial_dedupe_is_scoped_to_current_user_turn():
+    from api.streaming import _partial_marker_already_present
+
+    existing = [
+        {"role": "user", "content": "first run"},
+        _tool_partial(),
+        {"role": "assistant", "content": "**Task cancelled.**", "_error": True},
+        {"role": "user", "content": "repeat it"},
+    ]
+
+    assert not _partial_marker_already_present(existing, _tool_partial(), before_idx=len(existing))
+
+
+def test_session_load_collapses_adjacent_duplicate_partials(tmp_path, monkeypatch):
+    import api.models as models
+
+    sid = "abc123"
+    session_dir = tmp_path / "sessions"
+    session_dir.mkdir()
+    monkeypatch.setattr(models, "SESSION_DIR", session_dir)
+    monkeypatch.setattr(models, "SESSION_INDEX_FILE", session_dir / "_index.json")
+
+    payload = {
+        "session_id": sid,
+        "title": "bloated partials",
+        "workspace": str(tmp_path),
+        "model": "gpt-5.5",
+        "created_at": 100.0,
+        "updated_at": 200.0,
+        "messages": [
+            {"role": "user", "content": "run this"},
+            _tool_partial(timestamp=123),
+            _tool_partial(timestamp=123),
+            _tool_partial(timestamp=123),
+            {"role": "assistant", "content": "**Task cancelled.**", "_error": True},
+        ],
+        "tool_calls": [],
+    }
+    (session_dir / f"{sid}.json").write_text(json.dumps(payload), encoding="utf-8")
+
+    loaded = models.Session.load(sid)
+
+    assert loaded is not None
+    assert sum(1 for message in loaded.messages if message.get("_partial")) == 1
+    persisted = json.loads((session_dir / f"{sid}.json").read_text(encoding="utf-8"))
+    assert sum(1 for message in persisted["messages"] if message.get("_partial")) == 1
+    assert persisted["updated_at"] == 200.0
+    assert (session_dir / f"{sid}.json.bak").exists()

From b1b93f9c9749eba090b5247d2557b4914781671c Mon Sep 17 00:00:00 2001
From: Bryan Bartley <bryanjbartley@gmail.com>
Date: Tue, 19 May 2026 13:40:29 -0500
Subject: [PATCH 03/14] fix(i18n): add download_folder key to all non-en
 locales
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CI parity tests enforce that every key in the English locale block exists
in zh, ja, ko, ru, and es. The PR introducing download_folder added it to
en only, which broke the 5 hard-parity test files. Adds the English
fallback to all 10 non-en blocks (it/ja/ru/es/de/zh/zh-Hant/pt/ko/fr) with
the project's // TODO: translate marker so translators can refine later.

Tests: tests/test_chinese_locale.py, test_japanese_locale.py,
test_korean_locale.py, test_russian_locale.py, test_spanish_locale.py —
26/26 passing locally.
---
 static/i18n.js | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/static/i18n.js b/static/i18n.js
index f02c9fb88d..3354837021 100644
--- a/static/i18n.js
+++ b/static/i18n.js
@@ -1621,6 +1621,7 @@ const LOCALES = {
     reveal_in_finder: 'Mostra nel File Manager',
     reveal_failed: 'Mostra fallito: ',
     copy_file_path: 'Copia percorso file',
+    download_folder: 'Download Folder', // TODO: translate
     path_copied: 'Percorso file copiato negli appunti',
     path_copy_failed: 'Copia percorso fallita: ',
     session_rename: 'Rinomina conversazione',
@@ -2832,6 +2833,7 @@ const LOCALES = {
     reveal_in_finder: 'ファイルマネージャーで表示',
     reveal_failed: '表示に失敗しました: ',
     copy_file_path: 'ファイルパスをコピー',
+    download_folder: 'Download Folder', // TODO: translate
     path_copied: 'ファイルパスをクリップボードにコピーしました',
     path_copy_failed: 'パスのコピーに失敗しました: ',
     session_rename: '会話の名前を変更',
@@ -3969,6 +3971,7 @@ const LOCALES = {
     reveal_in_finder: 'Показать в файловом менеджере',
     reveal_failed: 'Не удалось открыть: ',
     copy_file_path: 'Копировать путь к файлу',
+    download_folder: 'Download Folder', // TODO: translate
     path_copied: 'Путь к файлу скопирован в буфер обмена',
     path_copy_failed: 'Не удалось скопировать путь: ',
     session_rename: 'Переименовать беседу',
@@ -5099,6 +5102,7 @@ const LOCALES = {
     reveal_in_finder: 'Mostrar en el gestor de archivos',
     reveal_failed: 'Error al mostrar: ',
     copy_file_path: 'Copiar ruta del archivo',
+    download_folder: 'Download Folder', // TODO: translate
     path_copied: 'Ruta del archivo copiada al portapapeles',
     path_copy_failed: 'Error al copiar la ruta: ',
     session_rename: 'Renombrar conversación',
@@ -6232,6 +6236,7 @@ const LOCALES = {
     reveal_in_finder: 'Im Dateimanager anzeigen',
     reveal_failed: 'Anzeige fehlgeschlagen: ',
     copy_file_path: 'Dateipfad kopieren',
+    download_folder: 'Download Folder', // TODO: translate
     path_copied: 'Dateipfad in die Zwischenablage kopiert',
     path_copy_failed: 'Pfad konnte nicht kopiert werden: ',
     session_rename: 'Unterhaltung umbenennen',
@@ -7415,6 +7420,7 @@ const LOCALES = {
     reveal_in_finder: '在文件管理器中显示',
     reveal_failed: '显示失败：',
     copy_file_path: '\u590d\u5236\u6587\u4ef6\u8def\u5f84',
+    download_folder: 'Download Folder', // TODO: translate
     path_copied: '\u6587\u4ef6\u8def\u5f84\u5df2\u590d\u5236\u5230\u526a\u8d34\u677f',
     path_copy_failed: '\u590d\u5236\u8def\u5f84\u5931\u8d25\uff1a',
     session_rename: '\u91cd\u547d\u540d\u5bf9\u8bdd',
@@ -8473,6 +8479,7 @@ const LOCALES = {
     reveal_in_finder: '\u5728\u6a94\u6848\u7ba1\u7406\u54e1\u4e2d\u986f\u793a',
     reveal_failed: '\u986f\u793a\u5931\u6557\uff1a',
     copy_file_path: '\u8907\u88fd\u6a94\u6848\u8def\u5f91',
+    download_folder: 'Download Folder', // TODO: translate
     path_copied: '\u6a94\u6848\u8def\u5f91\u5df2\u8907\u88fd\u5230\u526a\u8cbc\u7c3f',
     path_copy_failed: '\u8907\u88fd\u8def\u5f91\u5931\u6557\uff1a',
     session_rename: '\u91cd\u65b0\u547d\u540d\u5c0d\u8a71',
@@ -9775,6 +9782,7 @@ const LOCALES = {
     reveal_in_finder: 'Mostrar no gerenciador de arquivos',
     reveal_failed: 'Falha ao mostrar: ',
     copy_file_path: 'Copiar caminho do arquivo',
+    download_folder: 'Download Folder', // TODO: translate
     path_copied: 'Caminho do arquivo copiado para a área de transferência',
     path_copy_failed: 'Falha ao copiar caminho: ',
     session_rename: 'Renomear conversa',
@@ -10884,6 +10892,7 @@ const LOCALES = {
     reveal_in_finder: '파일 관리자에서 열기',
     reveal_failed: '표시 실패: ',
     copy_file_path: '파일 경로 복사',
+    download_folder: 'Download Folder', // TODO: translate
     path_copied: '파일 경로가 클립보드에 복사되었습니다',
     path_copy_failed: '경로 복사 실패: ',
     session_rename: '대화 이름 변경',
@@ -12026,6 +12035,7 @@ const LOCALES = {
     reveal_in_finder: 'Révéler dans le gestionnaire de fichiers',
     reveal_failed: 'Échec de la révélation :',
     copy_file_path: 'Copier le chemin du fichier',
+    download_folder: 'Download Folder', // TODO: translate
     path_copied: 'Chemin du fichier copié dans le presse-papiers',
     path_copy_failed: 'Échec de la copie du chemin :',
     session_rename: 'Renommer la conversation',

From acd1df1112e6a2c15e8844e35f36067ec83feba8 Mon Sep 17 00:00:00 2001
From: Dennis Soong <dso2ng@gmail.com>
Date: Wed, 20 May 2026 02:41:00 +0800
Subject: [PATCH 04/14] fix: time out hung browser api requests

---
 static/panels.js          |   2 +-
 static/ui.js              |   6 +-
 static/workspace.js       |  87 ++++++++++++-----
 tests/test_api_timeout.py | 194 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 264 insertions(+), 25 deletions(-)
 create mode 100644 tests/test_api_timeout.py

diff --git a/static/panels.js b/static/panels.js
index 198b6b3781..b40252e716 100644
--- a/static/panels.js
+++ b/static/panels.js
@@ -6364,7 +6364,7 @@ async function checkUpdatesNow(){
   if(label) label.textContent=t('settings_checking');
   if(status) status.textContent='';
   try {
-    const data=await api('/api/updates/check?force=1');
+    const data=await api('/api/updates/check?force=1',{timeoutMs:60000});
     if(data.disabled){
       if(status){status.textContent=t('settings_updates_disabled');status.style.color='var(--muted)';}
     } else {
diff --git a/static/ui.js b/static/ui.js
index 5ab149f741..e07373a090 100644
--- a/static/ui.js
+++ b/static/ui.js
@@ -4393,7 +4393,7 @@ async function showWhatsNewSummary(target){
   }
   _renderUpdateSummaryPanel({summary:'Writing a simple summary…'},data,target);
   try{
-    const res=await api('/api/updates/summary',{method:'POST',body:JSON.stringify({updates:scopedUpdates,target:target||null})});
+    const res=await api('/api/updates/summary',{method:'POST',body:JSON.stringify({updates:scopedUpdates,target:target||null}),timeoutMs:60000});
     _rememberGeneratedSummary(target,res,data);
     _renderUpdateSummaryPanel(res,data,target);
     _renderUpdateWhatsNewLinks(data,{mode:'summary'});
@@ -4514,7 +4514,7 @@ async function applyUpdates(){
   if(window._updateData?.agent?.behind>0) targets.push('agent');
   try{
     for(const target of targets){
-      const res=await api('/api/updates/apply',{method:'POST',body:JSON.stringify({target})});
+      const res=await api('/api/updates/apply',{method:'POST',body:JSON.stringify({target}),timeoutMs:120000});
       if(!res.ok){
         _showUpdateError(target,res);
         resetApplyButton(0);
@@ -4563,7 +4563,7 @@ async function forceUpdate(btn){
   const errEl=$('updateError');
   if(errEl){errEl.style.display='none';}
   try{
-    const res=await api('/api/updates/force',{method:'POST',body:JSON.stringify({target})});
+    const res=await api('/api/updates/force',{method:'POST',body:JSON.stringify({target}),timeoutMs:120000});
     if(!res.ok){
       if(errEl){errEl.textContent='Force update failed: '+(res.message||'unknown error');errEl.style.display='block';}
       btn.disabled=false;btn.textContent='Force update';
diff --git a/static/workspace.js b/static/workspace.js
index 1511a70a44..5309addca7 100644
--- a/static/workspace.js
+++ b/static/workspace.js
@@ -2,39 +2,84 @@ async function api(path,opts={}){
   // Strip leading slash so URL resolves relative to location.href (supports subpath mounts)
   const rel = path.startsWith('/') ? path.slice(1) : path;
   const url=new URL(rel,document.baseURI||location.href);
+  const timeoutMs=Object.prototype.hasOwnProperty.call(opts,'timeoutMs')?opts.timeoutMs:30000;
   // Retry up to 2 times on network errors (e.g. stale keep-alive after long idle).
-  // Server errors (4xx/5xx) are NOT retried — only connection failures.
+  // Server errors (4xx/5xx) and client-side timeouts are NOT retried.
   let lastErr;
   for(let attempt=0;attempt<3;attempt++){
+    let controller=null;
+    let timeoutId=null;
+    let didTimeout=false;
+    let upstreamSignal=null;
+    let upstreamAbort=null;
     try{
-      const res=await fetch(url.href,{credentials:'include',headers:{'Content-Type':'application/json'},...opts});
-      if(!res.ok){
-        // 401 means the auth session expired. Redirect to login so the user can
-        // re-authenticate. This is especially important for iOS PWA (standalone mode)
-        // and for subpath mounts like /hermes/, where /login escapes to the site root.
-        if(res.status===401){window.location.href='login?next='+encodeURIComponent(window.location.pathname+window.location.search);return;}
-        const text=await res.text();
-        // Parse JSON error body and surface the human-readable message,
-        // rather than showing raw JSON like {"error":"Profile 'x' does not exist."}
-        let message=text;
-        try{const j=JSON.parse(text);message=j.error||j.message||text;}catch(e){}
-        // Attach the raw HTTP context so callers can branch on status (404 stale-session
-        // cleanup, 401 redirect, 503 retry, etc.) without re-parsing the message string.
-        const err=new Error(message);
-        err.status=res.status;
-        err.statusText=res.statusText;
-        err.body=text;
-        throw err;
+      const fetchOpts={...opts};
+      delete fetchOpts.timeoutMs;
+      const useTimeout=Number.isFinite(Number(timeoutMs))&&Number(timeoutMs)>0;
+      if(useTimeout&&typeof AbortController!=='undefined'){
+        controller=new AbortController();
+        upstreamSignal=fetchOpts.signal||null;
+        if(upstreamSignal){
+          upstreamAbort=()=>controller.abort(upstreamSignal.reason);
+          if(upstreamSignal.aborted) upstreamAbort();
+          else upstreamSignal.addEventListener('abort',upstreamAbort,{once:true});
+        }
+        fetchOpts.signal=controller.signal;
       }
-      const ct=res.headers.get('content-type')||'';
-      return ct.includes('application/json')?res.json():res.text();
+      const requestPromise=(async()=>{
+        const res=await fetch(url.href,{credentials:'include',headers:{'Content-Type':'application/json'},...fetchOpts});
+        if(!res.ok){
+          // 401 means the auth session expired. Redirect to login so the user can
+          // re-authenticate. This is especially important for iOS PWA (standalone mode)
+          // and for subpath mounts like /hermes/, where /login escapes to the site root.
+          if(res.status===401){window.location.href='login?next='+encodeURIComponent(window.location.pathname+window.location.search);return;}
+          const text=await res.text();
+          // Parse JSON error body and surface the human-readable message,
+          // rather than showing raw JSON like {"error":"Profile 'x' does not exist."}
+          let message=text;
+          try{const j=JSON.parse(text);message=j.error||j.message||text;}catch(e){}
+          // Attach the raw HTTP context so callers can branch on status (404 stale-session
+          // cleanup, 401 redirect, 503 retry, etc.) without re-parsing the message string.
+          const err=new Error(message);
+          err.status=res.status;
+          err.statusText=res.statusText;
+          err.body=text;
+          throw err;
+        }
+        const ct=res.headers.get('content-type')||'';
+        return ct.includes('application/json')?await res.json():await res.text();
+      })();
+      return useTimeout?await Promise.race([
+        requestPromise,
+        new Promise((_,reject)=>{
+          timeoutId=setTimeout(()=>{
+            didTimeout=true;
+            if(controller) controller.abort();
+            const err=new Error('Request timed out. Please try again.');
+            err.name='TimeoutError';
+            err.timeout=true;
+            reject(err);
+          },Number(timeoutMs));
+        })
+      ]):await requestPromise;
     }catch(e){
       lastErr=e;
+      const isTimeout=didTimeout||(e&&(e.timeout===true||e.name==='TimeoutError'));
+      if(isTimeout){
+        const err=(e&&e.name==='TimeoutError')?e:new Error('Request timed out. Please try again.');
+        err.name='TimeoutError';
+        err.timeout=true;
+        if(typeof showToast==='function') showToast('Request timed out. Please try again.',5000,'error');
+        throw err;
+      }
       // Only retry on network errors (TypeError from fetch), not on HTTP errors
       // that were already thrown above. Re-throw 401 redirects immediately.
       if(e.message&&/401/.test(e.message)) throw e;
       if(attempt<2 && e instanceof TypeError) continue;
       throw e;
+    }finally{
+      if(timeoutId) clearTimeout(timeoutId);
+      if(upstreamSignal&&upstreamAbort) upstreamSignal.removeEventListener('abort',upstreamAbort);
     }
   }
   throw lastErr;
diff --git a/tests/test_api_timeout.py b/tests/test_api_timeout.py
new file mode 100644
index 0000000000..3f89f71afa
--- /dev/null
+++ b/tests/test_api_timeout.py
@@ -0,0 +1,194 @@
+"""Regression coverage for #2539 client-side api() timeout handling."""
+
+from __future__ import annotations
+
+import json
+import re
+import subprocess
+import textwrap
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+WORKSPACE_JS = ROOT / "static" / "workspace.js"
+SESSIONS_JS = ROOT / "static" / "sessions.js"
+UI_JS = ROOT / "static" / "ui.js"
+PANELS_JS = ROOT / "static" / "panels.js"
+
+
+def _source(path: Path) -> str:
+    return path.read_text(encoding="utf-8")
+
+
+def _extract_js_function(src: str, name: str) -> str:
+    marker = f"async function {name}("
+    start = src.find(marker)
+    assert start >= 0, f"{name}() function must exist"
+    # The api() signature contains a default object literal (`opts={}`), so the
+    # function-body brace is the first `{` after the balanced parameter list.
+    paren_depth = 0
+    close_paren = -1
+    for idx in range(start + len(f"async function {name}"), len(src)):
+        ch = src[idx]
+        if ch == "(":
+            paren_depth += 1
+        elif ch == ")":
+            paren_depth -= 1
+            if paren_depth == 0:
+                close_paren = idx
+                break
+    assert close_paren > start, f"{name}() parameter list must close"
+    brace = src.find("{", close_paren)
+    assert brace > close_paren, f"{name}() function body must start with {{"
+    depth = 0
+    in_string: str | None = None
+    escaped = False
+    in_line_comment = False
+    in_block_comment = False
+    for idx in range(brace, len(src)):
+        ch = src[idx]
+        nxt = src[idx + 1] if idx + 1 < len(src) else ""
+        if in_line_comment:
+            if ch == "\n":
+                in_line_comment = False
+            continue
+        if in_block_comment:
+            if ch == "*" and nxt == "/":
+                in_block_comment = False
+            continue
+        if in_string:
+            if escaped:
+                escaped = False
+            elif ch == "\\":
+                escaped = True
+            elif ch == in_string:
+                in_string = None
+            continue
+        if ch == "/" and nxt == "/":
+            in_line_comment = True
+            continue
+        if ch == "/" and nxt == "*":
+            in_block_comment = True
+            continue
+        if ch in ("'", '"', "`"):
+            in_string = ch
+            continue
+        if ch == "{":
+            depth += 1
+        elif ch == "}":
+            depth -= 1
+            if depth == 0:
+                return src[start : idx + 1]
+    raise AssertionError(f"could not extract {name}() body")
+
+
+def _node_eval(script: str, timeout: float = 2.0) -> subprocess.CompletedProcess[str]:
+    return subprocess.run(
+        ["node", "-e", script],
+        cwd=ROOT,
+        text=True,
+        capture_output=True,
+        timeout=timeout,
+        check=False,
+    )
+
+
+def test_api_rejects_hung_fetch_with_timeout_and_toast():
+    """A hung fetch must reject quickly and surface a recognizable timeout toast."""
+    api_fn = _extract_js_function(_source(WORKSPACE_JS), "api")
+    script = textwrap.dedent(
+        f"""
+        const events=[];
+        global.document={{baseURI:'http://example.test/hermes/'}};
+        global.location={{href:'http://example.test/hermes/',pathname:'/hermes/',search:''}};
+        global.window={{location:global.location}};
+        global.showToast=(msg,ms,type)=>events.push({{msg:String(msg),ms,type}});
+        global.fetch=(url,opts)=>new Promise(()=>{{
+          if(opts&&opts.signal)opts.signal.addEventListener('abort',()=>events.push({{aborted:true}}));
+        }});
+        {api_fn}
+        api('/api/sessions',{{timeoutMs:20}})
+          .then(()=>{{console.error('resolved unexpectedly');process.exit(2);}})
+          .catch(err=>{{
+            console.log(JSON.stringify({{message:String(err&&err.message||err),events}}));
+            process.exit(0);
+          }});
+        setTimeout(()=>{{console.error('api did not reject after timeoutMs');process.exit(3);}},250);
+        """
+    )
+    result = _node_eval(script, timeout=1.0)
+    assert result.returncode == 0, result.stderr or result.stdout
+    payload = json.loads(result.stdout.strip())
+    assert "timed out" in payload["message"].lower()
+    assert any(event.get("aborted") for event in payload["events"]), payload
+    assert any("request timed out" in event.get("msg", "").lower() for event in payload["events"]), payload
+    assert any(event.get("type") == "error" for event in payload["events"]), payload
+
+
+def test_api_rejects_stalled_response_body_with_timeout():
+    """The timeout must stay active through JSON/text body consumption, not only headers."""
+    api_fn = _extract_js_function(_source(WORKSPACE_JS), "api")
+    script = textwrap.dedent(
+        f"""
+        const events=[];
+        global.document={{baseURI:'http://example.test/hermes/'}};
+        global.location={{href:'http://example.test/hermes/',pathname:'/hermes/',search:''}};
+        global.window={{location:global.location}};
+        global.showToast=(msg,ms,type)=>events.push({{msg:String(msg),ms,type}});
+        global.fetch=(url,opts)=>Promise.resolve({{
+          ok:true,
+          headers:{{get:()=> 'application/json'}},
+          json:()=>new Promise(()=>{{
+            if(opts&&opts.signal)opts.signal.addEventListener('abort',()=>events.push({{aborted:true}}));
+          }}),
+          text:()=>Promise.resolve('')
+        }});
+        {api_fn}
+        api('/api/sessions',{{timeoutMs:20}})
+          .then(()=>{{console.error('resolved unexpectedly');process.exit(2);}})
+          .catch(err=>{{
+            console.log(JSON.stringify({{message:String(err&&err.message||err),events}}));
+            process.exit(0);
+          }});
+        setTimeout(()=>{{console.error('api body read did not reject after timeoutMs');process.exit(3);}},250);
+        """
+    )
+    result = _node_eval(script, timeout=1.0)
+    assert result.returncode == 0, result.stderr or result.stdout
+    payload = json.loads(result.stdout.strip())
+    assert "timed out" in payload["message"].lower()
+    assert any(event.get("aborted") for event in payload["events"]), payload
+
+
+def test_api_has_default_timeout_and_per_call_override_contract():
+    src = _source(WORKSPACE_JS)
+    body = _extract_js_function(src, "api")
+    assert "timeoutMs" in body, "api() must accept opts.timeoutMs as a per-call override"
+    assert "30000" in body, "api() must default browser API calls to a 30s timeout"
+    assert "AbortController" in body, "api() must abort hung fetches with AbortController"
+    assert "delete fetchOpts.timeoutMs" in body, "api() must strip timeoutMs before calling fetch()"
+    fetch_call = re.search(r"fetch\(url\.href,\{.*?\.\.\.fetchOpts.*?\}\)", body, re.DOTALL)
+    assert fetch_call, "api() must call fetch() with sanitized fetchOpts"
+    assert "...opts" not in fetch_call.group(0), "api() must not spread raw opts into fetch()"
+    assert "timeoutMs" not in fetch_call.group(0), "api() must not forward timeoutMs to fetch()"
+
+
+def test_update_flows_keep_explicit_longer_timeouts():
+    """Legitimately long update flows should not inherit the generic 30s guard."""
+    src = _source(UI_JS)
+    panels = _source(PANELS_JS)
+    assert "api('/api/updates/check?force=1',{timeoutMs:60000})" in panels
+    assert "api('/api/updates/summary',{method:'POST',body:JSON.stringify({updates:scopedUpdates,target:target||null}),timeoutMs:60000})" in src
+    assert "api('/api/updates/apply',{method:'POST',body:JSON.stringify({target}),timeoutMs:120000})" in src
+    assert "api('/api/updates/force',{method:'POST',body:JSON.stringify({target}),timeoutMs:120000})" in src
+
+
+def test_new_session_inflight_cleanup_still_runs_after_api_rejects():
+    """newSession() must keep its finally cleanup path so timeout rejections unpin the UI."""
+    src = _source(SESSIONS_JS)
+    start = src.find("async function newSession")
+    assert start >= 0, "newSession() must exist"
+    finally_idx = src.find("}finally{", start)
+    assert finally_idx > start, "newSession() must keep a finally cleanup block"
+    block = src[finally_idx : src.find("\n}", finally_idx) + 2]
+    assert "_newSessionInFlight=null" in block
+    assert "_setNewSessionPending(false)" in block

From 94ceb66c170ceeee6684098cbdee0d3ba0423e55 Mon Sep 17 00:00:00 2001
From: Bryan Bartley <bryanjbartley@gmail.com>
Date: Tue, 19 May 2026 13:44:56 -0500
Subject: [PATCH 05/14] docs: clarify folder-zip cap bounds
 wall-clock/bandwidth not RSS

Per reviewer note: because the zip streams straight into handler.wfile
(no io.BytesIO buffering), peak memory is bounded by zipfile's per-file
read buffer, not the HERMES_WEBUI_FOLDER_ZIP_MAX_MB cap. Adds a comment
so the next reader doesn't have to trace it to learn the cap's actual
shape.
---
 api/routes.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/api/routes.py b/api/routes.py
index 1835416638..40cb55354a 100644
--- a/api/routes.py
+++ b/api/routes.py
@@ -6636,6 +6636,9 @@ def _file_raw_target(session, sid: str, rel: str) -> Path | None:
 # ─── /api/folder/download ───────────────────────────────────────────────────
 # Configurable caps. Match the HERMES_WEBUI_MAX_UPLOAD_MB style used elsewhere
 # (api/config.py) so operators have one consistent env-var convention.
+# Bound on per-request wall-clock and bandwidth, not RSS. The zip streams
+# straight into handler.wfile, so peak memory is the per-file read buffer
+# inside zipfile, not the cap value.
 def _folder_zip_max_bytes() -> int:
     try:
         mb = int(os.getenv("HERMES_WEBUI_FOLDER_ZIP_MAX_MB", "1024"))

From 8d2b9d4a16396eb302127eff887413081d973eb8 Mon Sep 17 00:00:00 2001
From: Lumen Yang <lumen.yang@lumeny.io>
Date: Tue, 19 May 2026 18:52:50 +0000
Subject: [PATCH 06/14] feat(webui): render indexed context metadata

---
 api/compression_anchor.py           | 20 ++++++++
 api/models.py                       | 17 +++++++
 static/i18n.js                      | 16 +++++++
 static/messages.js                  |  3 ++
 static/ui.js                        | 37 +++++++++++++--
 tests/test_auto_compression_card.py | 72 +++++++++++++++++++++++++++++
 6 files changed, 161 insertions(+), 4 deletions(-)

diff --git a/api/compression_anchor.py b/api/compression_anchor.py
index 3a457d5778..f251851c4f 100644
--- a/api/compression_anchor.py
+++ b/api/compression_anchor.py
@@ -53,6 +53,24 @@ def _content_has_part_type(content, part_types):
     )
 
 
+def _is_context_compression_marker(message):
+    """Return true for synthetic compression/reference cards, not user turns."""
+    if not isinstance(message, dict):
+        return False
+    role = message.get("role")
+    if not role or role == "tool":
+        return False
+    text = _content_text(
+        message.get("content", ""),
+        part_types={"text", "input_text", "output_text"},
+    ).lower().lstrip()
+    return (
+        text.startswith("[context compaction")
+        or text.startswith("context compaction")
+        or text.startswith("[your active task list was preserved across context compression]")
+    )
+
+
 def visible_messages_for_anchor(messages, *, auto_compression: bool = False):
     """Return transcript messages that can anchor compression UI metadata.
 
@@ -70,6 +88,8 @@ def visible_messages_for_anchor(messages, *, auto_compression: bool = False):
         role = message.get("role")
         if not role or role == "tool":
             continue
+        if _is_context_compression_marker(message):
+            continue
 
         content = message.get("content", "")
         has_attachments = bool(message.get("attachments"))
diff --git a/api/models.py b/api/models.py
index 6ff748696b..1a22ec9d13 100644
--- a/api/models.py
+++ b/api/models.py
@@ -377,6 +377,11 @@ def __init__(self, session_id: str=None, title: str='Untitled',
                  compression_anchor_message_key=None,
                  compression_anchor_summary=None,
                  pre_compression_snapshot: bool=False,
+                 context_engine=None,
+                 compression_anchor_engine=None,
+                 compression_anchor_mode=None,
+                 compression_anchor_details=None,
+                 context_engine_state=None,
                  context_length=None, threshold_tokens=None,
                  last_prompt_tokens=None,
                  gateway_routing=None, gateway_routing_history=None,
@@ -417,6 +422,11 @@ def __init__(self, session_id: str=None, title: str='Untitled',
         self.compression_anchor_message_key = compression_anchor_message_key
         self.compression_anchor_summary = compression_anchor_summary
         self.pre_compression_snapshot = bool(pre_compression_snapshot)
+        self.context_engine = context_engine
+        self.compression_anchor_engine = compression_anchor_engine
+        self.compression_anchor_mode = compression_anchor_mode
+        self.compression_anchor_details = compression_anchor_details if isinstance(compression_anchor_details, dict) else {}
+        self.context_engine_state = context_engine_state if isinstance(context_engine_state, dict) else {}
         self.context_length = context_length
         self.threshold_tokens = threshold_tokens
         self.last_prompt_tokens = last_prompt_tokens
@@ -474,6 +484,8 @@ def save(self, touch_updated_at: bool = True, skip_index: bool = False) -> None:
             'pending_user_message', 'pending_attachments', 'pending_started_at',
             'compression_anchor_visible_idx', 'compression_anchor_message_key',
             'compression_anchor_summary', 'pre_compression_snapshot',
+            'context_engine', 'compression_anchor_engine', 'compression_anchor_mode',
+            'compression_anchor_details', 'context_engine_state',
             'context_length', 'threshold_tokens', 'last_prompt_tokens',
             'gateway_routing', 'gateway_routing_history', 'llm_title_generated',
             'parent_session_id',
@@ -641,6 +653,11 @@ def compact(self, include_runtime=False, active_stream_ids=None) -> dict:
             'compression_anchor_message_key': self.compression_anchor_message_key,
             'compression_anchor_summary': self.compression_anchor_summary,
             'pre_compression_snapshot': self.pre_compression_snapshot,
+            'context_engine': self.context_engine,
+            'compression_anchor_engine': self.compression_anchor_engine,
+            'compression_anchor_mode': self.compression_anchor_mode,
+            'compression_anchor_details': self.compression_anchor_details,
+            'context_engine_state': self.context_engine_state,
             'context_length': self.context_length,
             'threshold_tokens': self.threshold_tokens,
             'last_prompt_tokens': self.last_prompt_tokens,
diff --git a/static/i18n.js b/static/i18n.js
index c9e923608b..4982119caf 100644
--- a/static/i18n.js
+++ b/static/i18n.js
@@ -196,6 +196,8 @@ const LOCALES = {
     conversation_cleared: 'Conversation cleared',
     command_label: 'Command',
     context_compaction_label: 'Context compaction',
+    retrieval_context_label: 'Indexed context',
+    retrieval_context_preview: 'Earlier messages are stored and retrievable with context tools',
     preserved_task_list_label: 'Preserved task list',
     reference_only_label: 'Reference only',
     model_usage: 'Usage: /model <name>',
@@ -1417,6 +1419,8 @@ const LOCALES = {
     conversation_cleared: 'Conversazione cancellata',
     command_label: 'Comando',
     context_compaction_label: 'Compattazione contesto',
+    retrieval_context_label: 'Contesto indicizzato',
+    retrieval_context_preview: 'I messaggi precedenti sono archiviati e recuperabili con gli strumenti di contesto',
     preserved_task_list_label: 'Lista task preservata',
     reference_only_label: 'Solo riferimento',
     model_usage: 'Uso: /model <nome>',
@@ -2630,6 +2634,8 @@ const LOCALES = {
     conversation_cleared: '会話をクリアしました',
     command_label: 'コマンド',
     context_compaction_label: 'コンテキスト圧縮',
+    retrieval_context_label: 'インデックス済みコンテキスト',
+    retrieval_context_preview: '以前のメッセージは保存され、コンテキストツールで取得できます',
     preserved_task_list_label: '保持されたタスクリスト',
     reference_only_label: '参照専用',
     model_usage: '使い方: /model <名前>',
@@ -3883,6 +3889,8 @@ const LOCALES = {
     compress_failed_label: 'Ошибка сжатия',
     compress_running_label: 'Сжатие…',
     context_compaction_label: 'Сжатие контекста',
+    retrieval_context_label: 'Индексированный контекст',
+    retrieval_context_preview: 'Предыдущие сообщения сохранены и доступны через инструменты контекста',
     preserved_task_list_label: 'Сохранённый список задач',
     focus_label: 'Фокус',
     model_search_no_results: 'Модели не найдены',
@@ -4992,6 +5000,8 @@ const LOCALES = {
     conversation_cleared: 'Conversación borrada',
     command_label: 'Comando',
     context_compaction_label: 'Compacción de contexto',
+    retrieval_context_label: 'Contexto indexado',
+    retrieval_context_preview: 'Los mensajes anteriores se almacenan y se pueden recuperar con herramientas de contexto',
     preserved_task_list_label: 'Lista de tareas conservada',
     reference_only_label: 'Solo referencia',
     model_usage: 'Uso: /model <name>',
@@ -6118,6 +6128,8 @@ const LOCALES = {
     conversation_cleared: 'Konversation gelöscht',
     command_label: 'Befehl',
     context_compaction_label: 'Kontextkomprimierung',
+    retrieval_context_label: 'Indizierter Kontext',
+    retrieval_context_preview: 'Frühere Nachrichten sind gespeichert und über Kontextwerkzeuge abrufbar',
     preserved_task_list_label: 'Beibehaltene Aufgabenliste',
     reference_only_label: 'Nur Referenz',
     model_usage: 'Nutzung: /model <name>',
@@ -7295,6 +7307,8 @@ const LOCALES = {
     conversation_cleared: '对话已清空',
     command_label: '命令',
     context_compaction_label: '上下文压缩',
+    retrieval_context_label: '已索引上下文',
+    retrieval_context_preview: '较早消息已存储，可通过上下文工具检索',
     preserved_task_list_label: '保留的任务列表',
     reference_only_label: '仅供参考',
     model_usage: '用法：/model <name>',
@@ -10715,6 +10729,8 @@ const LOCALES = {
     conversation_cleared: '대화를 지웠습니다',
     command_label: '명령',
     context_compaction_label: 'Context compaction',
+    retrieval_context_label: 'Indexed context',
+    retrieval_context_preview: 'Earlier messages are stored and retrievable with context tools',
     preserved_task_list_label: '보존된 작업 목록',
     reference_only_label: 'Reference only',
     model_usage: 'Usage: /model <name>',
diff --git a/static/messages.js b/static/messages.js
index 6842bbb96f..0cb687ee8f 100644
--- a/static/messages.js
+++ b/static/messages.js
@@ -1846,6 +1846,9 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
           phase:'done',
           automatic:true,
           message,
+          engine:d.engine,
+          mode:d.mode,
+          details:d.details,
           summary:{headline:message},
           continuationSessionId:continuationSid,
         };
diff --git a/static/ui.js b/static/ui.js
index 8da479ab9f..b5bc868404 100644
--- a/static/ui.js
+++ b/static/ui.js
@@ -5096,9 +5096,10 @@ function _autoCompressionBaseDetail(state){
     : (String(state&&state.message||fallback).trim()||fallback);
 }
 function _autoCompressionPreviewText(state){
+  const copy=_engineAwareCompressionCopy(String(state&&state.engine||_compressionEngineForSession()).toLowerCase(), String(state&&state.mode||_compressionModeForSession()).toLowerCase());
   const running=state&&state.phase==='running';
   const detail=_autoCompressionBaseDetail(state);
-  if(!running) return (String(state&&state.summary?.headline||detail).trim()||detail);
+  if(!running) return (String(state&&state.summary?.headline||copy.preview||detail).trim()||detail);
   const elapsedLabel=_compressionElapsedLabel(state);
   return [detail, elapsedLabel].filter(Boolean).join(' · ');
 }
@@ -5112,13 +5113,14 @@ function _autoCompressionDetailText(state){
   return [base,handoff].filter(Boolean).join('\n');
 }
 function _autoCompressionCardsHtml(state){
+  const copy=_engineAwareCompressionCopy(String(state&&state.engine||_compressionEngineForSession()).toLowerCase(), String(state&&state.mode||_compressionModeForSession()).toLowerCase());
   const running=state&&state.phase==='running';
   const preview=_autoCompressionPreviewText(state);
   const cardDetail=_autoCompressionDetailText(state);
   return `
     <div class="tool-card-row compression-card-row" data-compression-card="1">
       ${_compressionStatusCardHtml({
-        statusLabel: t('auto_compress_label'),
+        statusLabel: (String(state&&state.engine||'').toLowerCase()==='lcm'||String(state&&state.mode||'').toLowerCase()==='lossless_retrieval')?copy.label:t('auto_compress_label'),
         previewText: preview,
         detail: cardDetail,
         icon: running ? '<span class="tool-card-running-dot"></span>' : li('check',13),
@@ -5286,14 +5288,15 @@ function _latestCompressionReferenceMessage(messages, summaryText=''){
   return {message:null, rawIdx:-1};
 }
 function _compressionReferenceCardHtml(text, open=false){
+  const copy=_engineAwareCompressionCopy();
   const preview=text.split(/\n+/).filter(Boolean).slice(0,2).join(' ');
   return `
     <div class="tool-card-row compression-card-row" data-compression-card="1" data-raw-text="${esc(text)}">
       <div class="tool-card tool-card-compress-reference${open?' open':''}">
         <div class="tool-card-header" onclick="this.closest('.tool-card').classList.toggle('open')">
           <span class="tool-card-icon">${li('star',13)}</span>
-          <span class="tool-card-name">${esc(t('context_compaction_label'))}</span>
-          <span class="tool-card-preview">${esc(t('reference_only_label'))} · ${esc(preview)}</span>
+          <span class="tool-card-name">${esc(copy.label)}</span>
+          <span class="tool-card-preview">${esc(copy.preview)} · ${esc(preview)}</span>
           <span class="tool-card-toggle">${li('chevron-right',12)}</span>
           <button class="msg-copy-btn msg-action-btn tool-card-copy compression-reference-copy" title="${t('copy')}" onclick="copyMsg(this);event.stopPropagation()">${li('copy',13)}</button>
         </div>
@@ -5367,6 +5370,31 @@ function _formatMessageFooterTimestamp(tsVal){
   const opts={month:'short', day:'numeric', hour:'numeric', minute:'2-digit'};
   return fmt?fmt(date,opts):date.toLocaleString([], opts);
 }
+function _compressionEngineForSession(){
+  return String(
+    (S.session&&(
+      S.session.compression_anchor_engine
+      || S.session.context_engine
+    )) || 'compressor'
+  ).trim().toLowerCase() || 'compressor';
+}
+function _compressionModeForSession(){
+  return String(
+    (S.session&&S.session.compression_anchor_mode) || 'summary_compaction'
+  ).trim().toLowerCase() || 'summary_compaction';
+}
+function _engineAwareCompressionCopy(engine=_compressionEngineForSession(), mode=_compressionModeForSession()){
+  if(engine==='lcm'||mode==='lossless_retrieval'){
+    return {
+      label:t('retrieval_context_label'),
+      preview:t('retrieval_context_preview'),
+    };
+  }
+  return {
+    label:t('context_compaction_label'),
+    preview:t('reference_only_label'),
+  };
+}
 function _compressionStatusCardHtml({
   statusLabel,
   previewText,
@@ -5946,6 +5974,7 @@ function renderMessages(options){
   }
   function _insertCompressionLikeNodeByRawIdx(node, rawIdx){
     if(!node) return;
+    if(rawIdx<firstRenderedRawIdx) return;
     if(!renderVisWithIdx.length){
       inner.appendChild(node);
       return;
diff --git a/tests/test_auto_compression_card.py b/tests/test_auto_compression_card.py
index 23576ff2a0..f49463d695 100644
--- a/tests/test_auto_compression_card.py
+++ b/tests/test_auto_compression_card.py
@@ -1,5 +1,7 @@
 from pathlib import Path
 
+from api.compression_anchor import visible_messages_for_anchor
+from api.models import Session
 from api.streaming import _is_fallback_lifecycle_message
 
 
@@ -459,6 +461,76 @@ def test_reference_message_inserted_before_future_assistant_anchor():
     assert helper.index("blocks.insertBefore(node, anchorSeg);") < helper.index("const userRow=userRows.get(anchorRawIdx);")
 
 
+def test_frontend_uses_context_engine_metadata_for_indexed_context_copy():
+    src = _read("static/ui.js")
+    i18n = _read("static/i18n.js")
+
+    assert "function _compressionEngineForSession" in src
+    assert "S.session.compression_anchor_engine" in src
+    assert "S.session.context_engine" in src
+    assert "function _compressionModeForSession" in src
+    assert "S.session.compression_anchor_mode" in src
+    assert "function _engineAwareCompressionCopy" in src
+    assert "mode==='lossless_retrieval'" in src
+    assert "t('retrieval_context_label')" in src
+    assert "t('retrieval_context_preview')" in src
+    assert "retrieval_context_label" in i18n
+    assert "retrieval_context_preview" in i18n
+
+
+def test_session_model_round_trips_context_engine_metadata(tmp_path, monkeypatch):
+    import api.models as models
+
+    state_dir = tmp_path / "state"
+    session_dir = state_dir / "sessions"
+    session_dir.mkdir(parents=True)
+    monkeypatch.setattr(models, "SESSION_DIR", session_dir)
+    monkeypatch.setattr(models, "SESSION_INDEX_FILE", state_dir / "session_index.json")
+
+    session = Session(
+        session_id="lcm_metadata",
+        workspace=str(tmp_path),
+        context_engine="lcm",
+        compression_anchor_engine="lcm",
+        compression_anchor_mode="lossless_retrieval",
+        compression_anchor_details={"retrieval_tools": ["lcm_grep"]},
+        context_engine_state={"status": "indexed"},
+    )
+    session.save(touch_updated_at=False)
+
+    loaded = Session.load("lcm_metadata")
+    assert loaded.context_engine == "lcm"
+    assert loaded.compression_anchor_engine == "lcm"
+    assert loaded.compression_anchor_mode == "lossless_retrieval"
+    assert loaded.compression_anchor_details == {"retrieval_tools": ["lcm_grep"]}
+    assert loaded.context_engine_state == {"status": "indexed"}
+
+
+def test_backend_auto_anchor_count_excludes_compaction_marker_cards():
+    messages = [
+        {"role": "user", "content": "before compression"},
+        {"role": "assistant", "content": "[CONTEXT COMPACTION — REFERENCE ONLY] summary"},
+        {"role": "assistant", "content": "after compression"},
+        {"role": "tool", "content": "hidden tool output"},
+        {"role": "user", "content": "[Your active task list was preserved across context compression]"},
+    ]
+
+    visible = visible_messages_for_anchor(messages, auto_compression=True)
+
+    assert [m["content"] for m in visible] == ["before compression", "after compression"]
+
+
+def test_frontend_reference_insertion_skips_when_reference_is_before_render_window():
+    src = _read("static/ui.js")
+    start = src.find("function _insertCompressionLikeNodeByRawIdx")
+    assert start != -1, "raw-index insertion helper not found"
+    end = src.find("const preservedOnlyNode=", start)
+    assert end != -1, "raw-index insertion helper end not found"
+    helper = src[start:end]
+
+    assert "if(rawIdx<firstRenderedRawIdx) return;" in helper
+
+
 def test_reference_message_selection_prefers_latest_matching_marker():
     src = _read("static/ui.js")
     start = src.find("function _latestCompressionReferenceMessage")

From 57703231885d48f3aa562e14c70533f3db740259 Mon Sep 17 00:00:00 2001
From: Michael Lam <michael@example.local>
Date: Tue, 19 May 2026 12:06:57 -0700
Subject: [PATCH 07/14] feat(runtime): add runner adapter facade

---
 CHANGELOG.md                             |   3 +
 api/runtime_adapter.py                   | 123 +++++++++++++++++++++++
 docs/rfcs/hermes-run-adapter-contract.md |  27 ++++-
 tests/test_runtime_adapter_seam.py       | 118 ++++++++++++++++++++++
 4 files changed, 270 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cfb09c642e..87d37ceb3f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,9 @@
 
 ## [Unreleased]
 
+### Added
+
+- **PR #TBD** by @Michaelyklam (refs #1925) — Add the first Slice 4b `RunnerRuntimeAdapter` facade for future runner/sidecar backends. The facade delegates `start_run`, `observe_run`, `get_run`, and control calls to an injected runner client, normalizes results into the existing RuntimeAdapter dataclasses, carries explicit profile/workspace/model payloads, and returns bounded unsupported-control results without owning `AIAgent`, stream, cancel, approval, clarify, goal, or queue state. No route wiring or default-on runner mode is introduced.
 
 ## [v0.51.93] — 2026-05-19 — Release BQ (stage-386 — 10-PR full sweep batch — RFC Slice 4 runner/sidecar gate + workspace tree toggle width CSS variable + settled file:// markdown link rendering + prompt-cache coverage percentage fix + terminal shell shutdown reap + configured model picker provider preservation + profile-aware assistant display names + state.db reconciliation slice 1 + queued-message cross-session drain fix + stale-stream writeback supersede)
 
diff --git a/api/runtime_adapter.py b/api/runtime_adapter.py
index f59fe87ad3..28a63d8099 100644
--- a/api/runtime_adapter.py
+++ b/api/runtime_adapter.py
@@ -144,6 +144,129 @@ def _active_control_result(value: Any) -> ControlResult:
     )
 
 
+def _runner_unsupported_control(name: str) -> ControlResult:
+    return ControlResult(
+        False,
+        status="unsupported",
+        safe_message=f"{name} is not supported by this runner backend.",
+    )
+
+
+class RunnerRuntimeAdapter:
+    """Protocol-translator facade for a future runner/sidecar backend.
+
+    Slice 4 moves runtime ownership behind a runner boundary, but the WebUI
+    adapter must remain a translator.  This class deliberately delegates to an
+    injected client instead of owning process-local streams, cancellation flags,
+    approval queues, clarify queues, or cached agent instances itself.
+    """
+
+    def __init__(self, *, client: Any):
+        self._client = client
+
+    def start_run(self, request: StartRunRequest) -> RunStartResult:
+        start_run = getattr(self._client, "start_run", None)
+        if start_run is None:
+            raise NotImplementedError("RunnerRuntimeAdapter.start_run requires a runner client")
+        payload = start_run(request)
+        if isinstance(payload, RunStartResult):
+            return payload
+        payload = dict(payload or {})
+        run_id = str(payload.get("run_id") or payload.get("stream_id") or "")
+        stream_id = str(payload.get("stream_id") or run_id)
+        session_id = str(payload.get("session_id") or request.session_id)
+        active_controls = payload.get("active_controls")
+        if not isinstance(active_controls, list):
+            active_controls = []
+        return RunStartResult(
+            run_id=run_id,
+            session_id=session_id,
+            stream_id=stream_id,
+            status=str(payload.get("status") or "started"),
+            started_at=payload.get("started_at"),
+            cursor=payload.get("cursor"),
+            active_controls=active_controls,
+            payload=payload,
+        )
+
+    def observe_run(self, run_id: str, *, cursor: str | None = None) -> RunEventStream:
+        observe_run = getattr(self._client, "observe_run", None)
+        if observe_run is None:
+            return RunEventStream(run_id=run_id, events=[], cursor=cursor, last_event_id=None)
+        result = observe_run(run_id, cursor=cursor)
+        if isinstance(result, RunEventStream):
+            return result
+        payload = dict(result or {})
+        events = list(payload.get("events") or [])
+        last_event_id = payload.get("last_event_id") or (events[-1].get("event_id") if events else None)
+        next_cursor = payload.get("cursor")
+        if next_cursor is None and events:
+            next_cursor = str(events[-1].get("seq") or "")
+        return RunEventStream(
+            run_id=str(payload.get("run_id") or run_id),
+            events=events,
+            cursor=str(next_cursor) if next_cursor is not None else cursor,
+            last_event_id=last_event_id,
+        )
+
+    def get_run(self, run_id: str) -> RunStatus:
+        get_run = getattr(self._client, "get_run", None)
+        if get_run is None:
+            return RunStatus(run_id=run_id)
+        result = get_run(run_id)
+        if isinstance(result, RunStatus):
+            return result
+        payload = dict(result or {})
+        active_controls = payload.get("active_controls")
+        if not isinstance(active_controls, list):
+            active_controls = []
+        return RunStatus(
+            run_id=str(payload.get("run_id") or run_id),
+            session_id=str(payload.get("session_id") or "") or None,
+            status=str(payload.get("status") or "unknown"),
+            last_event_id=payload.get("last_event_id"),
+            terminal_state=payload.get("terminal_state"),
+            active_controls=active_controls,
+            pending_approval_id=payload.get("pending_approval_id"),
+            pending_clarify_id=payload.get("pending_clarify_id"),
+        )
+
+    def cancel_run(self, run_id: str) -> ControlResult:
+        cancel_run = getattr(self._client, "cancel_run", None)
+        if cancel_run is None:
+            return _runner_unsupported_control("Cancel")
+        return _active_control_result(cancel_run(run_id))
+
+    def respond_approval(self, run_id: str, approval_id: str, choice: str) -> ControlResult:
+        respond_approval = getattr(self._client, "respond_approval", None)
+        if respond_approval is None:
+            return _runner_unsupported_control("Approval")
+        return _active_control_result(respond_approval(run_id, approval_id, choice))
+
+    def respond_clarify(self, run_id: str, clarify_id: str, response: str) -> ControlResult:
+        respond_clarify = getattr(self._client, "respond_clarify", None)
+        if respond_clarify is None:
+            return _runner_unsupported_control("Clarify")
+        return _active_control_result(respond_clarify(run_id, clarify_id, response))
+
+    def queue_message(self, run_id: str, message: str, *, mode: str = "queue") -> ControlResult:
+        queue_message = getattr(self._client, "queue_message", None)
+        if queue_message is None:
+            return _runner_unsupported_control("Queue")
+        return _active_control_result(queue_message(run_id, message, mode=mode))
+
+    def update_goal(
+        self,
+        session_id: str,
+        action: Literal["set", "pause", "resume", "clear", "status", "edit"],
+        text: str = "",
+    ) -> ControlResult:
+        update_goal = getattr(self._client, "update_goal", None)
+        if update_goal is None:
+            return _runner_unsupported_control("Goal")
+        return _active_control_result(update_goal(session_id, action, text))
+
+
 class LegacyJournalRuntimeAdapter:
     """Protocol-translator facade over the current legacy streaming path.
 
diff --git a/docs/rfcs/hermes-run-adapter-contract.md b/docs/rfcs/hermes-run-adapter-contract.md
index 34ad176ab8..50ddb09795 100644
--- a/docs/rfcs/hermes-run-adapter-contract.md
+++ b/docs/rfcs/hermes-run-adapter-contract.md
@@ -94,8 +94,13 @@ adapter-seam work:
   `queue_message(...)` as a staged protocol method only; `/queue` remains
   browser-side queue/drain behavior, and no server-side queue endpoint or queue
   scheduler should be added merely for adapter symmetry.
+- #2575 shipped the Slice 4a runner/sidecar contract gate in v0.51.93. The next
+  implementation step can add runner-backend adapter plumbing, but it must stay
+  default-off, keep legacy fallback intact, pass explicit profile/workspace/model
+  payloads instead of mutating WebUI process globals, and avoid recreating
+  `STREAMS` / `CANCEL_FLAGS` / approval queues / clarify queues under new names.
 
-The next gate is the runner/sidecar planning contract, not queue implementation
+The next gate is runner-backend plumbing, not queue implementation
 by default. Queue / continue routing should only move before Slice 4 if a future
 maintainer decision identifies an existing server-side legacy entry point and
 pins its response shape, ordering, and idempotency contract. Otherwise, keeping
@@ -746,6 +751,26 @@ Non-goals for Slice 4a:
 - no dependency on Hermes Agent shipping `/v1/runs` before WebUI can validate the
   local runner boundary.
 
+#### Slice 4b: Runner adapter client facade
+
+The first code slice after the Slice 4a contract should be a small
+`RunnerRuntimeAdapter` facade that delegates to an injected runner client. This
+is still not the runner process itself. Its job is to pin the adapter-facing
+normalization rules before route wiring or process supervision lands:
+
+- `start_run` forwards a `StartRunRequest` carrying explicit session, profile,
+  workspace, attachments, model/provider, toolset, source, and metadata payloads;
+- `observe_run` and `get_run` normalize runner responses into `RunEventStream`
+  and `RunStatus` so a recreated WebUI server can observe the same runner-owned
+  state without relying on process-local `STREAMS`;
+- controls normalize accepted / not-active / unsupported outcomes into bounded
+  `ControlResult` values;
+- the facade itself owns no `AIAgent`, worker thread, cancellation registry,
+  approval queue, clarify queue, goal scheduler, or server-side queue.
+
+The implementation remains default-off until a later slice adds an actual runner
+client/backend and explicit route selection.
+
 ## First Meaningful Success Criteria
 
 The first meaningful milestones are deliberately split.
diff --git a/tests/test_runtime_adapter_seam.py b/tests/test_runtime_adapter_seam.py
index 1b6910c6e0..1028b08a17 100644
--- a/tests/test_runtime_adapter_seam.py
+++ b/tests/test_runtime_adapter_seam.py
@@ -18,6 +18,7 @@ def test_runtime_adapter_interface_and_legacy_journal_methods_exist():
     for name in required:
         assert hasattr(runtime.RuntimeAdapter, name)
         assert hasattr(runtime.LegacyJournalRuntimeAdapter, name)
+        assert hasattr(runtime.RunnerRuntimeAdapter, name)
 
     assert runtime.runtime_adapter_mode({}) == "legacy-direct"
     assert runtime.runtime_adapter_enabled({}) is False
@@ -328,3 +329,120 @@ def test_rfc_defines_slice4_runner_contract_before_runner_code():
     assert "profile,\n   workspace, attachments, model/provider, toolset, and source metadata" in rfc
     assert "no removal of the legacy in-process backend" in rfc
     assert "no default-on runner mode" in rfc
+    assert "#### Slice 4b: Runner adapter client facade" in rfc
+    assert "delegates to an injected runner client" in rfc
+    assert "without relying on process-local `STREAMS`" in rfc
+
+
+def test_runner_runtime_adapter_passes_explicit_start_payload_without_env_mutation(monkeypatch):
+    runtime = importlib.import_module("api.runtime_adapter")
+    captured = []
+
+    class FakeRunnerClient:
+        def start_run(self, request):
+            captured.append(request)
+            return {
+                "run_id": "runner-1",
+                "session_id": request.session_id,
+                "stream_id": "runner-1",
+                "status": "running",
+                "active_controls": ["cancel", "approval", "clarify", "goal"],
+            }
+
+    before_terminal_cwd = "existing-cwd"
+    monkeypatch.setenv("TERMINAL_CWD", before_terminal_cwd)
+    adapter = runtime.RunnerRuntimeAdapter(client=FakeRunnerClient())
+    request = runtime.StartRunRequest(
+        session_id="s-runner",
+        message="hello runner",
+        attachments=[{"path": "/tmp/a.png", "mime": "image/png"}],
+        workspace="/workspace/project",
+        profile="research",
+        provider="openai-codex",
+        model="gpt-5.5",
+        toolsets=["terminal", "file"],
+        source="webui",
+        metadata={"route": "/api/chat/start", "csrf_checked": True},
+    )
+
+    result = adapter.start_run(request)
+
+    assert captured == [request]
+    assert captured[0].workspace == "/workspace/project"
+    assert captured[0].profile == "research"
+    assert captured[0].attachments == [{"path": "/tmp/a.png", "mime": "image/png"}]
+    assert captured[0].provider == "openai-codex"
+    assert captured[0].model == "gpt-5.5"
+    assert captured[0].toolsets == ["terminal", "file"]
+    assert result.run_id == "runner-1"
+    assert result.active_controls == ["cancel", "approval", "clarify", "goal"]
+    assert runtime.os.environ["TERMINAL_CWD"] == before_terminal_cwd
+
+
+def test_runner_runtime_adapter_observe_and_get_survive_adapter_recreation():
+    runtime = importlib.import_module("api.runtime_adapter")
+
+    class FakeRunnerClient:
+        def __init__(self):
+            self.events = []
+            self.status = "unknown"
+
+        def start_run(self, request):
+            self.status = "running"
+            self.events.append({"event_id": "runner-1:1", "seq": 1, "type": "token", "data": {"text": "hi"}})
+            self.events.append({"event_id": "runner-1:2", "seq": 2, "type": "done", "data": {"ok": True}})
+            self.status = "completed"
+            return {"run_id": "runner-1", "session_id": request.session_id, "stream_id": "runner-1", "status": "running"}
+
+        def observe_run(self, run_id, *, cursor=None):
+            after = int(cursor or 0)
+            return {"run_id": run_id, "events": [e for e in self.events if e["seq"] > after]}
+
+        def get_run(self, run_id):
+            return {
+                "run_id": run_id,
+                "session_id": "s-runner",
+                "status": self.status,
+                "terminal_state": "completed",
+                "last_event_id": self.events[-1]["event_id"],
+                "active_controls": [],
+            }
+
+    shared_runner = FakeRunnerClient()
+    first_webui_process = runtime.RunnerRuntimeAdapter(client=shared_runner)
+    first_webui_process.start_run(runtime.StartRunRequest(session_id="s-runner", message="hello"))
+
+    restarted_webui_process = runtime.RunnerRuntimeAdapter(client=shared_runner)
+    replay = restarted_webui_process.observe_run("runner-1", cursor="1")
+    status = restarted_webui_process.get_run("runner-1")
+
+    assert [event["type"] for event in replay.events] == ["done"]
+    assert replay.cursor == "2"
+    assert replay.last_event_id == "runner-1:2"
+    assert status.status == "completed"
+    assert status.terminal_state == "completed"
+    assert status.last_event_id == "runner-1:2"
+
+
+def test_runner_runtime_adapter_controls_are_bounded_and_do_not_use_legacy_state():
+    runtime = importlib.import_module("api.runtime_adapter")
+
+    class FakeRunnerClient:
+        def cancel_run(self, run_id):
+            return {"ok": False, "status": "not-active", "message": "Run is not active."}
+
+    adapter = runtime.RunnerRuntimeAdapter(client=FakeRunnerClient())
+
+    cancel = adapter.cancel_run("finished-run")
+    approval = adapter.respond_approval("finished-run", "approval-1", "once")
+    clarify = adapter.respond_clarify("finished-run", "clarify-1", "answer")
+    queued = adapter.queue_message("finished-run", "next")
+    goal = adapter.update_goal("s-runner", "status")
+
+    assert cancel.accepted is False
+    assert cancel.status == "not-active"
+    assert cancel.safe_message == "Run is not active."
+    for result in (approval, clarify, queued, goal):
+        assert result.accepted is False
+        assert result.status == "unsupported"
+        assert "not supported by this runner backend" in (result.safe_message or "")

From 37df7d76a40c5c878b9486ee75bd3cfe45939703 Mon Sep 17 00:00:00 2001
From: starship-s <45587122+starship-s@users.noreply.github.com>
Date: Tue, 19 May 2026 13:25:16 -0600
Subject: [PATCH 08/14] fix(webui): prevent composer draft rollback on refresh

---
 api/routes.py                                   |  6 +++++-
 static/sessions.js                              | 17 +++++++++++++----
 .../test_stage326_composer_draft_validation.py  | 16 +++++++++++++++-
 tests/test_webui_external_refresh_frontend.py   | 13 +++++++++++++
 4 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/api/routes.py b/api/routes.py
index 01671c2af1..6e738d0661 100644
--- a/api/routes.py
+++ b/api/routes.py
@@ -4852,7 +4852,11 @@ def handle_post(handler, parsed) -> bool:
             if files is not None:
                 draft["files"] = files
             s.composer_draft = draft
-            s.save()
+            # Draft persistence is not conversation activity. Touching updated_at
+            # here makes the active-session external-refresh poll force-reload the
+            # current chat every few seconds while the user is typing, and that
+            # delayed reload can restore an older draft over newer local input.
+            s.save(touch_updated_at=False)
         return j(handler, {"ok": True, "draft": s.composer_draft})
 
     if parsed.path == "/api/session/update":
diff --git a/static/sessions.js b/static/sessions.js
index b3ffd7ed5f..fc9ae52bb2 100644
--- a/static/sessions.js
+++ b/static/sessions.js
@@ -47,7 +47,7 @@ function _saveComposerDraftNow(sid, text, files) {
 // Restore composer draft from server onto #msg textarea.
 // Only restores if there's actual text (skip empty/None drafts).
 // Guards against double-restore when rapidly switching sessions.
-function _restoreComposerDraft(draft, targetSid) {
+function _restoreComposerDraft(draft, targetSid, opts={}) {
   const ta = $('msg');
   if (!ta) return;
   // targetSid is the session that was requested — if it no longer matches
@@ -55,10 +55,20 @@ function _restoreComposerDraft(draft, targetSid) {
   if (targetSid && _loadingSessionId !== null && _loadingSessionId !== targetSid) return;
   const text = (draft && typeof draft.text === 'string') ? draft.text : '';
   const files = (draft && Array.isArray(draft.files)) ? draft.files : [];
+  const current = ta.value || '';
+  const preserveActiveInput = !!(opts && opts.preserveActiveInput);
+
+  // Same-session force refreshes are driven by external state changes and may
+  // finish seconds after the user continued typing. In that case the local
+  // composer is the authoritative in-progress draft; never replace non-empty
+  // local input with an older server draft. Cross-session switches still restore
+  // normally so the previous session's composer contents do not leak forward.
+  if (preserveActiveInput && current && current !== text) return;
+
   // If there's no text and no files, clear the textarea (a previous session's
   // draft may still be sitting there from a cross-session switch).
   if (!text && !files.length) {
-    if (ta.value) {
+    if (current) {
       ta.value = '';
       if (typeof autoResize === 'function') autoResize();
       if (typeof updateSendBtn === 'function') updateSendBtn();
@@ -66,7 +76,6 @@ function _restoreComposerDraft(draft, targetSid) {
     return;
   }
   // Only update if different to avoid cursor jumps on unrelated session switches.
-  const current = ta.value || '';
   if (current !== text) {
     ta.value = text;
     if (typeof autoResize === 'function') autoResize();
@@ -790,7 +799,7 @@ async function loadSession(sid){
   // against stale writes from slow responses racing to restore the previous draft).
   const _draft = S.session && S.session.composer_draft;
   if (_draft && (typeof _restoreComposerDraft === 'function')) {
-    _restoreComposerDraft(_draft, sid);
+    _restoreComposerDraft(_draft, sid, {preserveActiveInput:currentSid===sid&&forceReload});
   }
 
   _resolveSessionModelForDisplaySoon(sid);
diff --git a/tests/test_stage326_composer_draft_validation.py b/tests/test_stage326_composer_draft_validation.py
index 71e3ecec5e..3f5904d6a1 100644
--- a/tests/test_stage326_composer_draft_validation.py
+++ b/tests/test_stage326_composer_draft_validation.py
@@ -81,10 +81,24 @@ def test_draft_validation_appears_before_persist():
     src = Path(__file__).parents[1].joinpath("api", "routes.py").read_text(encoding="utf-8")
     # Anchor on the unique POST-validation comment marker.
     marker_idx = src.find("Stage-326 hardening (per Opus advisor)")
-    persist_idx = src.find("s.composer_draft = draft\n            s.save()")
+    persist_idx = src.find("s.composer_draft = draft\n            # Draft persistence is not conversation activity")
     assert marker_idx != -1 and persist_idx != -1, (
         "could not locate validation marker or persist site"
     )
     assert marker_idx < persist_idx, (
         "validation block must run before composer_draft persist"
     )
+
+
+def test_draft_save_does_not_touch_session_updated_at():
+    """Autosaving the composer must not look like conversation activity.
+
+    If POST /api/session/draft bumps updated_at, the frontend's active-session
+    external refresh poll treats every keystroke autosave as a remote session
+    update and force-reloads the current chat a few seconds later.
+    """
+    src = Path(__file__).parents[1].joinpath("api", "routes.py").read_text(encoding="utf-8")
+    persist_idx = src.find("s.composer_draft = draft")
+    assert persist_idx != -1, "could not locate composer draft persist site"
+    save_idx = src.find("s.save(touch_updated_at=False)", persist_idx)
+    assert save_idx != -1, "composer draft save must preserve session updated_at"
diff --git a/tests/test_webui_external_refresh_frontend.py b/tests/test_webui_external_refresh_frontend.py
index faf1fe1aec..44b22f68f7 100644
--- a/tests/test_webui_external_refresh_frontend.py
+++ b/tests/test_webui_external_refresh_frontend.py
@@ -37,3 +37,16 @@ def test_force_reload_clears_stale_blocking_prompts_immediately():
     """
     assert "hideApprovalCard(forceReload)" in SESSIONS_JS
     assert "hideClarifyCard(forceReload, forceReload?'external-refresh':'dismissed')" in SESSIONS_JS
+
+
+def test_same_session_force_reload_preserves_non_empty_composer_input():
+    """A slow same-session refresh must not roll back text typed meanwhile.
+
+    The active-session refresh path can finish seconds after it started. If the
+    user kept typing, restoring the server draft at the end of that load would
+    replace newer local input with an older debounced draft.
+    """
+    assert "function _restoreComposerDraft(draft, targetSid, opts={})" in SESSIONS_JS
+    assert "const preserveActiveInput = !!(opts && opts.preserveActiveInput);" in SESSIONS_JS
+    assert "if (preserveActiveInput && current && current !== text) return;" in SESSIONS_JS
+    assert "_restoreComposerDraft(_draft, sid, {preserveActiveInput:currentSid===sid&&forceReload});" in SESSIONS_JS

From 729ed415ff91cac7d57086098e308f1ae5acee95 Mon Sep 17 00:00:00 2001
From: keyos <keyos@us.er>
Date: Tue, 19 May 2026 20:23:46 +0000
Subject: [PATCH 09/14] fix(approval): peek _gateway_queues for session-level
 approval when _pending is empty
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

During active streaming, dangerous-command approvals go through the
gateway path and are stored in _gateway_queues as _ApprovalEntry
objects, not in _pending. The _resolve_approval_legacy helper only
looked at _pending, so 'Allow for this session' never called
approve_session() — the user clicked Allow, the card vanished, but
the next dangerous command asked again.

Now when _pending has no matching entry, the helper peeks into
_gateway_queues to extract pattern_keys, calls approve_session(),
and marks found_target=True so resolve_gateway_approval also fires.

This commit is re-scoped to peek-only (no agent_session_key round-trip,
no state_db metadata changes).

Includes:
- Import + fallback for _gateway_queues
- Null-safe key filtering in all_keys
- Source-contract test (static) + functional test with
  @requires_agent_modules skip marker for CI
- All comments and docstrings in English
---
 api/routes.py                      | 43 +++++++++++++----
 tests/test_runtime_adapter_seam.py | 74 ++++++++++++++++++++++++++++++
 2 files changed, 107 insertions(+), 10 deletions(-)

diff --git a/api/routes.py b/api/routes.py
index 01671c2af1..097de89079 100644
--- a/api/routes.py
+++ b/api/routes.py
@@ -2277,6 +2277,7 @@ def _keep_latest_messaging_session_per_source(
         _pending,
         _lock,
         _permanent_approved,
+        _gateway_queues,
         resolve_gateway_approval,
         enable_session_yolo,
         disable_session_yolo,
@@ -2295,6 +2296,7 @@ def _keep_latest_messaging_session_per_source(
     _pending = {}
     _lock = threading.Lock()
     _permanent_approved = set()
+    _gateway_queues = {}
 
 
 # ── Approval SSE subscribers (long-connection push) ──────────────────────────
@@ -8739,6 +8741,7 @@ def _resolve_approval_legacy(sid: str, approval_id: str, choice: str) -> bool:
     # that omit approval_id still resolve the oldest entry for compatibility.
     pending = None
     found_target = False
+    gateway_keys = []
     with _lock:
         queue = _pending.get(sid)
         if isinstance(queue, list):
@@ -8764,6 +8767,25 @@ def _resolve_approval_legacy(sid: str, approval_id: str, choice: str) -> bool:
             if not approval_id or queue.get("approval_id") == approval_id:
                 pending = _pending.pop(sid, None)
                 found_target = pending is not None
+        # When no _pending entry found, peek into _gateway_queues for
+        # pattern_keys so session-level approval still works. The gateway
+        # path is the primary mechanism during active streaming; _pending
+        # is only used for UI polling/SSE notification.
+        # NOTE: Gateway queue entries don't carry approval_id, so when
+        # approval_id is given and _pending is empty, we assume the gateway
+        # entry at the head of the queue corresponds. This is safe because
+        # gateway entries are consumed synchronously with _pending entries
+        # under the same lock — there is no interleaving where a stale
+        # approval_id could match a different gateway entry.
+        if not pending:
+            gw_queue = _gateway_queues.get(sid)
+            if gw_queue and len(gw_queue) > 0:
+                gw_entry = gw_queue[0]
+                # _gateway_queues stores _ApprovalEntry objects; their
+                # .data dict carries command, pattern_key, pattern_keys.
+                gw_data = getattr(gw_entry, 'data', None) or {}
+                gateway_keys = gw_data.get("pattern_keys") or [gw_data.get("pattern_key", "")] if gw_data else []
+                found_target = True
         # Notify SSE subscribers of the new head (or empty state) so the UI
         # surfaces any trailing approvals that were queued behind this one
         # without waiting for the next submit_pending. Without this, a parallel
@@ -8775,16 +8797,17 @@ def _resolve_approval_legacy(sid: str, approval_id: str, choice: str) -> bool:
         else:
             _approval_sse_notify_locked(sid, None, 0)
 
-    if pending:
-        keys = pending.get("pattern_keys") or [pending.get("pattern_key", "")]
-        if choice in ("once", "session"):
-            for k in keys:
-                approve_session(sid, k)
-        elif choice == "always":
-            for k in keys:
-                approve_session(sid, k)
-                approve_permanent(k)
-            save_permanent_allowlist(_permanent_approved)
+    # Collect keys from both _pending and _gateway_queues
+    keys_from_pending = pending.get("pattern_keys") or [pending.get("pattern_key", "")] if pending else []
+    all_keys = [k for k in keys_from_pending if k] + [k for k in gateway_keys if k]
+    if choice in ("once", "session"):
+        for k in all_keys:
+            approve_session(sid, k)
+    elif choice == "always":
+        for k in all_keys:
+            approve_session(sid, k)
+            approve_permanent(k)
+        save_permanent_allowlist(_permanent_approved)
     # Unblock the agent thread waiting in the gateway approval queue.
     # This is the primary signal when streaming is active — the agent
     # thread is parked in entry.event.wait() and needs to be woken up.
diff --git a/tests/test_runtime_adapter_seam.py b/tests/test_runtime_adapter_seam.py
index 1b6910c6e0..f24bac6715 100644
--- a/tests/test_runtime_adapter_seam.py
+++ b/tests/test_runtime_adapter_seam.py
@@ -1,6 +1,8 @@
 import importlib
 import queue
 
+from tests.conftest import requires_agent_modules
+
 
 def test_runtime_adapter_interface_and_legacy_journal_methods_exist():
     runtime = importlib.import_module("api.runtime_adapter")
@@ -271,6 +273,78 @@ def test_approval_respond_does_not_fallback_to_oldest_when_explicit_id_is_stale(
     assert "queue.pop(0)" not in stale_branch
 
 
+def test_approval_respond_peeks_gateway_queues_when_pending_empty() -> None:
+    """When _pending has no matching entry but _gateway_queues does, the
+    helper should extract pattern_keys from the gateway queue and call
+    approve_session even though pending is None.
+    """
+    routes = importlib.import_module("api.routes")
+    src = (routes.Path(__file__).parent.parent / "api" / "routes.py").read_text(encoding="utf-8")
+    helper_idx = src.index("def _resolve_approval_legacy")
+    helper_body = src[helper_idx:src.index("def _handle_approval_respond", helper_idx)]
+
+    assert "_gateway_queues" in helper_body, (
+        "_resolve_approval_legacy must reference _gateway_queues "
+        "to read pattern_keys when _pending is empty"
+    )
+    assert "gateway_keys" in helper_body, (
+        "Must extract pattern_keys from _gateway_queues into a gateway_keys variable"
+    )
+    assert "approve_session" in helper_body[helper_body.index("all_keys"):], (
+        "Must call approve_session for keys extracted from _gateway_queues"
+    )
+
+
+@requires_agent_modules
+def test_approval_respond_approves_from_gateway_queues_when_pending_empty() -> None:
+    """Verify _resolve_approval_legacy peeks into _gateway_queues for
+    pattern_keys when _pending has no matching entry, and calls
+    approve_session() even though pending is None (the real streaming case).
+    """
+    import threading
+    from api.routes import _resolve_approval_legacy
+
+    routes = importlib.import_module("api.routes")
+    approval_mod = importlib.import_module("tools.approval")
+
+    test_sid = "__test_gateway_approval_sid__"
+    test_key = "__test_pattern_key__"
+
+    # 1. Ensure _pending is empty for this sid
+    with approval_mod._lock:
+        approval_mod._pending.pop(test_sid, None)
+
+    # 2. Populate _gateway_queues with a real entry
+    entry = approval_mod._ApprovalEntry({
+        "command": "test_cmd",
+        "pattern_key": test_key,
+        "pattern_keys": [test_key],
+        "description": "test dangerous cmd",
+    })
+    with approval_mod._lock:
+        approval_mod._gateway_queues.setdefault(test_sid, []).append(entry)
+
+    try:
+        # 3. Run the helper with empty _pending but populated _gateway_queues
+        result = _resolve_approval_legacy(test_sid, "", "session")
+
+        # 4. Verify approve_session was called (is_approved must return True)
+        assert approval_mod.is_approved(test_sid, test_key), (
+            "approve_session should have been called for the pattern_key "
+            "extracted from _gateway_queues"
+        )
+        assert result is True, (
+            "_resolve_approval_legacy should return True when it finds "
+            "and resolves the gateway entry"
+        )
+    finally:
+        # 5. Cleanup
+        with approval_mod._lock:
+            approval_mod._gateway_queues.pop(test_sid, None)
+            approval_mod._session_approved.pop(test_sid, None)
+            approval_mod._pending.pop(test_sid, None)
+
+
 def test_chat_start_route_selects_adapter_only_when_flag_enabled():
     routes = importlib.import_module("api.routes")
     src = (routes.Path(__file__).parent.parent / "api" / "routes.py").read_text(encoding="utf-8")

From 692ea22f9ef7a009f596ee80b7297ae66183a0ac Mon Sep 17 00:00:00 2001
From: starship-s <45587122+starship-s@users.noreply.github.com>
Date: Tue, 19 May 2026 14:35:11 -0600
Subject: [PATCH 10/14] fix(streaming): finish auto-compression card after
 rotation

---
 static/messages.js                  |  9 ++++++---
 tests/test_auto_compression_card.py | 30 ++++++++++++++++++++++-------
 2 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/static/messages.js b/static/messages.js
index 6842bbb96f..7fffdc631c 100644
--- a/static/messages.js
+++ b/static/messages.js
@@ -1829,12 +1829,15 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
       // Context was auto-compressed during this turn. Render it through the
       // same transient compression-card path as manual /compress, without
       // inserting a fake assistant message into history or model context.
-      if(!S.session||S.session.session_id!==activeSid) return;
+      if(!S.session) return;
+      const currentSid=S.session.session_id;
       let d={};
       try{ d=JSON.parse(e.data||'{}')||{}; }catch(_){ d={}; }
       const eventSid=d.old_session_id||d.session_id||activeSid;
-      if(eventSid!==activeSid && d.new_session_id!==activeSid && d.continuation_session_id!==activeSid) return;
       const continuationSid=d.new_session_id||d.continuation_session_id||'';
+      const eventMatchesCurrent=!!(currentSid&&(eventSid===currentSid||d.new_session_id===currentSid||d.continuation_session_id===currentSid));
+      if(!eventMatchesCurrent) return;
+      const displaySid=currentSid;
       const message=String(d.message||'Context auto-compressed to continue the conversation').trim();
       if(d.usage&&typeof _syncCtxIndicator==='function'){
         S.lastUsage={...(S.lastUsage||{}),...d.usage};
@@ -1842,7 +1845,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
       }
       if(typeof setCompressionUi==='function'){
         const state={
-          sessionId:activeSid,
+          sessionId:displaySid,
           phase:'done',
           automatic:true,
           message,
diff --git a/tests/test_auto_compression_card.py b/tests/test_auto_compression_card.py
index 23576ff2a0..d57a79567d 100644
--- a/tests/test_auto_compression_card.py
+++ b/tests/test_auto_compression_card.py
@@ -217,16 +217,15 @@ def test_auto_compression_sse_uses_transient_card_not_fake_message():
 def test_auto_compression_sse_keeps_inactive_and_malformed_paths_safe():
     block = _compressed_listener_block()
 
-    guard = "if(!S.session||S.session.session_id!==activeSid) return;"
+    guard = "if(!S.session) return;"
     assert guard in block
     assert block.index(guard) < block.index("setCompressionUi")
     assert "try{ d=JSON.parse(e.data||'{}')||{}; }catch(_){ d={}; }" in block
     assert "const eventSid=d.old_session_id||d.session_id||activeSid;" in block
-    # The listener also accepts a rotated continuation session id so journal-
-    # replay reconnects after compression rotate land the done card.
-    # See Opus advisor followup on stage-385 (v0.51.92).
-    event_guard = "if(eventSid!==activeSid && d.new_session_id!==activeSid && d.continuation_session_id!==activeSid) return;"
+    assert "const eventMatchesCurrent=" in block
+    event_guard = "if(!eventMatchesCurrent) return;"
     assert event_guard in block
+    assert block.index("const eventMatchesCurrent=") < block.index(event_guard)
 
 
 def test_auto_compression_done_accepts_rotated_continuation_session_event():
@@ -238,12 +237,29 @@ def test_auto_compression_done_accepts_rotated_continuation_session_event():
     # continuation id as display metadata instead of dropping the event.
     assert "const eventSid=d.old_session_id||d.session_id||activeSid;" in block
     assert "const continuationSid=d.new_session_id||d.continuation_session_id||'';" in block
-    event_guard = "if(eventSid!==activeSid && d.new_session_id!==activeSid && d.continuation_session_id!==activeSid) return;"
+    event_guard = "if(!eventMatchesCurrent) return;"
     assert event_guard in block
-    assert block.index("const eventSid=") < block.index(event_guard)
+    assert block.index("const eventSid=") < block.index("const eventMatchesCurrent=")
     assert "continuationSessionId:continuationSid" in block
 
 
+def test_auto_compression_done_accepts_event_after_current_session_rotates():
+    block = _compressed_listener_block()
+
+    # The final compressed event can arrive/replay after another event has already
+    # updated S.session to the continuation session id. Do not drop it just
+    # because the active browser session no longer equals the original activeSid.
+    strict_active_guard = "if(!S.session||S.session.session_id!==activeSid) return;"
+    assert strict_active_guard not in block
+    assert "if(!S.session) return;" in block
+    assert "const currentSid=S.session.session_id;" in block
+    assert "const eventMatchesCurrent=" in block
+    assert "const displaySid=currentSid;" in block
+    assert "sessionId:displaySid" in block
+    assert block.index("const eventSid=") < block.index("const eventMatchesCurrent=")
+    assert block.index("const displaySid=") < block.index("setCompressionUi(state)")
+
+
 def test_auto_compression_done_sse_refreshes_context_indicator_usage():
     block = _compressed_listener_block()
 

From ada59d73e631cef1836f346b6fea088e29db571e Mon Sep 17 00:00:00 2001
From: keyos <keyos@us.er>
Date: Tue, 19 May 2026 20:56:17 +0000
Subject: [PATCH 11/14] fix(approval): simplify gateway_keys expression and
 document race window
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drop the redundant 'if gw_data else []' guard — gw_data is already
guaranteed to be a dict by the 'or {}' fallback above.

Add a one-line comment explaining the peek-without-pop race window:
a concurrent resolver may pop a different gateway entry, but
approve_session is idempotent over the session key set so the
outcome is the same regardless.
---
 api/routes.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/api/routes.py b/api/routes.py
index 097de89079..73a9427e08 100644
--- a/api/routes.py
+++ b/api/routes.py
@@ -8784,7 +8784,12 @@ def _resolve_approval_legacy(sid: str, approval_id: str, choice: str) -> bool:
                 # _gateway_queues stores _ApprovalEntry objects; their
                 # .data dict carries command, pattern_key, pattern_keys.
                 gw_data = getattr(gw_entry, 'data', None) or {}
-                gateway_keys = gw_data.get("pattern_keys") or [gw_data.get("pattern_key", "")] if gw_data else []
+                gateway_keys = gw_data.get("pattern_keys") or [gw_data.get("pattern_key", "")]
+                # Peek is not strict — a concurrent resolver may pop a
+                # different gateway entry before we reach
+                # resolve_gateway_approval below, but approve_session is
+                # idempotent over the session key set so the outcome is
+                # the same regardless of which entry wins the race.
                 found_target = True
         # Notify SSE subscribers of the new head (or empty state) so the UI
         # surfaces any trailing approvals that were queued behind this one

From 1ebfbf352702def9af13d3c9b477b3a75d3879bb Mon Sep 17 00:00:00 2001
From: Michael Lam <michael@example.local>
Date: Tue, 19 May 2026 14:27:41 -0700
Subject: [PATCH 12/14] fix: reconcile session metadata counts

---
 CHANGELOG.md                                |  4 ++
 api/routes.py                               | 42 +++++++++++----------
 tests/test_webui_state_db_reconciliation.py | 37 ++++++++++++++++++
 3 files changed, 64 insertions(+), 19 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cfb09c642e..c518267d71 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,10 @@
 
 ## [Unreleased]
 
+### Fixed
+
+- **PR #2604** by @Michaelyklam (refs #2594) — Make the metadata-only `/api/session?messages=0` path report the same reconciled message count and last-message timestamp as a full session load. Sidebar refresh polling no longer loops forever when `state.db` retains old rows that the append-only merge correctly filters out.
+
 
 ## [v0.51.93] — 2026-05-19 — Release BQ (stage-386 — 10-PR full sweep batch — RFC Slice 4 runner/sidecar gate + workspace tree toggle width CSS variable + settled file:// markdown link rendering + prompt-cache coverage percentage fix + terminal shell shutdown reap + configured model picker provider preservation + profile-aware assistant display names + state.db reconciliation slice 1 + queued-message cross-session drain fix + stale-stream writeback supersede)
 
diff --git a/api/routes.py b/api/routes.py
index 01671c2af1..8b00c8840f 100644
--- a/api/routes.py
+++ b/api/routes.py
@@ -2221,7 +2221,6 @@ def _keep_latest_messaging_session_per_source(
     get_cli_sessions,
     get_cli_session_messages,
     get_state_db_session_messages,
-    get_state_db_session_summary,
     merge_session_messages_append_only,
     ensure_cron_project,
     is_cron_session,
@@ -3669,16 +3668,24 @@ def handle_get(handler, parsed) -> bool:
             is_messaging_session = _is_messaging_session_record(s) or _is_messaging_session_record(cli_meta)
             cli_messages = []
             state_db_messages = []
-            state_db_summary = {}
+            sidecar_metadata_messages = None
             if is_messaging_session:
                 cli_messages = get_cli_session_messages(sid)
             elif load_messages:
                 state_db_messages = get_state_db_session_messages(sid)
             elif not is_messaging_session:
-                # Metadata-only callers (frontend refresh polling) only need a
-                # cheap staleness signal. Avoid full transcript materialization
-                # on the steady-state polling path.
-                state_db_summary = get_state_db_session_summary(sid)
+                # Metadata-only callers still need the same append-only
+                # reconciliation contract as full loads. A raw state.db summary
+                # can count stale rows that the merge intentionally filters out,
+                # which makes sidebar polling think the transcript is always
+                # newer than the loaded conversation.
+                state_db_messages = get_state_db_session_messages(sid)
+                sidecar_metadata_session = Session.load(sid)
+                sidecar_metadata_messages = (
+                    getattr(sidecar_metadata_session, "messages", []) or []
+                    if sidecar_metadata_session
+                    else []
+                )
             _t2 = _time.monotonic()
             effective_model = (
                 _resolve_effective_session_model_for_display(s)
@@ -3708,23 +3715,20 @@ def handle_get(handler, parsed) -> bool:
                     sidecar_messages = getattr(s, "messages", []) or []
                     _all_msgs = merge_session_messages_append_only(cli_messages, sidecar_messages)
                 else:
-                    _all_msgs = merge_session_messages_append_only(getattr(s, "messages", []) or [], state_db_messages)
-            if not load_messages and state_db_summary:
-                sidecar_messages = getattr(s, "messages", []) or []
-                sidecar_count = len(sidecar_messages)
+                    _metadata_sidecar = sidecar_metadata_messages
+                    if _metadata_sidecar is None:
+                        _metadata_sidecar = getattr(s, "messages", []) or []
+                    _all_msgs = merge_session_messages_append_only(_metadata_sidecar, state_db_messages)
+            if not load_messages:
+                _summary_message_count = len(_all_msgs)
                 try:
-                    sidecar_last = max(
+                    _summary_last_message_at = max(
                         float((m or {}).get("timestamp") or 0)
-                        for m in sidecar_messages
+                        for m in _all_msgs
                         if isinstance(m, dict)
-                    ) if sidecar_messages else 0
+                    ) if _all_msgs else 0
                 except (TypeError, ValueError):
-                    sidecar_last = 0
-                state_count = int(state_db_summary.get("message_count") or 0)
-                state_last = float(state_db_summary.get("last_message_at") or 0)
-                _all_msgs = sidecar_messages
-                _summary_message_count = max(sidecar_count, state_count)
-                _summary_last_message_at = max(sidecar_last, state_last)
+                    _summary_last_message_at = 0
             else:
                 _summary_message_count = None
                 _summary_last_message_at = None
diff --git a/tests/test_webui_state_db_reconciliation.py b/tests/test_webui_state_db_reconciliation.py
index 0180345057..5e0e17b29c 100644
--- a/tests/test_webui_state_db_reconciliation.py
+++ b/tests/test_webui_state_db_reconciliation.py
@@ -317,6 +317,43 @@ def test_metadata_fast_path_reports_reconciled_state_db_count(monkeypatch, tmp_p
     assert session["last_message_at"] == 1003.0
 
 
+def test_metadata_fast_path_excludes_state_db_rows_filtered_by_reconciliation(monkeypatch, tmp_path):
+    import api.routes as routes
+
+    sid = "webui_reconcile_metadata_filtered"
+    _install_test_session(
+        monkeypatch,
+        tmp_path,
+        sid,
+        [
+            {"role": "user", "content": "old user", "timestamp": 1000.0},
+            {"role": "assistant", "content": "old assistant", "timestamp": 1001.0},
+        ],
+    )
+    _make_state_db(
+        tmp_path / "state.db",
+        sid,
+        [
+            {"role": "user", "content": "old user", "timestamp": 1000.0},
+            {"role": "assistant", "content": "old assistant", "timestamp": 1001.0},
+            # This stale state.db-only row is older than the newest sidecar
+            # timestamp and lacks an explicit message id, so the full
+            # append-only merge filters it out. The metadata path must report
+            # the same count/last timestamp or sidebar refresh polling loops.
+            {"role": "tool", "content": "stale state row", "timestamp": 1000.5},
+        ],
+    )
+
+    handler = _GetHandler(f"/api/session?session_id={sid}&messages=0&resolve_model=0")
+    routes.handle_get(handler, urlparse(handler.path))
+
+    assert handler.status == 200
+    session = handler.response_json["session"]
+    assert session["messages"] == []
+    assert session["message_count"] == 2
+    assert session["last_message_at"] == 1001.0
+
+
 def test_state_db_reconciliation_preserves_tool_metadata(monkeypatch, tmp_path):
     import api.routes as routes
 

From dc5c8168d154d4621fb8faefff2a142a64163d94 Mon Sep 17 00:00:00 2001
From: Lumen Yang <lumen.yang@lumeny.io>
Date: Tue, 19 May 2026 21:34:08 +0000
Subject: [PATCH 13/14] fix(webui): refresh active session on external sidecar
 updates

---
 api/models.py                               | 23 ++++++++++++++++--
 api/routes.py                               | 13 ++++++++---
 tests/test_webui_state_db_reconciliation.py | 26 +++++++++++++++++++++
 3 files changed, 57 insertions(+), 5 deletions(-)

diff --git a/api/models.py b/api/models.py
index 6ff748696b..fe932d288c 100644
--- a/api/models.py
+++ b/api/models.py
@@ -436,7 +436,14 @@ def __init__(self, session_id: str=None, title: str='Untitled',
         self.read_only = bool(kwargs.get('read_only', False))
         self.enabled_toolsets = enabled_toolsets  # List[str] or None — per-session toolset override
         self.composer_draft = composer_draft if isinstance(composer_draft, dict) else {}
-        self._metadata_message_count = None
+        raw_message_count = kwargs.get('message_count')
+        parsed_message_count = None
+        if raw_message_count is not None:
+            try:
+                parsed_message_count = int(raw_message_count)
+            except (TypeError, ValueError):
+                parsed_message_count = None
+        self._metadata_message_count = parsed_message_count if parsed_message_count is not None and parsed_message_count >= 0 else None
 
     @property
     def path(self):
@@ -590,7 +597,19 @@ def load_metadata_only(cls, sid):
             parsed['messages'] = []
             parsed['tool_calls'] = []
             session = cls(**parsed)
-            session._metadata_message_count = _lookup_index_message_count(sid)
+            metadata_message_count = _lookup_index_message_count(sid)
+            if metadata_message_count is None:
+                raw_count = parsed.get('message_count')
+                if isinstance(raw_count, int) and raw_count >= 0:
+                    metadata_message_count = raw_count
+                else:
+                    try:
+                        parsed_count = int(raw_count)
+                    except (TypeError, ValueError):
+                        parsed_count = None
+                    if parsed_count is not None and parsed_count >= 0:
+                        metadata_message_count = parsed_count
+            session._metadata_message_count = metadata_message_count
             # Mark this session as a metadata-only stub. save() refuses to write
             # such a session because doing so would atomically replace the
             # on-disk JSON with messages=[], wiping the conversation. Any
diff --git a/api/routes.py b/api/routes.py
index 01671c2af1..b3a7c8373d 100644
--- a/api/routes.py
+++ b/api/routes.py
@@ -3709,9 +3709,16 @@ def handle_get(handler, parsed) -> bool:
                     _all_msgs = merge_session_messages_append_only(cli_messages, sidecar_messages)
                 else:
                     _all_msgs = merge_session_messages_append_only(getattr(s, "messages", []) or [], state_db_messages)
-            if not load_messages and state_db_summary:
+            if not load_messages:
                 sidecar_messages = getattr(s, "messages", []) or []
                 sidecar_count = len(sidecar_messages)
+                if sidecar_count == 0:
+                    try:
+                        metadata_count = getattr(s, "_metadata_message_count", None)
+                        if metadata_count is not None:
+                            sidecar_count = max(0, int(metadata_count))
+                    except (TypeError, ValueError):
+                        sidecar_count = 0
                 try:
                     sidecar_last = max(
                         float((m or {}).get("timestamp") or 0)
@@ -3720,8 +3727,8 @@ def handle_get(handler, parsed) -> bool:
                     ) if sidecar_messages else 0
                 except (TypeError, ValueError):
                     sidecar_last = 0
-                state_count = int(state_db_summary.get("message_count") or 0)
-                state_last = float(state_db_summary.get("last_message_at") or 0)
+                state_count = int(state_db_summary.get("message_count") or 0) if state_db_summary else 0
+                state_last = float(state_db_summary.get("last_message_at") or 0) if state_db_summary else 0
                 _all_msgs = sidecar_messages
                 _summary_message_count = max(sidecar_count, state_count)
                 _summary_last_message_at = max(sidecar_last, state_last)
diff --git a/tests/test_webui_state_db_reconciliation.py b/tests/test_webui_state_db_reconciliation.py
index 0180345057..3bb30bd6be 100644
--- a/tests/test_webui_state_db_reconciliation.py
+++ b/tests/test_webui_state_db_reconciliation.py
@@ -135,6 +135,32 @@ def test_api_session_includes_state_db_messages_newer_than_webui_sidecar(monkeyp
     assert payload["session"]["message_count"] == 4
 
 
+def test_metadata_poll_uses_sidecar_message_count_for_external_updates(monkeypatch, tmp_path):
+    """Active-session external refresh relies on metadata-only counts.
+
+    When no session index exists, metadata-only loads may fall back to
+    _metadata_message_count=None. The refresh poll must still report the real
+    sidecar message count; otherwise an external session JSON update can be
+    invisible until a full reload.
+    """
+    import api.routes as routes
+
+    sid = "webui_reconcile_metadata_sidecar"
+    sidecar_messages = [
+        {"role": "user", "content": "before external update", "timestamp": 1000.0},
+        {"role": "assistant", "content": "externally appended", "timestamp": 1001.0},
+    ]
+    _install_test_session(monkeypatch, tmp_path, sid, sidecar_messages)
+
+    handler = _GetHandler(f"/api/session?session_id={sid}&messages=0&resolve_model=0")
+    routes.handle_get(handler, urlparse(handler.path))
+
+    assert handler.status == 200
+    session = handler.response_json["session"]
+    assert session["message_count"] == 2
+    assert session["last_message_at"] == 1001.0
+
+
 def test_state_db_reconciliation_preserves_sidecar_only_messages(monkeypatch, tmp_path):
     import api.routes as routes
 

From 7ae97c551a3b93ab71574571572389e0ebef3863 Mon Sep 17 00:00:00 2001
From: nesquena-hermes <[email protected]>
Date: Tue, 19 May 2026 22:11:44 +0000
Subject: [PATCH 14/14] Stamp CHANGELOG for v0.51.94 (Release BR / stage-387 /
 10-PR full sweep batch)

---
 CHANGELOG.md | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ab8a179e1f..20fd62773a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,14 +2,24 @@
 
 ## [Unreleased]
 
-### Added
 
-- **PR #TBD** by @Michaelyklam (refs #1925) — Add the first Slice 4b `RunnerRuntimeAdapter` facade for future runner/sidecar backends. The facade delegates `start_run`, `observe_run`, `get_run`, and control calls to an injected runner client, normalizes results into the existing RuntimeAdapter dataclasses, carries explicit profile/workspace/model payloads, and returns bounded unsupported-control results without owning `AIAgent`, stream, cancel, approval, clarify, goal, or queue state. No route wiring or default-on runner mode is introduced.
+## [v0.51.94] — 2026-05-19 — Release BR (stage-387 — 10-PR full sweep batch — Slice 4b runner adapter facade + folder zip download + partial recovery marker dedupe + browser api() client-side timeout + auto-compression card rotation finish + composer draft rollback fix + metadata count reconciliation + active-session refresh on external sidecar updates + indexed context metadata + gateway-queues approval peek)
 
 ### Fixed
 
-- **PR #2604** by @Michaelyklam (refs #2594) — Make the metadata-only `/api/session?messages=0` path report the same reconciled message count and last-message timestamp as a full session load. Sidebar refresh polling no longer loops forever when `state.db` retains old rows that the append-only merge correctly filters out.
+- **PR #2566** by @bjb2 — Add `GET /api/folder/download?session_id=...&path=...` streaming-zip endpoint with pre-flight 413 on size/file-count cap exceeded, `os.walk(followlinks=False)` plus per-symlink workspace-root resolution check, `allowZip64=True` for large files, and a "Download Folder" item in the workspace file context menu (dir items only). Configurable caps via `HERMES_WEBUI_FOLDER_ZIP_MAX_MB` (1024 default) and `HERMES_WEBUI_FOLDER_ZIP_MAX_FILES` (50000 default). `download_folder` i18n key added across all 11 locales with `// TODO: translate` fallback markers for non-en entries.
+- **PR #2593** by @Michaelyklam (closes #2592) — Deduplicate cancelled/recovered partial assistant markers using the full `(content, reasoning, partial tool calls)` payload instead of only non-empty text content. Tool-only failed turns no longer append identical empty-content `_partial` messages repeatedly. Full session loads collapse adjacent duplicate partial markers from already-bloated session files while preserving a `.partial-bak-<timestamp>` backup. New helpers `_partial_message_signature()` (api/streaming.py:2593-2622) + `_partial_marker_already_present()` (api/streaming.py:2625-2641) scope the dedup search to the current user turn only.
+- **PR #2597** by @dso2ng (closes #2539) — Add a 30s default client-side timeout to the shared browser `api()` helper, with per-call `timeoutMs` overrides, `AbortController`-based cancellation, a timeout toast, and explicit 60s/120s ceilings for legitimately longer update flows. Body-read phase also raced against the timeout so a server that replies headers-OK and then stalls mid-JSON rejects cleanly. New `tests/test_api_timeout.py` covers default, override, abort, and body-read-stall paths.
+- **PR #2601** by @starship-s — Prevent the composer-draft rollback regression introduced by #2581's active-session external-refresh polling. Adds `opts.preserveActiveInput` to `_restoreComposerDraft` and gates the overwrite on `current && current !== text`, keeping the guard co-located with the function that owns the contract. Backend `s.save(touch_updated_at=False)` for `/api/session/draft` so draft autosaves no longer falsely advance `updated_at` and trigger the refresh poll. Supersedes parallel-discovery PR #2602.
+- **PR #2603** by @starship-s — Finish the running auto-compression card after the backend rotates the session id. The `compressed` SSE listener at `static/messages.js:1829-1862` used to early-return whenever `S.session.session_id !== activeSid`, but the `state` event listener at `:1656-1662` already rotates `window._compressionUi.sessionId` to the continuation id before `compressed` arrives. The strict active-session check is replaced with a cross-session safety check that still rejects mismatched events but no longer rejects the legitimate post-rotation `done` payload, so the elapsed-timer "compressing…" state no longer freezes after rotation completes.
+- **PR #2604** by @Michaelyklam (closes #2594) — Reconcile session metadata counts in the `/api/session?messages=0` fast path. Replaces the prior `max(sidecar_count, state_count)` heuristic with `len(merge_session_messages_append_only(sidecar_messages, state_db_messages))` so the metadata-only count matches the full-load count. Closes the followup issue filed against PR #2581 / v0.51.93 — sidebar refresh polling no longer loops forever when `state.db` retains old rows that the append-only merge correctly filters out.
+- **PR #2605** by @LumenYoung (refs #2581) — Make the metadata-only `/api/session?messages=0&resolve_model=0` path return the persisted sidecar `message_count` from `Session._metadata_message_count` when no session-index entry exists, so the active-session external-refresh signal still trips on legacy sessions whose sidecar contains externally-appended content. Composed cleanly with #2604 (the legacy-fallback applies only when the reconciled merged count is zero).
+- **PR #2573** by @espokaos-ops (closes #2510) — Persist session-level approvals when a "Allow for this session" click lands while a stream is active and `_pending` is empty. The approval flow now peeks `_gateway_queues[sid]` to recover the queued `_ApprovalEntry`'s `pattern_keys` so `approve_session()` records the approval; the next dangerous command in the same session no longer asks again. Reduced scope to peek-only per prior review note; the `agent_session_key` round-trip plumbing was dropped (it was dead on the WebUI streaming path).
+
+### Added
 
+- **PR #2599** by @Michaelyklam (refs #1925) — Add the Slice 4b `RunnerRuntimeAdapter` facade — a protocol-translator client over a future runner/sidecar backend. The facade delegates `start_run`, `observe_run`, `get_run`, and control calls to an injected runner client, normalizes results into the existing `RunStartResult`/`RunEventStream`/`RunStatus`/`ControlResult` dataclasses, carries explicit `profile`/`workspace`/`model` payload fields, and returns bounded `unsupported` control results without owning `AIAgent`, stream lifecycle, cancel/approval/clarify queues, goal state, or cached-agent table. No route wiring, no default-on runner mode, no public response-shape change.
+- **PR #2600** by @LumenYoung (refs #2266) — Slimmer WebUI follow-up from the closed LCM/context-engine PR #2266. Adds rendering and persistence for context-engine compression-anchor metadata (when present on a session or live compression event) including an "Indexed context" detail line on auto-compression cards. No agent-layer clone orchestration; WebUI-only metadata surface.
 
 ## [v0.51.93] — 2026-05-19 — Release BQ (stage-386 — 10-PR full sweep batch — RFC Slice 4 runner/sidecar gate + workspace tree toggle width CSS variable + settled file:// markdown link rendering + prompt-cache coverage percentage fix + terminal shell shutdown reap + configured model picker provider preservation + profile-aware assistant display names + state.db reconciliation slice 1 + queued-message cross-session drain fix + stale-stream writeback supersede)
 
@@ -25,8 +35,6 @@
 - **PR #2588** by @Michaelyklam (refs #2569) — Preserve the configured provider when choosing a configured model from the composer picker. `_getOptionProviderId()` now reads `data-provider` from temporary `<option data-custom="1">` rows (created by `selectModelFromDropdown` for configured models outside the native catalog), so the next send routes through the correct provider instead of falling back to whatever provider was already active.
 
 
-
-- **PR #2593** by @Michaelyklam (closes #2592) — Deduplicate cancelled/recovered partial assistant markers using the full `(content, reasoning, partial tool calls)` payload instead of only non-empty text content. Tool-only failed turns no longer append identical empty-content `_partial` messages repeatedly, and full session loads collapse adjacent duplicate partial markers from already-bloated session files while preserving a backup.
 ### Changed
 
 - **PR #2581** by @LumenYoung (refs #2194) — First recovery slice from the closed reconciliation PR #2194. Routes streaming session reconstruction and sidebar metadata through the reconciled state.db/session-summary path with a metadata-only fast path for sidebar polls and a single-snapshot reuse on the streaming hot path. Includes the reviewer-requested `_new_turn_context_from_messages` extraction so both legacy and streaming paths share the `_drop_checkpointed_current_user_from_context` + casual-fresh-chat suppression behavior (refs #1217 / #2308). 923 LOC across `api/models.py`, `api/routes.py`, `api/streaming.py`, `static/sessions.js` + four new test files; second-pass agent diff review LGTM after the streaming-path regression was caught and fixed.