From e4d16e93c76de884c9f831c00aa4944b29bd348e Mon Sep 17 00:00:00 2001
From: Jordan SkyLF <jordan@skylinkfiber.net>
Date: Tue, 12 May 2026 12:53:28 -0700
Subject: [PATCH 01/28] fix: clarify cancelled chat turn status

---
 api/streaming.py                              | 334 +++++++++++++++++-
 static/messages.js                            |   9 +-
 static/ui.js                                  |   3 +-
 tests/test_cancelled_turn_status.py           | 167 +++++++++
 tests/test_issue1361_cancel_data_loss.py      |  72 ++++
 .../test_issue893_cancel_preserves_partial.py |   9 +-
 .../test_pr1341_context_window_persistence.py |   7 +-
 tests/test_sprint36.py                        |   8 +-
 8 files changed, 579 insertions(+), 30 deletions(-)
 create mode 100644 tests/test_cancelled_turn_status.py

diff --git a/api/streaming.py b/api/streaming.py
index 8db29e25e3..9bd0480404 100644
--- a/api/streaming.py
+++ b/api/streaming.py
@@ -136,6 +136,40 @@ def _classify_provider_error(err_str: str, exc=None, *, silent_failure: bool = F
     err_str = str(err_str or '')
     _err_lower = err_str.lower()
     _exc_name = type(exc).__name__ if exc is not None else ''
+    _is_cancelled = (
+        'cancelled by user' in _err_lower
+        or 'canceled by user' in _err_lower
+        or 'user cancelled' in _err_lower
+        or 'user canceled' in _err_lower
+        or 'task cancelled' in _err_lower
+        or 'task canceled' in _err_lower
+        or (exc is not None and type(exc).__name__ in ('CancelledError', 'CanceledError'))
+    )
+    _is_interrupted = (
+        not _is_cancelled
+        and (
+            'interrupted by user' in _err_lower
+            or 'response interrupted' in _err_lower
+            or 'operation interrupted' in _err_lower
+            or 'operation was interrupted' in _err_lower
+            or 'operation aborted' in _err_lower
+            or 'request was aborted' in _err_lower
+            or 'aborterror' in _err_lower
+            or (exc is not None and type(exc).__name__ in ('KeyboardInterrupt', 'AbortError'))
+        )
+    )
+    if _is_cancelled:
+        return {
+            'label': 'Task cancelled',
+            'type': 'cancelled',
+            'hint': 'The run was cancelled by the user before Skyly finished. No provider failure occurred.',
+        }
+    if _is_interrupted:
+        return {
+            'label': 'Response interrupted',
+            'type': 'interrupted',
+            'hint': 'The run stopped before a provider response completed. If you did not cancel it, try again.',
+        }
     _is_quota = _is_quota_error_text(err_str)
     _is_auth = (
         not _is_quota and (
@@ -213,6 +247,92 @@ def _provider_error_payload(message: str, err_type: str, hint: str = '') -> dict
     return payload
 
 
+def _session_has_cancel_marker(session) -> bool:
+    """Return True if a visible cancel/interrupted marker is already persisted."""
+    for msg in reversed(getattr(session, 'messages', None) or []):
+        if not isinstance(msg, dict):
+            continue
+        if msg.get('role') == 'user':
+            return False
+        if msg.get('role') != 'assistant':
+            continue
+        content = msg.get('content')
+        text = ''
+        if isinstance(content, str):
+            text = content
+        elif isinstance(content, list):
+            parts = []
+            for part in content:
+                if isinstance(part, dict):
+                    parts.append(str(part.get('text') or part.get('content') or ''))
+            text = '\n'.join(parts)
+        normalized = text.strip().lower()
+        if 'task cancelled' in normalized or 'task canceled' in normalized:
+            return True
+        if 'response interrupted' in normalized:
+            return True
+    return False
+
+
+def _cancelled_turn_content(message: str = 'Task cancelled.') -> str:
+    """Return cancelled-turn copy matching the verbose provider-error layout."""
+    _message = str(message or 'Task cancelled.').strip()
+    if not _message.endswith('.'):
+        _message += '.'
+    return (
+        f"**Task cancelled:** {_message}\n\n"
+        "*The run was cancelled by the user before Skyly finished. No provider failure occurred.*"
+    )
+
+
+def _persist_cancelled_turn(session, *, message: str = 'Task cancelled.') -> None:
+    """Persist a user-cancelled terminal state without provider-error wording.
+
+    cancel_stream() usually writes this marker first, but the streaming thread can
+    later unwind through the silent-failure or exception path. Those paths must
+    not append a misleading provider no-response error after an explicit cancel.
+    """
+    _materialize_pending_user_turn_before_error(session)
+    session.active_stream_id = None
+    session.pending_user_message = None
+    session.pending_attachments = []
+    session.pending_started_at = None
+    if not _session_has_cancel_marker(session):
+        session.messages.append({
+            'role': 'assistant',
+            'content': _cancelled_turn_content(message),
+            '_error': True,
+            'provider_details': str(message or 'Task cancelled.').strip(),
+            'provider_details_label': 'Cancellation details',
+            'timestamp': int(time.time()),
+        })
+
+
+def _cleanup_ephemeral_cancelled_turn(session) -> None:
+    """Remove transient /btw session state after a cancel without saving it."""
+    session.active_stream_id = None
+    session.pending_user_message = None
+    session.pending_attachments = []
+    session.pending_started_at = None
+    try:
+        import pathlib
+        pathlib.Path(session.path).unlink(missing_ok=True)
+    except Exception:
+        logger.debug("Failed to clean up ephemeral cancelled session", exc_info=True)
+
+
+def _finalize_cancelled_turn(session, *, ephemeral: bool = False, message: str = 'Task cancelled.') -> None:
+    """Finalize a cancelled turn for persistent or ephemeral sessions."""
+    if ephemeral:
+        _cleanup_ephemeral_cancelled_turn(session)
+        return
+    _persist_cancelled_turn(session, message=message)
+    try:
+        session.save()
+    except Exception:
+        logger.debug("Failed to persist cancelled turn", exc_info=True)
+
+
 def _aiagent_import_error_detail() -> str:
     """Return a multi-line diagnostic string for the "AIAgent not available" path.
 
@@ -2277,6 +2397,8 @@ def _agent_status_callback(kind, message):
         # TD1: set thread-local env context so concurrent sessions don't clobber globals
         # Check for pre-flight cancel (user cancelled before agent even started)
         if cancel_event.is_set():
+            with _agent_lock:
+                _finalize_cancelled_turn(s, ephemeral=ephemeral, message='Task cancelled before start.')
             put('cancel', {'message': 'Cancelled before start'})
             return
 
@@ -2996,6 +3118,8 @@ def on_tool_complete(tool_call_id, name, args, function_result):
                         agent.interrupt("Cancelled before start")
                     except Exception:
                         logger.debug("Failed to interrupt agent before start")
+                    with _agent_lock:
+                        _finalize_cancelled_turn(s, ephemeral=ephemeral, message='Task cancelled before start.')
                     put('cancel', {'message': 'Cancelled by user'})
                     return
 
@@ -3097,6 +3221,30 @@ def _periodic_checkpoint():
                 task_id=session_id,
                 persist_user_message=msg_text,
             )
+            if cancel_event.is_set():
+                if _checkpoint_stop is not None:
+                    _checkpoint_stop.set()
+                if _ckpt_thread is not None:
+                    _ckpt_thread.join(timeout=15)
+                if ephemeral:
+                    _cleanup_ephemeral_cancelled_turn(s)
+                else:
+                    with _agent_lock:
+                        _finalize_cancelled_turn(s, ephemeral=False)
+                        try:
+                            append_turn_journal_event_for_stream(
+                                s.session_id,
+                                stream_id,
+                                {
+                                    "event": "interrupted",
+                                    "created_at": time.time(),
+                                    "reason": "cancelled",
+                                },
+                            )
+                        except Exception:
+                            logger.debug("Failed to append cancelled turn journal event", exc_info=True)
+                put('cancel', {'message': 'Cancelled by user'})
+                return
             # ── Ephemeral mode (/btw): deliver answer, skip persistence, cleanup ──
             if ephemeral:
                 _answer = ''
@@ -3122,8 +3270,41 @@ def _periodic_checkpoint():
                 _checkpoint_stop.set()
             if _ckpt_thread is not None:
                 _ckpt_thread.join(timeout=15)
+            if cancel_event.is_set():
+                with _agent_lock:
+                    _finalize_cancelled_turn(s, ephemeral=False)
+                    try:
+                        append_turn_journal_event_for_stream(
+                            s.session_id,
+                            stream_id,
+                            {
+                                "event": "interrupted",
+                                "created_at": time.time(),
+                                "reason": "cancelled",
+                            },
+                        )
+                    except Exception:
+                        logger.debug("Failed to append cancelled turn journal event", exc_info=True)
+                put('cancel', {'message': 'Cancelled by user'})
+                return
             with _agent_lock:
                 _result_messages = result.get('messages') or _previous_context_messages
+                if cancel_event.is_set():
+                    _finalize_cancelled_turn(s, ephemeral=False)
+                    try:
+                        append_turn_journal_event_for_stream(
+                            s.session_id,
+                            stream_id,
+                            {
+                                "event": "interrupted",
+                                "created_at": time.time(),
+                                "reason": "cancelled",
+                            },
+                        )
+                    except Exception:
+                        logger.debug("Failed to append cancelled turn journal event", exc_info=True)
+                    put('cancel', {'message': 'Cancelled by user'})
+                    return
                 _next_context_messages = _restore_reasoning_metadata(
                     _previous_context_messages,
                     _result_messages,
@@ -3162,6 +3343,23 @@ def _periodic_checkpoint():
                 )
                 # _token_sent tracks whether on_token() was called (any streamed text)
                 if not _assistant_added and not _token_sent:
+                    if cancel_event.is_set():
+                        _finalize_cancelled_turn(s, ephemeral=ephemeral)
+                        if not ephemeral:
+                            try:
+                                append_turn_journal_event_for_stream(
+                                    s.session_id,
+                                    stream_id,
+                                    {
+                                        "event": "interrupted",
+                                        "created_at": time.time(),
+                                        "reason": "cancelled",
+                                    },
+                                )
+                            except Exception:
+                                logger.debug("Failed to append cancelled turn journal event", exc_info=True)
+                        put('cancel', {'message': 'Cancelled by user'})
+                        return
                     _last_err = getattr(agent, '_last_error', None) or result.get('error') or ''
                     _err_str = str(_last_err) if _last_err else ''
                     _classification = _classify_provider_error(
@@ -3313,6 +3511,10 @@ def _periodic_checkpoint():
                         }
                         if _error_payload.get('details'):
                             _error_message['provider_details'] = _error_payload['details']
+                        if _err_type == 'cancelled':
+                            _error_message['provider_details_label'] = 'Cancellation details'
+                        elif _err_type == 'interrupted':
+                            _error_message['provider_details_label'] = 'Interruption details'
                         s.messages.append(_error_message)
                         try:
                             s.save()
@@ -3600,7 +3802,39 @@ def _periodic_checkpoint():
                             )
                         except Exception:
                             logger.debug("Failed to append assistant_started turn journal event", exc_info=True)
+                if cancel_event.is_set():
+                    _finalize_cancelled_turn(s, ephemeral=False)
+                    try:
+                        append_turn_journal_event_for_stream(
+                            s.session_id,
+                            stream_id,
+                            {
+                                "event": "interrupted",
+                                "created_at": time.time(),
+                                "reason": "cancelled",
+                            },
+                        )
+                    except Exception:
+                        logger.debug("Failed to append cancelled turn journal event", exc_info=True)
+                    put('cancel', {'message': 'Cancelled by user'})
+                    return
                 s.save()
+                if cancel_event.is_set():
+                    _finalize_cancelled_turn(s, ephemeral=False)
+                    try:
+                        append_turn_journal_event_for_stream(
+                            s.session_id,
+                            stream_id,
+                            {
+                                "event": "interrupted",
+                                "created_at": time.time(),
+                                "reason": "cancelled",
+                            },
+                        )
+                    except Exception:
+                        logger.debug("Failed to append cancelled turn journal event", exc_info=True)
+                    put('cancel', {'message': 'Cancelled by user'})
+                    return
                 if not ephemeral:
                     try:
                         append_turn_journal_event_for_stream(
@@ -3857,12 +4091,38 @@ def _periodic_checkpoint():
             err_str = _stripped
         _exc_lower = err_str.lower()
         _classification = _classify_provider_error(err_str, e)
+        if cancel_event.is_set():
+            if s is not None:
+                if _checkpoint_stop is not None:
+                    _checkpoint_stop.set()
+                if _ckpt_thread is not None:
+                    _ckpt_thread.join(timeout=15)
+                _lock_ctx = _agent_lock if _agent_lock is not None else contextlib.nullcontext()
+                with _lock_ctx:
+                    _finalize_cancelled_turn(s, ephemeral=ephemeral)
+                    if not ephemeral:
+                        try:
+                            append_turn_journal_event_for_stream(
+                                s.session_id,
+                                stream_id,
+                                {
+                                    "event": "interrupted",
+                                    "created_at": time.time(),
+                                    "reason": "cancelled",
+                                },
+                            )
+                        except Exception:
+                            logger.debug("Failed to append cancelled turn journal event", exc_info=True)
+            put('cancel', {'message': 'Cancelled by user'})
+            return
         _exc_is_quota = _classification['type'] == 'quota_exhausted'
         # Exception quota text still includes: 'more credits' in _exc_lower, 'can only afford' in _exc_lower, 'fewer max_tokens' in _exc_lower.
         # Rate-limit detection remains guarded as: (not _exc_is_quota).
         _exc_is_rate_limit = (_classification['type'] == 'rate_limit') and (not _exc_is_quota)
         _exc_is_auth = _classification['type'] == 'auth_mismatch'  # detects '401' and 'unauthorized' via _classify_provider_error.
         _exc_is_not_found = _classification['type'] == 'model_not_found'  # detects '404', 'not found', 'does not exist', and 'invalid model'.
+        _exc_is_cancelled = _classification['type'] == 'cancelled'
+        _exc_is_interrupted = _classification['type'] == 'interrupted'
 
         # The user hint still points to Settings / `hermes model` from _classify_provider_error().
         if _exc_is_quota:
@@ -3955,6 +4215,10 @@ def _periodic_checkpoint():
             _exc_label, _exc_type, _exc_hint = (
                 _classification['label'], _classification['type'], _classification['hint'],
             )
+        elif _exc_is_cancelled or _exc_is_interrupted:
+            _exc_label, _exc_type, _exc_hint = (
+                _classification['label'], _classification['type'], _classification['hint'],
+            )
         else:
             _exc_label, _exc_type, _exc_hint = 'Error', 'error', ''
 
@@ -3982,6 +4246,10 @@ def _periodic_checkpoint():
                 }
                 if _error_payload.get('details'):
                     _error_message['provider_details'] = _error_payload['details']
+                if _exc_type == 'cancelled':
+                    _error_message['provider_details_label'] = 'Cancellation details'
+                elif _exc_type == 'interrupted':
+                    _error_message['provider_details_label'] = 'Interruption details'
                 s.messages.append(_error_message)
                 try:
                     s.save()
@@ -4185,13 +4453,12 @@ def cancel_stream(stream_id: str) -> bool:
         except Exception:
             logger.debug("Failed to clear clarify prompt during cancel")
 
-        # Put a cancel sentinel into the queue so the SSE handler wakes up
+        # Capture the queue while the stream still exists, but do not emit the
+        # terminal cancel event until the session cleanup below confirms the turn
+        # is still active. Otherwise a late Stop click can race with a successful
+        # worker save and show cancel in the client while persistence says done.
         q = streams.get(stream_id)
-        if q:
-            try:
-                q.put_nowait(('cancel', {'message': 'Cancelled by user'}))
-            except Exception:
-                logger.debug("Failed to put cancel event to queue")
+        _emit_cancel_event = True
 
         # ── Eager session lock release (fixes #653) ──────────────────────────
         # Pop stream state now so the 409 guard in routes.py sees the session
@@ -4241,6 +4508,16 @@ def cancel_stream(stream_id: str) -> bool:
         with _get_session_agent_lock(_cancel_session_id):
             try:
                 _cs = get_session(_cancel_session_id)
+                if not isinstance(getattr(_cs, 'messages', None), list):
+                    _cs.messages = []
+                if (getattr(_cs, 'active_stream_id', None) != stream_id
+                        and not getattr(_cs, 'pending_user_message', None)):
+                    # The worker won the race and already finalized this turn.
+                    # Do not append a contradictory cancel marker or emit a
+                    # terminal cancel event after the client may have received
+                    # the successful done payload.
+                    _emit_cancel_event = False
+                    return True
                 # ── Preserve the user's typed message before clearing pending state (#1298) ──
                 # The agent's internal messages list (where the user message was appended at
                 # the start of run_conversation()) may not have been merged back into
@@ -4334,7 +4611,27 @@ def cancel_stream(stream_id: str) -> bool:
                 # reasoning-only or tool-only stream produced NO partial message).
                 _has_reasoning = bool(_cancel_reasoning and _cancel_reasoning.strip())
                 _has_tools = bool(_cancel_tool_calls)
-                if _stripped or _has_reasoning or _has_tools:
+                _cancel_marker_exists = _session_has_cancel_marker(_cs)
+                _cancel_marker_idx = len(_cs.messages)
+                if _cancel_marker_exists:
+                    for _idx in range(len(_cs.messages) - 1, -1, -1):
+                        _m = _cs.messages[_idx]
+                        if not isinstance(_m, dict) or _m.get('role') != 'assistant':
+                            continue
+                        _content = str(_m.get('content') or '').strip().lower()
+                        if 'task cancelled' in _content or 'task canceled' in _content or 'response interrupted' in _content:
+                            _cancel_marker_idx = _idx
+                            break
+                _partial_already_present = False
+                if _stripped:
+                    for _m in _cs.messages:
+                        if not isinstance(_m, dict) or _m.get('role') != 'assistant' or _m.get('_error'):
+                            continue
+                        _existing = str(_m.get('content') or '').strip()
+                        if _existing and (_stripped in _existing or _existing in _stripped):
+                            _partial_already_present = True
+                            break
+                if (_stripped or _has_reasoning or _has_tools) and not _partial_already_present:
                     _partial_msg: dict = {
                         'role': 'assistant',
                         'content': _stripped,  # may be empty for reasoning/tool-only turns
@@ -4361,18 +4658,27 @@ def cancel_stream(stream_id: str) -> bool:
                         # alongside the regular tool_calls path.
                         # (Opus pre-release review pass 2 of v0.50.251.)
                         _partial_msg['_partial_tool_calls'] = list(_cancel_tool_calls)
-                    _cs.messages.append(_partial_msg)
+                    _cs.messages.insert(_cancel_marker_idx, _partial_msg)
                 # Cancel marker — flagged _error=True so it is stripped from conversation
                 # history on the next turn (prevents model from seeing "Task cancelled."
                 # as a prior assistant reply).
-                _cs.messages.append({
-                    'role': 'assistant',
-                    'content': '*Task cancelled.*',
-                    '_error': True,
-                    'timestamp': int(time.time()),
-                })
+                if not _cancel_marker_exists:
+                    _cs.messages.append({
+                        'role': 'assistant',
+                        'content': _cancelled_turn_content('Task cancelled.'),
+                        '_error': True,
+                        'provider_details': 'Task cancelled.',
+                        'provider_details_label': 'Cancellation details',
+                        'timestamp': int(time.time()),
+                    })
                 _cs.save()
             except Exception:
                 logger.debug("Failed to clear session state on cancel for %s", _cancel_session_id)
 
+    if _emit_cancel_event and q:
+        try:
+            q.put_nowait(('cancel', {'message': 'Cancelled by user'}))
+        except Exception:
+            logger.debug("Failed to put cancel event to queue")
+
     return True
diff --git a/static/messages.js b/static/messages.js
index fc7c6d975f..83ad09cb0f 100644
--- a/static/messages.js
+++ b/static/messages.js
@@ -1228,11 +1228,14 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
           const isQuotaExhausted=d.type==='quota_exhausted';
           const isAuthMismatch=d.type==='auth_mismatch';
           const isModelNotFound=d.type==='model_not_found';
+          const isCancelled=d.type==='cancelled';
+          const isInterrupted=d.type==='interrupted';
           const isNoResponse=d.type==='no_response'||d.type==='silent_failure';
-          const label=isQuotaExhausted?'Out of credits':isRateLimit?'Rate limit reached':isAuthMismatch?(typeof t==='function'?t('provider_mismatch_label'):'Provider mismatch'):isModelNotFound?(typeof t==='function'?t('model_not_found_label'):'Model not found'):isNoResponse?'No response received':'Error';
+          const label=isCancelled?'Task cancelled':isInterrupted?'Response interrupted':isQuotaExhausted?'Out of credits':isRateLimit?'Rate limit reached':isAuthMismatch?(typeof t==='function'?t('provider_mismatch_label'):'Provider mismatch'):isModelNotFound?(typeof t==='function'?t('model_not_found_label'):'Model not found'):isNoResponse?'No response from provider':'Error';
           const hint=d.hint?`\n\n*${d.hint}*`:'';
           const details=d.details?String(d.details).replace(/```/g,'`\u200b``'):'';
-          S.messages.push({role:'assistant',content:`**${label}:** ${d.message}${hint}`,provider_details:details});
+          const detailsLabel=isCancelled?'Cancellation details':isInterrupted?'Interruption details':undefined;
+          S.messages.push({role:'assistant',content:`**${label}:** ${d.message}${hint}`,provider_details:details,provider_details_label:detailsLabel});
         }catch(_){
           S.messages.push({role:'assistant',content:'**Error:** An error occurred. Check server logs.'});
         }
@@ -1323,7 +1326,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){
           // Fallback to local cancel message if API fails
           if(S.session&&S.session.session_id===activeSid){
             clearLiveToolCards();if(!assistantText)removeThinking();
-            S.messages.push({role:'assistant',content:'*Task cancelled.*'});renderMessages({preserveScroll:true});
+            S.messages.push({role:'assistant',content:'**Task cancelled:** Task cancelled.\n\n*The run was cancelled by the user before Skyly finished. No provider failure occurred.*',provider_details:'Task cancelled.',provider_details_label:'Cancellation details',_error:true});renderMessages({preserveScroll:true});
             _markSessionViewed(activeSid, S.messages.length);
           }
         }
diff --git a/static/ui.js b/static/ui.js
index d071d93ed9..1cbb21c58d 100644
--- a/static/ui.js
+++ b/static/ui.js
@@ -5013,7 +5013,8 @@ function renderMessages(options){
     }
     let bodyHtml = isUser ? _renderUserFencedBlocks(displayContent) : renderMd(_stripXmlToolCallsDisplay(String(displayContent)));
     if(!isUser&&m.provider_details){
-      bodyHtml += `<details class="provider-error-details"><summary>Provider details</summary><pre><code>${esc(String(m.provider_details))}</code></pre></details>`;
+      const summary=m.provider_details_label||'Provider details';
+      bodyHtml += `<details class="provider-error-details"><summary>${esc(String(summary))}</summary><pre><code>${esc(String(m.provider_details))}</code></pre></details>`;
     }
     const statusHtml = (!isUser&&m._statusCard) ? _statusCardHtml(m._statusCard) : '';
     const isEditableUser=isUser&&rawIdx===lastUserRawIdx;
diff --git a/tests/test_cancelled_turn_status.py b/tests/test_cancelled_turn_status.py
new file mode 100644
index 0000000000..b241674c4d
--- /dev/null
+++ b/tests/test_cancelled_turn_status.py
@@ -0,0 +1,167 @@
+"""Regression tests for accurate cancelled/interrupted turn status.
+
+A user pressing Stop/Cancel must not be shown provider-empty guidance like
+"No response from provider". Provider-empty remains valid only when there was
+no explicit cancel/interruption signal.
+"""
+from __future__ import annotations
+
+import pathlib
+
+from api.streaming import _cancelled_turn_content, _classify_provider_error, _finalize_cancelled_turn
+
+REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
+
+
+def _read(rel_path: str) -> str:
+    return (REPO_ROOT / rel_path).read_text(encoding="utf-8")
+
+
+class _DummySession:
+    def __init__(self, path: str = ''):
+        self.path = path
+        self.messages = []
+        self.active_stream_id = 'stream-1'
+        self.pending_user_message = 'hello'
+        self.pending_attachments = ['a.txt']
+        self.pending_started_at = 123
+        self.saved = 0
+
+    def save(self, *args, **kwargs):
+        self.saved += 1
+
+
+class TestCancelledTurnClassification:
+    def test_user_cancelled_error_is_not_provider_no_response(self):
+        result = _classify_provider_error("Cancelled by user", Exception("Cancelled by user"))
+
+        assert result["type"] == "cancelled"
+        assert result["label"] == "Task cancelled"
+        assert "provider returned no content" not in result.get("hint", "").lower()
+        assert "rate limit" not in result.get("hint", "").lower()
+        assert "no provider failure" in result.get("hint", "").lower()
+
+    def test_interrupted_or_aborted_error_is_not_provider_no_response(self):
+        for text in (
+            "Interrupted by user",
+            "Operation aborted before provider response completed",
+            "AbortError: request was aborted",
+        ):
+            result = _classify_provider_error(text, RuntimeError(text))
+            assert result["type"] == "interrupted", text
+            assert result["label"] == "Response interrupted", text
+            assert "provider returned no content" not in result.get("hint", "").lower()
+
+    def test_provider_empty_response_still_uses_no_response(self):
+        result = _classify_provider_error("", None, silent_failure=True)
+
+        assert result["type"] == "no_response"
+        assert result["label"] == "No response from provider"
+        assert "provider returned no content" in result.get("hint", "").lower()
+
+
+class TestCancelledTurnFinalizer:
+    def test_persistent_cancel_finalizer_clears_pending_and_saves_cancel_marker(self):
+        session = _DummySession()
+
+        _finalize_cancelled_turn(session, ephemeral=False)
+
+        assert session.active_stream_id is None
+        assert session.pending_user_message is None
+        assert session.pending_attachments == []
+        assert session.pending_started_at is None
+        assert session.saved == 1
+        assert session.messages[-1]['content'] == _cancelled_turn_content('Task cancelled.')
+        assert '**Task cancelled:** Task cancelled.' in session.messages[-1]['content']
+        assert 'No provider failure occurred' in session.messages[-1]['content']
+        assert session.messages[-1]['provider_details'] == 'Task cancelled.'
+        assert session.messages[-1]['provider_details_label'] == 'Cancellation details'
+        assert session.messages[-1]['_error'] is True
+
+    def test_ephemeral_cancel_finalizer_unlinks_temp_session_without_saving_error_marker(self, tmp_path):
+        temp_session = tmp_path / 'btw-session.json'
+        temp_session.write_text('{}', encoding='utf-8')
+        session = _DummySession(str(temp_session))
+
+        _finalize_cancelled_turn(session, ephemeral=True)
+
+        assert session.active_stream_id is None
+        assert session.pending_user_message is None
+        assert session.pending_attachments == []
+        assert session.pending_started_at is None
+        assert session.saved == 0
+        assert session.messages == []
+        assert not temp_session.exists()
+
+
+    def test_message_renderer_allows_non_provider_details_label(self):
+        src = _read("static/ui.js")
+        assert "provider_details_label||'Provider details'" in src
+        assert "provider-error-details" in src
+
+
+class TestCancelledTurnPersistenceGuards:
+    def test_silent_failure_path_checks_cancel_event_before_persisting_provider_error(self):
+        src = _read("api/streaming.py")
+        silent_idx = src.find("# ── Detect silent agent failure")
+        assert silent_idx != -1, "silent-failure block not found"
+        apperror_idx = src.find("put('apperror', _error_payload)", silent_idx)
+        assert apperror_idx != -1, "silent-failure apperror emission not found"
+        block = src[silent_idx:apperror_idx]
+
+        assert "cancel_event.is_set()" in block, (
+            "When a user cancels and the interrupted agent returns no assistant text, "
+            "the silent-failure path must not persist a provider no_response error."
+        )
+        assert "cancelled" in block.lower(), (
+            "The cancellation guard should persist/report a cancelled turn, not silently drop state."
+        )
+
+    def test_exception_path_classifies_after_cancel_event_before_generic_error(self):
+        src = _read("api/streaming.py")
+        except_idx = src.find("print('[webui] stream error:")
+        assert except_idx != -1, "stream exception handler not found"
+        classify_idx = src.find("_classify_provider_error", except_idx)
+        generic_idx = src.find("_exc_label, _exc_type, _exc_hint = 'Error', 'error', ''", except_idx)
+        assert classify_idx != -1 and generic_idx != -1
+        block = src[except_idx:generic_idx]
+
+        assert "cancel_event.is_set()" in block, (
+            "Exception handling must distinguish user-cancelled/aborted runs before generic errors."
+        )
+        assert "cancelled" in block.lower() or "interrupted" in block.lower()
+        assert "provider_details_label" in src
+        assert "Cancellation details" in src
+        assert "Interruption details" in src
+
+    def test_post_run_cancel_guard_runs_before_normal_success_merge(self):
+        src = _read("api/streaming.py")
+        run_idx = src.find("result = agent.run_conversation(")
+        merge_idx = src.find("_result_messages = result.get", run_idx)
+        assert run_idx != -1 and merge_idx != -1, "run/merge path not found"
+        block = src[run_idx:merge_idx]
+
+        assert "cancel_event.is_set()" in block, (
+            "If cancellation arrives after tokens streamed but before run_conversation returns, "
+            "the worker must emit/persist cancel before normal merge/save/completed handling."
+        )
+        assert "put('cancel'" in block
+        assert "_cleanup_ephemeral_cancelled_turn" in block or "_finalize_cancelled_turn" in block, (
+            "Ephemeral cancels must clean up their temporary session before returning."
+        )
+        assert "return" in block
+
+    def test_frontend_has_cancelled_and_interrupted_labels_for_apperror_fallbacks(self):
+        src = _read("static/messages.js")
+        start = src.find("source.addEventListener('apperror'")
+        end = src.find("source.addEventListener('warning'", start)
+        assert start != -1 and end != -1, "apperror handler not found"
+        block = src[start:end]
+
+        assert "d.type==='cancelled'" in block or 'd.type==="cancelled"' in block
+        assert "d.type==='interrupted'" in block or 'd.type==="interrupted"' in block
+        assert "Task cancelled" in block
+        assert "Response interrupted" in block
+        assert "No response from provider" in block
+        assert "Cancellation details" in block
+        assert "Interruption details" in block
diff --git a/tests/test_issue1361_cancel_data_loss.py b/tests/test_issue1361_cancel_data_loss.py
index 09fc77f0de..817f02e5a1 100644
--- a/tests/test_issue1361_cancel_data_loss.py
+++ b/tests/test_issue1361_cancel_data_loss.py
@@ -441,3 +441,75 @@ def test_materialize_helper_called_immediately_before_error_path_clears():
         f"found {sites_with_helper}. PR #1760 / #1361 regression — re-wire the "
         f"helper at the error-branch clear sites in api/streaming.py."
     )
+
+
+
+class TestCancelStreamIdempotentWithWorkerFinalizer:
+    """The worker and explicit cancel endpoint can both finalize the same turn."""
+
+    def test_cancel_stream_does_not_duplicate_existing_worker_cancel_marker(self):
+        sid = "test_1361_idempotent"
+        stream_id = "stream_idempotent"
+        _make_session(
+            session_id=sid,
+            messages=[
+                {'role': 'user', 'content': 'Help me debug this', 'timestamp': 100},
+                {'role': 'assistant', 'content': '**Task cancelled:** Task cancelled.\n\n*The run was cancelled by the user before Skyly finished. No provider failure occurred.*', '_error': True, 'timestamp': 101},
+            ],
+        )
+        _setup_cancel_state(sid, stream_id)
+        config.STREAM_PARTIAL_TEXT[stream_id] = "partial text before cancel"
+
+        cancel_stream(stream_id)
+
+        msgs = models.SESSIONS[sid].messages
+        cancel_markers = [
+            m for m in msgs
+            if isinstance(m, dict)
+            and m.get('role') == 'assistant'
+            and 'task cancelled' in str(m.get('content') or '').lower()
+        ]
+        partial_idx = next(
+            i for i, m in enumerate(msgs)
+            if isinstance(m, dict) and m.get('_partial') and m.get('content') == 'partial text before cancel'
+        )
+        marker_idx = next(i for i, m in enumerate(msgs) if m in cancel_markers)
+
+        assert len(cancel_markers) == 1
+        assert partial_idx < marker_idx
+
+    def test_late_cancel_after_worker_finalized_does_not_add_cancel_marker(self):
+        sid = "test_1361_late_done"
+        stream_id = "stream_late_done"
+        s = Session(
+            session_id=sid,
+            title="Done Session",
+            messages=[
+                {'role': 'user', 'content': 'finish normally', 'timestamp': 100},
+                {'role': 'assistant', 'content': 'done normally', 'timestamp': 101},
+            ],
+        )
+        s.active_stream_id = None
+        s.pending_user_message = None
+        s.pending_attachments = []
+        s.pending_started_at = None
+        s.save()
+        models.SESSIONS[sid] = s
+
+        q = queue.Queue()
+        config.STREAMS[stream_id] = q
+        config.CANCEL_FLAGS[stream_id] = threading.Event()
+        mock_agent = Mock()
+        mock_agent.session_id = sid
+        mock_agent.interrupt = Mock()
+        config.AGENT_INSTANCES[stream_id] = mock_agent
+        config.STREAM_PARTIAL_TEXT[stream_id] = 'stale partial snapshot'
+
+        assert cancel_stream(stream_id) is True
+
+        msgs = models.SESSIONS[sid].messages
+        assert msgs == [
+            {'role': 'user', 'content': 'finish normally', 'timestamp': 100},
+            {'role': 'assistant', 'content': 'done normally', 'timestamp': 101},
+        ]
+        assert q.empty(), "late cancel must not emit a terminal cancel event after done"
diff --git a/tests/test_issue893_cancel_preserves_partial.py b/tests/test_issue893_cancel_preserves_partial.py
index df36f6aa0e..37c79a2385 100644
--- a/tests/test_issue893_cancel_preserves_partial.py
+++ b/tests/test_issue893_cancel_preserves_partial.py
@@ -3,7 +3,7 @@
 assistant content rather than discarding it.
 
 Before this fix, clicking Stop Generation threw away all streamed text. The
-session was saved with only '*Task cancelled.*' appended, so the user lost
+session was saved with only a cancellation marker appended, so the user lost
 whatever the agent had produced up to that point.
 
 After this fix:
@@ -118,7 +118,7 @@ def interrupt(self, _): pass
         assert any('Python is a high-level programming language' in c for c in msg_contents), (
             f"Partial text not found in session messages: {msg_contents}"
         )
-        assert any('*Task cancelled.*' in c for c in msg_contents), (
+        assert any('Task cancelled:' in c for c in msg_contents), (
             "Cancel marker missing from session messages"
         )
         # Partial message should NOT have _error=True (it's real content)
@@ -127,8 +127,9 @@ def interrupt(self, _): pass
         assert partial_msg.get('_partial') is True
         assert not partial_msg.get('_error')
         # Cancel marker should have _error=True
-        cancel_msg = next(m for m in saved.messages if '*Task cancelled.*' in m.get('content', ''))
+        cancel_msg = next(m for m in saved.messages if 'Task cancelled:' in m.get('content', ''))
         assert cancel_msg.get('_error') is True
+        assert cancel_msg.get('provider_details_label') == 'Cancellation details'
 
     def test_cancel_stream_with_no_partial_text_still_saves_cancel_marker(self, tmp_path, monkeypatch):
         """If no tokens were streamed before cancel, only the cancel marker is saved."""
@@ -168,7 +169,7 @@ def interrupt(self, _): pass
 
         saved = Session.load('sess_nopartial')
         msg_contents = [m.get('content', '') for m in saved.messages]
-        assert any('*Task cancelled.*' in c for c in msg_contents)
+        assert any('Task cancelled:' in c for c in msg_contents)
         # No extra partial message when there was nothing streamed
         assert not any(m.get('_partial') for m in saved.messages), (
             "Should not add partial message when no tokens were streamed"
diff --git a/tests/test_pr1341_context_window_persistence.py b/tests/test_pr1341_context_window_persistence.py
index 2311250c11..bcbf99c30f 100644
--- a/tests/test_pr1341_context_window_persistence.py
+++ b/tests/test_pr1341_context_window_persistence.py
@@ -38,11 +38,12 @@ def test_streaming_persists_context_fields_on_session_before_save():
     # Save call follows shortly after
     save_call = src.find("\n                s.save()", block_start)
     assert save_call != -1, "s.save() not found after the post-merge marker"
-    # Limit bumped to 8200 by turn-journal lifecycle events: the block now also
-    # records `assistant_started` immediately before the durable final save.
+    # Limit bumped to 9000 by cancellation finalization guards: the block now also
+    # checks for a late user cancel immediately before the durable final save,
+    # preventing a race that would otherwise save/emit a completed turn after Stop.
     # The context_length fallback is still a single focused resolver call with
     # arg-prep scaffold and commentary explaining the failure mode it prevents.
-    assert save_call - block_start < 8200, (
+    assert save_call - block_start < 9000, (
         "s.save() should be close to the post-merge marker — block expanded unexpectedly. "
         "If you've added a new pre-save mutation block here, bump this limit."
     )
diff --git a/tests/test_sprint36.py b/tests/test_sprint36.py
index a5c2cbfe78..e9317fad3d 100644
--- a/tests/test_sprint36.py
+++ b/tests/test_sprint36.py
@@ -212,16 +212,14 @@ def test_cancel_marker_flagged_as_error_to_skip_in_api_history():
     _error: True so _sanitize_messages_for_api() strips it from the
     conversation_history sent to the agent on the next user message.
 
-    Without this flag, the LLM sees "*Task cancelled.*" as a prior assistant
+    Without this flag, the LLM sees "Task cancelled" as a prior assistant
     turn and may reference it in subsequent responses ("As I mentioned, I was
     cancelled...") — a behavioral regression introduced when this PR started
     persisting the marker to the session.
     """
     src = read("api/streaming.py")
-    idx = src.find("'content': '*Task cancelled.*'")
-    if idx == -1:
-        idx = src.find('"content": "*Task cancelled.*"')
-    assert idx != -1, "cancel marker content string not found in cancel_stream()"
+    idx = src.find("'content': _cancelled_turn_content(message)")
+    assert idx != -1, "cancel marker content writer not found in cancel_stream()"
 
     # Walk back to the start of the dict literal (opening brace)
     brace_open = src.rfind("{", 0, idx)

From 112eadc209613fd358b9080b71ae4a05717e3b8b Mon Sep 17 00:00:00 2001
From: Jordan SkyLF <jordan@skylinkfiber.net>
Date: Tue, 12 May 2026 15:43:36 -0700
Subject: [PATCH 02/28] fix: address cancelled turn review feedback

- classify string-only CancelledError payloads as cancelled
- centralize cancel marker substring matching
- add targeted regression coverage
---
 api/streaming.py                    | 12 +++++++-----
 tests/test_cancelled_turn_status.py | 20 +++++++++++++++++++-
 2 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/api/streaming.py b/api/streaming.py
index 9bd0480404..fea387d813 100644
--- a/api/streaming.py
+++ b/api/streaming.py
@@ -127,6 +127,9 @@ def _clarify_timeout_seconds(default: int = 120) -> int:
         return default
 
 
+_CANCEL_MARKER_PATTERNS = ('task cancelled', 'task canceled', 'response interrupted')
+
+
 def _classify_provider_error(err_str: str, exc=None, *, silent_failure: bool = False) -> dict:
     """Classify provider/agent failure text for WebUI apperror UX.
 
@@ -143,7 +146,8 @@ def _classify_provider_error(err_str: str, exc=None, *, silent_failure: bool = F
         or 'user canceled' in _err_lower
         or 'task cancelled' in _err_lower
         or 'task canceled' in _err_lower
-        or (exc is not None and type(exc).__name__ in ('CancelledError', 'CanceledError'))
+        or 'cancellederror' in _err_lower
+        or (exc is not None and _exc_name in ('CancelledError', 'CanceledError'))
     )
     _is_interrupted = (
         not _is_cancelled
@@ -267,9 +271,7 @@ def _session_has_cancel_marker(session) -> bool:
                     parts.append(str(part.get('text') or part.get('content') or ''))
             text = '\n'.join(parts)
         normalized = text.strip().lower()
-        if 'task cancelled' in normalized or 'task canceled' in normalized:
-            return True
-        if 'response interrupted' in normalized:
+        if any(pattern in normalized for pattern in _CANCEL_MARKER_PATTERNS):
             return True
     return False
 
@@ -4619,7 +4621,7 @@ def cancel_stream(stream_id: str) -> bool:
                         if not isinstance(_m, dict) or _m.get('role') != 'assistant':
                             continue
                         _content = str(_m.get('content') or '').strip().lower()
-                        if 'task cancelled' in _content or 'task canceled' in _content or 'response interrupted' in _content:
+                        if any(pattern in _content for pattern in _CANCEL_MARKER_PATTERNS):
                             _cancel_marker_idx = _idx
                             break
                 _partial_already_present = False
diff --git a/tests/test_cancelled_turn_status.py b/tests/test_cancelled_turn_status.py
index b241674c4d..3e9b7c501c 100644
--- a/tests/test_cancelled_turn_status.py
+++ b/tests/test_cancelled_turn_status.py
@@ -8,7 +8,12 @@
 
 import pathlib
 
-from api.streaming import _cancelled_turn_content, _classify_provider_error, _finalize_cancelled_turn
+from api.streaming import (
+    _CANCEL_MARKER_PATTERNS,
+    _cancelled_turn_content,
+    _classify_provider_error,
+    _finalize_cancelled_turn,
+)
 
 REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve()
 
@@ -41,6 +46,13 @@ def test_user_cancelled_error_is_not_provider_no_response(self):
         assert "rate limit" not in result.get("hint", "").lower()
         assert "no provider failure" in result.get("hint", "").lower()
 
+    def test_string_only_cancelled_error_repr_is_cancelled(self):
+        result = _classify_provider_error("<CancelledError>", None, silent_failure=True)
+
+        assert result["type"] == "cancelled"
+        assert result["label"] == "Task cancelled"
+        assert "provider returned no content" not in result.get("hint", "").lower()
+
     def test_interrupted_or_aborted_error_is_not_provider_no_response(self):
         for text in (
             "Interrupted by user",
@@ -101,6 +113,12 @@ def test_message_renderer_allows_non_provider_details_label(self):
 
 
 class TestCancelledTurnPersistenceGuards:
+    def test_cancel_marker_patterns_are_centralized_for_dedupe(self):
+        assert _CANCEL_MARKER_PATTERNS == ('task cancelled', 'task canceled', 'response interrupted')
+        src = _read("api/streaming.py")
+        assert "any(pattern in normalized for pattern in _CANCEL_MARKER_PATTERNS)" in src
+        assert "any(pattern in _content for pattern in _CANCEL_MARKER_PATTERNS)" in src
+
     def test_silent_failure_path_checks_cancel_event_before_persisting_provider_error(self):
         src = _read("api/streaming.py")
         silent_idx = src.find("# ── Detect silent agent failure")

From bc3f4e54a6541877b609de37eb5800a2a3c9cb6d Mon Sep 17 00:00:00 2001
From: Lucas Coutinho <lrclucas@gmail.com>
Date: Wed, 13 May 2026 00:25:41 -0300
Subject: [PATCH 03/28] Cache PBKDF2 password hash to eliminate ~1s overhead on
 every HTTP request
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

get_password_hash() computes PBKDF2-SHA256 with 600k iterations to
hash the HERMES_WEBUI_PASSWORD env var.  This is called on nearly every
HTTP request via check_auth -> is_auth_enabled -> get_password_hash.

Before: ~1s of PBKDF2 per request, regardless of how many times the
same env-var value has already been hashed.  A page load hitting 5+
API endpoints would burn 5+ seconds purely on password hashing.

After: compute once on first call, cache the hex result in a module-
level variable.  Subsequent calls are a single global-variable read
(~50ns).  The env var is immutable for the process lifetime, so there
is nothing to invalidate.

Thread-safe: double-checked locking ensures that under a burst of
concurrent requests only one thread computes PBKDF2, while the fast
path (after initialisation) requires zero locks.

Security analysis: zero regression.  The hash is derived from a static
env var and a static signing key — both already readable from process
memory.  Caching does not introduce any new disclosure or replay
vector.  PBKDF2 is still used for the initial computation and for
verify_password() on login.

AI: deepseek/deepseek-v4-flash
---
 api/auth.py                            |  44 ++++-
 tests/test_auth_password_hash_cache.py | 236 +++++++++++++++++++++++++
 2 files changed, 274 insertions(+), 6 deletions(-)
 create mode 100644 tests/test_auth_password_hash_cache.py

diff --git a/api/auth.py b/api/auth.py
index 73303f0126..42c1cde362 100644
--- a/api/auth.py
+++ b/api/auth.py
@@ -11,6 +11,7 @@
 import os
 import secrets
 import tempfile
+import threading
 import time
 
 from api.config import STATE_DIR, load_settings
@@ -210,14 +211,45 @@ def _hash_password(password):
     return dk.hex()
 
 
+_AUTH_HASH_LOCK = threading.Lock()
+_AUTH_HASH_COMPUTED: bool = False
+_AUTH_HASH_CACHE: str | None = None
+
+
 def get_password_hash() -> str | None:
     """Return the active password hash, or None if auth is disabled.
-    Priority: env var > settings.json."""
-    env_pw = os.getenv('HERMES_WEBUI_PASSWORD', '').strip()
-    if env_pw:
-        return _hash_password(env_pw)
-    settings = load_settings()
-    return settings.get('password_hash') or None
+    Priority: env var > settings.json.
+
+    The hash is computed once and cached for the lifetime of the process.
+    PBKDF2-600k takes ~1 s and is called on nearly every HTTP request via
+    check_auth → is_auth_enabled, so caching avoids wasting a full second
+    of CPU per request after the first one.
+
+    Thread-safe: double-checked locking ensures that under a burst of
+    concurrent requests only one thread computes PBKDF2, while the fast
+    path (after initialisation) requires zero locks.
+    """
+    global _AUTH_HASH_COMPUTED, _AUTH_HASH_CACHE
+
+    # Fast path — no lock needed once cache is populated.
+    if _AUTH_HASH_COMPUTED:
+        return _AUTH_HASH_CACHE
+
+    with _AUTH_HASH_LOCK:
+        # Re-check inside lock — another thread may have populated while
+        # we were waiting to acquire.
+        if _AUTH_HASH_COMPUTED:
+            return _AUTH_HASH_CACHE
+
+        env_pw = os.getenv('HERMES_WEBUI_PASSWORD', '').strip()
+        if env_pw:
+            result = _hash_password(env_pw)
+        else:
+            result = load_settings().get('password_hash') or None
+
+        _AUTH_HASH_CACHE = result
+        _AUTH_HASH_COMPUTED = True
+        return result
 
 
 def is_auth_enabled() -> bool:
diff --git a/tests/test_auth_password_hash_cache.py b/tests/test_auth_password_hash_cache.py
new file mode 100644
index 0000000000..d3a1c687c9
--- /dev/null
+++ b/tests/test_auth_password_hash_cache.py
@@ -0,0 +1,236 @@
+"""
+Tests for get_password_hash() caching (env-var path).
+
+get_password_hash() calls PBKDF2-SHA256 with 600k iterations, which takes
+~1 second per invocation.  When HERMES_WEBUI_PASSWORD is set via env var,
+the hash never changes during the process lifetime, so the result should
+be computed once and cached.
+
+Performance regression: without caching, every HTTP request pays ~1s for
+PBKDF2 (check_auth -> is_auth_enabled -> get_password_hash), causing
+multi-second API response times.
+
+Thread-safety: under a burst of concurrent requests, only one thread must
+compute PBKDF2.  Double-checked locking ensures the others wait and receive
+the cached result.
+"""
+import importlib
+import os
+import sys
+import threading
+import time
+import unittest
+from pathlib import Path
+
+# Isolate state dir from production
+import tempfile
+_TEST_STATE = Path(tempfile.mkdtemp())
+os.environ["HERMES_WEBUI_STATE_DIR"] = str(_TEST_STATE)
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+# Ensure a clean module state
+for mod in list(sys.modules.keys()):
+    if 'api.auth' in mod or 'api.config' in mod:
+        del sys.modules[mod]
+
+import api.auth as auth
+
+
+class TestPasswordHashCache(unittest.TestCase):
+    """Verify that get_password_hash() caches after first computation."""
+
+    def setUp(self):
+        # Reset the module-level cache state
+        auth._AUTH_HASH_LOCK = threading.Lock()
+        auth._AUTH_HASH_COMPUTED = False
+        auth._AUTH_HASH_CACHE = None
+        # Clear the env var before each test so a dirty environment
+        # doesn't cascade across test boundaries
+        os.environ.pop('HERMES_WEBUI_PASSWORD', None)
+
+    def _set_env_pw(self, pw: str) -> None:
+        os.environ['HERMES_WEBUI_PASSWORD'] = pw
+
+    def test_first_call_returns_hash(self):
+        """First call with env var set should return a hex hash string."""
+        self._set_env_pw("hunter2")
+        h = auth.get_password_hash()
+        self.assertIsNotNone(h)
+        self.assertIsInstance(h, str)
+        assert h is not None  # narrow type for type checker
+        self.assertGreater(len(h), 10)
+
+    def test_cache_flag_set_after_first_call(self):
+        """_AUTH_HASH_COMPUTED should be True after first call."""
+        self._set_env_pw("test-password")
+        self.assertFalse(auth._AUTH_HASH_COMPUTED)
+        auth.get_password_hash()
+        self.assertTrue(auth._AUTH_HASH_COMPUTED)
+
+    def test_cache_hit_is_order_of_magnitude_faster(self):
+        """Second invocation must be >>10x faster than the first (sub-millisecond vs ~1s)."""
+        self._set_env_pw("a-fairly-long-password-for-benchmarking")
+        t0 = time.perf_counter()
+        first = auth.get_password_hash()
+        t_first = time.perf_counter() - t0
+        t0 = time.perf_counter()
+        second = auth.get_password_hash()
+        t_second = time.perf_counter() - t0
+        self.assertEqual(first, second,
+                         "Cached hash must match the original")
+        self.assertLess(t_second, t_first / 10,
+                        f"Cache hit ({t_second*1000:.1f}ms) should be "
+                        f">10x faster than first call ({t_first*1000:.1f}ms)")
+
+    def test_subsequent_calls_return_same_hash(self):
+        """Multiple calls after caching should all return the identical hash."""
+        self._set_env_pw("consistent-password")
+        hashes = [auth.get_password_hash() for _ in range(10)]
+        self.assertTrue(all(h == hashes[0] for h in hashes),
+                        "All cached calls must return the same hash")
+
+    def test_cache_lifetime_is_process_lifetime(self):
+        """Cached value persists for the lifetime of the process."""
+        self._set_env_pw("persistent-password")
+        first = auth.get_password_hash()
+        # The env var could change between calls — cache must still
+        # return the original value.
+        os.environ['HERMES_WEBUI_PASSWORD'] = 'different-password'
+        second = auth.get_password_hash()
+        self.assertEqual(first, second,
+                         "Cache must return the original hash even if "
+                         "the env var changes (process-lifetime semantics)")
+
+    def test_multiple_calls_no_env_var(self):
+        """When env var is unset, get_password_hash must still work.
+
+        This exercises the settings.json fallback path. The test state
+        dir is fresh, so no settings file exists — the result should
+        be None (auth disabled).
+        """
+        # Ensure no env var
+        os.environ.pop('HERMES_WEBUI_PASSWORD', None)
+        h = auth.get_password_hash()
+        self.assertIsNone(h, "With no env var and no settings file, "
+                             "hash should be None")
+        self.assertTrue(auth._AUTH_HASH_COMPUTED)
+
+    def test_cache_returns_none_when_disabled(self):
+        """Once computed as None (no password), cache must keep returning None."""
+        os.environ.pop('HERMES_WEBUI_PASSWORD', None)
+        h1 = auth.get_password_hash()
+        h2 = auth.get_password_hash()
+        self.assertIsNone(h1)
+        self.assertIsNone(h2)
+
+    def test_cache_independent_of_settings_file(self):
+        """Env-var path must not read or depend on settings.json.
+
+        The query count on settings.json before caching is acceptable;
+        after caching it must not touch settings at all.
+        """
+        # Force a hash via env var, then cache it
+        self._set_env_pw("env-only")
+        auth.get_password_hash()
+
+        # Tamper with the settings load — after caching this should not
+        # matter because settings.json is only read inside
+        # get_password_hash when COMPUTED is False.
+        _original_load = auth.load_settings
+        try:
+            auth.load_settings = lambda: {"password_hash": "evil"}
+            cached = auth.get_password_hash()
+            self.assertIsNotNone(cached)
+            # The hash should NOT come from the tampered settings
+            self.assertNotEqual(cached, "evil",
+                                "Cached env-var hash must not be replaced "
+                                "by a settings.json value")
+        finally:
+            auth.load_settings = _original_load
+
+
+class TestPasswordHashCacheConcurrency(unittest.TestCase):
+    """Verify thread-safety: concurrent burst must not duplicate PBKDF2."""
+
+    def setUp(self):
+        auth._AUTH_HASH_LOCK = threading.Lock()
+        auth._AUTH_HASH_COMPUTED = False
+        auth._AUTH_HASH_CACHE = None
+        os.environ.pop('HERMES_WEBUI_PASSWORD', None)
+
+    def _set_env_pw(self, pw: str) -> None:
+        os.environ['HERMES_WEBUI_PASSWORD'] = pw
+
+    def test_concurrent_burst_only_computes_once(self):
+        """Under a burst of N concurrent requests, PBKDF2 runs exactly once.
+
+        Each thread records how many times _hash_password was invoked
+        (via a monkey-patched wrapper).  After all threads finish, the
+        counter must be exactly 1 and all results identical.
+        """
+        self._set_env_pw("burst-test-password")
+
+        call_count = 0
+        count_lock = threading.Lock()
+
+        original_hash = auth._hash_password
+        def counting_hash(pw):
+            nonlocal call_count
+            with count_lock:
+                call_count += 1
+            return original_hash(pw)
+        auth._hash_password = counting_hash
+        try:
+            results: list = []
+            results_lock = threading.Lock()
+
+            def worker():
+                r = auth.get_password_hash()
+                with results_lock:
+                    results.append(r)
+
+            threads = [threading.Thread(target=worker) for _ in range(8)]
+            t0 = time.perf_counter()
+            for t in threads:
+                t.start()
+            for t in threads:
+                t.join()
+            elapsed = time.perf_counter() - t0
+
+            self.assertEqual(call_count, 1,
+                             f"Expected 1 PBKDF2 call, got {call_count}. "
+                             "Threads are racing on cache population.")
+            self.assertEqual(len(set(results)), 1,
+                             "All threads must see the same hash")
+            # Elapsed time should be ~1s (one PBKDF2), not ~8s (serial).
+            # Use a generous 3× bound for slow machines.
+            self.assertLess(elapsed, 3.0,
+                            f"Burst took {elapsed:.1f}s — threads are likely "
+                            f"running PBKDF2 serially under the lock.")
+        finally:
+            auth._hash_password = original_hash
+
+    def test_concurrent_burst_with_no_env_var(self):
+        """Concurrent calls with no env var must all return None."""
+        os.environ.pop('HERMES_WEBUI_PASSWORD', None)
+        results: list = []
+        results_lock = threading.Lock()
+
+        def worker():
+            r = auth.get_password_hash()
+            with results_lock:
+                results.append(r)
+
+        threads = [threading.Thread(target=worker) for _ in range(5)]
+        for t in threads:
+            t.start()
+        for t in threads:
+            t.join()
+
+        self.assertTrue(all(r is None for r in results),
+                        "All threads must see None when auth is disabled")
+
+
+if __name__ == "__main__":
+    unittest.main()

From 7acbb3d99d6f6da7f5eb2b58801fe585bbdcd5e5 Mon Sep 17 00:00:00 2001
From: Lucas Coutinho <lrclucas@gmail.com>
Date: Wed, 13 May 2026 00:54:50 -0300
Subject: [PATCH 04/28] Cache PBKDF2 password hash to eliminate ~1s overhead on
 every HTTP request
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

get_password_hash() computes PBKDF2-SHA256 with 600k iterations to
hash the HERMES_WEBUI_PASSWORD env var.  This is called on nearly every
HTTP request via check_auth -> is_auth_enabled -> get_password_hash.

Before: ~1s of PBKDF2 per request, regardless of how many times the
same env-var value has already been hashed.  A page load hitting 5+
API endpoints would burn 5+ seconds purely on password hashing.

After: compute once on first call, cache the hex result in a module-
level variable.  Subsequent calls are a single global-variable read
(~50ns).  The env var is immutable for the process lifetime, so there
is nothing to invalidate.

Thread-safe: double-checked locking ensures that under a burst of
concurrent requests only one thread computes PBKDF2, while the fast
path (after initialisation) requires zero locks.

10 unit tests covering all branches, cache-lifetime semantics, and
concurrent burst safety (8 threads, exactly 1 PBKDF2 call).
Test isolation: reloads only api.auth via importlib.reload, leaving
api.config untouched so test_pytest_state_isolation.py is unaffected.

Security analysis: zero regression.  The hash is derived from a static
env var and a static signing key — both already readable from process
memory.  Caching does not introduce any new disclosure or replay
vector.  PBKDF2 is still used for the initial computation and for
verify_password() on login.

AI: deepseek/deepseek-v4-flash
---
 tests/test_auth_password_hash_cache.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/tests/test_auth_password_hash_cache.py b/tests/test_auth_password_hash_cache.py
index d3a1c687c9..00b5be16a4 100644
--- a/tests/test_auth_password_hash_cache.py
+++ b/tests/test_auth_password_hash_cache.py
@@ -22,19 +22,25 @@
 import unittest
 from pathlib import Path
 
-# Isolate state dir from production
+# Isolate state dir from production — only affects the auth module reload.
+# We deliberately do NOT delete api.config from sys.modules (unlike some
+# sibling test files that need a fresh config import).  Deleting api.config
+# would change its module-level STATE_DIR global and leak into all
+# subsequently collected tests (breaking test_pytest_state_isolation.py).
 import tempfile
 _TEST_STATE = Path(tempfile.mkdtemp())
 os.environ["HERMES_WEBUI_STATE_DIR"] = str(_TEST_STATE)
 
 sys.path.insert(0, str(Path(__file__).parent.parent))
 
-# Ensure a clean module state
-for mod in list(sys.modules.keys()):
-    if 'api.auth' in mod or 'api.config' in mod:
-        del sys.modules[mod]
-
-import api.auth as auth
+# Force a fresh import of the auth module so it picks up the isolated env var.
+# The auth module re-executes `from api.config import STATE_DIR, load_settings`
+# at import time, but api.config is already in sys.modules — Python just
+# rebinds the names from the existing module, keeping the conftest STATE_DIR
+# untouched.
+import api.auth
+importlib.reload(api.auth)
+auth = api.auth
 
 
 class TestPasswordHashCache(unittest.TestCase):

From a49c0fbf8bbe65d1bba7353ea481aa378c7adfce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=8E=8B=E6=B5=A9=E7=94=9F?= <wanghs2@lenovo.com>
Date: Wed, 13 May 2026 14:42:03 +0800
Subject: [PATCH 05/28] fix(ui): Fix the issue where custom models are not
 displayed in the model configuration list - Fix the issue where custom models
 are not shown - Fix the issue where custom models are not ollama but go
 through the ollama model processing function, causing the hyphen '-' in the
 model name to be replaced with a space " " and the last letter to be
 lowercase

---
 static/ui.js | 131 ++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 115 insertions(+), 16 deletions(-)

diff --git a/static/ui.js b/static/ui.js
index d071d93ed9..0548b73870 100644
--- a/static/ui.js
+++ b/static/ui.js
@@ -764,26 +764,67 @@ async function populateModelDropdown(){
     const _modelsRes=await fetch(new URL('api/models',document.baseURI||location.href).href,{credentials:'include'});
     if(_redirectIfUnauth(_modelsRes)) return;
     const data=await _modelsRes.json();
-    if(!data.groups||!data.groups.length) return; // keep HTML defaults
     // Store active provider globally so the send path can warn on mismatch
     window._activeProvider=data.active_provider||null;
     // Store default model so newSession() can apply it (#872).
     // Per-page-load — not synced across browser tabs.
     window._defaultModel=data.default_model||null;
     window._configuredModelBadges=data.configured_model_badges||{};
+
+    const _synthGroupsFromConfigured=()=>{
+      const badgeMap=window._configuredModelBadges||{};
+      const grouped=new Map();
+      const addModel=(providerId,modelId)=>{
+        const pid=String(providerId||'configured').trim()||'configured';
+        const mid=String(modelId||'').trim();
+        if(!mid) return;
+        if(!grouped.has(pid)) grouped.set(pid,[]);
+        const arr=grouped.get(pid);
+        if(arr.some(m=>m.id===mid)) return;
+        arr.push({id:mid,label:getModelLabel(mid)});
+      };
+
+      for(const [modelId,badge] of Object.entries(badgeMap)){
+        const mid=String(modelId||'').trim();
+        // Prefer canonical IDs only; skip derived aliases such as
+        // @provider:model and provider/model to avoid noisy duplicates.
+        if(!mid||mid.startsWith('@')||mid.includes('/')) continue;
+        const provider=(badge&&badge.provider)||'configured';
+        addModel(provider,mid);
+      }
+
+      if(grouped.size===0&&data&&data.default_model){
+        addModel(data.active_provider||'configured',data.default_model);
+      }
+
+      const groups=[];
+      for(const [providerId,models] of grouped.entries()){
+        const display=(String(providerId).startsWith('custom:')
+          ? String(providerId).slice('custom:'.length)
+          : String(providerId))||'Configured';
+        groups.push({provider:display,provider_id:providerId,models});
+      }
+      return groups;
+    };
+
+    const groups=(Array.isArray(data.groups)&&data.groups.length)
+      ? data.groups
+      : _synthGroupsFromConfigured();
+
+    if(!groups.length) return; // no server groups and no configured fallback
     // Clear existing options
     sel.innerHTML='';
     _dynamicModelLabels={};
-    for(const g of data.groups){
+    for(const g of groups){
       const og=document.createElement('optgroup');
       og.label=g.provider;
       if(g.provider_id) og.dataset.provider=g.provider_id;
-      for(const m of g.models){
+      for(const m of (Array.isArray(g.models)?g.models:[])){
         const opt=document.createElement('option');
         opt.value=m.id;
         opt.textContent=m.label;
         og.appendChild(opt);
-        _dynamicModelLabels[m.id]=m.label;
+        _dynamicModelLabels[m.id]=m.id;
       }
       // Hydrate the label map from extra_models too (the catalog tail that
       // doesn't render as <option> entries when the picker is capped — see
@@ -793,7 +834,7 @@ async function populateModelDropdown(){
       // instead of falling back to the bare ID. #1567.
       if(Array.isArray(g.extra_models)){
         for(const m of g.extra_models){
-          if(m && m.id) _dynamicModelLabels[m.id]=m.label||m.id;
+          if(m && m.id) _dynamicModelLabels[m.id]=m.id;
         }
       }
       sel.appendChild(og);
@@ -1033,11 +1074,19 @@ function renderModelDropdown(){
     if(child.tagName==='OPTGROUP'){
       const providerId=child.dataset&&child.dataset.provider?child.dataset.provider:'';
       for(const opt of Array.from(child.children)){
-        _modelData.push({value:opt.value,name:esc(opt.textContent||getModelLabel(opt.value)),id:esc(opt.value),group:child.label||'',badge:_getConfiguredModelBadge(opt.value,_badgeMap,providerId)});
+        const rawValue=String(opt.value||'');
+        const displayName=rawValue.startsWith('@custom:')
+          ? getModelLabel(rawValue)
+          : (opt.textContent||getModelLabel(rawValue));
+        _modelData.push({value:opt.value,name:esc(displayName),id:esc(opt.value),group:child.label||'',badge:_getConfiguredModelBadge(opt.value,_badgeMap,providerId)});
       }
     }
     if(child.tagName==='OPTION'){
-      _modelData.push({value:child.value,name:esc(child.textContent||getModelLabel(child.value)),id:esc(child.value),group:'',badge:_getConfiguredModelBadge(child.value,_badgeMap)});
+      const rawValue=String(child.value||'');
+      const displayName=rawValue.startsWith('@custom:')
+        ? getModelLabel(rawValue)
+        : (child.textContent||getModelLabel(rawValue));
+      _modelData.push({value:child.value,name:esc(displayName),id:esc(child.value),group:'',badge:_getConfiguredModelBadge(child.value,_badgeMap)});
     }
   }
   const _existingConfiguredKeys=new Set(_modelData.map(existing=>_normalizeConfiguredModelKey(existing.value)));
@@ -1091,8 +1140,32 @@ function renderModelDropdown(){
       }
     }
     const matches=(m)=>!term||found.has(m.value);
-    const configuredModels=_modelData
-      .filter(m=>m.badge&&matches(m))
+    const configuredCandidates=_modelData
+      .filter(m=>m.badge&&matches(m));
+    const configuredBySemanticKey=new Map();
+    const _configuredProviderKey=(m)=>String((m&&m.badge&&m.badge.provider)||_providerFromModelValue(m&&m.value)||'').toLowerCase();
+    const _configuredModelKey=(m)=>_normalizeConfiguredModelKey(m&&m.value||'');
+    const _configuredDisplayPriority=(m)=>{
+      // Prefer plain IDs over provider-qualified aliases for readability.
+      const v=String((m&&m.value)||'');
+      if(v.startsWith('@')) return 0;
+      if(v.includes('/')) return 1;
+      return 2;
+    };
+    for(const candidate of configuredCandidates){
+      const semanticKey=`${_configuredProviderKey(candidate)}::${_configuredModelKey(candidate)}`;
+      const existing=configuredBySemanticKey.get(semanticKey);
+      if(!existing){
+        configuredBySemanticKey.set(semanticKey,candidate);
+        continue;
+      }
+      const candidatePriority=_configuredDisplayPriority(candidate);
+      const existingPriority=_configuredDisplayPriority(existing);
+      if(candidatePriority>existingPriority){
+        configuredBySemanticKey.set(semanticKey,candidate);
+      }
+    }
+    const configuredModels=[...configuredBySemanticKey.values()]
       .sort((a,b)=>{
         const configuredRankA=_configuredRank(a.badge);
         const configuredRankB=_configuredRank(b.badge);
@@ -1112,17 +1185,28 @@ function renderModelDropdown(){
       configuredHeading.className='model-group';
       configuredHeading.textContent=t('model_group_configured')||'Configured';
       dd.appendChild(configuredHeading);
+      // 为了显示原始ID，建立 badgeKeyMap: badge对象->原始key
+      const badgeKeyMap = new Map();
+      for(const [k, v] of Object.entries(_badgeMap)){
+        badgeKeyMap.set(v, k);
+      }
       for(const m of configuredModels){
         const row=document.createElement('div');
         row.className='model-opt'+(m.value===sel.value?' active':'');
-        // Add provider info to badge label (e.g., "Primary (jingdong)")
-        let badgeLabel=m.badge?(m.badge.label||'Configured'):'';
-        if(m.badge&&m.badge.provider){
-          const providerName=m.badge.provider.replace(/^custom:/,'').split('/')[0];
-          badgeLabel+=` (${providerName})`;
+        let badgeLabel = '';
+        let modelName = m.name;
+        if (m.badge) {
+          // 直接用badge的原始key（即config.yaml里的ID）
+          const rawId = badgeKeyMap.get(m.badge) || m.value || m.badge.label || 'Configured';
+          badgeLabel = rawId;
+          modelName = rawId; // model-opt-name直接用原始ID
+          if(m.badge.provider){
+            const providerName=m.badge.provider.replace(/^custom:/,'').split('/')[0];
+            badgeLabel += ` (${providerName})`;
+          }
         }
         const badgeHtml=m.badge?`<span class="model-opt-badge model-opt-badge--${esc(m.badge.role||'configured')}">${esc(badgeLabel)}</span>`:'';
-        row.innerHTML=`<div class="model-opt-top"><span class="model-opt-name">${m.name}</span>${badgeHtml}</div><span class="model-opt-id">${m.id}</span>`;
+        row.innerHTML=`<div class="model-opt-top"><span class="model-opt-name">${esc(modelName)}</span>${badgeHtml}</div><span class="model-opt-id">${m.id}</span>`;
         row.onclick=()=>selectModelFromDropdown(m.value);
         dd.appendChild(row);
       }
@@ -2086,6 +2170,17 @@ function _fmtOllamaLabel(mid){
 
 function getModelLabel(modelId){
   if(!modelId) return 'Unknown';
+  const rawId=String(modelId||'');
+  // Preserve custom gateway model IDs exactly as configured.
+  // Examples:
+  //   @custom:ai_gateway:Qwen3.6-35B-A3B -> Qwen3.6-35B-A3B
+  //   @custom:qwen397b-64k               -> qwen397b-64k
+  if(rawId.startsWith('@custom:')){
+    const rest=rawId.slice('@custom:'.length);
+    if(rest.includes(':')) return rest.slice(rest.lastIndexOf(':')+1)||rawId;
+    if(rest.includes('/')) return rest.split('/').pop()||rawId;
+    return rest||rawId;
+  }
   // Check dynamic labels first, then fall back to splitting the ID
   if(_dynamicModelLabels[modelId]) return _dynamicModelLabels[modelId];
   // Static fallback for common models
@@ -2096,11 +2191,15 @@ function getModelLabel(modelId){
   // Strip @provider: prefix if present (e.g. @ollama-cloud:kimi-k2.6)
   if (_last.startsWith('@') && _last.includes(':')) _last = _last.split(':').slice(1).join(':');
   const looksLikeOllamaTag = /^[a-z0-9][\w.-]*:[\w.-]+$/i.test(_last);
+  const atProvider=(rawId.startsWith('@')&&rawId.includes(':'))
+    ? rawId.slice(1,rawId.indexOf(':')).toLowerCase()
+    : '';
+  const allowOllamaFormat=!atProvider||atProvider.startsWith('ollama');
   // Narrow: only apply Ollama formatter to IDs with explicit @ollama prefix or colon-tag format.
   // Avoids reformatting bare provider model IDs like claude-sonnet-4-6 or gpt-4o.
   const looksLikeBareOllamaId = modelId.startsWith('@ollama') || looksLikeOllamaTag;
   const ollamaLabel = _fmtOllamaLabel(_last);
-  if ((modelId.startsWith('ollama/') || modelId.startsWith('@ollama') || looksLikeOllamaTag || looksLikeBareOllamaId) && ollamaLabel !== _last) {
+  if (allowOllamaFormat && (modelId.startsWith('ollama/') || modelId.startsWith('@ollama') || looksLikeOllamaTag || looksLikeBareOllamaId) && ollamaLabel !== _last) {
     return ollamaLabel;
   }
   return _last || 'Unknown';

From de3dba3c6211c4b2ce90dcfc0b46d277eb5380ce Mon Sep 17 00:00:00 2001
From: dobby-d-elf <dobby.the.agent@gmail.com>
Date: Tue, 12 May 2026 21:58:22 -0600
Subject: [PATCH 06/28] feat: soften sweep edges and widen band for Activity
 animation

---
 static/style.css | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/static/style.css b/static/style.css
index 9866171825..86bb189142 100644
--- a/static/style.css
+++ b/static/style.css
@@ -1883,6 +1883,34 @@ body.resizing .sidebar{transition:none!important;}
 .tool-call-group:not(.tool-call-group-collapsed) .tool-call-group-chevron{transform:rotate(90deg);}
 .tool-call-group-body{display:block;padding-left:var(--space-3);}
 .tool-call-group.tool-call-group-collapsed .tool-call-group-body{display:none;}
+.tool-call-group-label{font-weight:600;color:var(--muted);position:relative;display:inline-block;overflow:hidden;}
+.tool-call-group[data-live-tool-call-group="1"] .tool-call-group-label{
+  color:var(--accent);
+}
+/* Sweep overlay — a bg-colored bar with a narrow transparent gap moves across, hiding text under the solid parts */
+@keyframes _tool-shimmer-sweep{
+  from{-webkit-mask-position:100% 0;mask-position:100% 0;}
+  to{-webkit-mask-position:-200% 0;mask-position:-200% 0;}
+}
+.tool-call-group[data-live-tool-call-group="1"] .tool-call-group-label::after{
+  content:"";
+  position:absolute;inset:0;
+  background-color:var(--bg);
+  pointer-events:none;
+  -webkit-mask-image:linear-gradient(90deg,rgba(0,0,0,0) 0%,rgba(0,0,0,0) 43%,rgba(0,0,0,0.6) 47%,rgba(0,0,0,1) 50%,rgba(0,0,0,0.6) 53%,rgba(0,0,0,0) 57%,rgba(0,0,0,0) 100%);
+          mask-image:linear-gradient(90deg,rgba(0,0,0,0) 0%,rgba(0,0,0,0) 43%,rgba(0,0,0,0.6) 47%,rgba(0,0,0,1) 50%,rgba(0,0,0,0.6) 53%,rgba(0,0,0,0) 57%,rgba(0,0,0,0) 100%);
+  -webkit-mask-size:250% 100%;
+          mask-size:250% 100%;
+  -webkit-mask-repeat:no-repeat;
+          mask-repeat:no-repeat;
+  animation:_tool-shimmer-sweep 2s linear infinite;
+}
+/* Fallback for browsers without CSS mask support */
+@supports not (mask-image:none){
+  .tool-call-group[data-live-tool-call-group="1"] .tool-call-group-label::after{
+    display:none;
+  }
+}
 .tool-card{background:var(--surface-subtle);border:1px solid var(--border-subtle);border-radius:var(--radius-card);margin:2px 0;overflow:hidden;transition:border-color .15s,background-color .15s;}
 .tool-card:hover{border-color:var(--border-muted);background:var(--surface-subtle-hover);}
 .tool-card-running{border-color:var(--accent-bg-strong);background:var(--accent-bg);}

From 7b263cea03bee28c7d0e70e502bad028b87127a6 Mon Sep 17 00:00:00 2001
From: dobby-d-elf <dobby.the.agent@gmail.com>
Date: Wed, 13 May 2026 07:40:21 -0600
Subject: [PATCH 07/28] save

---
 static/style.css | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/static/style.css b/static/style.css
index 86bb189142..10cdcea2d7 100644
--- a/static/style.css
+++ b/static/style.css
@@ -1877,7 +1877,6 @@ body.resizing .sidebar{transition:none!important;}
 .tool-call-group{margin:4px 0 4px var(--msg-rail);max-width:var(--msg-max);border-left:1px solid var(--border-subtle);}
 .tool-call-group-summary{width:100%;display:flex;align-items:center;gap:var(--space-2);padding:var(--space-1) var(--space-3);border:0;background:transparent;color:var(--muted);cursor:pointer;text-align:left;font:inherit;font-size:var(--font-size-xs);line-height:1.4;border-radius:var(--radius-card);}
 .tool-call-group-summary:hover{background:var(--surface-subtle-hover);color:var(--text);}
-.tool-call-group-label{font-weight:600;color:var(--muted);}
 .tool-call-group-duration{margin-left:auto;opacity:.62;font-variant-numeric:tabular-nums;white-space:nowrap;}
 .tool-call-group-chevron{opacity:.45;display:inline-flex;transition:transform .16s ease;}
 .tool-call-group:not(.tool-call-group-collapsed) .tool-call-group-chevron{transform:rotate(90deg);}
@@ -1897,13 +1896,13 @@ body.resizing .sidebar{transition:none!important;}
   position:absolute;inset:0;
   background-color:var(--bg);
   pointer-events:none;
-  -webkit-mask-image:linear-gradient(90deg,rgba(0,0,0,0) 0%,rgba(0,0,0,0) 43%,rgba(0,0,0,0.6) 47%,rgba(0,0,0,1) 50%,rgba(0,0,0,0.6) 53%,rgba(0,0,0,0) 57%,rgba(0,0,0,0) 100%);
-          mask-image:linear-gradient(90deg,rgba(0,0,0,0) 0%,rgba(0,0,0,0) 43%,rgba(0,0,0,0.6) 47%,rgba(0,0,0,1) 50%,rgba(0,0,0,0.6) 53%,rgba(0,0,0,0) 57%,rgba(0,0,0,0) 100%);
+  -webkit-mask-image:linear-gradient(90deg,rgba(0,0,0,0) 0%,rgba(0,0,0,0) 47%,rgba(0,0,0,1) 49%,rgba(0,0,0,1) 51%,rgba(0,0,0,0) 53%,rgba(0,0,0,0) 100%);
+          mask-image:linear-gradient(90deg,rgba(0,0,0,0) 0%,rgba(0,0,0,0) 47%,rgba(0,0,0,1) 49%,rgba(0,0,0,1) 51%,rgba(0,0,0,0) 53%,rgba(0,0,0,0) 100%);
   -webkit-mask-size:250% 100%;
           mask-size:250% 100%;
   -webkit-mask-repeat:no-repeat;
           mask-repeat:no-repeat;
-  animation:_tool-shimmer-sweep 2s linear infinite;
+  animation:_tool-shimmer-sweep 1.5s linear infinite;
 }
 /* Fallback for browsers without CSS mask support */
 @supports not (mask-image:none){

From e6e91e4973db52b44dcce050e87c3b8c21f2a97d Mon Sep 17 00:00:00 2001
From: Lucas Coutinho <lrclucas@gmail.com>
Date: Wed, 13 May 2026 10:40:23 -0300
Subject: [PATCH 08/28] fix(auth): thread-safe login rate limiter, PBKDF2 key
 separation, and migration path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Concurrent failed logins raced on _login_attempts because no lock guarded
the dict. Add _LOGIN_ATTEMPTS_LOCK and wrap both _check_login_rate() and
_record_login_attempt() with it.

Extract _load_key() to de-duplicate key file I/O. Add _pbkdf2_key() that
loads .pbkdf2_key (separate from .signing_key) so PBKDF2 and HMAC signing
no longer share a key — key reuse across cryptographic primitives is unsafe.

Update _hash_password() to use _pbkdf2_key() as its default salt, with an
optional *salt* kwarg so verify_password() can try the legacy .signing_key
salt during transparent migration. When the old hash matches, save_settings()
re-hashes with _pbkdf2_key() and _invalidate_password_hash_cache() ensures
the next request sees the upgraded hash without a restart.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 api/auth.py | 109 ++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 81 insertions(+), 28 deletions(-)

diff --git a/api/auth.py b/api/auth.py
index 42c1cde362..5f95bfaee1 100644
--- a/api/auth.py
+++ b/api/auth.py
@@ -155,58 +155,80 @@ def _save_login_attempts(attempts: dict[str, list[float]]) -> None:
 
 
 _login_attempts = _load_login_attempts()  # ip -> [timestamp, ...]
+_LOGIN_ATTEMPTS_LOCK = threading.Lock()
 
 
 def _check_login_rate(ip: str) -> bool:
-    """Return True if the IP is allowed to attempt login."""
-    now = time.time()
-    attempts = _login_attempts.get(ip, [])
-    # Prune old attempts
-    attempts = [t for t in attempts if now - t < _LOGIN_WINDOW]
-    if attempts:
-        _login_attempts[ip] = attempts
-    else:
-        _login_attempts.pop(ip, None)
-    _save_login_attempts(_login_attempts)
-    return len(attempts) < _LOGIN_MAX_ATTEMPTS
+    """Return True if the IP is allowed to attempt login (thread-safe)."""
+    with _LOGIN_ATTEMPTS_LOCK:
+        now = time.time()
+        attempts = _login_attempts.get(ip, [])
+        # Prune old attempts
+        attempts = [t for t in attempts if now - t < _LOGIN_WINDOW]
+        if attempts:
+            _login_attempts[ip] = attempts
+        else:
+            _login_attempts.pop(ip, None)
+        _save_login_attempts(_login_attempts)
+        return len(attempts) < _LOGIN_MAX_ATTEMPTS
 
 
 def _record_login_attempt(ip: str) -> None:
-    now = time.time()
-    attempts = _login_attempts.get(ip, [])
-    attempts.append(now)
-    _login_attempts[ip] = attempts
-    _save_login_attempts(_login_attempts)
+    """Record a login attempt for rate limiting (thread-safe)."""
+    with _LOGIN_ATTEMPTS_LOCK:
+        now = time.time()
+        attempts = _login_attempts.get(ip, [])
+        attempts.append(now)
+        _login_attempts[ip] = attempts
+        _save_login_attempts(_login_attempts)
 
 
-def _signing_key():
-    """Return a random signing key, generating and persisting one on first call."""
-    key_file = STATE_DIR / '.signing_key'
+def _load_key(filename: str) -> bytes:
+    """Load a 32-byte key from STATE_DIR, generating and persisting one if missing."""
+    key_file = STATE_DIR / filename
     try:
         if key_file.exists():
             raw = key_file.read_bytes()
             if len(raw) >= 32:
                 return raw[:32]
     except Exception:
-        logger.debug("Failed to read or access signing key file, using in-memory key")
-    # Generate a new random key
+        logger.debug("Failed to read key %s", filename)
     key = secrets.token_bytes(32)
     try:
         STATE_DIR.mkdir(parents=True, exist_ok=True)
         key_file.write_bytes(key)
         key_file.chmod(0o600)
     except Exception:
-        logger.debug("Failed to persist signing key, using in-memory key only")
+        logger.debug("Failed to persist key %s", filename)
     return key
 
 
-def _hash_password(password):
+def _pbkdf2_key() -> bytes:
+    """Salt for password hashing (PBKDF2). Persisted so password hashes remain
+    valid across restarts. Separate from _signing_key to avoid key reuse across
+    different cryptographic primitives."""
+    return _load_key('.pbkdf2_key')
+
+
+def _signing_key() -> bytes:
+    """HMAC key for session signing. Persisted so signed cookies remain
+    valid across restarts."""
+    return _load_key('.signing_key')
+
+
+def _hash_password(password, *, salt: bytes | None = None) -> str:
     """PBKDF2-SHA256 with 600k iterations (OWASP recommendation).
-    Salt is the persisted random signing key, which is secret and unique per
+    Salt is the persisted PBKDF2 key, which is secret and unique per
     installation. This keeps the stored hash format a plain hex string
     (no format change to settings.json) while replacing the predictable
-    STATE_DIR-derived salt from the original implementation."""
-    salt = _signing_key()
+    STATE_DIR-derived salt from the original implementation.
+
+    The *salt* parameter exists solely to support transparent migration
+    of password hashes that were computed with a different key (e.g. the
+    old `.signing_key`). Normal callers should never pass it.
+    """
+    if salt is None:
+        salt = _pbkdf2_key()
     dk = hashlib.pbkdf2_hmac('sha256', password.encode(), salt, 600_000)
     return dk.hex()
 
@@ -216,6 +238,15 @@ def _hash_password(password):
 _AUTH_HASH_CACHE: str | None = None
 
 
+def _invalidate_password_hash_cache() -> None:
+    """Invalidate the in-process password hash cache so the next call to
+    get_password_hash() re-reads from settings.json or the env var."""
+    global _AUTH_HASH_COMPUTED, _AUTH_HASH_CACHE
+    with _AUTH_HASH_LOCK:
+        _AUTH_HASH_COMPUTED = False
+        _AUTH_HASH_CACHE = None
+
+
 def get_password_hash() -> str | None:
     """Return the active password hash, or None if auth is disabled.
     Priority: env var > settings.json.
@@ -258,11 +289,33 @@ def is_auth_enabled() -> bool:
 
 
 def verify_password(plain) -> bool:
-    """Verify a plaintext password against the stored hash."""
+    """Verify a plaintext password against the stored hash.
+
+    Supports transparent migration of password hashes that were computed
+    with the old `.signing_key` salt.  When the two keys differ and the
+    legacy-salted hash matches, the password is transparently re-hashed
+    with the current `.pbkdf2_key` and persisted to settings.json.
+    """
     expected = get_password_hash()
     if not expected:
         return False
-    return hmac.compare_digest(_hash_password(plain), expected)
+    # Fast path: current PBKDF2 key
+    if hmac.compare_digest(_hash_password(plain), expected):
+        return True
+    # Migration: some hashes were computed with `.signing_key` before the
+    # PBKDF2 key was separated.  Try the legacy salt; if it matches,
+    # transparently upgrade so the next login uses the fast path.
+    legacy_salt = _signing_key()
+    current_salt = _pbkdf2_key()
+    if legacy_salt != current_salt:
+        if hmac.compare_digest(_hash_password(plain, salt=legacy_salt), expected):
+            from api.config import save_settings
+
+            save_settings({'_set_password': plain})
+            _invalidate_password_hash_cache()
+            get_password_hash()
+            return True
+    return False
 
 
 def create_session() -> str:

From a60c222e76f2aac8c7178c4419f8a5c7dec47f86 Mon Sep 17 00:00:00 2001
From: dobby-d-elf <dobby.the.agent@gmail.com>
Date: Wed, 13 May 2026 08:07:52 -0600
Subject: [PATCH 09/28] Version A: tune Activity sweep animation

---
 static/style.css                | 12 +++++++++---
 static/ui.js                    | 18 ++++++++++++++++--
 tests/test_ui_card_animation.py |  7 +++++++
 3 files changed, 32 insertions(+), 5 deletions(-)

diff --git a/static/style.css b/static/style.css
index 10cdcea2d7..ae41414714 100644
--- a/static/style.css
+++ b/static/style.css
@@ -1896,13 +1896,19 @@ body.resizing .sidebar{transition:none!important;}
   position:absolute;inset:0;
   background-color:var(--bg);
   pointer-events:none;
-  -webkit-mask-image:linear-gradient(90deg,rgba(0,0,0,0) 0%,rgba(0,0,0,0) 47%,rgba(0,0,0,1) 49%,rgba(0,0,0,1) 51%,rgba(0,0,0,0) 53%,rgba(0,0,0,0) 100%);
-          mask-image:linear-gradient(90deg,rgba(0,0,0,0) 0%,rgba(0,0,0,0) 47%,rgba(0,0,0,1) 49%,rgba(0,0,0,1) 51%,rgba(0,0,0,0) 53%,rgba(0,0,0,0) 100%);
+  -webkit-mask-image:linear-gradient(90deg,rgba(0,0,0,0) 0%,rgba(0,0,0,0) 45.5%,rgba(0,0,0,.12) 46.5%,rgba(0,0,0,.32) 47.5%,rgba(0,0,0,.58) 48.5%,rgba(0,0,0,.8) 49%,rgba(0,0,0,.8) 51%,rgba(0,0,0,.58) 51.5%,rgba(0,0,0,.32) 52.5%,rgba(0,0,0,.12) 53.5%,rgba(0,0,0,0) 54.5%,rgba(0,0,0,0) 100%);
+          mask-image:linear-gradient(90deg,rgba(0,0,0,0) 0%,rgba(0,0,0,0) 45.5%,rgba(0,0,0,.12) 46.5%,rgba(0,0,0,.32) 47.5%,rgba(0,0,0,.58) 48.5%,rgba(0,0,0,.8) 49%,rgba(0,0,0,.8) 51%,rgba(0,0,0,.58) 51.5%,rgba(0,0,0,.32) 52.5%,rgba(0,0,0,.12) 53.5%,rgba(0,0,0,0) 54.5%,rgba(0,0,0,0) 100%);
   -webkit-mask-size:250% 100%;
           mask-size:250% 100%;
   -webkit-mask-repeat:no-repeat;
           mask-repeat:no-repeat;
-  animation:_tool-shimmer-sweep 1.5s linear infinite;
+  animation:_tool-shimmer-sweep 2.0s linear infinite;
+}
+@media (prefers-reduced-motion: reduce){
+  .tool-call-group[data-live-tool-call-group="1"] .tool-call-group-label::after{
+    animation:none;
+    display:none;
+  }
 }
 /* Fallback for browsers without CSS mask support */
 @supports not (mask-image:none){
diff --git a/static/ui.js b/static/ui.js
index d071d93ed9..fe2a305a61 100644
--- a/static/ui.js
+++ b/static/ui.js
@@ -6284,6 +6284,20 @@ function _thinkingMarkup(text=''){
     ? `<div class="thinking-card${openClass}"><div class="thinking-card-header" onclick="this.parentElement.classList.toggle('open')"><span class="thinking-card-icon">${li('lightbulb',14)}</span><span class="thinking-card-label">${t('thinking')}</span><span class="thinking-card-toggle">${li('chevron-right',12)}</span></div><div class="thinking-card-body"><pre>${esc(String(clean).trim())}</pre></div></div>`
     : `<div class="thinking"><div class="dot"></div><div class="dot"></div><div class="dot"></div></div>`;
 }
+function _renderThinkingInto(row,text=''){
+  if(!row) return;
+  const clean=_sanitizeThinkingDisplayText(text);
+  if(!clean){
+    row.innerHTML=_thinkingMarkup(text);
+    return;
+  }
+  const pre=row.querySelector('.thinking-card-body pre');
+  if(pre){
+    pre.textContent=clean;
+    return;
+  }
+  row.innerHTML=_thinkingMarkup(text);
+}
 function finalizeThinkingCard(){
   // Guard: only finalize thinking card if we're looking at the session that started it.
   // Without this check, switching tabs while a stream is running causes finalizeThinkingCard
@@ -6365,7 +6379,7 @@ function appendThinking(text=''){
       else blocks.appendChild(row);
     }
     row.className=(text&&String(text).trim())?'assistant-segment thinking-card-row':'assistant-segment';
-    row.innerHTML=_thinkingMarkup(text);
+    _renderThinkingInto(row,text);
     scrollIfPinned();
     // Auto-scroll the thinking card body to bottom if the user is watching
     // (scroll pinned). If the user scrolled up to read history, leave it alone.
@@ -6394,7 +6408,7 @@ function appendThinking(text=''){
     row.setAttribute('data-thinking-active','1');
     body.insertBefore(row, body.firstChild);
   }
-  row.innerHTML=_thinkingMarkup(text);
+  _renderThinkingInto(row,text);
   _syncToolCallGroupSummary(group);
   scrollIfPinned();
   if(_scrollPinned){
diff --git a/tests/test_ui_card_animation.py b/tests/test_ui_card_animation.py
index c698230d6f..3af47bc0d3 100644
--- a/tests/test_ui_card_animation.py
+++ b/tests/test_ui_card_animation.py
@@ -44,6 +44,13 @@ def test_tool_card_toggle_uses_same_chevron_icon_markup_as_thinking_card():
     assert "<div class=\"thinking-card\"><div class=\"thinking-card-header\" onclick=\"this.parentElement.classList.toggle('open')\"><span class=\"thinking-card-icon\">" in UI_JS
 
 
+def test_live_thinking_updates_existing_card_body_in_place():
+    assert "function _renderThinkingInto(row,text='')" in UI_JS
+    assert "row.querySelector('.thinking-card-body pre')" in UI_JS
+    assert "pre.textContent=clean" in UI_JS
+    assert "_renderThinkingInto(row,text);" in UI_JS
+
+
 def test_thinking_card_uses_panel_chrome_with_gold_palette():
     # Canonical thinking-card rule lives in the consolidated block (border-radius
     # tightened from 10px → 8px as part of the "quieter card" design pass).

From 3640cd8edf3135c3130ce3be0dbf8b666317c0d7 Mon Sep 17 00:00:00 2001
From: dobby-d-elf <dobby.the.agent@gmail.com>
Date: Wed, 13 May 2026 08:08:49 -0600
Subject: [PATCH 10/28] Version B: use gold Activity highlight sweep

---
 static/style.css | 41 ++++++++++++++++++-----------------------
 1 file changed, 18 insertions(+), 23 deletions(-)

diff --git a/static/style.css b/static/style.css
index ae41414714..7332250232 100644
--- a/static/style.css
+++ b/static/style.css
@@ -1884,36 +1884,31 @@ body.resizing .sidebar{transition:none!important;}
 .tool-call-group.tool-call-group-collapsed .tool-call-group-body{display:none;}
 .tool-call-group-label{font-weight:600;color:var(--muted);position:relative;display:inline-block;overflow:hidden;}
 .tool-call-group[data-live-tool-call-group="1"] .tool-call-group-label{
-  color:var(--accent);
+  color:var(--muted);
+  background-image:linear-gradient(90deg,var(--muted) 0%,var(--muted) 45.5%,color-mix(in srgb,var(--accent) 12%,var(--muted)) 46.5%,color-mix(in srgb,var(--accent) 32%,var(--muted)) 47.5%,color-mix(in srgb,var(--accent) 58%,var(--muted)) 48.5%,var(--accent) 49%,var(--accent) 51%,color-mix(in srgb,var(--accent) 58%,var(--muted)) 51.5%,color-mix(in srgb,var(--accent) 32%,var(--muted)) 52.5%,color-mix(in srgb,var(--accent) 12%,var(--muted)) 53.5%,var(--muted) 54.5%,var(--muted) 100%);
+  background-size:250% 100%;
+  background-repeat:no-repeat;
+  -webkit-background-clip:text;
+          background-clip:text;
+  -webkit-text-fill-color:transparent;
+  animation:_tool-shimmer-sweep 2.0s linear infinite;
 }
-/* Sweep overlay — a bg-colored bar with a narrow transparent gap moves across, hiding text under the solid parts */
 @keyframes _tool-shimmer-sweep{
-  from{-webkit-mask-position:100% 0;mask-position:100% 0;}
-  to{-webkit-mask-position:-200% 0;mask-position:-200% 0;}
-}
-.tool-call-group[data-live-tool-call-group="1"] .tool-call-group-label::after{
-  content:"";
-  position:absolute;inset:0;
-  background-color:var(--bg);
-  pointer-events:none;
-  -webkit-mask-image:linear-gradient(90deg,rgba(0,0,0,0) 0%,rgba(0,0,0,0) 45.5%,rgba(0,0,0,.12) 46.5%,rgba(0,0,0,.32) 47.5%,rgba(0,0,0,.58) 48.5%,rgba(0,0,0,.8) 49%,rgba(0,0,0,.8) 51%,rgba(0,0,0,.58) 51.5%,rgba(0,0,0,.32) 52.5%,rgba(0,0,0,.12) 53.5%,rgba(0,0,0,0) 54.5%,rgba(0,0,0,0) 100%);
-          mask-image:linear-gradient(90deg,rgba(0,0,0,0) 0%,rgba(0,0,0,0) 45.5%,rgba(0,0,0,.12) 46.5%,rgba(0,0,0,.32) 47.5%,rgba(0,0,0,.58) 48.5%,rgba(0,0,0,.8) 49%,rgba(0,0,0,.8) 51%,rgba(0,0,0,.58) 51.5%,rgba(0,0,0,.32) 52.5%,rgba(0,0,0,.12) 53.5%,rgba(0,0,0,0) 54.5%,rgba(0,0,0,0) 100%);
-  -webkit-mask-size:250% 100%;
-          mask-size:250% 100%;
-  -webkit-mask-repeat:no-repeat;
-          mask-repeat:no-repeat;
-  animation:_tool-shimmer-sweep 2.0s linear infinite;
+  from{background-position:100% 0;}
+  to{background-position:-200% 0;}
 }
 @media (prefers-reduced-motion: reduce){
-  .tool-call-group[data-live-tool-call-group="1"] .tool-call-group-label::after{
+  .tool-call-group[data-live-tool-call-group="1"] .tool-call-group-label{
     animation:none;
-    display:none;
+    background-image:none;
+    -webkit-text-fill-color:currentColor;
   }
 }
-/* Fallback for browsers without CSS mask support */
-@supports not (mask-image:none){
-  .tool-call-group[data-live-tool-call-group="1"] .tool-call-group-label::after{
-    display:none;
+/* Fallback for browsers without clipped text backgrounds */
+@supports not ((background-clip:text) or (-webkit-background-clip:text)){
+  .tool-call-group[data-live-tool-call-group="1"] .tool-call-group-label{
+    background-image:none;
+    -webkit-text-fill-color:currentColor;
   }
 }
 .tool-card{background:var(--surface-subtle);border:1px solid var(--border-subtle);border-radius:var(--radius-card);margin:2px 0;overflow:hidden;transition:border-color .15s,background-color .15s;}

From 720e69cb83a23a3695401ee283c9dbee5b658b3f Mon Sep 17 00:00:00 2001
From: Lucas Coutinho <lrclucas@gmail.com>
Date: Wed, 13 May 2026 11:13:23 -0300
Subject: [PATCH 11/28] fix(auth): cache signing and PBKDF2 keys in memory,
 remove migration side-effect call

---
 api/auth.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/api/auth.py b/api/auth.py
index 5f95bfaee1..30c785c0a8 100644
--- a/api/auth.py
+++ b/api/auth.py
@@ -203,17 +203,22 @@ def _load_key(filename: str) -> bytes:
     return key
 
 
+_PBKDF2_KEY_CACHE: bytes | None = None
+_SIGNING_KEY_CACHE: bytes | None = None
+
+
 def _pbkdf2_key() -> bytes:
-    """Salt for password hashing (PBKDF2). Persisted so password hashes remain
-    valid across restarts. Separate from _signing_key to avoid key reuse across
-    different cryptographic primitives."""
-    return _load_key('.pbkdf2_key')
+    global _PBKDF2_KEY_CACHE
+    if _PBKDF2_KEY_CACHE is None:
+        _PBKDF2_KEY_CACHE = _load_key('.pbkdf2_key')
+    return _PBKDF2_KEY_CACHE
 
 
 def _signing_key() -> bytes:
-    """HMAC key for session signing. Persisted so signed cookies remain
-    valid across restarts."""
-    return _load_key('.signing_key')
+    global _SIGNING_KEY_CACHE
+    if _SIGNING_KEY_CACHE is None:
+        _SIGNING_KEY_CACHE = _load_key('.signing_key')
+    return _SIGNING_KEY_CACHE
 
 
 def _hash_password(password, *, salt: bytes | None = None) -> str:
@@ -312,8 +317,8 @@ def verify_password(plain) -> bool:
             from api.config import save_settings
 
             save_settings({'_set_password': plain})
-            _invalidate_password_hash_cache()
-            get_password_hash()
+            # Cache invalidated inside save_settings(); the next call to
+            # get_password_hash() will re-read and warm the cache automatically.
             return True
     return False
 

From a183378a0511a96a26cced58b75330e36f3abae0 Mon Sep 17 00:00:00 2001
From: dobby-d-elf <dobby.the.agent@gmail.com>
Date: Wed, 13 May 2026 08:17:12 -0600
Subject: [PATCH 12/28] Refine version B Activity highlight sweep

---
 static/style.css | 38 +++++++++++++++++++++++---------------
 static/ui.js     |  1 +
 2 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/static/style.css b/static/style.css
index 7332250232..881b51f0e9 100644
--- a/static/style.css
+++ b/static/style.css
@@ -1885,30 +1885,38 @@ body.resizing .sidebar{transition:none!important;}
 .tool-call-group-label{font-weight:600;color:var(--muted);position:relative;display:inline-block;overflow:hidden;}
 .tool-call-group[data-live-tool-call-group="1"] .tool-call-group-label{
   color:var(--muted);
-  background-image:linear-gradient(90deg,var(--muted) 0%,var(--muted) 45.5%,color-mix(in srgb,var(--accent) 12%,var(--muted)) 46.5%,color-mix(in srgb,var(--accent) 32%,var(--muted)) 47.5%,color-mix(in srgb,var(--accent) 58%,var(--muted)) 48.5%,var(--accent) 49%,var(--accent) 51%,color-mix(in srgb,var(--accent) 58%,var(--muted)) 51.5%,color-mix(in srgb,var(--accent) 32%,var(--muted)) 52.5%,color-mix(in srgb,var(--accent) 12%,var(--muted)) 53.5%,var(--muted) 54.5%,var(--muted) 100%);
-  background-size:250% 100%;
-  background-repeat:no-repeat;
+}
+@keyframes _tool-shimmer-sweep{
+  from{-webkit-mask-position:100% 0;mask-position:100% 0;}
+  to{-webkit-mask-position:-200% 0;mask-position:-200% 0;}
+}
+.tool-call-group[data-live-tool-call-group="1"] .tool-call-group-label::after{
+  content:attr(data-sweep-label);
+  position:absolute;inset:0;
+  color:var(--accent);
+  background-image:linear-gradient(90deg,var(--accent) 0%,var(--accent) 48.5%,color-mix(in srgb,var(--accent) 88%,#000) 49%,color-mix(in srgb,var(--accent) 88%,#000) 51%,var(--accent) 51.5%,var(--accent) 100%);
   -webkit-background-clip:text;
           background-clip:text;
   -webkit-text-fill-color:transparent;
+  pointer-events:none;
+  -webkit-mask-image:linear-gradient(90deg,rgba(0,0,0,0) 0%,rgba(0,0,0,0) 45.5%,rgba(0,0,0,.12) 46.5%,rgba(0,0,0,.32) 47.5%,rgba(0,0,0,.58) 48.5%,rgba(0,0,0,.8) 49%,rgba(0,0,0,.8) 51%,rgba(0,0,0,.58) 51.5%,rgba(0,0,0,.32) 52.5%,rgba(0,0,0,.12) 53.5%,rgba(0,0,0,0) 54.5%,rgba(0,0,0,0) 100%);
+          mask-image:linear-gradient(90deg,rgba(0,0,0,0) 0%,rgba(0,0,0,0) 45.5%,rgba(0,0,0,.12) 46.5%,rgba(0,0,0,.32) 47.5%,rgba(0,0,0,.58) 48.5%,rgba(0,0,0,.8) 49%,rgba(0,0,0,.8) 51%,rgba(0,0,0,.58) 51.5%,rgba(0,0,0,.32) 52.5%,rgba(0,0,0,.12) 53.5%,rgba(0,0,0,0) 54.5%,rgba(0,0,0,0) 100%);
+  -webkit-mask-size:250% 100%;
+          mask-size:250% 100%;
+  -webkit-mask-repeat:no-repeat;
+          mask-repeat:no-repeat;
   animation:_tool-shimmer-sweep 2.0s linear infinite;
 }
-@keyframes _tool-shimmer-sweep{
-  from{background-position:100% 0;}
-  to{background-position:-200% 0;}
-}
 @media (prefers-reduced-motion: reduce){
-  .tool-call-group[data-live-tool-call-group="1"] .tool-call-group-label{
+  .tool-call-group[data-live-tool-call-group="1"] .tool-call-group-label::after{
     animation:none;
-    background-image:none;
-    -webkit-text-fill-color:currentColor;
+    display:none;
   }
 }
-/* Fallback for browsers without clipped text backgrounds */
-@supports not ((background-clip:text) or (-webkit-background-clip:text)){
-  .tool-call-group[data-live-tool-call-group="1"] .tool-call-group-label{
-    background-image:none;
-    -webkit-text-fill-color:currentColor;
+/* Fallback for browsers without CSS mask support */
+@supports not ((mask-image:linear-gradient(#000,#000)) or (-webkit-mask-image:linear-gradient(#000,#000))){
+  .tool-call-group[data-live-tool-call-group="1"] .tool-call-group-label::after{
+    display:none;
   }
 }
 .tool-card{background:var(--surface-subtle);border:1px solid var(--border-subtle);border-radius:var(--radius-card);margin:2px 0;overflow:hidden;transition:border-color .15s,background-color .15s;}
diff --git a/static/ui.js b/static/ui.js
index fe2a305a61..0358b33df1 100644
--- a/static/ui.js
+++ b/static/ui.js
@@ -5515,6 +5515,7 @@ function _syncToolCallGroupSummary(group){
   if(label){
     if(toolCount) label.textContent=`Activity: ${toolCount} tool${toolCount===1?'':'s'}`;
     else label.textContent='Activity';
+    label.setAttribute('data-sweep-label', label.textContent);
   }
   if(durationEl){
     if(group.getAttribute('data-live-tool-call-group')==='1'){

From f6a5fc28851d57d4981a35cb537fb45c7d3c6e36 Mon Sep 17 00:00:00 2001
From: dobby-d-elf <dobby.the.agent@gmail.com>
Date: Wed, 13 May 2026 09:21:59 -0600
Subject: [PATCH 13/28] Widen version B Activity highlight sweep

---
 static/style.css | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/static/style.css b/static/style.css
index 881b51f0e9..abd65bca54 100644
--- a/static/style.css
+++ b/static/style.css
@@ -1891,16 +1891,18 @@ body.resizing .sidebar{transition:none!important;}
   to{-webkit-mask-position:-200% 0;mask-position:-200% 0;}
 }
 .tool-call-group[data-live-tool-call-group="1"] .tool-call-group-label::after{
+  --activity-sweep-highlight:linear-gradient(90deg,var(--accent) 0%,var(--accent) 48%,color-mix(in srgb,var(--accent) 88%,#000) 48.7%,color-mix(in srgb,var(--accent) 88%,#000) 51.3%,var(--accent) 52%,var(--accent) 100%);
+  --activity-sweep-mask:linear-gradient(90deg,rgba(0,0,0,0) 0%,rgba(0,0,0,0) 44%,rgba(0,0,0,.12) 45.3%,rgba(0,0,0,.32) 46.7%,rgba(0,0,0,.58) 48%,rgba(0,0,0,.8) 48.7%,rgba(0,0,0,.8) 51.3%,rgba(0,0,0,.58) 52%,rgba(0,0,0,.32) 53.3%,rgba(0,0,0,.12) 54.7%,rgba(0,0,0,0) 56%,rgba(0,0,0,0) 100%);
   content:attr(data-sweep-label);
   position:absolute;inset:0;
   color:var(--accent);
-  background-image:linear-gradient(90deg,var(--accent) 0%,var(--accent) 48.5%,color-mix(in srgb,var(--accent) 88%,#000) 49%,color-mix(in srgb,var(--accent) 88%,#000) 51%,var(--accent) 51.5%,var(--accent) 100%);
+  background-image:var(--activity-sweep-highlight);
   -webkit-background-clip:text;
           background-clip:text;
   -webkit-text-fill-color:transparent;
   pointer-events:none;
-  -webkit-mask-image:linear-gradient(90deg,rgba(0,0,0,0) 0%,rgba(0,0,0,0) 45.5%,rgba(0,0,0,.12) 46.5%,rgba(0,0,0,.32) 47.5%,rgba(0,0,0,.58) 48.5%,rgba(0,0,0,.8) 49%,rgba(0,0,0,.8) 51%,rgba(0,0,0,.58) 51.5%,rgba(0,0,0,.32) 52.5%,rgba(0,0,0,.12) 53.5%,rgba(0,0,0,0) 54.5%,rgba(0,0,0,0) 100%);
-          mask-image:linear-gradient(90deg,rgba(0,0,0,0) 0%,rgba(0,0,0,0) 45.5%,rgba(0,0,0,.12) 46.5%,rgba(0,0,0,.32) 47.5%,rgba(0,0,0,.58) 48.5%,rgba(0,0,0,.8) 49%,rgba(0,0,0,.8) 51%,rgba(0,0,0,.58) 51.5%,rgba(0,0,0,.32) 52.5%,rgba(0,0,0,.12) 53.5%,rgba(0,0,0,0) 54.5%,rgba(0,0,0,0) 100%);
+  -webkit-mask-image:var(--activity-sweep-mask);
+          mask-image:var(--activity-sweep-mask);
   -webkit-mask-size:250% 100%;
           mask-size:250% 100%;
   -webkit-mask-repeat:no-repeat;

From 8ca29618fea7dea575c48da30ea61e0150260d28 Mon Sep 17 00:00:00 2001
From: Lucas Coutinho <lrclucas@gmail.com>
Date: Wed, 13 May 2026 12:27:27 -0300
Subject: [PATCH 14/28] fix(auth): tighten except to OSError, add type hints,
 fix test imports

---
 api/auth.py                            | 6 +++---
 tests/test_auth_password_hash_cache.py | 3 +--
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/api/auth.py b/api/auth.py
index 30c785c0a8..b725724fc7 100644
--- a/api/auth.py
+++ b/api/auth.py
@@ -191,14 +191,14 @@ def _load_key(filename: str) -> bytes:
             raw = key_file.read_bytes()
             if len(raw) >= 32:
                 return raw[:32]
-    except Exception:
+    except OSError:
         logger.debug("Failed to read key %s", filename)
     key = secrets.token_bytes(32)
     try:
         STATE_DIR.mkdir(parents=True, exist_ok=True)
         key_file.write_bytes(key)
         key_file.chmod(0o600)
-    except Exception:
+    except OSError:
         logger.debug("Failed to persist key %s", filename)
     return key
 
@@ -293,7 +293,7 @@ def is_auth_enabled() -> bool:
     return get_password_hash() is not None
 
 
-def verify_password(plain) -> bool:
+def verify_password(plain: str) -> bool:
     """Verify a plaintext password against the stored hash.
 
     Supports transparent migration of password hashes that were computed
diff --git a/tests/test_auth_password_hash_cache.py b/tests/test_auth_password_hash_cache.py
index 00b5be16a4..a74ebbd4b1 100644
--- a/tests/test_auth_password_hash_cache.py
+++ b/tests/test_auth_password_hash_cache.py
@@ -17,6 +17,7 @@
 import importlib
 import os
 import sys
+import tempfile
 import threading
 import time
 import unittest
@@ -27,7 +28,6 @@
 # sibling test files that need a fresh config import).  Deleting api.config
 # would change its module-level STATE_DIR global and leak into all
 # subsequently collected tests (breaking test_pytest_state_isolation.py).
-import tempfile
 _TEST_STATE = Path(tempfile.mkdtemp())
 os.environ["HERMES_WEBUI_STATE_DIR"] = str(_TEST_STATE)
 
@@ -64,7 +64,6 @@ def test_first_call_returns_hash(self):
         h = auth.get_password_hash()
         self.assertIsNotNone(h)
         self.assertIsInstance(h, str)
-        assert h is not None  # narrow type for type checker
         self.assertGreater(len(h), 10)
 
     def test_cache_flag_set_after_first_call(self):

From 978dbc15d8eb3a5903baad9a0f6b18094677ab11 Mon Sep 17 00:00:00 2001
From: Lucas Coutinho <lrclucas@gmail.com>
Date: Wed, 13 May 2026 12:48:35 -0300
Subject: [PATCH 15/28] fix(auth): correct misleading cache invalidation
 comment in verify_password()

---
 api/auth.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/api/auth.py b/api/auth.py
index b725724fc7..a2b5fb7662 100644
--- a/api/auth.py
+++ b/api/auth.py
@@ -317,8 +317,9 @@ def verify_password(plain: str) -> bool:
             from api.config import save_settings
 
             save_settings({'_set_password': plain})
-            # Cache invalidated inside save_settings(); the next call to
-            # get_password_hash() will re-read and warm the cache automatically.
+            # Password re-hashed and persisted to disk using the current salt.
+            # Cache invalidation is handled by fix 2/3 (#2192) which adds the
+            # _invalidate_password_hash_cache() call inside save_settings().
             return True
     return False
 

From 2bcf411519b79cdaa7c997a69ac9de0ffb880def Mon Sep 17 00:00:00 2001
From: Lucas Coutinho <lrclucas@gmail.com>
Date: Wed, 13 May 2026 14:08:37 -0300
Subject: [PATCH 16/28] fix(auth): invalidate password hash cache in
 save_settings() on password change

---
 api/config.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/api/config.py b/api/config.py
index d44c10396e..b0f8860485 100644
--- a/api/config.py
+++ b/api/config.py
@@ -4039,15 +4039,18 @@ def save_settings(settings: dict) -> dict:
     theme_was_explicit = False
     skin_was_explicit = False
     # Handle _set_password: hash and store as password_hash
+    _password_changed = False
     raw_pw = settings.pop("_set_password", None)
     if raw_pw and isinstance(raw_pw, str) and raw_pw.strip():
         # Use PBKDF2 from auth module (600k iterations) -- never raw SHA-256
         from api.auth import _hash_password
 
         current["password_hash"] = _hash_password(raw_pw.strip())
+        _password_changed = True
     # Handle _clear_password: explicitly disable auth
     if settings.pop("_clear_password", False):
         current["password_hash"] = None
+        _password_changed = True
     for k, v in settings.items():
         if k in _SETTINGS_ALLOWED_KEYS:
             if k == "theme":
@@ -4089,6 +4092,12 @@ def save_settings(settings: dict) -> dict:
         json.dumps(persisted, ensure_ascii=False, indent=2),
         encoding="utf-8",
     )
+    # Invalidate the in-memory password hash cache so the next call to
+    # get_password_hash() picks up the new value from disk immediately.
+    if _password_changed:
+        from api.auth import _invalidate_password_hash_cache
+
+        _invalidate_password_hash_cache()
     # Update runtime defaults so new sessions use them immediately
     global DEFAULT_WORKSPACE
     if "default_workspace" in current:

From 3daa12ceb0ee6a0c4066f241be3659af5d56d2ae Mon Sep 17 00:00:00 2001
From: Lucas Coutinho <lrclucas@gmail.com>
Date: Wed, 13 May 2026 14:09:39 -0300
Subject: [PATCH 17/28] test(auth): add cache invalidation regression tests for
 save_settings()

---
 tests/test_auth_password_cache.py | 73 +++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)
 create mode 100644 tests/test_auth_password_cache.py

diff --git a/tests/test_auth_password_cache.py b/tests/test_auth_password_cache.py
new file mode 100644
index 0000000000..7090271e7d
--- /dev/null
+++ b/tests/test_auth_password_cache.py
@@ -0,0 +1,73 @@
+"""
+Tests for the password hash cache invalidation hook.
+
+Verifies that changing the password via save_settings() takes effect
+immediately in the running process — without a restart.
+
+Regression: before the invalidation hook was added to save_settings(),
+_AUTH_HASH_COMPUTED stayed True and get_password_hash() returned the
+stale hash from before the UI password change.
+"""
+import os
+import pathlib
+import tempfile
+import unittest
+
+_TEST_STATE = pathlib.Path(tempfile.mkdtemp())
+os.environ["HERMES_WEBUI_STATE_DIR"] = str(_TEST_STATE)
+
+import sys
+sys.path.insert(0, str(pathlib.Path(__file__).parent.parent))
+
+import importlib
+
+auth = importlib.import_module("api.auth")
+config = importlib.import_module("api.config")
+
+
+def _reset_cache():
+    auth._invalidate_password_hash_cache()
+
+
+class TestPasswordCacheInvalidation(unittest.TestCase):
+
+    def setUp(self):
+        _reset_cache()
+        # Ensure no env-var password interferes
+        os.environ.pop("HERMES_WEBUI_PASSWORD", None)
+
+    def tearDown(self):
+        _reset_cache()
+        os.environ.pop("HERMES_WEBUI_PASSWORD", None)
+
+    def test_set_password_takes_effect_without_restart(self):
+        config.save_settings({"_set_password": "first"})
+        self.assertTrue(auth.verify_password("first"))
+
+        config.save_settings({"_set_password": "second"})
+        # Cache must be invalidated; old password must no longer verify
+        self.assertFalse(auth.verify_password("first"),
+                         "stale hash still accepted after password change — cache not invalidated")
+        self.assertTrue(auth.verify_password("second"))
+
+    def test_clear_password_takes_effect_without_restart(self):
+        config.save_settings({"_set_password": "secret"})
+        self.assertTrue(auth.is_auth_enabled())
+
+        config.save_settings({"_clear_password": True})
+        # Cache must be invalidated; auth must be disabled immediately
+        self.assertFalse(auth.is_auth_enabled(),
+                         "auth still enabled after clear — cache not invalidated")
+        self.assertFalse(auth.verify_password("secret"))
+
+    def test_cache_repopulates_after_invalidation(self):
+        config.save_settings({"_set_password": "pw"})
+        # Warm the cache
+        auth.get_password_hash()
+        # Invalidate and warm again — must reflect current settings.json
+        _reset_cache()
+        self.assertTrue(auth.verify_password("pw"))
+
+
+if __name__ == "__main__":
+    unittest.main()

From 07a5fe0838e69347945dc17abf16d8fe672103f1 Mon Sep 17 00:00:00 2001
From: Lucas Coutinho <lrclucas@gmail.com>
Date: Wed, 13 May 2026 10:42:13 -0300
Subject: [PATCH 18/28] fix(auth): HMAC length migration bridge and restore
 Secure cookie heuristic

HMAC length: create_session() now emits a full 64-char HMAC-SHA256 hex
digest instead of the truncated 32-char form. verify_session() accepts
both lengths during a transition window so existing sessions survive the
upgrade without a forced global logout. The legacy 32-char branch can be
removed once the default 30-day session TTL has elapsed.

Secure flag: introduce _is_secure_context(handler) to encapsulate the
env-var override and heuristic. Restores the getpeercert / X-Forwarded-Proto
heuristic that was present before this refactor, keeping the env-var
override (HERMES_WEBUI_SECURE) on top for proxy deployments that need
explicit control. The bare `return False` stub that the previous commit
left in place silently broke Secure-cookie delivery for all reverse-proxy
users who never set the env var.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 api/auth.py | 38 +++++++++++++++++++++++++++++++++-----
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/api/auth.py b/api/auth.py
index a2b5fb7662..b5343e6a7c 100644
--- a/api/auth.py
+++ b/api/auth.py
@@ -329,7 +329,7 @@ def create_session() -> str:
     token = secrets.token_hex(32)
     _sessions[token] = time.time() + _resolve_session_ttl()
     _save_sessions(_sessions)
-    sig = hmac.new(_signing_key(), token.encode(), hashlib.sha256).hexdigest()[:32]
+    sig = hmac.new(_signing_key(), token.encode(), hashlib.sha256).hexdigest()
     return f"{token}.{sig}"
 
 
@@ -349,8 +349,14 @@ def verify_session(cookie_value) -> bool:
         return False
     _prune_expired_sessions()  # lazy cleanup on every verification attempt
     token, sig = cookie_value.rsplit('.', 1)
-    expected_sig = hmac.new(_signing_key(), token.encode(), hashlib.sha256).hexdigest()[:32]
-    if not hmac.compare_digest(sig, expected_sig):
+    full_sig = hmac.new(_signing_key(), token.encode(), hashlib.sha256).hexdigest()
+    # Accept both new (64-char) and legacy (32-char truncated) signatures so
+    # existing sessions survive the upgrade without a forced global logout.
+    # The legacy branch can be removed once session TTLs have expired (~30 days).
+    valid = hmac.compare_digest(sig, full_sig) or (
+        len(sig) == 32 and hmac.compare_digest(sig, full_sig[:32])
+    )
+    if not valid:
         return False
     expiry = _sessions.get(token)
     if not expiry or time.time() > expiry:
@@ -433,6 +439,29 @@ def check_auth(handler, parsed) -> bool:
     return False
 
 
+def _is_secure_context(handler=None) -> bool:
+    """Return True if cookies should carry the Secure flag.
+
+    Behaviour is overridable via HERMES_WEBUI_SECURE env var for
+    reverse-proxy setups where TLS terminates at a frontend proxy
+    (nginx, Cloudflare, etc.) and Python only sees plain HTTP.
+    1/true/yes → force Secure on; 0/false/no → force Secure off.
+    When unset, fall back to heuristics: direct TLS socket (getpeercert)
+    or X-Forwarded-Proto header from the request.
+    """
+    env = os.getenv('HERMES_WEBUI_SECURE', '').strip().lower()
+    if env in ('1', 'true', 'yes'):
+        return True
+    if env in ('0', 'false', 'no'):
+        return False
+    if handler is not None:
+        if getattr(handler.request, 'getpeercert', None) is not None:
+            return True
+        if handler.headers.get('X-Forwarded-Proto', '') == 'https':
+            return True
+    return False
+
+
 def set_auth_cookie(handler, cookie_value) -> None:
     """Set the auth cookie on the response."""
     cookie = http.cookies.SimpleCookie()
@@ -441,8 +470,7 @@ def set_auth_cookie(handler, cookie_value) -> None:
     cookie[COOKIE_NAME]['samesite'] = 'Lax'
     cookie[COOKIE_NAME]['path'] = '/'
     cookie[COOKIE_NAME]['max-age'] = str(_resolve_session_ttl())
-    # Set Secure flag when connection is HTTPS
-    if getattr(handler.request, 'getpeercert', None) is not None or handler.headers.get('X-Forwarded-Proto', '') == 'https':
+    if _is_secure_context(handler):
         cookie[COOKIE_NAME]['secure'] = True
     handler.send_header('Set-Cookie', cookie[COOKIE_NAME].OutputString())
 

From 9921bbb4127b206f5bfe3a8ed6edee2a64ff13f8 Mon Sep 17 00:00:00 2001
From: Lucas Coutinho <lrclucas@gmail.com>
Date: Wed, 13 May 2026 11:14:51 -0300
Subject: [PATCH 19/28] docs(auth): add X-Forwarded-Proto trust warning to
 _is_secure_context()

---
 api/auth.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/api/auth.py b/api/auth.py
index b5343e6a7c..ded55ebece 100644
--- a/api/auth.py
+++ b/api/auth.py
@@ -448,6 +448,12 @@ def _is_secure_context(handler=None) -> bool:
     1/true/yes → force Secure on; 0/false/no → force Secure off.
     When unset, fall back to heuristics: direct TLS socket (getpeercert)
     or X-Forwarded-Proto header from the request.
+
+    .. warning::
+       The ``X-Forwarded-Proto`` header is only trustworthy when a
+       reverse proxy (nginx, Cloudflare, etc.) is deployed in front
+       of the application.  Without a proxy, any client can forge the
+       header and cause the Secure flag to be set on plain HTTP.
     """
     env = os.getenv('HERMES_WEBUI_SECURE', '').strip().lower()
     if env in ('1', 'true', 'yes'):

From 7e6f7372d52a49d40d14e9b447c22bc2f046d70e Mon Sep 17 00:00:00 2001
From: Lucas Coutinho <lrclucas@gmail.com>
Date: Wed, 13 May 2026 12:29:08 -0300
Subject: [PATCH 20/28] fix(auth): add type hint to verify_session()

---
 api/auth.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api/auth.py b/api/auth.py
index ded55ebece..0c9c70646e 100644
--- a/api/auth.py
+++ b/api/auth.py
@@ -343,7 +343,7 @@ def _prune_expired_sessions():
         _save_sessions(_sessions)
 
 
-def verify_session(cookie_value) -> bool:
+def verify_session(cookie_value: str) -> bool:
     """Verify a signed session cookie. Returns True if valid and not expired."""
     if not cookie_value or '.' not in cookie_value:
         return False

From b734d95bc0035209437b592407be7e2a7570fc33 Mon Sep 17 00:00:00 2001
From: Lucas Coutinho <lrclucas@gmail.com>
Date: Wed, 13 May 2026 12:57:29 -0300
Subject: [PATCH 21/28] =?UTF-8?q?test(auth):=20add=20regression=20tests=20?=
 =?UTF-8?q?for=20HMAC=20migration=20bridge=20(32=E2=86=9264=20char)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/test_auth_sessions.py | 42 +++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/tests/test_auth_sessions.py b/tests/test_auth_sessions.py
index 9e95dedf4e..725e336be3 100644
--- a/tests/test_auth_sessions.py
+++ b/tests/test_auth_sessions.py
@@ -130,6 +130,48 @@ def test_invalidate_unknown_token_is_safe(self):
         # Should not raise
 
 
+class TestHmacMigrationBridge(unittest.TestCase):
+    """Verify the 32→64-char HMAC migration bridge in verify_session().
+
+    When create_session() was changed from hexdigest()[:32] to hexdigest(),
+    existing session cookies with 32-char signatures needed to remain valid.
+    These tests lock down the dual-length acceptance so a future refactor
+    doesn't accidentally drop it.
+
+    These can be removed once session TTLs have expired (~30 days from the
+    deploy date of fix 3/3).
+    """
+
+    def setUp(self):
+        auth._sessions.clear()
+
+    def test_legacy_truncated_sig_still_validates(self):
+        """A cookie signed with the old 32-char truncation must still verify.
+
+        Simulates a session created by a pre-upgrade build where
+        hexdigest()[:32] was used.  After upgrade to full 64-char HMAC,
+        this cookie must still be accepted (migration bridge).
+        """
+        token = auth.secrets.token_hex(32)
+        auth._sessions[token] = time.time() + 3600
+        legacy_sig = auth.hmac.new(
+            auth._signing_key(), token.encode(), auth.hashlib.sha256
+        ).hexdigest()[:32]
+        cookie = f"{token}.{legacy_sig}"
+        self.assertTrue(auth.verify_session(cookie))
+
+    def test_full_sig_rejects_forged_prefix(self):
+        """A forged 32-char sig that is NOT the HMAC prefix must be rejected.
+
+        Ensures the len(sig) == 32 guard prevents blind acceptance of
+        arbitrary short signatures.
+        """
+        token = auth.secrets.token_hex(32)
+        auth._sessions[token] = time.time() + 3600
+        forged = "a" * 32
+        self.assertFalse(auth.verify_session(f"{token}.{forged}"))
+
+
 if __name__ == "__main__":
     unittest.main()
 

From 2a96fb4a5bd15a46c74dfa30cde8c1b36798bdeb Mon Sep 17 00:00:00 2001
From: Lucas Coutinho <lrclucas@gmail.com>
Date: Wed, 13 May 2026 14:19:09 -0300
Subject: [PATCH 22/28] fix(auth): update HMAC sig length assertion to 64 chars
 and rebase on PR1

---
 tests/test_sprint29.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/test_sprint29.py b/tests/test_sprint29.py
index 55c9b9e8b6..351bac5efd 100644
--- a/tests/test_sprint29.py
+++ b/tests/test_sprint29.py
@@ -472,13 +472,13 @@ def test_secure_flag_not_set_for_plain_http(self, webui_server):
 
 
 class TestHMACLength:
-    def test_session_token_sig_is_32_chars(self):
-        """Session cookie signature must be 32 hex chars (128-bit), not 16."""
+    def test_session_token_sig_is_64_chars(self):
+        """Session cookie signature must be 64 hex chars (256-bit), not 32."""
         from api.auth import create_session
         cookie = create_session()
         token, sig = cookie.rsplit('.', 1)
-        assert len(sig) == 32, \
-            f"Expected 32-char signature (128-bit), got {len(sig)}: {sig}"
+        assert len(sig) == 64, \
+            f"Expected 64-char signature (SHA-256), got {len(sig)}: {sig}"
 
     def test_verify_session_rejects_old_16char_sig(self):
         """A cookie with a 16-char sig must fail verification."""

From 11d968748314da9376aaeac1389cc058d3de4577 Mon Sep 17 00:00:00 2001
From: dobby-d-elf <dobby.the.agent@gmail.com>
Date: Wed, 13 May 2026 13:02:07 -0600
Subject: [PATCH 23/28] Polish version B Activity highlight sweep

---
 static/style.css | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/static/style.css b/static/style.css
index abd65bca54..8bd471a2ba 100644
--- a/static/style.css
+++ b/static/style.css
@@ -1887,12 +1887,12 @@ body.resizing .sidebar{transition:none!important;}
   color:var(--muted);
 }
 @keyframes _tool-shimmer-sweep{
-  from{-webkit-mask-position:100% 0;mask-position:100% 0;}
-  to{-webkit-mask-position:-200% 0;mask-position:-200% 0;}
+  0%{-webkit-mask-position:100% 0;mask-position:100% 0;}
+  100%{-webkit-mask-position:-200% 0;mask-position:-200% 0;}
 }
 .tool-call-group[data-live-tool-call-group="1"] .tool-call-group-label::after{
-  --activity-sweep-highlight:linear-gradient(90deg,var(--accent) 0%,var(--accent) 48%,color-mix(in srgb,var(--accent) 88%,#000) 48.7%,color-mix(in srgb,var(--accent) 88%,#000) 51.3%,var(--accent) 52%,var(--accent) 100%);
-  --activity-sweep-mask:linear-gradient(90deg,rgba(0,0,0,0) 0%,rgba(0,0,0,0) 44%,rgba(0,0,0,.12) 45.3%,rgba(0,0,0,.32) 46.7%,rgba(0,0,0,.58) 48%,rgba(0,0,0,.8) 48.7%,rgba(0,0,0,.8) 51.3%,rgba(0,0,0,.58) 52%,rgba(0,0,0,.32) 53.3%,rgba(0,0,0,.12) 54.7%,rgba(0,0,0,0) 56%,rgba(0,0,0,0) 100%);
+  --activity-sweep-highlight:linear-gradient(90deg,var(--accent) 0%,var(--accent) 45.2%,color-mix(in srgb,var(--accent) 90%,#000) 46.5%,color-mix(in srgb,var(--accent) 90%,#000) 53.5%,var(--accent) 55%,var(--accent) 100%);
+  --activity-sweep-mask:linear-gradient(90deg,rgba(0,0,0,0) 0%,rgba(0,0,0,0) 38%,rgba(0,0,0,.18) 40.8%,rgba(0,0,0,.46) 43.6%,rgba(0,0,0,.72) 46.5%,rgba(0,0,0,.9) 53.5%,rgba(0,0,0,.52) 55.8%,rgba(0,0,0,.28) 58.2%,rgba(0,0,0,.1) 60.4%,rgba(0,0,0,0) 62%,rgba(0,0,0,0) 100%);
   content:attr(data-sweep-label);
   position:absolute;inset:0;
   color:var(--accent);
@@ -1907,7 +1907,7 @@ body.resizing .sidebar{transition:none!important;}
           mask-size:250% 100%;
   -webkit-mask-repeat:no-repeat;
           mask-repeat:no-repeat;
-  animation:_tool-shimmer-sweep 2.0s linear infinite;
+  animation:_tool-shimmer-sweep 3s cubic-bezier(.45,0,.55,1) infinite;
 }
 @media (prefers-reduced-motion: reduce){
   .tool-call-group[data-live-tool-call-group="1"] .tool-call-group-label::after{

From 1e17760a04cd0e17d23e473e917cf91c713318c1 Mon Sep 17 00:00:00 2001
From: Michael Lam <Michaelyklam1@gmail.com>
Date: Wed, 13 May 2026 12:13:37 -0700
Subject: [PATCH 24/28] Fix opencode-go provider overlap routing

Closes #1894
---
 api/config.py                            |  19 ++-
 tests/test_issue1894_provider_overlap.py | 157 +++++++++++++++++++++++
 2 files changed, 174 insertions(+), 2 deletions(-)
 create mode 100644 tests/test_issue1894_provider_overlap.py

diff --git a/api/config.py b/api/config.py
index 42d6357419..20d7c73add 100644
--- a/api/config.py
+++ b/api/config.py
@@ -1576,10 +1576,25 @@ def resolve_model_provider(model_id: str) -> tuple:
         and not config_provider.startswith('custom:')
     )
     _default_model = model_cfg.get('default') if isinstance(model_cfg, dict) else None
+    # Owns model if it appears in the static catalog for the configured provider.
+    _provider_models_set: set[str] = set()
+    if (
+        config_provider is not None
+        and config_provider in _PROVIDER_MODELS
+        and isinstance(_PROVIDER_MODELS[config_provider], list)
+    ):
+        _provider_models_set = {
+            m.get('id', '') for m in _PROVIDER_MODELS[config_provider]
+            if isinstance(m, dict) and isinstance(m.get('id'), str)
+        }
     _skip_custom_providers = (
         _is_explicit_non_custom_provider
-        and _default_model is not None
-        and model_id == _default_model
+        and (
+            # Guard 1: model is the configured default (existing behaviour).
+            (_default_model is not None and model_id == _default_model)
+            # Guard 2: model is owned by the configured non-custom provider.
+            or model_id in _provider_models_set
+        )
     )
     custom_providers = cfg.get('custom_providers', [])
     if isinstance(custom_providers, list) and not _skip_custom_providers:
diff --git a/tests/test_issue1894_provider_overlap.py b/tests/test_issue1894_provider_overlap.py
new file mode 100644
index 0000000000..af1440016d
--- /dev/null
+++ b/tests/test_issue1894_provider_overlap.py
@@ -0,0 +1,157 @@
+# Copyright 2025 the Hermes WebUI contributors
+# SPDX-License-Identifier: MIT
+
+# noqa: N801
+
+# Regression tests for GitHub issue #1894.
+#
+# Symptom: when the WebUI's configured provider (e.g. `opencode-go`) and a
+# `custom_providers[]` entry both expose the same bare model id (e.g.
+# `deepseek-v4-pro`), the resolver was routing to `custom:<name>` instead of
+# the configured `opencode-go` endpoint.
+#
+# Root cause: `resolve_model_provider()` in `api/config.py` guarded the custom-
+# provider skip only when `model_id == model.default`.  If `model.default`
+# was a different model (e.g. `glm-5.1`), the overlap was not detected and
+# `deepseek-v4-pro` was matched against `custom_providers[]` first, routing
+# the WebUI to the wrong endpoint.
+#
+# Fix: widen the guard so an explicit non-custom provider wins for any model
+# it owns in `_PROVIDER_MODELS[config_provider]`.
+
+from api.config import resolve_model_provider, model_with_provider_context
+
+
+def _apply_config_overrides(cfg_module, overrides):
+    old_model = cfg_module.cfg.get('model')
+    cfg_module.cfg['model'] = {
+        'provider': 'opencode-go',
+        'default': 'glm-5.1',          # intentionally != the overlapping model
+        **overrides,
+    }
+    old_custom = cfg_module.cfg.get('custom_providers')
+    return old_model, old_custom
+
+
+def _restore_config(cfg_module, old_model, old_custom):
+    if old_model is None:
+        cfg_module.cfg.pop('model', None)
+    else:
+        cfg_module.cfg['model'] = old_model
+    if old_custom is None:
+        cfg_module.cfg.pop('custom_providers', None)
+    else:
+        cfg_module.cfg['custom_providers'] = old_custom
+
+
+# ---------------------------------------------------------------------------
+# Case 1 — overlap: selected non-custom provider should win
+# ---------------------------------------------------------------------------
+
+def test_selected_opencode_go_wins_over_custom_provider_overlap():
+    # opencode-go and a custom DeepSeek-compatible endpoint both serve
+    # deepseek-v4-pro.  With opencode-go configured as the active provider,
+    # selection of deepseek-v4-pro must route to opencode-go, not to the
+    # custom endpoint.
+    import api.config as cfg_mod
+    old_model, old_custom = _apply_config_overrides(cfg_mod, {
+        'base_url': 'https://api.opencode.ai/go/v1',
+    })
+    cfg_mod.cfg['custom_providers'] = [{
+        'name': 'ds2api',
+        'base_url': 'http://ds2api:5001/v1/',
+        'models': {'deepseek-v4-pro': {}},
+    }]
+    try:
+        # model_with_provider_context strips the prefix when config_provider
+        # equals the selected provider — deepseek-v4-pro is passed bare.
+        wrapped = model_with_provider_context('deepseek-v4-pro', 'opencode-go')
+        model, provider, base_url = resolve_model_provider(wrapped)
+        assert provider == 'opencode-go', (
+            f'Expected provider=opencode-go, got provider={provider!r}. '
+            f'WebUI was routed to custom provider instead.'
+        )
+        assert base_url == 'https://api.opencode.ai/go/v1', (
+            f'Expected base_url from opencode-go config, got {base_url!r}'
+        )
+        assert model == 'deepseek-v4-pro'
+    finally:
+        _restore_config(cfg_mod, old_model, old_custom)
+
+
+def test_selected_opencode_go_wins_direct_resolve():
+    # Same scenario but bypassing model_with_provider_context to test the
+    # resolver path directly with a bare model id.
+    import api.config as cfg_mod
+    old_model, old_custom = _apply_config_overrides(cfg_mod, {
+        'base_url': 'https://api.opencode.ai/go/v1',
+    })
+    cfg_mod.cfg['custom_providers'] = [{
+        'name': 'ds2api',
+        'base_url': 'http://ds2api:5001/v1/',
+        'models': {'deepseek-v4-pro': {}},
+    }]
+    try:
+        model, provider, base_url = resolve_model_provider('deepseek-v4-pro')
+        assert provider == 'opencode-go', (
+            f'Expected provider=opencode-go, got provider={provider!r}'
+        )
+        assert base_url == 'https://api.opencode.ai/go/v1'
+    finally:
+        _restore_config(cfg_mod, old_model, old_custom)
+
+
+# ---------------------------------------------------------------------------
+# Case 2 — custom-only model: custom provider routing must stay intact
+# ---------------------------------------------------------------------------
+
+def test_custom_only_model_still_routes_to_custom_provider():
+    # A model that exists only in a custom provider must still be routed
+    # correctly when no explicit provider prefix is given.
+    import api.config as cfg_mod
+    old_model, old_custom = _apply_config_overrides(cfg_mod, {
+        'base_url': 'https://api.opencode.ai/go/v1',
+    })
+    cfg_mod.cfg['custom_providers'] = [{
+        'name': 'ds2api',
+        'base_url': 'http://ds2api:5001/v1/',
+        'models': {'my-private-model': {}},
+    }]
+    try:
+        model, provider, base_url = resolve_model_provider('my-private-model')
+        assert provider == 'custom:ds2api', (
+            f'Expected provider=custom:ds2api, got provider={provider!r}'
+        )
+        assert base_url == 'http://ds2api:5001/v1/'
+    finally:
+        _restore_config(cfg_mod, old_model, old_custom)
+
+
+# ---------------------------------------------------------------------------
+# Case 3 — explicit custom provider selection still works
+# ---------------------------------------------------------------------------
+
+def test_explicit_custom_provider_selection_intact():
+    # @custom:<name>:<model> syntax must not be swallowed by the new guard.
+    model, provider, base_url = resolve_model_provider('@custom:ds2api:deepseek-v4-pro')
+    assert provider == 'custom:ds2api', f'Expected provider=custom:ds2api, got {provider!r}'
+    assert model == 'deepseek-v4-pro'
+
+
+# ---------------------------------------------------------------------------
+# Case 4 — existing suffix syntax is preserved
+# ---------------------------------------------------------------------------
+
+def test_openrouter_suffix_still_works():
+    import api.config as cfg_mod
+    old_model, old_custom = _apply_config_overrides(cfg_mod, {
+        'provider': 'anthropic',          # non-openrouter so prefix is needed
+        'default': 'claude-sonnet-4.6',
+    })
+    try:
+        wrapped = model_with_provider_context('tencent/hy3-preview:free', 'openrouter')
+        model, provider, _ = resolve_model_provider(wrapped)
+        assert provider == 'openrouter'
+        assert model == 'tencent/hy3-preview:free'
+    finally:
+        _restore_config(cfg_mod, old_model, old_custom)
\ No newline at end of file

From fe4689e280b5900007f905ab921391e68b0ec631 Mon Sep 17 00:00:00 2001
From: Lucas Coutinho <lrclucas@gmail.com>
Date: Wed, 13 May 2026 16:17:44 -0300
Subject: [PATCH 25/28] test(auth): merge invalidation tests into hash cache
 test file, remove duplicate

---
 tests/test_auth_password_cache.py      | 73 --------------------------
 tests/test_auth_password_hash_cache.py | 52 ++++++++++++++++++
 2 files changed, 52 insertions(+), 73 deletions(-)
 delete mode 100644 tests/test_auth_password_cache.py

diff --git a/tests/test_auth_password_cache.py b/tests/test_auth_password_cache.py
deleted file mode 100644
index 7090271e7d..0000000000
--- a/tests/test_auth_password_cache.py
+++ /dev/null
@@ -1,73 +0,0 @@
-"""
-Tests for the password hash cache invalidation hook.
-
-Verifies that changing the password via save_settings() takes effect
-immediately in the running process — without a restart.
-
-Regression: before the invalidation hook was added to save_settings(),
-_AUTH_HASH_COMPUTED stayed True and get_password_hash() returned the
-stale hash from before the UI password change.
-"""
-import os
-import pathlib
-import tempfile
-import unittest
-
-_TEST_STATE = pathlib.Path(tempfile.mkdtemp())
-os.environ["HERMES_WEBUI_STATE_DIR"] = str(_TEST_STATE)
-
-import sys
-sys.path.insert(0, str(pathlib.Path(__file__).parent.parent))
-
-import importlib
-
-auth = importlib.import_module("api.auth")
-config = importlib.import_module("api.config")
-
-
-def _reset_cache():
-    auth._invalidate_password_hash_cache()
-
-
-class TestPasswordCacheInvalidation(unittest.TestCase):
-
-    def setUp(self):
-        _reset_cache()
-        # Ensure no env-var password interferes
-        os.environ.pop("HERMES_WEBUI_PASSWORD", None)
-
-    def tearDown(self):
-        _reset_cache()
-        os.environ.pop("HERMES_WEBUI_PASSWORD", None)
-
-    def test_set_password_takes_effect_without_restart(self):
-        config.save_settings({"_set_password": "first"})
-        self.assertTrue(auth.verify_password("first"))
-
-        config.save_settings({"_set_password": "second"})
-        # Cache must be invalidated; old password must no longer verify
-        self.assertFalse(auth.verify_password("first"),
-                         "stale hash still accepted after password change — cache not invalidated")
-        self.assertTrue(auth.verify_password("second"))
-
-    def test_clear_password_takes_effect_without_restart(self):
-        config.save_settings({"_set_password": "secret"})
-        self.assertTrue(auth.is_auth_enabled())
-
-        config.save_settings({"_clear_password": True})
-        # Cache must be invalidated; auth must be disabled immediately
-        self.assertFalse(auth.is_auth_enabled(),
-                         "auth still enabled after clear — cache not invalidated")
-        self.assertFalse(auth.verify_password("secret"))
-
-    def test_cache_repopulates_after_invalidation(self):
-        config.save_settings({"_set_password": "pw"})
-        # Warm the cache
-        auth.get_password_hash()
-        # Invalidate and warm again — must reflect current settings.json
-        _reset_cache()
-        self.assertTrue(auth.verify_password("pw"))
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/test_auth_password_hash_cache.py b/tests/test_auth_password_hash_cache.py
index a74ebbd4b1..fe0f9e1093 100644
--- a/tests/test_auth_password_hash_cache.py
+++ b/tests/test_auth_password_hash_cache.py
@@ -42,6 +42,8 @@
 importlib.reload(api.auth)
 auth = api.auth
 
+import api.config as config
+
 
 class TestPasswordHashCache(unittest.TestCase):
     """Verify that get_password_hash() caches after first computation."""
@@ -237,5 +239,55 @@ def worker():
                         "All threads must see None when auth is disabled")
 
 
+class TestPasswordCacheInvalidation(unittest.TestCase):
+    """Verify that save_settings() invalidates the password hash cache.
+
+    Changing the password via the Settings panel must take effect immediately
+    in the running process — without a restart.
+    """
+
+    def setUp(self):
+        auth._AUTH_HASH_LOCK = threading.Lock()
+        auth._AUTH_HASH_COMPUTED = False
+        auth._AUTH_HASH_CACHE = None
+        os.environ.pop('HERMES_WEBUI_PASSWORD', None)
+        # Start with a clean settings.json so write tests are isolated
+        self._sf = config.SETTINGS_FILE
+        self._backup = None
+        if self._sf.exists():
+            self._backup = self._sf.read_text(encoding='utf-8')
+            self._sf.unlink()
+
+    def tearDown(self):
+        if self._backup is not None:
+            self._sf.write_text(self._backup, encoding='utf-8')
+        auth._invalidate_password_hash_cache()
+        os.environ.pop('HERMES_WEBUI_PASSWORD', None)
+
+    def test_set_password_takes_effect_without_restart(self):
+        config.save_settings({"_set_password": "first"})
+        self.assertTrue(auth.verify_password("first"))
+
+        config.save_settings({"_set_password": "second"})
+        self.assertFalse(auth.verify_password("first"),
+                         "stale hash still accepted after password change")
+        self.assertTrue(auth.verify_password("second"))
+
+    def test_clear_password_takes_effect_without_restart(self):
+        config.save_settings({"_set_password": "secret"})
+        self.assertTrue(auth.is_auth_enabled())
+
+        config.save_settings({"_clear_password": True})
+        self.assertFalse(auth.is_auth_enabled(),
+                         "auth still enabled after clear")
+        self.assertFalse(auth.verify_password("secret"))
+
+    def test_cache_repopulates_after_invalidation(self):
+        config.save_settings({"_set_password": "pw"})
+        auth.get_password_hash()
+        auth._invalidate_password_hash_cache()
+        self.assertTrue(auth.verify_password("pw"))
+
+
 if __name__ == "__main__":
     unittest.main()

From 43f86d038e2d4538c19df74dc6d88202e3760cb1 Mon Sep 17 00:00:00 2001
From: Hermes Agent <agent@nesquena-hermes.local>
Date: Wed, 13 May 2026 20:45:44 +0000
Subject: [PATCH 26/28] =?UTF-8?q?stage-350:=20fix=20#2178=20CI=20=E2=80=94?=
 =?UTF-8?q?=20update=20Ollama=20test=20assertion=20to=20match=20new=20allo?=
 =?UTF-8?q?wOllamaFormat=20guard?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR #2178 added an 'allowOllamaFormat' guard (resolves to false for non-ollama
@-provider prefixes like '@custom:ai_gateway') to stop the ollama label
formatter from reformatting custom-provider model IDs with dashes. The
existing test asserted on the pre-PR code shape and didn't pick up the new
guard.

Updated the assertion to match the actual post-PR code at static/ui.js:2202,
with an extended docstring explaining the bug class the guard fixes (bare
custom-provider model IDs like 'Qwen3.6-35B-A3B' had hyphens stripped to
spaces + last letter lowercased by the formatter).
---
 tests/test_ollama_model_chip_label_regression.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tests/test_ollama_model_chip_label_regression.py b/tests/test_ollama_model_chip_label_regression.py
index 51a1a5d1f5..ca024a589b 100644
--- a/tests/test_ollama_model_chip_label_regression.py
+++ b/tests/test_ollama_model_chip_label_regression.py
@@ -40,9 +40,14 @@ def test_get_model_label_formats_bare_ollama_ids():
         "to avoid reformatting generic bare model IDs."
     )
     assert "const ollamaLabel = _fmtOllamaLabel(_last);" in src
-    assert "if ((modelId.startsWith('ollama/') || modelId.startsWith('@ollama') || looksLikeOllamaTag || looksLikeBareOllamaId) && ollamaLabel !== _last) {" in src, (
+    assert "if (allowOllamaFormat && (modelId.startsWith('ollama/') || modelId.startsWith('@ollama') || looksLikeOllamaTag || looksLikeBareOllamaId) && ollamaLabel !== _last) {" in src, (
         "Ollama-tagged ids like 'kimi-k2.6:3b' should still pass through _fmtOllamaLabel() "
-        "when the formatter produces a friendlier label."
+        "when the formatter produces a friendlier label, but ONLY when the resolved "
+        "atProvider is empty or starts with 'ollama' (allowOllamaFormat guard added in "
+        "PR #2178 to stop reformatting non-ollama custom-provider models like "
+        "'custom:ai_gateway/Qwen3.6-35B-A3B'). The guard fixes the bug where bare "
+        "custom-provider model IDs containing dashes had their hyphens stripped to "
+        "spaces and their last letter lowercased by the ollama formatter."
     )
 
 

From 66ffc7d44b8bb5130ad321017c1d9a65d84bd160 Mon Sep 17 00:00:00 2001
From: Hermes Agent <agent@nesquena-hermes.local>
Date: Wed, 13 May 2026 20:46:45 +0000
Subject: [PATCH 27/28] =?UTF-8?q?docs:=20CHANGELOG=20stage-350=20=E2=80=94?=
 =?UTF-8?q?=20close=20v0.51.56,=20open=20Unreleased=20for=207-PR=20medium-?=
 =?UTF-8?q?risk=20batch?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 CHANGELOG.md | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 99d907a584..e1c80ccafb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,26 @@
 
 ## [Unreleased]
 
+### Fixed
+
+- **PR #2191** by @lucasrc (auth refactor 1/3) — Thread-safe login rate limiter (new `_LOGIN_ATTEMPTS_LOCK`) + PBKDF2 key separation (new `_pbkdf2_key()` reading `.pbkdf2_key` separately from `_signing_key()` reading `.signing_key` — previously both shared `.signing_key`, a key-reuse anti-pattern across HMAC and PBKDF2 primitives) + transparent migration in `verify_password()` that re-salts legacy hashes with the new key on next successful login. 241-line regression suite covering the lock + migration paths. Split from earlier #2167 per maintainer review request.
+
+- **PR #2192** by @lucasrc (auth refactor 2/3, depends on #2191) — Invalidate password-hash cache when password changes via the Settings panel. The PR #2191 cache lives for the process lifetime, but `save_settings({'_set_password': ...})` could mutate `settings.json.password_hash` without telling the auth module — leaving the cache stale and verifying against the old password until restart. Now `save_settings()` calls `_invalidate_password_hash_cache()` on both `_set_password` and `_clear_password` paths. 52-line regression suite + `verify_password()` simplified to rely on the new hook instead of doing the invalidation itself.
+
+- **PR #2193** by @lucasrc (auth refactor 3/3, independent of #2191/2) — Full 64-char HMAC-SHA256 session signatures with upgrade migration bridge. `create_session()` now emits the full digest instead of the previous `[:32]` truncated form; `verify_session()` accepts both lengths during a transition window so existing sessions survive the upgrade without a forced global logout. Restored the `_is_secure_context(handler)` heuristic (getpeercert + X-Forwarded-Proto) that the original #2167 had dropped — adds an `HERMES_WEBUI_SECURE` env-var override on top of the auto-detect. 42-line regression suite covering both signature lengths + Secure-cookie env-var override.
+
+- **PR #2151** by @Jordan-SkyLF — Cancelled chat turns are no longer reported as provider/no-content failures. Classifies user/client cancellation, interruption/abort, provider-empty/no-content, and provider/rate/quota errors separately in streaming error handling. Persists cancelled turns as `_error` assistant markers with verbose copy and a `Cancellation details` disclosure, so reloads match the live UI. Adds race/idempotency guards so worker finalization and `/api/chat/cancel` do not duplicate cancel markers, late Stop clicks after a completed worker save do not emit contradictory cancel events (`_emit_cancel_event = False` short-circuits the terminal event when the writeback is stale), and partial streamed text/reasoning/tool-call metadata is still preserved on real cancellation. Stage-350 maintainer resolution merged this PR's cancel-handler guard with #2136's `_stream_writeback_is_current()` ownership check — both correct guards now coexist on the cancel path.
+
+- **PR #2178** by @hualong1009 — Custom-provider models now display correctly in the model configuration list, and bare custom-provider model IDs containing dashes (e.g. `Qwen3.6-35B-A3B`) no longer have their hyphens stripped to spaces + last letter lowercased by the Ollama label formatter. Adds an `allowOllamaFormat` guard derived from `atProvider` (the `@<provider>` prefix on the model id, if any): the Ollama formatter only runs when `atProvider` is empty or starts with `ollama`. For `@custom:ai_gateway:Qwen3.6-35B-A3B` and similar non-ollama @-provider model IDs, the formatter is suppressed and the model badge label preserves the original casing/punctuation. Stage-350 maintainer fix updated `tests/test_ollama_model_chip_label_regression.py` to assert on the new `allowOllamaFormat &&` guard prefix (the original test asserted on the pre-PR code shape and was failing CI).
+
+- **PR #2204** by @Michaelyklam (closes #1894) — `resolve_model_provider()` now prefers the configured non-custom provider when it owns a requested bare model id, even when a named custom provider also advertises the same model. Pre-fix, `model="deepseek-v4-pro"` under `provider="opencode-go"` could route to a sibling `custom_providers["opencode-go"]` entry that happened to advertise the same model rather than the canonical opencode-go provider. Custom-provider routing for custom-only models is preserved. 157-line regression suite covering the opencode-go/deepseek-v4-pro overlap and explicit provider/suffix parsing.
+
+### Added
+
+- **PR #2203** by @dobby-d-elf — Animates the "Activity: X tools" composer footer text while the LLM is using tools — subtle shimmer gradient that stops when tool-calling completes. Highlight color follows the active theme. Reduced-motion and mask-support fallbacks render plain muted Activity text unchanged in unsupported or `prefers-reduced-motion` environments. Also fixes a small flickering/unclickable first "Thinking" block when the user clicks it while the model is still streaming reasoning into it (unrelated to the animation but right next to it on screen).
+
+## [v0.51.56] — 2026-05-13 — Release AF (stage-349 — Tier 1 safe slice — reasoning_content whitelist + fork-from-here absolute index + Firefox sidebar scroll + provisional session titles)
+
 ### Added
 
 - **PR #2202** by @Jordan-SkyLF — Early session titles on chat start. Pre-fix, new conversations sat as "Untitled" until later title generation completed. Now `/api/chat/start` derives a provisional title from the first user prompt and returns it in the response, so the sidebar and topbar sync immediately. Later SSE title refinements replace the provisional via one guarded helper (only when the current title is still known-default/provisional). Manual/custom user titles are protected via exact-normalized-match detection, so user-renamed prefix titles are never treated as automatic placeholders. 167-line regression suite in `tests/test_early_session_title.py` covering default/eager/manual title behavior, chat-start response shape, JS wiring, and manual-prefix protection.

From 7209e89ef4e20d813b9004e3097ad45ea75a3e07 Mon Sep 17 00:00:00 2001
From: Hermes Agent <agent@nesquena-hermes.local>
Date: Wed, 13 May 2026 21:11:01 +0000
Subject: [PATCH 28/28] =?UTF-8?q?stage-350:=20apply=20Opus=20SHOULD-FIX=20?=
 =?UTF-8?q?=E2=80=94=20tighten=20=5Fpartial=5Falready=5Fpresent=20dedup=20?=
 =?UTF-8?q?scope?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Opus flagged that PR #2151's cancel-handler partial-dedup loop used a
substring check that was too broad: any short prior assistant reply
('OK', 'Here is the answer:') would dedup a longer new partial containing
it, silently dropping the partial and resurrecting the #893 data-loss bug.

Tightened to only dedup against actual prior _partial=True markers with
exact (whitespace-stripped) content match. Three new regression tests
added (short-non-partial-prefix-does-not-dedup, exact-partial-match-still-
dedups, same-content-non-partial-does-not-dedup).

10/10 partial-cancel tests pass after the fix. Also updated CHANGELOG with
the conflict-resolution notes for #2151 vs #2136 and the #2178 test-fix.
---
 CHANGELOG.md                                  |   8 ++
 api/streaming.py                              |  14 ++-
 .../test_issue893_cancel_preserves_partial.py | 112 ++++++++++++++++++
 3 files changed, 131 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e1c80ccafb..48e58a6e0a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -20,6 +20,14 @@
 
 - **PR #2203** by @dobby-d-elf — Animates the "Activity: X tools" composer footer text while the LLM is using tools — subtle shimmer gradient that stops when tool-calling completes. Highlight color follows the active theme. Reduced-motion and mask-support fallbacks render plain muted Activity text unchanged in unsupported or `prefers-reduced-motion` environments. Also fixes a small flickering/unclickable first "Thinking" block when the user clicks it while the model is still streaming reasoning into it (unrelated to the animation but right next to it on screen).
 
+### Stage-350 maintainer fixes
+
+- **`api/streaming.py:_partial_already_present` dedup scope tightening** — Opus SHOULD-FIX-pre-merge on PR #2151. The dedup loop that prevents double-writing a `_partial` marker on `cancel_stream` re-entry used a substring check (`_stripped in _existing or _existing in _stripped`) against any prior assistant message — too broad. Any short prior assistant reply like "OK" or "Here is the answer:" would be a substring of many later partial bodies and could silently drop the new partial, resurrecting the #893 data-loss bug on long sessions. Tightened to: only dedup against actual prior `_partial=True` markers, with exact (whitespace-stripped) content match. Three new regression tests added: (a) short prior non-partial reply does NOT dedup a longer new partial that contains it, (b) exact-content match against a prior `_partial` marker DOES still dedup (re-entry safety), (c) prior assistant message with same content but NOT marked `_partial` does NOT dedup (it's from a completed earlier turn). 10/10 partial-cancel tests pass after the fix.
+
+- **`api/streaming.py` cancel-handler conflict resolution between #2151 and the already-shipped #2136** — Resolved a semantic merge conflict on the cancel handler. Both PRs added stale-stream ownership guards at the same site. Kept #2136's `_stream_writeback_is_current()` check as the strictly-stronger condition (it also catches the case where the stream rotated to a new stream with a new pending_user_message — #2151's standalone check would have let that case fall through). Adopted #2151's `_emit_cancel_event = False` semantic on the same path so the terminal cancel SSE event isn't emitted in addition to skipping the writeback (otherwise a successful done payload already delivered to the client would be contradicted by a late cancel event). 55/55 tests across both PR suites pass after the resolution.
+
+- **`tests/test_ollama_model_chip_label_regression.py` updated to match PR #2178's new `allowOllamaFormat` guard** — The existing static-source test asserted on the pre-PR string and was failing CI. Updated the assertion to require the new `allowOllamaFormat &&` guard prefix, with an extended docstring explaining the bug class (`Qwen3.6-35B-A3B`-shaped bare custom-provider model IDs had hyphens stripped to spaces + last letter lowercased by the ollama formatter pre-fix).
+
 ## [v0.51.56] — 2026-05-13 — Release AF (stage-349 — Tier 1 safe slice — reasoning_content whitelist + fork-from-here absolute index + Firefox sidebar scroll + provisional session titles)
 
 ### Added
diff --git a/api/streaming.py b/api/streaming.py
index d8c7576a18..91dbc738c0 100644
--- a/api/streaming.py
+++ b/api/streaming.py
@@ -4669,10 +4669,18 @@ def cancel_stream(stream_id: str) -> bool:
                 _partial_already_present = False
                 if _stripped:
                     for _m in _cs.messages:
-                        if not isinstance(_m, dict) or _m.get('role') != 'assistant' or _m.get('_error'):
+                        # Stage-350 Opus SHOULD-FIX (#2151): only dedup
+                        # against actual prior _partial markers from the
+                        # same stream, with exact content match. The original
+                        # substring check (`_stripped in _existing or
+                        # _existing in _stripped`) was too broad — any short
+                        # prior assistant reply (e.g. "OK", "Here is the
+                        # answer:") becomes a substring of many later partial
+                        # bodies and could silently drop the new partial,
+                        # resurrecting the #893 data-loss bug on long sessions.
+                        if not isinstance(_m, dict) or not _m.get('_partial'):
                             continue
-                        _existing = str(_m.get('content') or '').strip()
-                        if _existing and (_stripped in _existing or _existing in _stripped):
+                        if str(_m.get('content') or '').strip() == _stripped:
                             _partial_already_present = True
                             break
                 if (_stripped or _has_reasoning or _has_tools) and not _partial_already_present:
diff --git a/tests/test_issue893_cancel_preserves_partial.py b/tests/test_issue893_cancel_preserves_partial.py
index 37c79a2385..05e3deb563 100644
--- a/tests/test_issue893_cancel_preserves_partial.py
+++ b/tests/test_issue893_cancel_preserves_partial.py
@@ -295,3 +295,115 @@ def test_partial_message_included_in_api_sanitization(self):
         assert not any('Task cancelled' in c for c in contents), (
             "Cancel marker with _error=True must be stripped from API context"
         )
+
+    def test_short_prior_assistant_reply_does_not_dedup_new_partial(self):
+        '''Stage-350 Opus SHOULD-FIX (#2151 follow-up): the partial-dedup loop
+        in cancel_stream must only dedup against actual prior _partial markers
+        with exact content match, not via substring containment against any
+        prior assistant reply.
+
+        The original substring check (`_stripped in _existing or _existing in
+        _stripped`) was too broad — a short prior assistant reply like "OK" or
+        "Here is the answer:" would be a substring of many later partial bodies
+        and silently drop the new partial, resurrecting the #893 data-loss bug.
+        '''
+        from api.models import Session
+
+        # Build a session that already has a short prior assistant reply
+        s = Session(session_id='sess_short_prior', title='Test')
+        s.messages = [
+            {'role': 'user', 'content': 'Question one'},
+            {'role': 'assistant', 'content': 'OK'},  # short reply, NOT _partial
+            {'role': 'user', 'content': 'Question two — please answer fully'},
+        ]
+
+        # Simulate what cancel_stream does for the dedup check.
+        # The new partial would be "OK, let me think about this..."
+        # — "OK" appears as a substring of this. Under the OLD substring
+        # check, this would have set _partial_already_present=True and
+        # dropped the new partial. Under the NEW exact-match-against-_partial
+        # check, no prior _partial exists, so the loop should NOT short-circuit.
+
+        new_partial_content = 'OK, let me think about this carefully...'
+        _stripped = new_partial_content.strip()
+
+        # Inline the new dedup logic (matches api/streaming.py:4669-4685):
+        _partial_already_present = False
+        if _stripped:
+            for _m in s.messages:
+                if not isinstance(_m, dict) or not _m.get('_partial'):
+                    continue
+                if str(_m.get('content') or '').strip() == _stripped:
+                    _partial_already_present = True
+                    break
+
+        assert _partial_already_present is False, (
+            "Tightened dedup must NOT consider 'OK' (a non-partial prior "
+            "assistant reply) as deduping a longer new partial that contains it. "
+            "Without the tightening, the substring check `_stripped in _existing "
+            "or _existing in _stripped` would have falsely matched."
+        )
+
+    def test_exact_partial_match_still_dedups(self):
+        '''Stage-350 Opus SHOULD-FIX (#2151 follow-up): the tighter dedup
+        still correctly deduplicates a partial that is being persisted twice
+        with exactly the same content (e.g. cancel_stream re-entered for the
+        same stream id after STREAMS_LOCK is released).
+        '''
+        from api.models import Session
+
+        s = Session(session_id='sess_exact_dedup', title='Test')
+        s.messages = [
+            {'role': 'user', 'content': 'Hello'},
+            # Prior _partial marker with exact same content as the incoming one
+            {'role': 'assistant', 'content': 'Partial reply text', '_partial': True},
+        ]
+
+        _stripped = 'Partial reply text'
+
+        _partial_already_present = False
+        if _stripped:
+            for _m in s.messages:
+                if not isinstance(_m, dict) or not _m.get('_partial'):
+                    continue
+                if str(_m.get('content') or '').strip() == _stripped:
+                    _partial_already_present = True
+                    break
+
+        assert _partial_already_present is True, (
+            "Exact-content match against a prior _partial marker must still "
+            "dedup so cancel_stream re-entry doesn't double-write the partial."
+        )
+
+    def test_non_partial_assistant_with_same_content_does_not_dedup(self):
+        '''Stage-350 Opus SHOULD-FIX (#2151 follow-up): even if a prior
+        assistant message has exactly the same content, if it isn't marked
+        _partial, it does NOT dedup the new partial. This is correct: the
+        prior message was a completed turn from an earlier conversation,
+        and the new _partial belongs to the current cancelled stream.
+        '''
+        from api.models import Session
+
+        s = Session(session_id='sess_nondiluted', title='Test')
+        s.messages = [
+            {'role': 'user', 'content': 'Hello'},
+            # Same content but NOT _partial — this is a completed prior turn
+            {'role': 'assistant', 'content': 'Hi there'},
+        ]
+
+        _stripped = 'Hi there'
+
+        _partial_already_present = False
+        if _stripped:
+            for _m in s.messages:
+                if not isinstance(_m, dict) or not _m.get('_partial'):
+                    continue
+                if str(_m.get('content') or '').strip() == _stripped:
+                    _partial_already_present = True
+                    break
+
+        assert _partial_already_present is False, (
+            "A prior assistant message with same content but NOT _partial "
+            "must not dedup the new partial — it's from a completed earlier turn."
+        )
+