From e4d16e93c76de884c9f831c00aa4944b29bd348e Mon Sep 17 00:00:00 2001 From: Jordan SkyLF Date: Tue, 12 May 2026 12:53:28 -0700 Subject: [PATCH 01/28] fix: clarify cancelled chat turn status --- api/streaming.py | 334 +++++++++++++++++- static/messages.js | 9 +- static/ui.js | 3 +- tests/test_cancelled_turn_status.py | 167 +++++++++ tests/test_issue1361_cancel_data_loss.py | 72 ++++ .../test_issue893_cancel_preserves_partial.py | 9 +- .../test_pr1341_context_window_persistence.py | 7 +- tests/test_sprint36.py | 8 +- 8 files changed, 579 insertions(+), 30 deletions(-) create mode 100644 tests/test_cancelled_turn_status.py diff --git a/api/streaming.py b/api/streaming.py index 8db29e25e3..9bd0480404 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -136,6 +136,40 @@ def _classify_provider_error(err_str: str, exc=None, *, silent_failure: bool = F err_str = str(err_str or '') _err_lower = err_str.lower() _exc_name = type(exc).__name__ if exc is not None else '' + _is_cancelled = ( + 'cancelled by user' in _err_lower + or 'canceled by user' in _err_lower + or 'user cancelled' in _err_lower + or 'user canceled' in _err_lower + or 'task cancelled' in _err_lower + or 'task canceled' in _err_lower + or (exc is not None and type(exc).__name__ in ('CancelledError', 'CanceledError')) + ) + _is_interrupted = ( + not _is_cancelled + and ( + 'interrupted by user' in _err_lower + or 'response interrupted' in _err_lower + or 'operation interrupted' in _err_lower + or 'operation was interrupted' in _err_lower + or 'operation aborted' in _err_lower + or 'request was aborted' in _err_lower + or 'aborterror' in _err_lower + or (exc is not None and type(exc).__name__ in ('KeyboardInterrupt', 'AbortError')) + ) + ) + if _is_cancelled: + return { + 'label': 'Task cancelled', + 'type': 'cancelled', + 'hint': 'The run was cancelled by the user before Skyly finished. No provider failure occurred.', + } + if _is_interrupted: + return { + 'label': 'Response interrupted', + 'type': 'interrupted', + 'hint': 'The run stopped before a provider response completed. If you did not cancel it, try again.', + } _is_quota = _is_quota_error_text(err_str) _is_auth = ( not _is_quota and ( @@ -213,6 +247,92 @@ def _provider_error_payload(message: str, err_type: str, hint: str = '') -> dict return payload +def _session_has_cancel_marker(session) -> bool: + """Return True if a visible cancel/interrupted marker is already persisted.""" + for msg in reversed(getattr(session, 'messages', None) or []): + if not isinstance(msg, dict): + continue + if msg.get('role') == 'user': + return False + if msg.get('role') != 'assistant': + continue + content = msg.get('content') + text = '' + if isinstance(content, str): + text = content + elif isinstance(content, list): + parts = [] + for part in content: + if isinstance(part, dict): + parts.append(str(part.get('text') or part.get('content') or '')) + text = '\n'.join(parts) + normalized = text.strip().lower() + if 'task cancelled' in normalized or 'task canceled' in normalized: + return True + if 'response interrupted' in normalized: + return True + return False + + +def _cancelled_turn_content(message: str = 'Task cancelled.') -> str: + """Return cancelled-turn copy matching the verbose provider-error layout.""" + _message = str(message or 'Task cancelled.').strip() + if not _message.endswith('.'): + _message += '.' + return ( + f"**Task cancelled:** {_message}\n\n" + "*The run was cancelled by the user before Skyly finished. No provider failure occurred.*" + ) + + +def _persist_cancelled_turn(session, *, message: str = 'Task cancelled.') -> None: + """Persist a user-cancelled terminal state without provider-error wording. + + cancel_stream() usually writes this marker first, but the streaming thread can + later unwind through the silent-failure or exception path. Those paths must + not append a misleading provider no-response error after an explicit cancel. + """ + _materialize_pending_user_turn_before_error(session) + session.active_stream_id = None + session.pending_user_message = None + session.pending_attachments = [] + session.pending_started_at = None + if not _session_has_cancel_marker(session): + session.messages.append({ + 'role': 'assistant', + 'content': _cancelled_turn_content(message), + '_error': True, + 'provider_details': str(message or 'Task cancelled.').strip(), + 'provider_details_label': 'Cancellation details', + 'timestamp': int(time.time()), + }) + + +def _cleanup_ephemeral_cancelled_turn(session) -> None: + """Remove transient /btw session state after a cancel without saving it.""" + session.active_stream_id = None + session.pending_user_message = None + session.pending_attachments = [] + session.pending_started_at = None + try: + import pathlib + pathlib.Path(session.path).unlink(missing_ok=True) + except Exception: + logger.debug("Failed to clean up ephemeral cancelled session", exc_info=True) + + +def _finalize_cancelled_turn(session, *, ephemeral: bool = False, message: str = 'Task cancelled.') -> None: + """Finalize a cancelled turn for persistent or ephemeral sessions.""" + if ephemeral: + _cleanup_ephemeral_cancelled_turn(session) + return + _persist_cancelled_turn(session, message=message) + try: + session.save() + except Exception: + logger.debug("Failed to persist cancelled turn", exc_info=True) + + def _aiagent_import_error_detail() -> str: """Return a multi-line diagnostic string for the "AIAgent not available" path. @@ -2277,6 +2397,8 @@ def _agent_status_callback(kind, message): # TD1: set thread-local env context so concurrent sessions don't clobber globals # Check for pre-flight cancel (user cancelled before agent even started) if cancel_event.is_set(): + with _agent_lock: + _finalize_cancelled_turn(s, ephemeral=ephemeral, message='Task cancelled before start.') put('cancel', {'message': 'Cancelled before start'}) return @@ -2996,6 +3118,8 @@ def on_tool_complete(tool_call_id, name, args, function_result): agent.interrupt("Cancelled before start") except Exception: logger.debug("Failed to interrupt agent before start") + with _agent_lock: + _finalize_cancelled_turn(s, ephemeral=ephemeral, message='Task cancelled before start.') put('cancel', {'message': 'Cancelled by user'}) return @@ -3097,6 +3221,30 @@ def _periodic_checkpoint(): task_id=session_id, persist_user_message=msg_text, ) + if cancel_event.is_set(): + if _checkpoint_stop is not None: + _checkpoint_stop.set() + if _ckpt_thread is not None: + _ckpt_thread.join(timeout=15) + if ephemeral: + _cleanup_ephemeral_cancelled_turn(s) + else: + with _agent_lock: + _finalize_cancelled_turn(s, ephemeral=False) + try: + append_turn_journal_event_for_stream( + s.session_id, + stream_id, + { + "event": "interrupted", + "created_at": time.time(), + "reason": "cancelled", + }, + ) + except Exception: + logger.debug("Failed to append cancelled turn journal event", exc_info=True) + put('cancel', {'message': 'Cancelled by user'}) + return # ── Ephemeral mode (/btw): deliver answer, skip persistence, cleanup ── if ephemeral: _answer = '' @@ -3122,8 +3270,41 @@ def _periodic_checkpoint(): _checkpoint_stop.set() if _ckpt_thread is not None: _ckpt_thread.join(timeout=15) + if cancel_event.is_set(): + with _agent_lock: + _finalize_cancelled_turn(s, ephemeral=False) + try: + append_turn_journal_event_for_stream( + s.session_id, + stream_id, + { + "event": "interrupted", + "created_at": time.time(), + "reason": "cancelled", + }, + ) + except Exception: + logger.debug("Failed to append cancelled turn journal event", exc_info=True) + put('cancel', {'message': 'Cancelled by user'}) + return with _agent_lock: _result_messages = result.get('messages') or _previous_context_messages + if cancel_event.is_set(): + _finalize_cancelled_turn(s, ephemeral=False) + try: + append_turn_journal_event_for_stream( + s.session_id, + stream_id, + { + "event": "interrupted", + "created_at": time.time(), + "reason": "cancelled", + }, + ) + except Exception: + logger.debug("Failed to append cancelled turn journal event", exc_info=True) + put('cancel', {'message': 'Cancelled by user'}) + return _next_context_messages = _restore_reasoning_metadata( _previous_context_messages, _result_messages, @@ -3162,6 +3343,23 @@ def _periodic_checkpoint(): ) # _token_sent tracks whether on_token() was called (any streamed text) if not _assistant_added and not _token_sent: + if cancel_event.is_set(): + _finalize_cancelled_turn(s, ephemeral=ephemeral) + if not ephemeral: + try: + append_turn_journal_event_for_stream( + s.session_id, + stream_id, + { + "event": "interrupted", + "created_at": time.time(), + "reason": "cancelled", + }, + ) + except Exception: + logger.debug("Failed to append cancelled turn journal event", exc_info=True) + put('cancel', {'message': 'Cancelled by user'}) + return _last_err = getattr(agent, '_last_error', None) or result.get('error') or '' _err_str = str(_last_err) if _last_err else '' _classification = _classify_provider_error( @@ -3313,6 +3511,10 @@ def _periodic_checkpoint(): } if _error_payload.get('details'): _error_message['provider_details'] = _error_payload['details'] + if _err_type == 'cancelled': + _error_message['provider_details_label'] = 'Cancellation details' + elif _err_type == 'interrupted': + _error_message['provider_details_label'] = 'Interruption details' s.messages.append(_error_message) try: s.save() @@ -3600,7 +3802,39 @@ def _periodic_checkpoint(): ) except Exception: logger.debug("Failed to append assistant_started turn journal event", exc_info=True) + if cancel_event.is_set(): + _finalize_cancelled_turn(s, ephemeral=False) + try: + append_turn_journal_event_for_stream( + s.session_id, + stream_id, + { + "event": "interrupted", + "created_at": time.time(), + "reason": "cancelled", + }, + ) + except Exception: + logger.debug("Failed to append cancelled turn journal event", exc_info=True) + put('cancel', {'message': 'Cancelled by user'}) + return s.save() + if cancel_event.is_set(): + _finalize_cancelled_turn(s, ephemeral=False) + try: + append_turn_journal_event_for_stream( + s.session_id, + stream_id, + { + "event": "interrupted", + "created_at": time.time(), + "reason": "cancelled", + }, + ) + except Exception: + logger.debug("Failed to append cancelled turn journal event", exc_info=True) + put('cancel', {'message': 'Cancelled by user'}) + return if not ephemeral: try: append_turn_journal_event_for_stream( @@ -3857,12 +4091,38 @@ def _periodic_checkpoint(): err_str = _stripped _exc_lower = err_str.lower() _classification = _classify_provider_error(err_str, e) + if cancel_event.is_set(): + if s is not None: + if _checkpoint_stop is not None: + _checkpoint_stop.set() + if _ckpt_thread is not None: + _ckpt_thread.join(timeout=15) + _lock_ctx = _agent_lock if _agent_lock is not None else contextlib.nullcontext() + with _lock_ctx: + _finalize_cancelled_turn(s, ephemeral=ephemeral) + if not ephemeral: + try: + append_turn_journal_event_for_stream( + s.session_id, + stream_id, + { + "event": "interrupted", + "created_at": time.time(), + "reason": "cancelled", + }, + ) + except Exception: + logger.debug("Failed to append cancelled turn journal event", exc_info=True) + put('cancel', {'message': 'Cancelled by user'}) + return _exc_is_quota = _classification['type'] == 'quota_exhausted' # Exception quota text still includes: 'more credits' in _exc_lower, 'can only afford' in _exc_lower, 'fewer max_tokens' in _exc_lower. # Rate-limit detection remains guarded as: (not _exc_is_quota). _exc_is_rate_limit = (_classification['type'] == 'rate_limit') and (not _exc_is_quota) _exc_is_auth = _classification['type'] == 'auth_mismatch' # detects '401' and 'unauthorized' via _classify_provider_error. _exc_is_not_found = _classification['type'] == 'model_not_found' # detects '404', 'not found', 'does not exist', and 'invalid model'. + _exc_is_cancelled = _classification['type'] == 'cancelled' + _exc_is_interrupted = _classification['type'] == 'interrupted' # The user hint still points to Settings / `hermes model` from _classify_provider_error(). if _exc_is_quota: @@ -3955,6 +4215,10 @@ def _periodic_checkpoint(): _exc_label, _exc_type, _exc_hint = ( _classification['label'], _classification['type'], _classification['hint'], ) + elif _exc_is_cancelled or _exc_is_interrupted: + _exc_label, _exc_type, _exc_hint = ( + _classification['label'], _classification['type'], _classification['hint'], + ) else: _exc_label, _exc_type, _exc_hint = 'Error', 'error', '' @@ -3982,6 +4246,10 @@ def _periodic_checkpoint(): } if _error_payload.get('details'): _error_message['provider_details'] = _error_payload['details'] + if _exc_type == 'cancelled': + _error_message['provider_details_label'] = 'Cancellation details' + elif _exc_type == 'interrupted': + _error_message['provider_details_label'] = 'Interruption details' s.messages.append(_error_message) try: s.save() @@ -4185,13 +4453,12 @@ def cancel_stream(stream_id: str) -> bool: except Exception: logger.debug("Failed to clear clarify prompt during cancel") - # Put a cancel sentinel into the queue so the SSE handler wakes up + # Capture the queue while the stream still exists, but do not emit the + # terminal cancel event until the session cleanup below confirms the turn + # is still active. Otherwise a late Stop click can race with a successful + # worker save and show cancel in the client while persistence says done. q = streams.get(stream_id) - if q: - try: - q.put_nowait(('cancel', {'message': 'Cancelled by user'})) - except Exception: - logger.debug("Failed to put cancel event to queue") + _emit_cancel_event = True # ── Eager session lock release (fixes #653) ────────────────────────── # Pop stream state now so the 409 guard in routes.py sees the session @@ -4241,6 +4508,16 @@ def cancel_stream(stream_id: str) -> bool: with _get_session_agent_lock(_cancel_session_id): try: _cs = get_session(_cancel_session_id) + if not isinstance(getattr(_cs, 'messages', None), list): + _cs.messages = [] + if (getattr(_cs, 'active_stream_id', None) != stream_id + and not getattr(_cs, 'pending_user_message', None)): + # The worker won the race and already finalized this turn. + # Do not append a contradictory cancel marker or emit a + # terminal cancel event after the client may have received + # the successful done payload. + _emit_cancel_event = False + return True # ── Preserve the user's typed message before clearing pending state (#1298) ── # The agent's internal messages list (where the user message was appended at # the start of run_conversation()) may not have been merged back into @@ -4334,7 +4611,27 @@ def cancel_stream(stream_id: str) -> bool: # reasoning-only or tool-only stream produced NO partial message). _has_reasoning = bool(_cancel_reasoning and _cancel_reasoning.strip()) _has_tools = bool(_cancel_tool_calls) - if _stripped or _has_reasoning or _has_tools: + _cancel_marker_exists = _session_has_cancel_marker(_cs) + _cancel_marker_idx = len(_cs.messages) + if _cancel_marker_exists: + for _idx in range(len(_cs.messages) - 1, -1, -1): + _m = _cs.messages[_idx] + if not isinstance(_m, dict) or _m.get('role') != 'assistant': + continue + _content = str(_m.get('content') or '').strip().lower() + if 'task cancelled' in _content or 'task canceled' in _content or 'response interrupted' in _content: + _cancel_marker_idx = _idx + break + _partial_already_present = False + if _stripped: + for _m in _cs.messages: + if not isinstance(_m, dict) or _m.get('role') != 'assistant' or _m.get('_error'): + continue + _existing = str(_m.get('content') or '').strip() + if _existing and (_stripped in _existing or _existing in _stripped): + _partial_already_present = True + break + if (_stripped or _has_reasoning or _has_tools) and not _partial_already_present: _partial_msg: dict = { 'role': 'assistant', 'content': _stripped, # may be empty for reasoning/tool-only turns @@ -4361,18 +4658,27 @@ def cancel_stream(stream_id: str) -> bool: # alongside the regular tool_calls path. # (Opus pre-release review pass 2 of v0.50.251.) _partial_msg['_partial_tool_calls'] = list(_cancel_tool_calls) - _cs.messages.append(_partial_msg) + _cs.messages.insert(_cancel_marker_idx, _partial_msg) # Cancel marker — flagged _error=True so it is stripped from conversation # history on the next turn (prevents model from seeing "Task cancelled." # as a prior assistant reply). - _cs.messages.append({ - 'role': 'assistant', - 'content': '*Task cancelled.*', - '_error': True, - 'timestamp': int(time.time()), - }) + if not _cancel_marker_exists: + _cs.messages.append({ + 'role': 'assistant', + 'content': _cancelled_turn_content('Task cancelled.'), + '_error': True, + 'provider_details': 'Task cancelled.', + 'provider_details_label': 'Cancellation details', + 'timestamp': int(time.time()), + }) _cs.save() except Exception: logger.debug("Failed to clear session state on cancel for %s", _cancel_session_id) + if _emit_cancel_event and q: + try: + q.put_nowait(('cancel', {'message': 'Cancelled by user'})) + except Exception: + logger.debug("Failed to put cancel event to queue") + return True diff --git a/static/messages.js b/static/messages.js index fc7c6d975f..83ad09cb0f 100644 --- a/static/messages.js +++ b/static/messages.js @@ -1228,11 +1228,14 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){ const isQuotaExhausted=d.type==='quota_exhausted'; const isAuthMismatch=d.type==='auth_mismatch'; const isModelNotFound=d.type==='model_not_found'; + const isCancelled=d.type==='cancelled'; + const isInterrupted=d.type==='interrupted'; const isNoResponse=d.type==='no_response'||d.type==='silent_failure'; - const label=isQuotaExhausted?'Out of credits':isRateLimit?'Rate limit reached':isAuthMismatch?(typeof t==='function'?t('provider_mismatch_label'):'Provider mismatch'):isModelNotFound?(typeof t==='function'?t('model_not_found_label'):'Model not found'):isNoResponse?'No response received':'Error'; + const label=isCancelled?'Task cancelled':isInterrupted?'Response interrupted':isQuotaExhausted?'Out of credits':isRateLimit?'Rate limit reached':isAuthMismatch?(typeof t==='function'?t('provider_mismatch_label'):'Provider mismatch'):isModelNotFound?(typeof t==='function'?t('model_not_found_label'):'Model not found'):isNoResponse?'No response from provider':'Error'; const hint=d.hint?`\n\n*${d.hint}*`:''; const details=d.details?String(d.details).replace(/```/g,'`\u200b``'):''; - S.messages.push({role:'assistant',content:`**${label}:** ${d.message}${hint}`,provider_details:details}); + const detailsLabel=isCancelled?'Cancellation details':isInterrupted?'Interruption details':undefined; + S.messages.push({role:'assistant',content:`**${label}:** ${d.message}${hint}`,provider_details:details,provider_details_label:detailsLabel}); }catch(_){ S.messages.push({role:'assistant',content:'**Error:** An error occurred. Check server logs.'}); } @@ -1323,7 +1326,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){ // Fallback to local cancel message if API fails if(S.session&&S.session.session_id===activeSid){ clearLiveToolCards();if(!assistantText)removeThinking(); - S.messages.push({role:'assistant',content:'*Task cancelled.*'});renderMessages({preserveScroll:true}); + S.messages.push({role:'assistant',content:'**Task cancelled:** Task cancelled.\n\n*The run was cancelled by the user before Skyly finished. No provider failure occurred.*',provider_details:'Task cancelled.',provider_details_label:'Cancellation details',_error:true});renderMessages({preserveScroll:true}); _markSessionViewed(activeSid, S.messages.length); } } diff --git a/static/ui.js b/static/ui.js index d071d93ed9..1cbb21c58d 100644 --- a/static/ui.js +++ b/static/ui.js @@ -5013,7 +5013,8 @@ function renderMessages(options){ } let bodyHtml = isUser ? _renderUserFencedBlocks(displayContent) : renderMd(_stripXmlToolCallsDisplay(String(displayContent))); if(!isUser&&m.provider_details){ - bodyHtml += `
Provider details
${esc(String(m.provider_details))}
`; + const summary=m.provider_details_label||'Provider details'; + bodyHtml += `
${esc(String(summary))}
${esc(String(m.provider_details))}
`; } const statusHtml = (!isUser&&m._statusCard) ? _statusCardHtml(m._statusCard) : ''; const isEditableUser=isUser&&rawIdx===lastUserRawIdx; diff --git a/tests/test_cancelled_turn_status.py b/tests/test_cancelled_turn_status.py new file mode 100644 index 0000000000..b241674c4d --- /dev/null +++ b/tests/test_cancelled_turn_status.py @@ -0,0 +1,167 @@ +"""Regression tests for accurate cancelled/interrupted turn status. + +A user pressing Stop/Cancel must not be shown provider-empty guidance like +"No response from provider". Provider-empty remains valid only when there was +no explicit cancel/interruption signal. +""" +from __future__ import annotations + +import pathlib + +from api.streaming import _cancelled_turn_content, _classify_provider_error, _finalize_cancelled_turn + +REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve() + + +def _read(rel_path: str) -> str: + return (REPO_ROOT / rel_path).read_text(encoding="utf-8") + + +class _DummySession: + def __init__(self, path: str = ''): + self.path = path + self.messages = [] + self.active_stream_id = 'stream-1' + self.pending_user_message = 'hello' + self.pending_attachments = ['a.txt'] + self.pending_started_at = 123 + self.saved = 0 + + def save(self, *args, **kwargs): + self.saved += 1 + + +class TestCancelledTurnClassification: + def test_user_cancelled_error_is_not_provider_no_response(self): + result = _classify_provider_error("Cancelled by user", Exception("Cancelled by user")) + + assert result["type"] == "cancelled" + assert result["label"] == "Task cancelled" + assert "provider returned no content" not in result.get("hint", "").lower() + assert "rate limit" not in result.get("hint", "").lower() + assert "no provider failure" in result.get("hint", "").lower() + + def test_interrupted_or_aborted_error_is_not_provider_no_response(self): + for text in ( + "Interrupted by user", + "Operation aborted before provider response completed", + "AbortError: request was aborted", + ): + result = _classify_provider_error(text, RuntimeError(text)) + assert result["type"] == "interrupted", text + assert result["label"] == "Response interrupted", text + assert "provider returned no content" not in result.get("hint", "").lower() + + def test_provider_empty_response_still_uses_no_response(self): + result = _classify_provider_error("", None, silent_failure=True) + + assert result["type"] == "no_response" + assert result["label"] == "No response from provider" + assert "provider returned no content" in result.get("hint", "").lower() + + +class TestCancelledTurnFinalizer: + def test_persistent_cancel_finalizer_clears_pending_and_saves_cancel_marker(self): + session = _DummySession() + + _finalize_cancelled_turn(session, ephemeral=False) + + assert session.active_stream_id is None + assert session.pending_user_message is None + assert session.pending_attachments == [] + assert session.pending_started_at is None + assert session.saved == 1 + assert session.messages[-1]['content'] == _cancelled_turn_content('Task cancelled.') + assert '**Task cancelled:** Task cancelled.' in session.messages[-1]['content'] + assert 'No provider failure occurred' in session.messages[-1]['content'] + assert session.messages[-1]['provider_details'] == 'Task cancelled.' + assert session.messages[-1]['provider_details_label'] == 'Cancellation details' + assert session.messages[-1]['_error'] is True + + def test_ephemeral_cancel_finalizer_unlinks_temp_session_without_saving_error_marker(self, tmp_path): + temp_session = tmp_path / 'btw-session.json' + temp_session.write_text('{}', encoding='utf-8') + session = _DummySession(str(temp_session)) + + _finalize_cancelled_turn(session, ephemeral=True) + + assert session.active_stream_id is None + assert session.pending_user_message is None + assert session.pending_attachments == [] + assert session.pending_started_at is None + assert session.saved == 0 + assert session.messages == [] + assert not temp_session.exists() + + + def test_message_renderer_allows_non_provider_details_label(self): + src = _read("static/ui.js") + assert "provider_details_label||'Provider details'" in src + assert "provider-error-details" in src + + +class TestCancelledTurnPersistenceGuards: + def test_silent_failure_path_checks_cancel_event_before_persisting_provider_error(self): + src = _read("api/streaming.py") + silent_idx = src.find("# ── Detect silent agent failure") + assert silent_idx != -1, "silent-failure block not found" + apperror_idx = src.find("put('apperror', _error_payload)", silent_idx) + assert apperror_idx != -1, "silent-failure apperror emission not found" + block = src[silent_idx:apperror_idx] + + assert "cancel_event.is_set()" in block, ( + "When a user cancels and the interrupted agent returns no assistant text, " + "the silent-failure path must not persist a provider no_response error." + ) + assert "cancelled" in block.lower(), ( + "The cancellation guard should persist/report a cancelled turn, not silently drop state." + ) + + def test_exception_path_classifies_after_cancel_event_before_generic_error(self): + src = _read("api/streaming.py") + except_idx = src.find("print('[webui] stream error:") + assert except_idx != -1, "stream exception handler not found" + classify_idx = src.find("_classify_provider_error", except_idx) + generic_idx = src.find("_exc_label, _exc_type, _exc_hint = 'Error', 'error', ''", except_idx) + assert classify_idx != -1 and generic_idx != -1 + block = src[except_idx:generic_idx] + + assert "cancel_event.is_set()" in block, ( + "Exception handling must distinguish user-cancelled/aborted runs before generic errors." + ) + assert "cancelled" in block.lower() or "interrupted" in block.lower() + assert "provider_details_label" in src + assert "Cancellation details" in src + assert "Interruption details" in src + + def test_post_run_cancel_guard_runs_before_normal_success_merge(self): + src = _read("api/streaming.py") + run_idx = src.find("result = agent.run_conversation(") + merge_idx = src.find("_result_messages = result.get", run_idx) + assert run_idx != -1 and merge_idx != -1, "run/merge path not found" + block = src[run_idx:merge_idx] + + assert "cancel_event.is_set()" in block, ( + "If cancellation arrives after tokens streamed but before run_conversation returns, " + "the worker must emit/persist cancel before normal merge/save/completed handling." + ) + assert "put('cancel'" in block + assert "_cleanup_ephemeral_cancelled_turn" in block or "_finalize_cancelled_turn" in block, ( + "Ephemeral cancels must clean up their temporary session before returning." + ) + assert "return" in block + + def test_frontend_has_cancelled_and_interrupted_labels_for_apperror_fallbacks(self): + src = _read("static/messages.js") + start = src.find("source.addEventListener('apperror'") + end = src.find("source.addEventListener('warning'", start) + assert start != -1 and end != -1, "apperror handler not found" + block = src[start:end] + + assert "d.type==='cancelled'" in block or 'd.type==="cancelled"' in block + assert "d.type==='interrupted'" in block or 'd.type==="interrupted"' in block + assert "Task cancelled" in block + assert "Response interrupted" in block + assert "No response from provider" in block + assert "Cancellation details" in block + assert "Interruption details" in block diff --git a/tests/test_issue1361_cancel_data_loss.py b/tests/test_issue1361_cancel_data_loss.py index 09fc77f0de..817f02e5a1 100644 --- a/tests/test_issue1361_cancel_data_loss.py +++ b/tests/test_issue1361_cancel_data_loss.py @@ -441,3 +441,75 @@ def test_materialize_helper_called_immediately_before_error_path_clears(): f"found {sites_with_helper}. PR #1760 / #1361 regression — re-wire the " f"helper at the error-branch clear sites in api/streaming.py." ) + + + +class TestCancelStreamIdempotentWithWorkerFinalizer: + """The worker and explicit cancel endpoint can both finalize the same turn.""" + + def test_cancel_stream_does_not_duplicate_existing_worker_cancel_marker(self): + sid = "test_1361_idempotent" + stream_id = "stream_idempotent" + _make_session( + session_id=sid, + messages=[ + {'role': 'user', 'content': 'Help me debug this', 'timestamp': 100}, + {'role': 'assistant', 'content': '**Task cancelled:** Task cancelled.\n\n*The run was cancelled by the user before Skyly finished. No provider failure occurred.*', '_error': True, 'timestamp': 101}, + ], + ) + _setup_cancel_state(sid, stream_id) + config.STREAM_PARTIAL_TEXT[stream_id] = "partial text before cancel" + + cancel_stream(stream_id) + + msgs = models.SESSIONS[sid].messages + cancel_markers = [ + m for m in msgs + if isinstance(m, dict) + and m.get('role') == 'assistant' + and 'task cancelled' in str(m.get('content') or '').lower() + ] + partial_idx = next( + i for i, m in enumerate(msgs) + if isinstance(m, dict) and m.get('_partial') and m.get('content') == 'partial text before cancel' + ) + marker_idx = next(i for i, m in enumerate(msgs) if m in cancel_markers) + + assert len(cancel_markers) == 1 + assert partial_idx < marker_idx + + def test_late_cancel_after_worker_finalized_does_not_add_cancel_marker(self): + sid = "test_1361_late_done" + stream_id = "stream_late_done" + s = Session( + session_id=sid, + title="Done Session", + messages=[ + {'role': 'user', 'content': 'finish normally', 'timestamp': 100}, + {'role': 'assistant', 'content': 'done normally', 'timestamp': 101}, + ], + ) + s.active_stream_id = None + s.pending_user_message = None + s.pending_attachments = [] + s.pending_started_at = None + s.save() + models.SESSIONS[sid] = s + + q = queue.Queue() + config.STREAMS[stream_id] = q + config.CANCEL_FLAGS[stream_id] = threading.Event() + mock_agent = Mock() + mock_agent.session_id = sid + mock_agent.interrupt = Mock() + config.AGENT_INSTANCES[stream_id] = mock_agent + config.STREAM_PARTIAL_TEXT[stream_id] = 'stale partial snapshot' + + assert cancel_stream(stream_id) is True + + msgs = models.SESSIONS[sid].messages + assert msgs == [ + {'role': 'user', 'content': 'finish normally', 'timestamp': 100}, + {'role': 'assistant', 'content': 'done normally', 'timestamp': 101}, + ] + assert q.empty(), "late cancel must not emit a terminal cancel event after done" diff --git a/tests/test_issue893_cancel_preserves_partial.py b/tests/test_issue893_cancel_preserves_partial.py index df36f6aa0e..37c79a2385 100644 --- a/tests/test_issue893_cancel_preserves_partial.py +++ b/tests/test_issue893_cancel_preserves_partial.py @@ -3,7 +3,7 @@ assistant content rather than discarding it. Before this fix, clicking Stop Generation threw away all streamed text. The -session was saved with only '*Task cancelled.*' appended, so the user lost +session was saved with only a cancellation marker appended, so the user lost whatever the agent had produced up to that point. After this fix: @@ -118,7 +118,7 @@ def interrupt(self, _): pass assert any('Python is a high-level programming language' in c for c in msg_contents), ( f"Partial text not found in session messages: {msg_contents}" ) - assert any('*Task cancelled.*' in c for c in msg_contents), ( + assert any('Task cancelled:' in c for c in msg_contents), ( "Cancel marker missing from session messages" ) # Partial message should NOT have _error=True (it's real content) @@ -127,8 +127,9 @@ def interrupt(self, _): pass assert partial_msg.get('_partial') is True assert not partial_msg.get('_error') # Cancel marker should have _error=True - cancel_msg = next(m for m in saved.messages if '*Task cancelled.*' in m.get('content', '')) + cancel_msg = next(m for m in saved.messages if 'Task cancelled:' in m.get('content', '')) assert cancel_msg.get('_error') is True + assert cancel_msg.get('provider_details_label') == 'Cancellation details' def test_cancel_stream_with_no_partial_text_still_saves_cancel_marker(self, tmp_path, monkeypatch): """If no tokens were streamed before cancel, only the cancel marker is saved.""" @@ -168,7 +169,7 @@ def interrupt(self, _): pass saved = Session.load('sess_nopartial') msg_contents = [m.get('content', '') for m in saved.messages] - assert any('*Task cancelled.*' in c for c in msg_contents) + assert any('Task cancelled:' in c for c in msg_contents) # No extra partial message when there was nothing streamed assert not any(m.get('_partial') for m in saved.messages), ( "Should not add partial message when no tokens were streamed" diff --git a/tests/test_pr1341_context_window_persistence.py b/tests/test_pr1341_context_window_persistence.py index 2311250c11..bcbf99c30f 100644 --- a/tests/test_pr1341_context_window_persistence.py +++ b/tests/test_pr1341_context_window_persistence.py @@ -38,11 +38,12 @@ def test_streaming_persists_context_fields_on_session_before_save(): # Save call follows shortly after save_call = src.find("\n s.save()", block_start) assert save_call != -1, "s.save() not found after the post-merge marker" - # Limit bumped to 8200 by turn-journal lifecycle events: the block now also - # records `assistant_started` immediately before the durable final save. + # Limit bumped to 9000 by cancellation finalization guards: the block now also + # checks for a late user cancel immediately before the durable final save, + # preventing a race that would otherwise save/emit a completed turn after Stop. # The context_length fallback is still a single focused resolver call with # arg-prep scaffold and commentary explaining the failure mode it prevents. - assert save_call - block_start < 8200, ( + assert save_call - block_start < 9000, ( "s.save() should be close to the post-merge marker — block expanded unexpectedly. " "If you've added a new pre-save mutation block here, bump this limit." ) diff --git a/tests/test_sprint36.py b/tests/test_sprint36.py index a5c2cbfe78..e9317fad3d 100644 --- a/tests/test_sprint36.py +++ b/tests/test_sprint36.py @@ -212,16 +212,14 @@ def test_cancel_marker_flagged_as_error_to_skip_in_api_history(): _error: True so _sanitize_messages_for_api() strips it from the conversation_history sent to the agent on the next user message. - Without this flag, the LLM sees "*Task cancelled.*" as a prior assistant + Without this flag, the LLM sees "Task cancelled" as a prior assistant turn and may reference it in subsequent responses ("As I mentioned, I was cancelled...") — a behavioral regression introduced when this PR started persisting the marker to the session. """ src = read("api/streaming.py") - idx = src.find("'content': '*Task cancelled.*'") - if idx == -1: - idx = src.find('"content": "*Task cancelled.*"') - assert idx != -1, "cancel marker content string not found in cancel_stream()" + idx = src.find("'content': _cancelled_turn_content(message)") + assert idx != -1, "cancel marker content writer not found in cancel_stream()" # Walk back to the start of the dict literal (opening brace) brace_open = src.rfind("{", 0, idx) From 112eadc209613fd358b9080b71ae4a05717e3b8b Mon Sep 17 00:00:00 2001 From: Jordan SkyLF Date: Tue, 12 May 2026 15:43:36 -0700 Subject: [PATCH 02/28] fix: address cancelled turn review feedback - classify string-only CancelledError payloads as cancelled - centralize cancel marker substring matching - add targeted regression coverage --- api/streaming.py | 12 +++++++----- tests/test_cancelled_turn_status.py | 20 +++++++++++++++++++- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/api/streaming.py b/api/streaming.py index 9bd0480404..fea387d813 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -127,6 +127,9 @@ def _clarify_timeout_seconds(default: int = 120) -> int: return default +_CANCEL_MARKER_PATTERNS = ('task cancelled', 'task canceled', 'response interrupted') + + def _classify_provider_error(err_str: str, exc=None, *, silent_failure: bool = False) -> dict: """Classify provider/agent failure text for WebUI apperror UX. @@ -143,7 +146,8 @@ def _classify_provider_error(err_str: str, exc=None, *, silent_failure: bool = F or 'user canceled' in _err_lower or 'task cancelled' in _err_lower or 'task canceled' in _err_lower - or (exc is not None and type(exc).__name__ in ('CancelledError', 'CanceledError')) + or 'cancellederror' in _err_lower + or (exc is not None and _exc_name in ('CancelledError', 'CanceledError')) ) _is_interrupted = ( not _is_cancelled @@ -267,9 +271,7 @@ def _session_has_cancel_marker(session) -> bool: parts.append(str(part.get('text') or part.get('content') or '')) text = '\n'.join(parts) normalized = text.strip().lower() - if 'task cancelled' in normalized or 'task canceled' in normalized: - return True - if 'response interrupted' in normalized: + if any(pattern in normalized for pattern in _CANCEL_MARKER_PATTERNS): return True return False @@ -4619,7 +4621,7 @@ def cancel_stream(stream_id: str) -> bool: if not isinstance(_m, dict) or _m.get('role') != 'assistant': continue _content = str(_m.get('content') or '').strip().lower() - if 'task cancelled' in _content or 'task canceled' in _content or 'response interrupted' in _content: + if any(pattern in _content for pattern in _CANCEL_MARKER_PATTERNS): _cancel_marker_idx = _idx break _partial_already_present = False diff --git a/tests/test_cancelled_turn_status.py b/tests/test_cancelled_turn_status.py index b241674c4d..3e9b7c501c 100644 --- a/tests/test_cancelled_turn_status.py +++ b/tests/test_cancelled_turn_status.py @@ -8,7 +8,12 @@ import pathlib -from api.streaming import _cancelled_turn_content, _classify_provider_error, _finalize_cancelled_turn +from api.streaming import ( + _CANCEL_MARKER_PATTERNS, + _cancelled_turn_content, + _classify_provider_error, + _finalize_cancelled_turn, +) REPO_ROOT = pathlib.Path(__file__).parent.parent.resolve() @@ -41,6 +46,13 @@ def test_user_cancelled_error_is_not_provider_no_response(self): assert "rate limit" not in result.get("hint", "").lower() assert "no provider failure" in result.get("hint", "").lower() + def test_string_only_cancelled_error_repr_is_cancelled(self): + result = _classify_provider_error("", None, silent_failure=True) + + assert result["type"] == "cancelled" + assert result["label"] == "Task cancelled" + assert "provider returned no content" not in result.get("hint", "").lower() + def test_interrupted_or_aborted_error_is_not_provider_no_response(self): for text in ( "Interrupted by user", @@ -101,6 +113,12 @@ def test_message_renderer_allows_non_provider_details_label(self): class TestCancelledTurnPersistenceGuards: + def test_cancel_marker_patterns_are_centralized_for_dedupe(self): + assert _CANCEL_MARKER_PATTERNS == ('task cancelled', 'task canceled', 'response interrupted') + src = _read("api/streaming.py") + assert "any(pattern in normalized for pattern in _CANCEL_MARKER_PATTERNS)" in src + assert "any(pattern in _content for pattern in _CANCEL_MARKER_PATTERNS)" in src + def test_silent_failure_path_checks_cancel_event_before_persisting_provider_error(self): src = _read("api/streaming.py") silent_idx = src.find("# ── Detect silent agent failure") From bc3f4e54a6541877b609de37eb5800a2a3c9cb6d Mon Sep 17 00:00:00 2001 From: Lucas Coutinho Date: Wed, 13 May 2026 00:25:41 -0300 Subject: [PATCH 03/28] Cache PBKDF2 password hash to eliminate ~1s overhead on every HTTP request MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit get_password_hash() computes PBKDF2-SHA256 with 600k iterations to hash the HERMES_WEBUI_PASSWORD env var. This is called on nearly every HTTP request via check_auth -> is_auth_enabled -> get_password_hash. Before: ~1s of PBKDF2 per request, regardless of how many times the same env-var value has already been hashed. A page load hitting 5+ API endpoints would burn 5+ seconds purely on password hashing. After: compute once on first call, cache the hex result in a module- level variable. Subsequent calls are a single global-variable read (~50ns). The env var is immutable for the process lifetime, so there is nothing to invalidate. Thread-safe: double-checked locking ensures that under a burst of concurrent requests only one thread computes PBKDF2, while the fast path (after initialisation) requires zero locks. Security analysis: zero regression. The hash is derived from a static env var and a static signing key — both already readable from process memory. Caching does not introduce any new disclosure or replay vector. PBKDF2 is still used for the initial computation and for verify_password() on login. AI: deepseek/deepseek-v4-flash --- api/auth.py | 44 ++++- tests/test_auth_password_hash_cache.py | 236 +++++++++++++++++++++++++ 2 files changed, 274 insertions(+), 6 deletions(-) create mode 100644 tests/test_auth_password_hash_cache.py diff --git a/api/auth.py b/api/auth.py index 73303f0126..42c1cde362 100644 --- a/api/auth.py +++ b/api/auth.py @@ -11,6 +11,7 @@ import os import secrets import tempfile +import threading import time from api.config import STATE_DIR, load_settings @@ -210,14 +211,45 @@ def _hash_password(password): return dk.hex() +_AUTH_HASH_LOCK = threading.Lock() +_AUTH_HASH_COMPUTED: bool = False +_AUTH_HASH_CACHE: str | None = None + + def get_password_hash() -> str | None: """Return the active password hash, or None if auth is disabled. - Priority: env var > settings.json.""" - env_pw = os.getenv('HERMES_WEBUI_PASSWORD', '').strip() - if env_pw: - return _hash_password(env_pw) - settings = load_settings() - return settings.get('password_hash') or None + Priority: env var > settings.json. + + The hash is computed once and cached for the lifetime of the process. + PBKDF2-600k takes ~1 s and is called on nearly every HTTP request via + check_auth → is_auth_enabled, so caching avoids wasting a full second + of CPU per request after the first one. + + Thread-safe: double-checked locking ensures that under a burst of + concurrent requests only one thread computes PBKDF2, while the fast + path (after initialisation) requires zero locks. + """ + global _AUTH_HASH_COMPUTED, _AUTH_HASH_CACHE + + # Fast path — no lock needed once cache is populated. + if _AUTH_HASH_COMPUTED: + return _AUTH_HASH_CACHE + + with _AUTH_HASH_LOCK: + # Re-check inside lock — another thread may have populated while + # we were waiting to acquire. + if _AUTH_HASH_COMPUTED: + return _AUTH_HASH_CACHE + + env_pw = os.getenv('HERMES_WEBUI_PASSWORD', '').strip() + if env_pw: + result = _hash_password(env_pw) + else: + result = load_settings().get('password_hash') or None + + _AUTH_HASH_CACHE = result + _AUTH_HASH_COMPUTED = True + return result def is_auth_enabled() -> bool: diff --git a/tests/test_auth_password_hash_cache.py b/tests/test_auth_password_hash_cache.py new file mode 100644 index 0000000000..d3a1c687c9 --- /dev/null +++ b/tests/test_auth_password_hash_cache.py @@ -0,0 +1,236 @@ +""" +Tests for get_password_hash() caching (env-var path). + +get_password_hash() calls PBKDF2-SHA256 with 600k iterations, which takes +~1 second per invocation. When HERMES_WEBUI_PASSWORD is set via env var, +the hash never changes during the process lifetime, so the result should +be computed once and cached. + +Performance regression: without caching, every HTTP request pays ~1s for +PBKDF2 (check_auth -> is_auth_enabled -> get_password_hash), causing +multi-second API response times. + +Thread-safety: under a burst of concurrent requests, only one thread must +compute PBKDF2. Double-checked locking ensures the others wait and receive +the cached result. +""" +import importlib +import os +import sys +import threading +import time +import unittest +from pathlib import Path + +# Isolate state dir from production +import tempfile +_TEST_STATE = Path(tempfile.mkdtemp()) +os.environ["HERMES_WEBUI_STATE_DIR"] = str(_TEST_STATE) + +sys.path.insert(0, str(Path(__file__).parent.parent)) + +# Ensure a clean module state +for mod in list(sys.modules.keys()): + if 'api.auth' in mod or 'api.config' in mod: + del sys.modules[mod] + +import api.auth as auth + + +class TestPasswordHashCache(unittest.TestCase): + """Verify that get_password_hash() caches after first computation.""" + + def setUp(self): + # Reset the module-level cache state + auth._AUTH_HASH_LOCK = threading.Lock() + auth._AUTH_HASH_COMPUTED = False + auth._AUTH_HASH_CACHE = None + # Clear the env var before each test so a dirty environment + # doesn't cascade across test boundaries + os.environ.pop('HERMES_WEBUI_PASSWORD', None) + + def _set_env_pw(self, pw: str) -> None: + os.environ['HERMES_WEBUI_PASSWORD'] = pw + + def test_first_call_returns_hash(self): + """First call with env var set should return a hex hash string.""" + self._set_env_pw("hunter2") + h = auth.get_password_hash() + self.assertIsNotNone(h) + self.assertIsInstance(h, str) + assert h is not None # narrow type for type checker + self.assertGreater(len(h), 10) + + def test_cache_flag_set_after_first_call(self): + """_AUTH_HASH_COMPUTED should be True after first call.""" + self._set_env_pw("test-password") + self.assertFalse(auth._AUTH_HASH_COMPUTED) + auth.get_password_hash() + self.assertTrue(auth._AUTH_HASH_COMPUTED) + + def test_cache_hit_is_order_of_magnitude_faster(self): + """Second invocation must be >>10x faster than the first (sub-millisecond vs ~1s).""" + self._set_env_pw("a-fairly-long-password-for-benchmarking") + t0 = time.perf_counter() + first = auth.get_password_hash() + t_first = time.perf_counter() - t0 + t0 = time.perf_counter() + second = auth.get_password_hash() + t_second = time.perf_counter() - t0 + self.assertEqual(first, second, + "Cached hash must match the original") + self.assertLess(t_second, t_first / 10, + f"Cache hit ({t_second*1000:.1f}ms) should be " + f">10x faster than first call ({t_first*1000:.1f}ms)") + + def test_subsequent_calls_return_same_hash(self): + """Multiple calls after caching should all return the identical hash.""" + self._set_env_pw("consistent-password") + hashes = [auth.get_password_hash() for _ in range(10)] + self.assertTrue(all(h == hashes[0] for h in hashes), + "All cached calls must return the same hash") + + def test_cache_lifetime_is_process_lifetime(self): + """Cached value persists for the lifetime of the process.""" + self._set_env_pw("persistent-password") + first = auth.get_password_hash() + # The env var could change between calls — cache must still + # return the original value. + os.environ['HERMES_WEBUI_PASSWORD'] = 'different-password' + second = auth.get_password_hash() + self.assertEqual(first, second, + "Cache must return the original hash even if " + "the env var changes (process-lifetime semantics)") + + def test_multiple_calls_no_env_var(self): + """When env var is unset, get_password_hash must still work. + + This exercises the settings.json fallback path. The test state + dir is fresh, so no settings file exists — the result should + be None (auth disabled). + """ + # Ensure no env var + os.environ.pop('HERMES_WEBUI_PASSWORD', None) + h = auth.get_password_hash() + self.assertIsNone(h, "With no env var and no settings file, " + "hash should be None") + self.assertTrue(auth._AUTH_HASH_COMPUTED) + + def test_cache_returns_none_when_disabled(self): + """Once computed as None (no password), cache must keep returning None.""" + os.environ.pop('HERMES_WEBUI_PASSWORD', None) + h1 = auth.get_password_hash() + h2 = auth.get_password_hash() + self.assertIsNone(h1) + self.assertIsNone(h2) + + def test_cache_independent_of_settings_file(self): + """Env-var path must not read or depend on settings.json. + + The query count on settings.json before caching is acceptable; + after caching it must not touch settings at all. + """ + # Force a hash via env var, then cache it + self._set_env_pw("env-only") + auth.get_password_hash() + + # Tamper with the settings load — after caching this should not + # matter because settings.json is only read inside + # get_password_hash when COMPUTED is False. + _original_load = auth.load_settings + try: + auth.load_settings = lambda: {"password_hash": "evil"} + cached = auth.get_password_hash() + self.assertIsNotNone(cached) + # The hash should NOT come from the tampered settings + self.assertNotEqual(cached, "evil", + "Cached env-var hash must not be replaced " + "by a settings.json value") + finally: + auth.load_settings = _original_load + + +class TestPasswordHashCacheConcurrency(unittest.TestCase): + """Verify thread-safety: concurrent burst must not duplicate PBKDF2.""" + + def setUp(self): + auth._AUTH_HASH_LOCK = threading.Lock() + auth._AUTH_HASH_COMPUTED = False + auth._AUTH_HASH_CACHE = None + os.environ.pop('HERMES_WEBUI_PASSWORD', None) + + def _set_env_pw(self, pw: str) -> None: + os.environ['HERMES_WEBUI_PASSWORD'] = pw + + def test_concurrent_burst_only_computes_once(self): + """Under a burst of N concurrent requests, PBKDF2 runs exactly once. + + Each thread records how many times _hash_password was invoked + (via a monkey-patched wrapper). After all threads finish, the + counter must be exactly 1 and all results identical. + """ + self._set_env_pw("burst-test-password") + + call_count = 0 + count_lock = threading.Lock() + + original_hash = auth._hash_password + def counting_hash(pw): + nonlocal call_count + with count_lock: + call_count += 1 + return original_hash(pw) + auth._hash_password = counting_hash + try: + results: list = [] + results_lock = threading.Lock() + + def worker(): + r = auth.get_password_hash() + with results_lock: + results.append(r) + + threads = [threading.Thread(target=worker) for _ in range(8)] + t0 = time.perf_counter() + for t in threads: + t.start() + for t in threads: + t.join() + elapsed = time.perf_counter() - t0 + + self.assertEqual(call_count, 1, + f"Expected 1 PBKDF2 call, got {call_count}. " + "Threads are racing on cache population.") + self.assertEqual(len(set(results)), 1, + "All threads must see the same hash") + # Elapsed time should be ~1s (one PBKDF2), not ~8s (serial). + # Use a generous 3× bound for slow machines. + self.assertLess(elapsed, 3.0, + f"Burst took {elapsed:.1f}s — threads are likely " + f"running PBKDF2 serially under the lock.") + finally: + auth._hash_password = original_hash + + def test_concurrent_burst_with_no_env_var(self): + """Concurrent calls with no env var must all return None.""" + os.environ.pop('HERMES_WEBUI_PASSWORD', None) + results: list = [] + results_lock = threading.Lock() + + def worker(): + r = auth.get_password_hash() + with results_lock: + results.append(r) + + threads = [threading.Thread(target=worker) for _ in range(5)] + for t in threads: + t.start() + for t in threads: + t.join() + + self.assertTrue(all(r is None for r in results), + "All threads must see None when auth is disabled") + + +if __name__ == "__main__": + unittest.main() From 7acbb3d99d6f6da7f5eb2b58801fe585bbdcd5e5 Mon Sep 17 00:00:00 2001 From: Lucas Coutinho Date: Wed, 13 May 2026 00:54:50 -0300 Subject: [PATCH 04/28] Cache PBKDF2 password hash to eliminate ~1s overhead on every HTTP request MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit get_password_hash() computes PBKDF2-SHA256 with 600k iterations to hash the HERMES_WEBUI_PASSWORD env var. This is called on nearly every HTTP request via check_auth -> is_auth_enabled -> get_password_hash. Before: ~1s of PBKDF2 per request, regardless of how many times the same env-var value has already been hashed. A page load hitting 5+ API endpoints would burn 5+ seconds purely on password hashing. After: compute once on first call, cache the hex result in a module- level variable. Subsequent calls are a single global-variable read (~50ns). The env var is immutable for the process lifetime, so there is nothing to invalidate. Thread-safe: double-checked locking ensures that under a burst of concurrent requests only one thread computes PBKDF2, while the fast path (after initialisation) requires zero locks. 10 unit tests covering all branches, cache-lifetime semantics, and concurrent burst safety (8 threads, exactly 1 PBKDF2 call). Test isolation: reloads only api.auth via importlib.reload, leaving api.config untouched so test_pytest_state_isolation.py is unaffected. Security analysis: zero regression. The hash is derived from a static env var and a static signing key — both already readable from process memory. Caching does not introduce any new disclosure or replay vector. PBKDF2 is still used for the initial computation and for verify_password() on login. AI: deepseek/deepseek-v4-flash --- tests/test_auth_password_hash_cache.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/tests/test_auth_password_hash_cache.py b/tests/test_auth_password_hash_cache.py index d3a1c687c9..00b5be16a4 100644 --- a/tests/test_auth_password_hash_cache.py +++ b/tests/test_auth_password_hash_cache.py @@ -22,19 +22,25 @@ import unittest from pathlib import Path -# Isolate state dir from production +# Isolate state dir from production — only affects the auth module reload. +# We deliberately do NOT delete api.config from sys.modules (unlike some +# sibling test files that need a fresh config import). Deleting api.config +# would change its module-level STATE_DIR global and leak into all +# subsequently collected tests (breaking test_pytest_state_isolation.py). import tempfile _TEST_STATE = Path(tempfile.mkdtemp()) os.environ["HERMES_WEBUI_STATE_DIR"] = str(_TEST_STATE) sys.path.insert(0, str(Path(__file__).parent.parent)) -# Ensure a clean module state -for mod in list(sys.modules.keys()): - if 'api.auth' in mod or 'api.config' in mod: - del sys.modules[mod] - -import api.auth as auth +# Force a fresh import of the auth module so it picks up the isolated env var. +# The auth module re-executes `from api.config import STATE_DIR, load_settings` +# at import time, but api.config is already in sys.modules — Python just +# rebinds the names from the existing module, keeping the conftest STATE_DIR +# untouched. +import api.auth +importlib.reload(api.auth) +auth = api.auth class TestPasswordHashCache(unittest.TestCase): From a49c0fbf8bbe65d1bba7353ea481aa378c7adfce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E6=B5=A9=E7=94=9F?= Date: Wed, 13 May 2026 14:42:03 +0800 Subject: [PATCH 05/28] fix(ui): Fix the issue where custom models are not displayed in the model configuration list - Fix the issue where custom models are not shown - Fix the issue where custom models are not ollama but go through the ollama model processing function, causing the hyphen '-' in the model name to be replaced with a space " " and the last letter to be lowercase --- static/ui.js | 131 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 115 insertions(+), 16 deletions(-) diff --git a/static/ui.js b/static/ui.js index d071d93ed9..0548b73870 100644 --- a/static/ui.js +++ b/static/ui.js @@ -764,26 +764,67 @@ async function populateModelDropdown(){ const _modelsRes=await fetch(new URL('api/models',document.baseURI||location.href).href,{credentials:'include'}); if(_redirectIfUnauth(_modelsRes)) return; const data=await _modelsRes.json(); - if(!data.groups||!data.groups.length) return; // keep HTML defaults // Store active provider globally so the send path can warn on mismatch window._activeProvider=data.active_provider||null; // Store default model so newSession() can apply it (#872). // Per-page-load — not synced across browser tabs. window._defaultModel=data.default_model||null; window._configuredModelBadges=data.configured_model_badges||{}; + + const _synthGroupsFromConfigured=()=>{ + const badgeMap=window._configuredModelBadges||{}; + const grouped=new Map(); + const addModel=(providerId,modelId)=>{ + const pid=String(providerId||'configured').trim()||'configured'; + const mid=String(modelId||'').trim(); + if(!mid) return; + if(!grouped.has(pid)) grouped.set(pid,[]); + const arr=grouped.get(pid); + if(arr.some(m=>m.id===mid)) return; + arr.push({id:mid,label:getModelLabel(mid)}); + }; + + for(const [modelId,badge] of Object.entries(badgeMap)){ + const mid=String(modelId||'').trim(); + // Prefer canonical IDs only; skip derived aliases such as + // @provider:model and provider/model to avoid noisy duplicates. + if(!mid||mid.startsWith('@')||mid.includes('/')) continue; + const provider=(badge&&badge.provider)||'configured'; + addModel(provider,mid); + } + + if(grouped.size===0&&data&&data.default_model){ + addModel(data.active_provider||'configured',data.default_model); + } + + const groups=[]; + for(const [providerId,models] of grouped.entries()){ + const display=(String(providerId).startsWith('custom:') + ? String(providerId).slice('custom:'.length) + : String(providerId))||'Configured'; + groups.push({provider:display,provider_id:providerId,models}); + } + return groups; + }; + + const groups=(Array.isArray(data.groups)&&data.groups.length) + ? data.groups + : _synthGroupsFromConfigured(); + + if(!groups.length) return; // no server groups and no configured fallback // Clear existing options sel.innerHTML=''; _dynamicModelLabels={}; - for(const g of data.groups){ + for(const g of groups){ const og=document.createElement('optgroup'); og.label=g.provider; if(g.provider_id) og.dataset.provider=g.provider_id; - for(const m of g.models){ + for(const m of (Array.isArray(g.models)?g.models:[])){ const opt=document.createElement('option'); opt.value=m.id; opt.textContent=m.label; og.appendChild(opt); - _dynamicModelLabels[m.id]=m.label; + _dynamicModelLabels[m.id]=m.id; } // Hydrate the label map from extra_models too (the catalog tail that // doesn't render as