diff --git a/CHANGELOG.md b/CHANGELOG.md index 99d907a584..48e58a6e0a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,34 @@ ## [Unreleased] +### Fixed + +- **PR #2191** by @lucasrc (auth refactor 1/3) — Thread-safe login rate limiter (new `_LOGIN_ATTEMPTS_LOCK`) + PBKDF2 key separation (new `_pbkdf2_key()` reading `.pbkdf2_key` separately from `_signing_key()` reading `.signing_key` — previously both shared `.signing_key`, a key-reuse anti-pattern across HMAC and PBKDF2 primitives) + transparent migration in `verify_password()` that re-salts legacy hashes with the new key on next successful login. 241-line regression suite covering the lock + migration paths. Split from earlier #2167 per maintainer review request. + +- **PR #2192** by @lucasrc (auth refactor 2/3, depends on #2191) — Invalidate password-hash cache when password changes via the Settings panel. The PR #2191 cache lives for the process lifetime, but `save_settings({'_set_password': ...})` could mutate `settings.json.password_hash` without telling the auth module — leaving the cache stale and verifying against the old password until restart. Now `save_settings()` calls `_invalidate_password_hash_cache()` on both `_set_password` and `_clear_password` paths. 52-line regression suite + `verify_password()` simplified to rely on the new hook instead of doing the invalidation itself. + +- **PR #2193** by @lucasrc (auth refactor 3/3, independent of #2191/2) — Full 64-char HMAC-SHA256 session signatures with upgrade migration bridge. `create_session()` now emits the full digest instead of the previous `[:32]` truncated form; `verify_session()` accepts both lengths during a transition window so existing sessions survive the upgrade without a forced global logout. Restored the `_is_secure_context(handler)` heuristic (getpeercert + X-Forwarded-Proto) that the original #2167 had dropped — adds an `HERMES_WEBUI_SECURE` env-var override on top of the auto-detect. 42-line regression suite covering both signature lengths + Secure-cookie env-var override. + +- **PR #2151** by @Jordan-SkyLF — Cancelled chat turns are no longer reported as provider/no-content failures. Classifies user/client cancellation, interruption/abort, provider-empty/no-content, and provider/rate/quota errors separately in streaming error handling. Persists cancelled turns as `_error` assistant markers with verbose copy and a `Cancellation details` disclosure, so reloads match the live UI. Adds race/idempotency guards so worker finalization and `/api/chat/cancel` do not duplicate cancel markers, late Stop clicks after a completed worker save do not emit contradictory cancel events (`_emit_cancel_event = False` short-circuits the terminal event when the writeback is stale), and partial streamed text/reasoning/tool-call metadata is still preserved on real cancellation. Stage-350 maintainer resolution merged this PR's cancel-handler guard with #2136's `_stream_writeback_is_current()` ownership check — both correct guards now coexist on the cancel path. + +- **PR #2178** by @hualong1009 — Custom-provider models now display correctly in the model configuration list, and bare custom-provider model IDs containing dashes (e.g. `Qwen3.6-35B-A3B`) no longer have their hyphens stripped to spaces + last letter lowercased by the Ollama label formatter. Adds an `allowOllamaFormat` guard derived from `atProvider` (the `@` prefix on the model id, if any): the Ollama formatter only runs when `atProvider` is empty or starts with `ollama`. For `@custom:ai_gateway:Qwen3.6-35B-A3B` and similar non-ollama @-provider model IDs, the formatter is suppressed and the model badge label preserves the original casing/punctuation. Stage-350 maintainer fix updated `tests/test_ollama_model_chip_label_regression.py` to assert on the new `allowOllamaFormat &&` guard prefix (the original test asserted on the pre-PR code shape and was failing CI). + +- **PR #2204** by @Michaelyklam (closes #1894) — `resolve_model_provider()` now prefers the configured non-custom provider when it owns a requested bare model id, even when a named custom provider also advertises the same model. Pre-fix, `model="deepseek-v4-pro"` under `provider="opencode-go"` could route to a sibling `custom_providers["opencode-go"]` entry that happened to advertise the same model rather than the canonical opencode-go provider. Custom-provider routing for custom-only models is preserved. 157-line regression suite covering the opencode-go/deepseek-v4-pro overlap and explicit provider/suffix parsing. + +### Added + +- **PR #2203** by @dobby-d-elf — Animates the "Activity: X tools" composer footer text while the LLM is using tools — subtle shimmer gradient that stops when tool-calling completes. Highlight color follows the active theme. Reduced-motion and mask-support fallbacks render plain muted Activity text unchanged in unsupported or `prefers-reduced-motion` environments. Also fixes a small flickering/unclickable first "Thinking" block when the user clicks it while the model is still streaming reasoning into it (unrelated to the animation but right next to it on screen). + +### Stage-350 maintainer fixes + +- **`api/streaming.py:_partial_already_present` dedup scope tightening** — Opus SHOULD-FIX-pre-merge on PR #2151. The dedup loop that prevents double-writing a `_partial` marker on `cancel_stream` re-entry used a substring check (`_stripped in _existing or _existing in _stripped`) against any prior assistant message — too broad. Any short prior assistant reply like "OK" or "Here is the answer:" would be a substring of many later partial bodies and could silently drop the new partial, resurrecting the #893 data-loss bug on long sessions. Tightened to: only dedup against actual prior `_partial=True` markers, with exact (whitespace-stripped) content match. Three new regression tests added: (a) short prior non-partial reply does NOT dedup a longer new partial that contains it, (b) exact-content match against a prior `_partial` marker DOES still dedup (re-entry safety), (c) prior assistant message with same content but NOT marked `_partial` does NOT dedup (it's from a completed earlier turn). 10/10 partial-cancel tests pass after the fix. + +- **`api/streaming.py` cancel-handler conflict resolution between #2151 and the already-shipped #2136** — Resolved a semantic merge conflict on the cancel handler. Both PRs added stale-stream ownership guards at the same site. Kept #2136's `_stream_writeback_is_current()` check as the strictly-stronger condition (it also catches the case where the stream rotated to a new stream with a new pending_user_message — #2151's standalone check would have let that case fall through). Adopted #2151's `_emit_cancel_event = False` semantic on the same path so the terminal cancel SSE event isn't emitted in addition to skipping the writeback (otherwise a successful done payload already delivered to the client would be contradicted by a late cancel event). 55/55 tests across both PR suites pass after the resolution. + +- **`tests/test_ollama_model_chip_label_regression.py` updated to match PR #2178's new `allowOllamaFormat` guard** — The existing static-source test asserted on the pre-PR string and was failing CI. Updated the assertion to require the new `allowOllamaFormat &&` guard prefix, with an extended docstring explaining the bug class (`Qwen3.6-35B-A3B`-shaped bare custom-provider model IDs had hyphens stripped to spaces + last letter lowercased by the ollama formatter pre-fix). + +## [v0.51.56] — 2026-05-13 — Release AF (stage-349 — Tier 1 safe slice — reasoning_content whitelist + fork-from-here absolute index + Firefox sidebar scroll + provisional session titles) + ### Added - **PR #2202** by @Jordan-SkyLF — Early session titles on chat start. Pre-fix, new conversations sat as "Untitled" until later title generation completed. Now `/api/chat/start` derives a provisional title from the first user prompt and returns it in the response, so the sidebar and topbar sync immediately. Later SSE title refinements replace the provisional via one guarded helper (only when the current title is still known-default/provisional). Manual/custom user titles are protected via exact-normalized-match detection, so user-renamed prefix titles are never treated as automatic placeholders. 167-line regression suite in `tests/test_early_session_title.py` covering default/eager/manual title behavior, chat-start response shape, JS wiring, and manual-prefix protection. diff --git a/api/auth.py b/api/auth.py index 73303f0126..0c9c70646e 100644 --- a/api/auth.py +++ b/api/auth.py @@ -11,6 +11,7 @@ import os import secrets import tempfile +import threading import time from api.config import STATE_DIR, load_settings @@ -154,70 +155,137 @@ def _save_login_attempts(attempts: dict[str, list[float]]) -> None: _login_attempts = _load_login_attempts() # ip -> [timestamp, ...] +_LOGIN_ATTEMPTS_LOCK = threading.Lock() def _check_login_rate(ip: str) -> bool: - """Return True if the IP is allowed to attempt login.""" - now = time.time() - attempts = _login_attempts.get(ip, []) - # Prune old attempts - attempts = [t for t in attempts if now - t < _LOGIN_WINDOW] - if attempts: - _login_attempts[ip] = attempts - else: - _login_attempts.pop(ip, None) - _save_login_attempts(_login_attempts) - return len(attempts) < _LOGIN_MAX_ATTEMPTS + """Return True if the IP is allowed to attempt login (thread-safe).""" + with _LOGIN_ATTEMPTS_LOCK: + now = time.time() + attempts = _login_attempts.get(ip, []) + # Prune old attempts + attempts = [t for t in attempts if now - t < _LOGIN_WINDOW] + if attempts: + _login_attempts[ip] = attempts + else: + _login_attempts.pop(ip, None) + _save_login_attempts(_login_attempts) + return len(attempts) < _LOGIN_MAX_ATTEMPTS def _record_login_attempt(ip: str) -> None: - now = time.time() - attempts = _login_attempts.get(ip, []) - attempts.append(now) - _login_attempts[ip] = attempts - _save_login_attempts(_login_attempts) + """Record a login attempt for rate limiting (thread-safe).""" + with _LOGIN_ATTEMPTS_LOCK: + now = time.time() + attempts = _login_attempts.get(ip, []) + attempts.append(now) + _login_attempts[ip] = attempts + _save_login_attempts(_login_attempts) -def _signing_key(): - """Return a random signing key, generating and persisting one on first call.""" - key_file = STATE_DIR / '.signing_key' +def _load_key(filename: str) -> bytes: + """Load a 32-byte key from STATE_DIR, generating and persisting one if missing.""" + key_file = STATE_DIR / filename try: if key_file.exists(): raw = key_file.read_bytes() if len(raw) >= 32: return raw[:32] - except Exception: - logger.debug("Failed to read or access signing key file, using in-memory key") - # Generate a new random key + except OSError: + logger.debug("Failed to read key %s", filename) key = secrets.token_bytes(32) try: STATE_DIR.mkdir(parents=True, exist_ok=True) key_file.write_bytes(key) key_file.chmod(0o600) - except Exception: - logger.debug("Failed to persist signing key, using in-memory key only") + except OSError: + logger.debug("Failed to persist key %s", filename) return key -def _hash_password(password): +_PBKDF2_KEY_CACHE: bytes | None = None +_SIGNING_KEY_CACHE: bytes | None = None + + +def _pbkdf2_key() -> bytes: + global _PBKDF2_KEY_CACHE + if _PBKDF2_KEY_CACHE is None: + _PBKDF2_KEY_CACHE = _load_key('.pbkdf2_key') + return _PBKDF2_KEY_CACHE + + +def _signing_key() -> bytes: + global _SIGNING_KEY_CACHE + if _SIGNING_KEY_CACHE is None: + _SIGNING_KEY_CACHE = _load_key('.signing_key') + return _SIGNING_KEY_CACHE + + +def _hash_password(password, *, salt: bytes | None = None) -> str: """PBKDF2-SHA256 with 600k iterations (OWASP recommendation). - Salt is the persisted random signing key, which is secret and unique per + Salt is the persisted PBKDF2 key, which is secret and unique per installation. This keeps the stored hash format a plain hex string (no format change to settings.json) while replacing the predictable - STATE_DIR-derived salt from the original implementation.""" - salt = _signing_key() + STATE_DIR-derived salt from the original implementation. + + The *salt* parameter exists solely to support transparent migration + of password hashes that were computed with a different key (e.g. the + old `.signing_key`). Normal callers should never pass it. + """ + if salt is None: + salt = _pbkdf2_key() dk = hashlib.pbkdf2_hmac('sha256', password.encode(), salt, 600_000) return dk.hex() +_AUTH_HASH_LOCK = threading.Lock() +_AUTH_HASH_COMPUTED: bool = False +_AUTH_HASH_CACHE: str | None = None + + +def _invalidate_password_hash_cache() -> None: + """Invalidate the in-process password hash cache so the next call to + get_password_hash() re-reads from settings.json or the env var.""" + global _AUTH_HASH_COMPUTED, _AUTH_HASH_CACHE + with _AUTH_HASH_LOCK: + _AUTH_HASH_COMPUTED = False + _AUTH_HASH_CACHE = None + + def get_password_hash() -> str | None: """Return the active password hash, or None if auth is disabled. - Priority: env var > settings.json.""" - env_pw = os.getenv('HERMES_WEBUI_PASSWORD', '').strip() - if env_pw: - return _hash_password(env_pw) - settings = load_settings() - return settings.get('password_hash') or None + Priority: env var > settings.json. + + The hash is computed once and cached for the lifetime of the process. + PBKDF2-600k takes ~1 s and is called on nearly every HTTP request via + check_auth → is_auth_enabled, so caching avoids wasting a full second + of CPU per request after the first one. + + Thread-safe: double-checked locking ensures that under a burst of + concurrent requests only one thread computes PBKDF2, while the fast + path (after initialisation) requires zero locks. + """ + global _AUTH_HASH_COMPUTED, _AUTH_HASH_CACHE + + # Fast path — no lock needed once cache is populated. + if _AUTH_HASH_COMPUTED: + return _AUTH_HASH_CACHE + + with _AUTH_HASH_LOCK: + # Re-check inside lock — another thread may have populated while + # we were waiting to acquire. + if _AUTH_HASH_COMPUTED: + return _AUTH_HASH_CACHE + + env_pw = os.getenv('HERMES_WEBUI_PASSWORD', '').strip() + if env_pw: + result = _hash_password(env_pw) + else: + result = load_settings().get('password_hash') or None + + _AUTH_HASH_CACHE = result + _AUTH_HASH_COMPUTED = True + return result def is_auth_enabled() -> bool: @@ -225,12 +293,35 @@ def is_auth_enabled() -> bool: return get_password_hash() is not None -def verify_password(plain) -> bool: - """Verify a plaintext password against the stored hash.""" +def verify_password(plain: str) -> bool: + """Verify a plaintext password against the stored hash. + + Supports transparent migration of password hashes that were computed + with the old `.signing_key` salt. When the two keys differ and the + legacy-salted hash matches, the password is transparently re-hashed + with the current `.pbkdf2_key` and persisted to settings.json. + """ expected = get_password_hash() if not expected: return False - return hmac.compare_digest(_hash_password(plain), expected) + # Fast path: current PBKDF2 key + if hmac.compare_digest(_hash_password(plain), expected): + return True + # Migration: some hashes were computed with `.signing_key` before the + # PBKDF2 key was separated. Try the legacy salt; if it matches, + # transparently upgrade so the next login uses the fast path. + legacy_salt = _signing_key() + current_salt = _pbkdf2_key() + if legacy_salt != current_salt: + if hmac.compare_digest(_hash_password(plain, salt=legacy_salt), expected): + from api.config import save_settings + + save_settings({'_set_password': plain}) + # Password re-hashed and persisted to disk using the current salt. + # Cache invalidation is handled by fix 2/3 (#2192) which adds the + # _invalidate_password_hash_cache() call inside save_settings(). + return True + return False def create_session() -> str: @@ -238,7 +329,7 @@ def create_session() -> str: token = secrets.token_hex(32) _sessions[token] = time.time() + _resolve_session_ttl() _save_sessions(_sessions) - sig = hmac.new(_signing_key(), token.encode(), hashlib.sha256).hexdigest()[:32] + sig = hmac.new(_signing_key(), token.encode(), hashlib.sha256).hexdigest() return f"{token}.{sig}" @@ -252,14 +343,20 @@ def _prune_expired_sessions(): _save_sessions(_sessions) -def verify_session(cookie_value) -> bool: +def verify_session(cookie_value: str) -> bool: """Verify a signed session cookie. Returns True if valid and not expired.""" if not cookie_value or '.' not in cookie_value: return False _prune_expired_sessions() # lazy cleanup on every verification attempt token, sig = cookie_value.rsplit('.', 1) - expected_sig = hmac.new(_signing_key(), token.encode(), hashlib.sha256).hexdigest()[:32] - if not hmac.compare_digest(sig, expected_sig): + full_sig = hmac.new(_signing_key(), token.encode(), hashlib.sha256).hexdigest() + # Accept both new (64-char) and legacy (32-char truncated) signatures so + # existing sessions survive the upgrade without a forced global logout. + # The legacy branch can be removed once session TTLs have expired (~30 days). + valid = hmac.compare_digest(sig, full_sig) or ( + len(sig) == 32 and hmac.compare_digest(sig, full_sig[:32]) + ) + if not valid: return False expiry = _sessions.get(token) if not expiry or time.time() > expiry: @@ -342,6 +439,35 @@ def check_auth(handler, parsed) -> bool: return False +def _is_secure_context(handler=None) -> bool: + """Return True if cookies should carry the Secure flag. + + Behaviour is overridable via HERMES_WEBUI_SECURE env var for + reverse-proxy setups where TLS terminates at a frontend proxy + (nginx, Cloudflare, etc.) and Python only sees plain HTTP. + 1/true/yes → force Secure on; 0/false/no → force Secure off. + When unset, fall back to heuristics: direct TLS socket (getpeercert) + or X-Forwarded-Proto header from the request. + + .. warning:: + The ``X-Forwarded-Proto`` header is only trustworthy when a + reverse proxy (nginx, Cloudflare, etc.) is deployed in front + of the application. Without a proxy, any client can forge the + header and cause the Secure flag to be set on plain HTTP. + """ + env = os.getenv('HERMES_WEBUI_SECURE', '').strip().lower() + if env in ('1', 'true', 'yes'): + return True + if env in ('0', 'false', 'no'): + return False + if handler is not None: + if getattr(handler.request, 'getpeercert', None) is not None: + return True + if handler.headers.get('X-Forwarded-Proto', '') == 'https': + return True + return False + + def set_auth_cookie(handler, cookie_value) -> None: """Set the auth cookie on the response.""" cookie = http.cookies.SimpleCookie() @@ -350,8 +476,7 @@ def set_auth_cookie(handler, cookie_value) -> None: cookie[COOKIE_NAME]['samesite'] = 'Lax' cookie[COOKIE_NAME]['path'] = '/' cookie[COOKIE_NAME]['max-age'] = str(_resolve_session_ttl()) - # Set Secure flag when connection is HTTPS - if getattr(handler.request, 'getpeercert', None) is not None or handler.headers.get('X-Forwarded-Proto', '') == 'https': + if _is_secure_context(handler): cookie[COOKIE_NAME]['secure'] = True handler.send_header('Set-Cookie', cookie[COOKIE_NAME].OutputString()) diff --git a/api/config.py b/api/config.py index 42d6357419..a6f7daaf4f 100644 --- a/api/config.py +++ b/api/config.py @@ -1576,10 +1576,25 @@ def resolve_model_provider(model_id: str) -> tuple: and not config_provider.startswith('custom:') ) _default_model = model_cfg.get('default') if isinstance(model_cfg, dict) else None + # Owns model if it appears in the static catalog for the configured provider. + _provider_models_set: set[str] = set() + if ( + config_provider is not None + and config_provider in _PROVIDER_MODELS + and isinstance(_PROVIDER_MODELS[config_provider], list) + ): + _provider_models_set = { + m.get('id', '') for m in _PROVIDER_MODELS[config_provider] + if isinstance(m, dict) and isinstance(m.get('id'), str) + } _skip_custom_providers = ( _is_explicit_non_custom_provider - and _default_model is not None - and model_id == _default_model + and ( + # Guard 1: model is the configured default (existing behaviour). + (_default_model is not None and model_id == _default_model) + # Guard 2: model is owned by the configured non-custom provider. + or model_id in _provider_models_set + ) ) custom_providers = cfg.get('custom_providers', []) if isinstance(custom_providers, list) and not _skip_custom_providers: @@ -4042,15 +4057,18 @@ def save_settings(settings: dict) -> dict: theme_was_explicit = False skin_was_explicit = False # Handle _set_password: hash and store as password_hash + _password_changed = False raw_pw = settings.pop("_set_password", None) if raw_pw and isinstance(raw_pw, str) and raw_pw.strip(): # Use PBKDF2 from auth module (600k iterations) -- never raw SHA-256 from api.auth import _hash_password current["password_hash"] = _hash_password(raw_pw.strip()) + _password_changed = True # Handle _clear_password: explicitly disable auth if settings.pop("_clear_password", False): current["password_hash"] = None + _password_changed = True for k, v in settings.items(): if k in _SETTINGS_ALLOWED_KEYS: if k == "theme": @@ -4092,6 +4110,12 @@ def save_settings(settings: dict) -> dict: json.dumps(persisted, ensure_ascii=False, indent=2), encoding="utf-8", ) + # Invalidate the in-memory password hash cache so the next call to + # get_password_hash() picks up the new value from disk immediately. + if _password_changed: + from api.auth import _invalidate_password_hash_cache + + _invalidate_password_hash_cache() # Update runtime defaults so new sessions use them immediately global DEFAULT_WORKSPACE if "default_workspace" in current: diff --git a/api/streaming.py b/api/streaming.py index e2ee3aabef..91dbc738c0 100644 --- a/api/streaming.py +++ b/api/streaming.py @@ -127,6 +127,9 @@ def _clarify_timeout_seconds(default: int = 120) -> int: return default +_CANCEL_MARKER_PATTERNS = ('task cancelled', 'task canceled', 'response interrupted') + + def _classify_provider_error(err_str: str, exc=None, *, silent_failure: bool = False) -> dict: """Classify provider/agent failure text for WebUI apperror UX. @@ -136,6 +139,41 @@ def _classify_provider_error(err_str: str, exc=None, *, silent_failure: bool = F err_str = str(err_str or '') _err_lower = err_str.lower() _exc_name = type(exc).__name__ if exc is not None else '' + _is_cancelled = ( + 'cancelled by user' in _err_lower + or 'canceled by user' in _err_lower + or 'user cancelled' in _err_lower + or 'user canceled' in _err_lower + or 'task cancelled' in _err_lower + or 'task canceled' in _err_lower + or 'cancellederror' in _err_lower + or (exc is not None and _exc_name in ('CancelledError', 'CanceledError')) + ) + _is_interrupted = ( + not _is_cancelled + and ( + 'interrupted by user' in _err_lower + or 'response interrupted' in _err_lower + or 'operation interrupted' in _err_lower + or 'operation was interrupted' in _err_lower + or 'operation aborted' in _err_lower + or 'request was aborted' in _err_lower + or 'aborterror' in _err_lower + or (exc is not None and type(exc).__name__ in ('KeyboardInterrupt', 'AbortError')) + ) + ) + if _is_cancelled: + return { + 'label': 'Task cancelled', + 'type': 'cancelled', + 'hint': 'The run was cancelled by the user before Skyly finished. No provider failure occurred.', + } + if _is_interrupted: + return { + 'label': 'Response interrupted', + 'type': 'interrupted', + 'hint': 'The run stopped before a provider response completed. If you did not cancel it, try again.', + } _is_quota = _is_quota_error_text(err_str) _is_auth = ( not _is_quota and ( @@ -213,6 +251,90 @@ def _provider_error_payload(message: str, err_type: str, hint: str = '') -> dict return payload +def _session_has_cancel_marker(session) -> bool: + """Return True if a visible cancel/interrupted marker is already persisted.""" + for msg in reversed(getattr(session, 'messages', None) or []): + if not isinstance(msg, dict): + continue + if msg.get('role') == 'user': + return False + if msg.get('role') != 'assistant': + continue + content = msg.get('content') + text = '' + if isinstance(content, str): + text = content + elif isinstance(content, list): + parts = [] + for part in content: + if isinstance(part, dict): + parts.append(str(part.get('text') or part.get('content') or '')) + text = '\n'.join(parts) + normalized = text.strip().lower() + if any(pattern in normalized for pattern in _CANCEL_MARKER_PATTERNS): + return True + return False + + +def _cancelled_turn_content(message: str = 'Task cancelled.') -> str: + """Return cancelled-turn copy matching the verbose provider-error layout.""" + _message = str(message or 'Task cancelled.').strip() + if not _message.endswith('.'): + _message += '.' + return ( + f"**Task cancelled:** {_message}\n\n" + "*The run was cancelled by the user before Skyly finished. No provider failure occurred.*" + ) + + +def _persist_cancelled_turn(session, *, message: str = 'Task cancelled.') -> None: + """Persist a user-cancelled terminal state without provider-error wording. + + cancel_stream() usually writes this marker first, but the streaming thread can + later unwind through the silent-failure or exception path. Those paths must + not append a misleading provider no-response error after an explicit cancel. + """ + _materialize_pending_user_turn_before_error(session) + session.active_stream_id = None + session.pending_user_message = None + session.pending_attachments = [] + session.pending_started_at = None + if not _session_has_cancel_marker(session): + session.messages.append({ + 'role': 'assistant', + 'content': _cancelled_turn_content(message), + '_error': True, + 'provider_details': str(message or 'Task cancelled.').strip(), + 'provider_details_label': 'Cancellation details', + 'timestamp': int(time.time()), + }) + + +def _cleanup_ephemeral_cancelled_turn(session) -> None: + """Remove transient /btw session state after a cancel without saving it.""" + session.active_stream_id = None + session.pending_user_message = None + session.pending_attachments = [] + session.pending_started_at = None + try: + import pathlib + pathlib.Path(session.path).unlink(missing_ok=True) + except Exception: + logger.debug("Failed to clean up ephemeral cancelled session", exc_info=True) + + +def _finalize_cancelled_turn(session, *, ephemeral: bool = False, message: str = 'Task cancelled.') -> None: + """Finalize a cancelled turn for persistent or ephemeral sessions.""" + if ephemeral: + _cleanup_ephemeral_cancelled_turn(session) + return + _persist_cancelled_turn(session, message=message) + try: + session.save() + except Exception: + logger.debug("Failed to persist cancelled turn", exc_info=True) + + def _aiagent_import_error_detail() -> str: """Return a multi-line diagnostic string for the "AIAgent not available" path. @@ -2287,6 +2409,8 @@ def _agent_status_callback(kind, message): # TD1: set thread-local env context so concurrent sessions don't clobber globals # Check for pre-flight cancel (user cancelled before agent even started) if cancel_event.is_set(): + with _agent_lock: + _finalize_cancelled_turn(s, ephemeral=ephemeral, message='Task cancelled before start.') put('cancel', {'message': 'Cancelled before start'}) return @@ -3006,6 +3130,8 @@ def on_tool_complete(tool_call_id, name, args, function_result): agent.interrupt("Cancelled before start") except Exception: logger.debug("Failed to interrupt agent before start") + with _agent_lock: + _finalize_cancelled_turn(s, ephemeral=ephemeral, message='Task cancelled before start.') put('cancel', {'message': 'Cancelled by user'}) return @@ -3107,6 +3233,30 @@ def _periodic_checkpoint(): task_id=session_id, persist_user_message=msg_text, ) + if cancel_event.is_set(): + if _checkpoint_stop is not None: + _checkpoint_stop.set() + if _ckpt_thread is not None: + _ckpt_thread.join(timeout=15) + if ephemeral: + _cleanup_ephemeral_cancelled_turn(s) + else: + with _agent_lock: + _finalize_cancelled_turn(s, ephemeral=False) + try: + append_turn_journal_event_for_stream( + s.session_id, + stream_id, + { + "event": "interrupted", + "created_at": time.time(), + "reason": "cancelled", + }, + ) + except Exception: + logger.debug("Failed to append cancelled turn journal event", exc_info=True) + put('cancel', {'message': 'Cancelled by user'}) + return # ── Ephemeral mode (/btw): deliver answer, skip persistence, cleanup ── if ephemeral: _answer = '' @@ -3132,6 +3282,23 @@ def _periodic_checkpoint(): _checkpoint_stop.set() if _ckpt_thread is not None: _ckpt_thread.join(timeout=15) + if cancel_event.is_set(): + with _agent_lock: + _finalize_cancelled_turn(s, ephemeral=False) + try: + append_turn_journal_event_for_stream( + s.session_id, + stream_id, + { + "event": "interrupted", + "created_at": time.time(), + "reason": "cancelled", + }, + ) + except Exception: + logger.debug("Failed to append cancelled turn journal event", exc_info=True) + put('cancel', {'message': 'Cancelled by user'}) + return with _agent_lock: if not ephemeral and not _stream_writeback_is_current(s, stream_id): logger.info( @@ -3142,6 +3309,22 @@ def _periodic_checkpoint(): ) return _result_messages = result.get('messages') or _previous_context_messages + if cancel_event.is_set(): + _finalize_cancelled_turn(s, ephemeral=False) + try: + append_turn_journal_event_for_stream( + s.session_id, + stream_id, + { + "event": "interrupted", + "created_at": time.time(), + "reason": "cancelled", + }, + ) + except Exception: + logger.debug("Failed to append cancelled turn journal event", exc_info=True) + put('cancel', {'message': 'Cancelled by user'}) + return _next_context_messages = _restore_reasoning_metadata( _previous_context_messages, _result_messages, @@ -3180,6 +3363,23 @@ def _periodic_checkpoint(): ) # _token_sent tracks whether on_token() was called (any streamed text) if not _assistant_added and not _token_sent: + if cancel_event.is_set(): + _finalize_cancelled_turn(s, ephemeral=ephemeral) + if not ephemeral: + try: + append_turn_journal_event_for_stream( + s.session_id, + stream_id, + { + "event": "interrupted", + "created_at": time.time(), + "reason": "cancelled", + }, + ) + except Exception: + logger.debug("Failed to append cancelled turn journal event", exc_info=True) + put('cancel', {'message': 'Cancelled by user'}) + return _last_err = getattr(agent, '_last_error', None) or result.get('error') or '' _err_str = str(_last_err) if _last_err else '' _classification = _classify_provider_error( @@ -3331,6 +3531,10 @@ def _periodic_checkpoint(): } if _error_payload.get('details'): _error_message['provider_details'] = _error_payload['details'] + if _err_type == 'cancelled': + _error_message['provider_details_label'] = 'Cancellation details' + elif _err_type == 'interrupted': + _error_message['provider_details_label'] = 'Interruption details' s.messages.append(_error_message) try: s.save() @@ -3620,7 +3824,39 @@ def _periodic_checkpoint(): ) except Exception: logger.debug("Failed to append assistant_started turn journal event", exc_info=True) + if cancel_event.is_set(): + _finalize_cancelled_turn(s, ephemeral=False) + try: + append_turn_journal_event_for_stream( + s.session_id, + stream_id, + { + "event": "interrupted", + "created_at": time.time(), + "reason": "cancelled", + }, + ) + except Exception: + logger.debug("Failed to append cancelled turn journal event", exc_info=True) + put('cancel', {'message': 'Cancelled by user'}) + return s.save() + if cancel_event.is_set(): + _finalize_cancelled_turn(s, ephemeral=False) + try: + append_turn_journal_event_for_stream( + s.session_id, + stream_id, + { + "event": "interrupted", + "created_at": time.time(), + "reason": "cancelled", + }, + ) + except Exception: + logger.debug("Failed to append cancelled turn journal event", exc_info=True) + put('cancel', {'message': 'Cancelled by user'}) + return if not ephemeral: try: append_turn_journal_event_for_stream( @@ -3877,12 +4113,38 @@ def _periodic_checkpoint(): err_str = _stripped _exc_lower = err_str.lower() _classification = _classify_provider_error(err_str, e) + if cancel_event.is_set(): + if s is not None: + if _checkpoint_stop is not None: + _checkpoint_stop.set() + if _ckpt_thread is not None: + _ckpt_thread.join(timeout=15) + _lock_ctx = _agent_lock if _agent_lock is not None else contextlib.nullcontext() + with _lock_ctx: + _finalize_cancelled_turn(s, ephemeral=ephemeral) + if not ephemeral: + try: + append_turn_journal_event_for_stream( + s.session_id, + stream_id, + { + "event": "interrupted", + "created_at": time.time(), + "reason": "cancelled", + }, + ) + except Exception: + logger.debug("Failed to append cancelled turn journal event", exc_info=True) + put('cancel', {'message': 'Cancelled by user'}) + return _exc_is_quota = _classification['type'] == 'quota_exhausted' # Exception quota text still includes: 'more credits' in _exc_lower, 'can only afford' in _exc_lower, 'fewer max_tokens' in _exc_lower. # Rate-limit detection remains guarded as: (not _exc_is_quota). _exc_is_rate_limit = (_classification['type'] == 'rate_limit') and (not _exc_is_quota) _exc_is_auth = _classification['type'] == 'auth_mismatch' # detects '401' and 'unauthorized' via _classify_provider_error. _exc_is_not_found = _classification['type'] == 'model_not_found' # detects '404', 'not found', 'does not exist', and 'invalid model'. + _exc_is_cancelled = _classification['type'] == 'cancelled' + _exc_is_interrupted = _classification['type'] == 'interrupted' # The user hint still points to Settings / `hermes model` from _classify_provider_error(). if _exc_is_quota: @@ -3983,6 +4245,10 @@ def _periodic_checkpoint(): _exc_label, _exc_type, _exc_hint = ( _classification['label'], _classification['type'], _classification['hint'], ) + elif _exc_is_cancelled or _exc_is_interrupted: + _exc_label, _exc_type, _exc_hint = ( + _classification['label'], _classification['type'], _classification['hint'], + ) else: _exc_label, _exc_type, _exc_hint = 'Error', 'error', '' @@ -4018,6 +4284,10 @@ def _periodic_checkpoint(): } if _error_payload.get('details'): _error_message['provider_details'] = _error_payload['details'] + if _exc_type == 'cancelled': + _error_message['provider_details_label'] = 'Cancellation details' + elif _exc_type == 'interrupted': + _error_message['provider_details_label'] = 'Interruption details' s.messages.append(_error_message) try: s.save() @@ -4221,13 +4491,12 @@ def cancel_stream(stream_id: str) -> bool: except Exception: logger.debug("Failed to clear clarify prompt during cancel") - # Put a cancel sentinel into the queue so the SSE handler wakes up + # Capture the queue while the stream still exists, but do not emit the + # terminal cancel event until the session cleanup below confirms the turn + # is still active. Otherwise a late Stop click can race with a successful + # worker save and show cancel in the client while persistence says done. q = streams.get(stream_id) - if q: - try: - q.put_nowait(('cancel', {'message': 'Cancelled by user'})) - except Exception: - logger.debug("Failed to put cancel event to queue") + _emit_cancel_event = True # ── Eager session lock release (fixes #653) ────────────────────────── # Pop stream state now so the 409 guard in routes.py sees the session @@ -4277,13 +4546,21 @@ def cancel_stream(stream_id: str) -> bool: with _get_session_agent_lock(_cancel_session_id): try: _cs = get_session(_cancel_session_id) + if not isinstance(getattr(_cs, 'messages', None), list): + _cs.messages = [] if not _stream_writeback_is_current(_cs, stream_id): + # The stream has rotated to a different stream id (newer + # turn started, or the worker already finalized this one). + # Skip the cancel-marker append AND suppress the terminal + # cancel event so we don't contradict a possibly-already- + # delivered done payload (#2151 + #2154 / PR #2136). logger.info( "Skipping stale cancel writeback for session %s stream %s; active_stream_id=%s", _cancel_session_id, stream_id, getattr(_cs, 'active_stream_id', None), ) + _emit_cancel_event = False return True # ── Preserve the user's typed message before clearing pending state (#1298) ── # The agent's internal messages list (where the user message was appended at @@ -4378,7 +4655,35 @@ def cancel_stream(stream_id: str) -> bool: # reasoning-only or tool-only stream produced NO partial message). _has_reasoning = bool(_cancel_reasoning and _cancel_reasoning.strip()) _has_tools = bool(_cancel_tool_calls) - if _stripped or _has_reasoning or _has_tools: + _cancel_marker_exists = _session_has_cancel_marker(_cs) + _cancel_marker_idx = len(_cs.messages) + if _cancel_marker_exists: + for _idx in range(len(_cs.messages) - 1, -1, -1): + _m = _cs.messages[_idx] + if not isinstance(_m, dict) or _m.get('role') != 'assistant': + continue + _content = str(_m.get('content') or '').strip().lower() + if any(pattern in _content for pattern in _CANCEL_MARKER_PATTERNS): + _cancel_marker_idx = _idx + break + _partial_already_present = False + if _stripped: + for _m in _cs.messages: + # Stage-350 Opus SHOULD-FIX (#2151): only dedup + # against actual prior _partial markers from the + # same stream, with exact content match. The original + # substring check (`_stripped in _existing or + # _existing in _stripped`) was too broad — any short + # prior assistant reply (e.g. "OK", "Here is the + # answer:") becomes a substring of many later partial + # bodies and could silently drop the new partial, + # resurrecting the #893 data-loss bug on long sessions. + if not isinstance(_m, dict) or not _m.get('_partial'): + continue + if str(_m.get('content') or '').strip() == _stripped: + _partial_already_present = True + break + if (_stripped or _has_reasoning or _has_tools) and not _partial_already_present: _partial_msg: dict = { 'role': 'assistant', 'content': _stripped, # may be empty for reasoning/tool-only turns @@ -4405,18 +4710,27 @@ def cancel_stream(stream_id: str) -> bool: # alongside the regular tool_calls path. # (Opus pre-release review pass 2 of v0.50.251.) _partial_msg['_partial_tool_calls'] = list(_cancel_tool_calls) - _cs.messages.append(_partial_msg) + _cs.messages.insert(_cancel_marker_idx, _partial_msg) # Cancel marker — flagged _error=True so it is stripped from conversation # history on the next turn (prevents model from seeing "Task cancelled." # as a prior assistant reply). - _cs.messages.append({ - 'role': 'assistant', - 'content': '*Task cancelled.*', - '_error': True, - 'timestamp': int(time.time()), - }) + if not _cancel_marker_exists: + _cs.messages.append({ + 'role': 'assistant', + 'content': _cancelled_turn_content('Task cancelled.'), + '_error': True, + 'provider_details': 'Task cancelled.', + 'provider_details_label': 'Cancellation details', + 'timestamp': int(time.time()), + }) _cs.save() except Exception: logger.debug("Failed to clear session state on cancel for %s", _cancel_session_id) + if _emit_cancel_event and q: + try: + q.put_nowait(('cancel', {'message': 'Cancelled by user'})) + except Exception: + logger.debug("Failed to put cancel event to queue") + return True diff --git a/static/messages.js b/static/messages.js index 4ee2e1eb6e..b424ad3619 100644 --- a/static/messages.js +++ b/static/messages.js @@ -1285,11 +1285,14 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){ const isQuotaExhausted=d.type==='quota_exhausted'; const isAuthMismatch=d.type==='auth_mismatch'; const isModelNotFound=d.type==='model_not_found'; + const isCancelled=d.type==='cancelled'; + const isInterrupted=d.type==='interrupted'; const isNoResponse=d.type==='no_response'||d.type==='silent_failure'; - const label=isQuotaExhausted?'Out of credits':isRateLimit?'Rate limit reached':isAuthMismatch?(typeof t==='function'?t('provider_mismatch_label'):'Provider mismatch'):isModelNotFound?(typeof t==='function'?t('model_not_found_label'):'Model not found'):isNoResponse?'No response received':'Error'; + const label=isCancelled?'Task cancelled':isInterrupted?'Response interrupted':isQuotaExhausted?'Out of credits':isRateLimit?'Rate limit reached':isAuthMismatch?(typeof t==='function'?t('provider_mismatch_label'):'Provider mismatch'):isModelNotFound?(typeof t==='function'?t('model_not_found_label'):'Model not found'):isNoResponse?'No response from provider':'Error'; const hint=d.hint?`\n\n*${d.hint}*`:''; const details=d.details?String(d.details).replace(/```/g,'`\u200b``'):''; - S.messages.push({role:'assistant',content:`**${label}:** ${d.message}${hint}`,provider_details:details}); + const detailsLabel=isCancelled?'Cancellation details':isInterrupted?'Interruption details':undefined; + S.messages.push({role:'assistant',content:`**${label}:** ${d.message}${hint}`,provider_details:details,provider_details_label:detailsLabel}); }catch(_){ S.messages.push({role:'assistant',content:'**Error:** An error occurred. Check server logs.'}); } @@ -1380,7 +1383,7 @@ function attachLiveStream(activeSid, streamId, uploaded=[], options={}){ // Fallback to local cancel message if API fails if(S.session&&S.session.session_id===activeSid){ clearLiveToolCards();if(!assistantText)removeThinking(); - S.messages.push({role:'assistant',content:'*Task cancelled.*'});renderMessages({preserveScroll:true}); + S.messages.push({role:'assistant',content:'**Task cancelled:** Task cancelled.\n\n*The run was cancelled by the user before Skyly finished. No provider failure occurred.*',provider_details:'Task cancelled.',provider_details_label:'Cancellation details',_error:true});renderMessages({preserveScroll:true}); _markSessionViewed(activeSid, S.messages.length); } } diff --git a/static/style.css b/static/style.css index 6b6f55ed51..67b9b6f56f 100644 --- a/static/style.css +++ b/static/style.css @@ -1881,12 +1881,50 @@ body.resizing .sidebar{transition:none!important;} .tool-call-group{margin:4px 0 4px var(--msg-rail);max-width:var(--msg-max);border-left:1px solid var(--border-subtle);} .tool-call-group-summary{width:100%;display:flex;align-items:center;gap:var(--space-2);padding:var(--space-1) var(--space-3);border:0;background:transparent;color:var(--muted);cursor:pointer;text-align:left;font:inherit;font-size:var(--font-size-xs);line-height:1.4;border-radius:var(--radius-card);} .tool-call-group-summary:hover{background:var(--surface-subtle-hover);color:var(--text);} -.tool-call-group-label{font-weight:600;color:var(--muted);} .tool-call-group-duration{margin-left:auto;opacity:.62;font-variant-numeric:tabular-nums;white-space:nowrap;} .tool-call-group-chevron{opacity:.45;display:inline-flex;transition:transform .16s ease;} .tool-call-group:not(.tool-call-group-collapsed) .tool-call-group-chevron{transform:rotate(90deg);} .tool-call-group-body{display:block;padding-left:var(--space-3);} .tool-call-group.tool-call-group-collapsed .tool-call-group-body{display:none;} +.tool-call-group-label{font-weight:600;color:var(--muted);position:relative;display:inline-block;overflow:hidden;} +.tool-call-group[data-live-tool-call-group="1"] .tool-call-group-label{ + color:var(--muted); +} +@keyframes _tool-shimmer-sweep{ + 0%{-webkit-mask-position:100% 0;mask-position:100% 0;} + 100%{-webkit-mask-position:-200% 0;mask-position:-200% 0;} +} +.tool-call-group[data-live-tool-call-group="1"] .tool-call-group-label::after{ + --activity-sweep-highlight:linear-gradient(90deg,var(--accent) 0%,var(--accent) 45.2%,color-mix(in srgb,var(--accent) 90%,#000) 46.5%,color-mix(in srgb,var(--accent) 90%,#000) 53.5%,var(--accent) 55%,var(--accent) 100%); + --activity-sweep-mask:linear-gradient(90deg,rgba(0,0,0,0) 0%,rgba(0,0,0,0) 38%,rgba(0,0,0,.18) 40.8%,rgba(0,0,0,.46) 43.6%,rgba(0,0,0,.72) 46.5%,rgba(0,0,0,.9) 53.5%,rgba(0,0,0,.52) 55.8%,rgba(0,0,0,.28) 58.2%,rgba(0,0,0,.1) 60.4%,rgba(0,0,0,0) 62%,rgba(0,0,0,0) 100%); + content:attr(data-sweep-label); + position:absolute;inset:0; + color:var(--accent); + background-image:var(--activity-sweep-highlight); + -webkit-background-clip:text; + background-clip:text; + -webkit-text-fill-color:transparent; + pointer-events:none; + -webkit-mask-image:var(--activity-sweep-mask); + mask-image:var(--activity-sweep-mask); + -webkit-mask-size:250% 100%; + mask-size:250% 100%; + -webkit-mask-repeat:no-repeat; + mask-repeat:no-repeat; + animation:_tool-shimmer-sweep 3s cubic-bezier(.45,0,.55,1) infinite; +} +@media (prefers-reduced-motion: reduce){ + .tool-call-group[data-live-tool-call-group="1"] .tool-call-group-label::after{ + animation:none; + display:none; + } +} +/* Fallback for browsers without CSS mask support */ +@supports not ((mask-image:linear-gradient(#000,#000)) or (-webkit-mask-image:linear-gradient(#000,#000))){ + .tool-call-group[data-live-tool-call-group="1"] .tool-call-group-label::after{ + display:none; + } +} .tool-card{background:var(--surface-subtle);border:1px solid var(--border-subtle);border-radius:var(--radius-card);margin:2px 0;overflow:hidden;transition:border-color .15s,background-color .15s;} .tool-card:hover{border-color:var(--border-muted);background:var(--surface-subtle-hover);} .tool-card-running{border-color:var(--accent-bg-strong);background:var(--accent-bg);} diff --git a/static/ui.js b/static/ui.js index 6d557ccaf9..d0f27aa8f8 100644 --- a/static/ui.js +++ b/static/ui.js @@ -764,26 +764,67 @@ async function populateModelDropdown(){ const _modelsRes=await fetch(new URL('api/models',document.baseURI||location.href).href,{credentials:'include'}); if(_redirectIfUnauth(_modelsRes)) return; const data=await _modelsRes.json(); - if(!data.groups||!data.groups.length) return; // keep HTML defaults // Store active provider globally so the send path can warn on mismatch window._activeProvider=data.active_provider||null; // Store default model so newSession() can apply it (#872). // Per-page-load — not synced across browser tabs. window._defaultModel=data.default_model||null; window._configuredModelBadges=data.configured_model_badges||{}; + + const _synthGroupsFromConfigured=()=>{ + const badgeMap=window._configuredModelBadges||{}; + const grouped=new Map(); + const addModel=(providerId,modelId)=>{ + const pid=String(providerId||'configured').trim()||'configured'; + const mid=String(modelId||'').trim(); + if(!mid) return; + if(!grouped.has(pid)) grouped.set(pid,[]); + const arr=grouped.get(pid); + if(arr.some(m=>m.id===mid)) return; + arr.push({id:mid,label:getModelLabel(mid)}); + }; + + for(const [modelId,badge] of Object.entries(badgeMap)){ + const mid=String(modelId||'').trim(); + // Prefer canonical IDs only; skip derived aliases such as + // @provider:model and provider/model to avoid noisy duplicates. + if(!mid||mid.startsWith('@')||mid.includes('/')) continue; + const provider=(badge&&badge.provider)||'configured'; + addModel(provider,mid); + } + + if(grouped.size===0&&data&&data.default_model){ + addModel(data.active_provider||'configured',data.default_model); + } + + const groups=[]; + for(const [providerId,models] of grouped.entries()){ + const display=(String(providerId).startsWith('custom:') + ? String(providerId).slice('custom:'.length) + : String(providerId))||'Configured'; + groups.push({provider:display,provider_id:providerId,models}); + } + return groups; + }; + + const groups=(Array.isArray(data.groups)&&data.groups.length) + ? data.groups + : _synthGroupsFromConfigured(); + + if(!groups.length) return; // no server groups and no configured fallback // Clear existing options sel.innerHTML=''; _dynamicModelLabels={}; - for(const g of data.groups){ + for(const g of groups){ const og=document.createElement('optgroup'); og.label=g.provider; if(g.provider_id) og.dataset.provider=g.provider_id; - for(const m of g.models){ + for(const m of (Array.isArray(g.models)?g.models:[])){ const opt=document.createElement('option'); opt.value=m.id; opt.textContent=m.label; og.appendChild(opt); - _dynamicModelLabels[m.id]=m.label; + _dynamicModelLabels[m.id]=m.id; } // Hydrate the label map from extra_models too (the catalog tail that // doesn't render as